{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9998201546669865, "eval_steps": 400, "global_step": 12510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00047958755470295546, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 9.186, "step": 2 }, { "epoch": 0.0009591751094059109, "grad_norm": 41.87915802001953, "learning_rate": 0.0001, "loss": 9.1929, "step": 4 }, { "epoch": 0.0014387626641088663, "grad_norm": 26.778240203857422, "learning_rate": 9.998401151171157e-05, "loss": 7.9908, "step": 6 }, { "epoch": 0.0019183502188118218, "grad_norm": 17.092330932617188, "learning_rate": 9.996802302342314e-05, "loss": 5.8938, "step": 8 }, { "epoch": 0.002397937773514777, "grad_norm": 13.493156433105469, "learning_rate": 9.995203453513472e-05, "loss": 5.2688, "step": 10 }, { "epoch": 0.0028775253282177326, "grad_norm": 12.290217399597168, "learning_rate": 9.993604604684628e-05, "loss": 4.8495, "step": 12 }, { "epoch": 0.003357112882920688, "grad_norm": 9.30190372467041, "learning_rate": 9.992005755855785e-05, "loss": 4.341, "step": 14 }, { "epoch": 0.0038367004376236437, "grad_norm": 8.70991325378418, "learning_rate": 9.990406907026941e-05, "loss": 3.8891, "step": 16 }, { "epoch": 0.004316287992326599, "grad_norm": 7.046508312225342, "learning_rate": 9.988808058198098e-05, "loss": 3.6666, "step": 18 }, { "epoch": 0.004795875547029554, "grad_norm": 7.605278015136719, "learning_rate": 9.987209209369254e-05, "loss": 3.3926, "step": 20 }, { "epoch": 0.00527546310173251, "grad_norm": 6.254539489746094, "learning_rate": 9.985610360540412e-05, "loss": 3.1701, "step": 22 }, { "epoch": 0.005755050656435465, "grad_norm": 5.563010215759277, "learning_rate": 9.984011511711569e-05, "loss": 3.0436, "step": 24 }, { "epoch": 0.006234638211138421, "grad_norm": 5.891292095184326, "learning_rate": 9.982412662882725e-05, "loss": 2.8886, "step": 26 }, { "epoch": 0.006714225765841376, "grad_norm": 5.831413269042969, "learning_rate": 9.980813814053882e-05, "loss": 2.7854, "step": 28 }, { "epoch": 0.007193813320544332, "grad_norm": 4.954245567321777, "learning_rate": 9.979214965225039e-05, "loss": 2.7262, "step": 30 }, { "epoch": 0.007673400875247287, "grad_norm": 5.818669319152832, "learning_rate": 9.977616116396195e-05, "loss": 2.6802, "step": 32 }, { "epoch": 0.008152988429950242, "grad_norm": 5.170840263366699, "learning_rate": 9.976017267567353e-05, "loss": 2.6787, "step": 34 }, { "epoch": 0.008632575984653197, "grad_norm": 5.347048282623291, "learning_rate": 9.97441841873851e-05, "loss": 2.6252, "step": 36 }, { "epoch": 0.009112163539356153, "grad_norm": 5.011410236358643, "learning_rate": 9.972819569909666e-05, "loss": 2.5843, "step": 38 }, { "epoch": 0.009591751094059109, "grad_norm": 4.359274864196777, "learning_rate": 9.971220721080823e-05, "loss": 2.5945, "step": 40 }, { "epoch": 0.010071338648762064, "grad_norm": 4.415558338165283, "learning_rate": 9.969621872251978e-05, "loss": 2.5937, "step": 42 }, { "epoch": 0.01055092620346502, "grad_norm": 4.351756572723389, "learning_rate": 9.968023023423136e-05, "loss": 2.5505, "step": 44 }, { "epoch": 0.011030513758167975, "grad_norm": 4.589517116546631, "learning_rate": 9.966424174594293e-05, "loss": 2.4764, "step": 46 }, { "epoch": 0.01151010131287093, "grad_norm": 3.287195920944214, "learning_rate": 9.964825325765449e-05, "loss": 2.4906, "step": 48 }, { "epoch": 0.011989688867573886, "grad_norm": 4.175435543060303, "learning_rate": 9.963226476936606e-05, "loss": 2.4864, "step": 50 }, { "epoch": 0.012469276422276842, "grad_norm": 3.40952467918396, "learning_rate": 9.961627628107762e-05, "loss": 2.4421, "step": 52 }, { "epoch": 0.012948863976979797, "grad_norm": 3.997380495071411, "learning_rate": 9.960028779278919e-05, "loss": 2.4169, "step": 54 }, { "epoch": 0.013428451531682753, "grad_norm": 3.4981138706207275, "learning_rate": 9.958429930450077e-05, "loss": 2.4151, "step": 56 }, { "epoch": 0.013908039086385708, "grad_norm": 3.4333291053771973, "learning_rate": 9.956831081621233e-05, "loss": 2.3925, "step": 58 }, { "epoch": 0.014387626641088664, "grad_norm": 3.855370044708252, "learning_rate": 9.95523223279239e-05, "loss": 2.4055, "step": 60 }, { "epoch": 0.01486721419579162, "grad_norm": 3.3205909729003906, "learning_rate": 9.953633383963546e-05, "loss": 2.4241, "step": 62 }, { "epoch": 0.015346801750494575, "grad_norm": 3.5042200088500977, "learning_rate": 9.952034535134703e-05, "loss": 2.4053, "step": 64 }, { "epoch": 0.01582638930519753, "grad_norm": 3.7376601696014404, "learning_rate": 9.95043568630586e-05, "loss": 2.3935, "step": 66 }, { "epoch": 0.016305976859900484, "grad_norm": 3.2216756343841553, "learning_rate": 9.948836837477017e-05, "loss": 2.3533, "step": 68 }, { "epoch": 0.01678556441460344, "grad_norm": 3.4415132999420166, "learning_rate": 9.947237988648174e-05, "loss": 2.353, "step": 70 }, { "epoch": 0.017265151969306395, "grad_norm": 3.139700412750244, "learning_rate": 9.94563913981933e-05, "loss": 2.3406, "step": 72 }, { "epoch": 0.017744739524009352, "grad_norm": 3.3743584156036377, "learning_rate": 9.944040290990487e-05, "loss": 2.3941, "step": 74 }, { "epoch": 0.018224327078712306, "grad_norm": 3.6030659675598145, "learning_rate": 9.942441442161644e-05, "loss": 2.3372, "step": 76 }, { "epoch": 0.018703914633415263, "grad_norm": 3.3180689811706543, "learning_rate": 9.9408425933328e-05, "loss": 2.2878, "step": 78 }, { "epoch": 0.019183502188118217, "grad_norm": 3.350250720977783, "learning_rate": 9.939243744503958e-05, "loss": 2.26, "step": 80 }, { "epoch": 0.019663089742821174, "grad_norm": 3.2170333862304688, "learning_rate": 9.937644895675115e-05, "loss": 2.2594, "step": 82 }, { "epoch": 0.020142677297524128, "grad_norm": 2.8769371509552, "learning_rate": 9.936046046846271e-05, "loss": 2.1957, "step": 84 }, { "epoch": 0.020622264852227085, "grad_norm": 2.908954381942749, "learning_rate": 9.934447198017428e-05, "loss": 2.1265, "step": 86 }, { "epoch": 0.02110185240693004, "grad_norm": 2.8139522075653076, "learning_rate": 9.932848349188585e-05, "loss": 2.1725, "step": 88 }, { "epoch": 0.021581439961632996, "grad_norm": 2.705012798309326, "learning_rate": 9.931249500359741e-05, "loss": 2.2002, "step": 90 }, { "epoch": 0.02206102751633595, "grad_norm": 2.8362064361572266, "learning_rate": 9.929650651530899e-05, "loss": 2.1742, "step": 92 }, { "epoch": 0.022540615071038907, "grad_norm": 2.4432308673858643, "learning_rate": 9.928051802702056e-05, "loss": 2.1365, "step": 94 }, { "epoch": 0.02302020262574186, "grad_norm": 2.4180076122283936, "learning_rate": 9.926452953873212e-05, "loss": 2.1491, "step": 96 }, { "epoch": 0.02349979018044482, "grad_norm": 2.5836551189422607, "learning_rate": 9.924854105044369e-05, "loss": 2.0918, "step": 98 }, { "epoch": 0.023979377735147772, "grad_norm": 2.7202980518341064, "learning_rate": 9.923255256215525e-05, "loss": 2.1319, "step": 100 }, { "epoch": 0.02445896528985073, "grad_norm": 2.331252098083496, "learning_rate": 9.921656407386682e-05, "loss": 2.0991, "step": 102 }, { "epoch": 0.024938552844553683, "grad_norm": 2.5599703788757324, "learning_rate": 9.92005755855784e-05, "loss": 2.0856, "step": 104 }, { "epoch": 0.02541814039925664, "grad_norm": 2.4501640796661377, "learning_rate": 9.918458709728996e-05, "loss": 2.0635, "step": 106 }, { "epoch": 0.025897727953959594, "grad_norm": 2.85457444190979, "learning_rate": 9.916859860900153e-05, "loss": 2.0999, "step": 108 }, { "epoch": 0.02637731550866255, "grad_norm": 2.2711305618286133, "learning_rate": 9.91526101207131e-05, "loss": 2.1371, "step": 110 }, { "epoch": 0.026856903063365505, "grad_norm": 2.3988406658172607, "learning_rate": 9.913662163242466e-05, "loss": 2.0606, "step": 112 }, { "epoch": 0.027336490618068462, "grad_norm": 2.6506006717681885, "learning_rate": 9.912063314413623e-05, "loss": 2.0981, "step": 114 }, { "epoch": 0.027816078172771416, "grad_norm": 2.0897698402404785, "learning_rate": 9.910464465584779e-05, "loss": 2.0666, "step": 116 }, { "epoch": 0.028295665727474374, "grad_norm": 2.0126383304595947, "learning_rate": 9.908865616755936e-05, "loss": 2.0864, "step": 118 }, { "epoch": 0.028775253282177327, "grad_norm": 2.3770053386688232, "learning_rate": 9.907266767927092e-05, "loss": 2.0796, "step": 120 }, { "epoch": 0.029254840836880285, "grad_norm": 2.096311569213867, "learning_rate": 9.905667919098249e-05, "loss": 2.0379, "step": 122 }, { "epoch": 0.02973442839158324, "grad_norm": 2.5748302936553955, "learning_rate": 9.904069070269406e-05, "loss": 2.0907, "step": 124 }, { "epoch": 0.030214015946286196, "grad_norm": 2.0851612091064453, "learning_rate": 9.902470221440563e-05, "loss": 2.0433, "step": 126 }, { "epoch": 0.03069360350098915, "grad_norm": 2.5961525440216064, "learning_rate": 9.90087137261172e-05, "loss": 2.099, "step": 128 }, { "epoch": 0.031173191055692103, "grad_norm": 2.579314708709717, "learning_rate": 9.899272523782877e-05, "loss": 2.0994, "step": 130 }, { "epoch": 0.03165277861039506, "grad_norm": 2.3902342319488525, "learning_rate": 9.897673674954033e-05, "loss": 2.0561, "step": 132 }, { "epoch": 0.032132366165098014, "grad_norm": 2.5458385944366455, "learning_rate": 9.89607482612519e-05, "loss": 2.0395, "step": 134 }, { "epoch": 0.03261195371980097, "grad_norm": 2.160308599472046, "learning_rate": 9.894475977296346e-05, "loss": 2.1204, "step": 136 }, { "epoch": 0.03309154127450393, "grad_norm": 2.0382707118988037, "learning_rate": 9.892877128467504e-05, "loss": 2.1313, "step": 138 }, { "epoch": 0.03357112882920688, "grad_norm": 2.0661120414733887, "learning_rate": 9.891278279638661e-05, "loss": 2.0568, "step": 140 }, { "epoch": 0.034050716383909836, "grad_norm": 2.076857805252075, "learning_rate": 9.889679430809817e-05, "loss": 2.102, "step": 142 }, { "epoch": 0.03453030393861279, "grad_norm": 2.2664248943328857, "learning_rate": 9.888080581980974e-05, "loss": 2.0224, "step": 144 }, { "epoch": 0.03500989149331575, "grad_norm": 2.1755471229553223, "learning_rate": 9.88648173315213e-05, "loss": 2.0427, "step": 146 }, { "epoch": 0.035489479048018704, "grad_norm": 1.874707818031311, "learning_rate": 9.884882884323287e-05, "loss": 2.1581, "step": 148 }, { "epoch": 0.03596906660272166, "grad_norm": 1.8403527736663818, "learning_rate": 9.883284035494445e-05, "loss": 2.0546, "step": 150 }, { "epoch": 0.03644865415742461, "grad_norm": 1.9645122289657593, "learning_rate": 9.881685186665602e-05, "loss": 2.0816, "step": 152 }, { "epoch": 0.03692824171212757, "grad_norm": 2.118138074874878, "learning_rate": 9.880086337836758e-05, "loss": 2.018, "step": 154 }, { "epoch": 0.037407829266830527, "grad_norm": 1.7325690984725952, "learning_rate": 9.878487489007915e-05, "loss": 2.0723, "step": 156 }, { "epoch": 0.03788741682153348, "grad_norm": 2.266857624053955, "learning_rate": 9.876888640179071e-05, "loss": 2.0711, "step": 158 }, { "epoch": 0.038367004376236434, "grad_norm": 1.846144437789917, "learning_rate": 9.875289791350228e-05, "loss": 2.0807, "step": 160 }, { "epoch": 0.038846591930939395, "grad_norm": 1.8863399028778076, "learning_rate": 9.873690942521386e-05, "loss": 2.0804, "step": 162 }, { "epoch": 0.03932617948564235, "grad_norm": 1.9290143251419067, "learning_rate": 9.872092093692542e-05, "loss": 2.0735, "step": 164 }, { "epoch": 0.0398057670403453, "grad_norm": 1.7761247158050537, "learning_rate": 9.870493244863699e-05, "loss": 2.0673, "step": 166 }, { "epoch": 0.040285354595048256, "grad_norm": 1.7413686513900757, "learning_rate": 9.868894396034855e-05, "loss": 2.1205, "step": 168 }, { "epoch": 0.04076494214975122, "grad_norm": 2.399038314819336, "learning_rate": 9.867295547206012e-05, "loss": 2.0487, "step": 170 }, { "epoch": 0.04124452970445417, "grad_norm": 2.2538692951202393, "learning_rate": 9.865696698377169e-05, "loss": 2.0692, "step": 172 }, { "epoch": 0.041724117259157124, "grad_norm": 1.8458192348480225, "learning_rate": 9.864097849548327e-05, "loss": 1.9711, "step": 174 }, { "epoch": 0.04220370481386008, "grad_norm": 1.9570268392562866, "learning_rate": 9.862499000719483e-05, "loss": 2.0086, "step": 176 }, { "epoch": 0.04268329236856304, "grad_norm": 1.6991487741470337, "learning_rate": 9.86090015189064e-05, "loss": 2.0152, "step": 178 }, { "epoch": 0.04316287992326599, "grad_norm": 1.7585821151733398, "learning_rate": 9.859301303061796e-05, "loss": 2.109, "step": 180 }, { "epoch": 0.043642467477968946, "grad_norm": 1.7372496128082275, "learning_rate": 9.857702454232953e-05, "loss": 2.0334, "step": 182 }, { "epoch": 0.0441220550326719, "grad_norm": 1.6850683689117432, "learning_rate": 9.85610360540411e-05, "loss": 2.0298, "step": 184 }, { "epoch": 0.04460164258737486, "grad_norm": 1.9417377710342407, "learning_rate": 9.854504756575266e-05, "loss": 2.0335, "step": 186 }, { "epoch": 0.045081230142077815, "grad_norm": 1.8367892503738403, "learning_rate": 9.852905907746423e-05, "loss": 2.078, "step": 188 }, { "epoch": 0.04556081769678077, "grad_norm": 1.9329580068588257, "learning_rate": 9.851307058917579e-05, "loss": 1.9824, "step": 190 }, { "epoch": 0.04604040525148372, "grad_norm": 1.7819116115570068, "learning_rate": 9.849708210088736e-05, "loss": 2.0503, "step": 192 }, { "epoch": 0.046519992806186676, "grad_norm": 1.8635042905807495, "learning_rate": 9.848109361259892e-05, "loss": 2.0396, "step": 194 }, { "epoch": 0.04699958036088964, "grad_norm": 1.9340475797653198, "learning_rate": 9.84651051243105e-05, "loss": 2.0166, "step": 196 }, { "epoch": 0.04747916791559259, "grad_norm": 1.6574290990829468, "learning_rate": 9.844911663602207e-05, "loss": 2.0473, "step": 198 }, { "epoch": 0.047958755470295544, "grad_norm": 2.0224714279174805, "learning_rate": 9.843312814773363e-05, "loss": 2.0539, "step": 200 }, { "epoch": 0.0484383430249985, "grad_norm": 1.8588634729385376, "learning_rate": 9.84171396594452e-05, "loss": 2.0414, "step": 202 }, { "epoch": 0.04891793057970146, "grad_norm": 1.6053677797317505, "learning_rate": 9.840115117115676e-05, "loss": 2.0001, "step": 204 }, { "epoch": 0.04939751813440441, "grad_norm": 1.603960394859314, "learning_rate": 9.838516268286834e-05, "loss": 2.0125, "step": 206 }, { "epoch": 0.049877105689107366, "grad_norm": 1.5317060947418213, "learning_rate": 9.836917419457991e-05, "loss": 1.9929, "step": 208 }, { "epoch": 0.05035669324381032, "grad_norm": 1.7243187427520752, "learning_rate": 9.835318570629148e-05, "loss": 2.0833, "step": 210 }, { "epoch": 0.05083628079851328, "grad_norm": 1.611373782157898, "learning_rate": 9.833719721800304e-05, "loss": 2.0257, "step": 212 }, { "epoch": 0.051315868353216235, "grad_norm": 1.9812567234039307, "learning_rate": 9.83212087297146e-05, "loss": 2.0725, "step": 214 }, { "epoch": 0.05179545590791919, "grad_norm": 1.7286604642868042, "learning_rate": 9.830522024142617e-05, "loss": 2.0777, "step": 216 }, { "epoch": 0.05227504346262214, "grad_norm": 1.6793160438537598, "learning_rate": 9.828923175313775e-05, "loss": 2.0173, "step": 218 }, { "epoch": 0.0527546310173251, "grad_norm": 1.8709471225738525, "learning_rate": 9.827324326484932e-05, "loss": 2.0824, "step": 220 }, { "epoch": 0.05323421857202806, "grad_norm": 1.670653223991394, "learning_rate": 9.825725477656088e-05, "loss": 2.0217, "step": 222 }, { "epoch": 0.05371380612673101, "grad_norm": 1.7317931652069092, "learning_rate": 9.824126628827245e-05, "loss": 1.999, "step": 224 }, { "epoch": 0.054193393681433964, "grad_norm": 1.5993953943252563, "learning_rate": 9.822527779998401e-05, "loss": 2.0222, "step": 226 }, { "epoch": 0.054672981236136925, "grad_norm": 1.8333159685134888, "learning_rate": 9.820928931169558e-05, "loss": 1.9743, "step": 228 }, { "epoch": 0.05515256879083988, "grad_norm": 1.6943684816360474, "learning_rate": 9.819330082340716e-05, "loss": 1.9864, "step": 230 }, { "epoch": 0.05563215634554283, "grad_norm": 1.5798134803771973, "learning_rate": 9.817731233511872e-05, "loss": 2.0526, "step": 232 }, { "epoch": 0.056111743900245786, "grad_norm": 1.8624980449676514, "learning_rate": 9.816132384683029e-05, "loss": 2.013, "step": 234 }, { "epoch": 0.05659133145494875, "grad_norm": 1.670074462890625, "learning_rate": 9.814533535854186e-05, "loss": 2.0309, "step": 236 }, { "epoch": 0.0570709190096517, "grad_norm": 1.3447343111038208, "learning_rate": 9.812934687025342e-05, "loss": 1.9984, "step": 238 }, { "epoch": 0.057550506564354655, "grad_norm": 1.5524078607559204, "learning_rate": 9.811335838196499e-05, "loss": 2.0196, "step": 240 }, { "epoch": 0.05803009411905761, "grad_norm": 1.7860229015350342, "learning_rate": 9.809736989367657e-05, "loss": 1.9399, "step": 242 }, { "epoch": 0.05850968167376057, "grad_norm": 1.5522782802581787, "learning_rate": 9.808138140538813e-05, "loss": 1.9966, "step": 244 }, { "epoch": 0.05898926922846352, "grad_norm": 1.5307941436767578, "learning_rate": 9.80653929170997e-05, "loss": 1.992, "step": 246 }, { "epoch": 0.05946885678316648, "grad_norm": 1.5058282613754272, "learning_rate": 9.804940442881126e-05, "loss": 2.02, "step": 248 }, { "epoch": 0.05994844433786943, "grad_norm": 1.7620549201965332, "learning_rate": 9.803341594052283e-05, "loss": 2.039, "step": 250 }, { "epoch": 0.06042803189257239, "grad_norm": 1.5112171173095703, "learning_rate": 9.80174274522344e-05, "loss": 1.9949, "step": 252 }, { "epoch": 0.060907619447275345, "grad_norm": 1.4734177589416504, "learning_rate": 9.800143896394597e-05, "loss": 1.9812, "step": 254 }, { "epoch": 0.0613872070019783, "grad_norm": 1.3453019857406616, "learning_rate": 9.798545047565754e-05, "loss": 1.9964, "step": 256 }, { "epoch": 0.06186679455668125, "grad_norm": 1.4692628383636475, "learning_rate": 9.79694619873691e-05, "loss": 2.0019, "step": 258 }, { "epoch": 0.062346382111384206, "grad_norm": 1.5329688787460327, "learning_rate": 9.795347349908066e-05, "loss": 1.9776, "step": 260 }, { "epoch": 0.06282596966608717, "grad_norm": 1.5684436559677124, "learning_rate": 9.793748501079222e-05, "loss": 2.0404, "step": 262 }, { "epoch": 0.06330555722079012, "grad_norm": 1.2867313623428345, "learning_rate": 9.79214965225038e-05, "loss": 2.0364, "step": 264 }, { "epoch": 0.06378514477549307, "grad_norm": 1.450502872467041, "learning_rate": 9.790550803421537e-05, "loss": 2.0454, "step": 266 }, { "epoch": 0.06426473233019603, "grad_norm": 1.3149186372756958, "learning_rate": 9.788951954592693e-05, "loss": 2.0234, "step": 268 }, { "epoch": 0.06474431988489898, "grad_norm": 1.2291542291641235, "learning_rate": 9.78735310576385e-05, "loss": 1.9817, "step": 270 }, { "epoch": 0.06522390743960194, "grad_norm": 1.4603662490844727, "learning_rate": 9.785754256935007e-05, "loss": 1.9952, "step": 272 }, { "epoch": 0.0657034949943049, "grad_norm": 1.5469989776611328, "learning_rate": 9.784155408106163e-05, "loss": 2.0207, "step": 274 }, { "epoch": 0.06618308254900786, "grad_norm": 1.3526504039764404, "learning_rate": 9.782556559277321e-05, "loss": 2.0133, "step": 276 }, { "epoch": 0.06666267010371081, "grad_norm": 1.3773233890533447, "learning_rate": 9.780957710448478e-05, "loss": 2.0037, "step": 278 }, { "epoch": 0.06714225765841376, "grad_norm": 1.5665323734283447, "learning_rate": 9.779358861619634e-05, "loss": 2.0246, "step": 280 }, { "epoch": 0.06762184521311672, "grad_norm": 1.4199268817901611, "learning_rate": 9.777760012790791e-05, "loss": 1.9918, "step": 282 }, { "epoch": 0.06810143276781967, "grad_norm": 1.3533822298049927, "learning_rate": 9.776161163961947e-05, "loss": 1.9351, "step": 284 }, { "epoch": 0.06858102032252263, "grad_norm": 1.3868097066879272, "learning_rate": 9.774562315133104e-05, "loss": 1.9626, "step": 286 }, { "epoch": 0.06906060787722558, "grad_norm": 1.7091712951660156, "learning_rate": 9.772963466304262e-05, "loss": 2.0068, "step": 288 }, { "epoch": 0.06954019543192855, "grad_norm": 1.2914841175079346, "learning_rate": 9.771364617475418e-05, "loss": 2.0229, "step": 290 }, { "epoch": 0.0700197829866315, "grad_norm": 1.375750184059143, "learning_rate": 9.769765768646575e-05, "loss": 2.0374, "step": 292 }, { "epoch": 0.07049937054133446, "grad_norm": 1.5672591924667358, "learning_rate": 9.768166919817732e-05, "loss": 1.9825, "step": 294 }, { "epoch": 0.07097895809603741, "grad_norm": 1.4626381397247314, "learning_rate": 9.766568070988888e-05, "loss": 2.0257, "step": 296 }, { "epoch": 0.07145854565074036, "grad_norm": 1.3994699716567993, "learning_rate": 9.764969222160045e-05, "loss": 1.9971, "step": 298 }, { "epoch": 0.07193813320544332, "grad_norm": 1.4914413690567017, "learning_rate": 9.763370373331203e-05, "loss": 1.9984, "step": 300 }, { "epoch": 0.07241772076014627, "grad_norm": 1.470712423324585, "learning_rate": 9.761771524502359e-05, "loss": 2.0083, "step": 302 }, { "epoch": 0.07289730831484922, "grad_norm": 1.496848464012146, "learning_rate": 9.760172675673516e-05, "loss": 1.9728, "step": 304 }, { "epoch": 0.07337689586955219, "grad_norm": 1.5250974893569946, "learning_rate": 9.758573826844672e-05, "loss": 2.0366, "step": 306 }, { "epoch": 0.07385648342425515, "grad_norm": 1.5960869789123535, "learning_rate": 9.756974978015829e-05, "loss": 2.0246, "step": 308 }, { "epoch": 0.0743360709789581, "grad_norm": 1.485356330871582, "learning_rate": 9.755376129186986e-05, "loss": 2.0496, "step": 310 }, { "epoch": 0.07481565853366105, "grad_norm": 1.780368685722351, "learning_rate": 9.753777280358143e-05, "loss": 1.9831, "step": 312 }, { "epoch": 0.075295246088364, "grad_norm": 1.5884053707122803, "learning_rate": 9.7521784315293e-05, "loss": 2.0197, "step": 314 }, { "epoch": 0.07577483364306696, "grad_norm": 1.557611107826233, "learning_rate": 9.750579582700457e-05, "loss": 2.0042, "step": 316 }, { "epoch": 0.07625442119776991, "grad_norm": 1.6271719932556152, "learning_rate": 9.748980733871613e-05, "loss": 1.9927, "step": 318 }, { "epoch": 0.07673400875247287, "grad_norm": 1.6771419048309326, "learning_rate": 9.74738188504277e-05, "loss": 2.0104, "step": 320 }, { "epoch": 0.07721359630717582, "grad_norm": 1.4173927307128906, "learning_rate": 9.745783036213926e-05, "loss": 2.0032, "step": 322 }, { "epoch": 0.07769318386187879, "grad_norm": 2.060178279876709, "learning_rate": 9.744184187385084e-05, "loss": 1.9805, "step": 324 }, { "epoch": 0.07817277141658174, "grad_norm": 1.2423216104507446, "learning_rate": 9.742585338556241e-05, "loss": 2.0156, "step": 326 }, { "epoch": 0.0786523589712847, "grad_norm": 1.590680718421936, "learning_rate": 9.740986489727397e-05, "loss": 1.9942, "step": 328 }, { "epoch": 0.07913194652598765, "grad_norm": 1.8672218322753906, "learning_rate": 9.739387640898554e-05, "loss": 1.9727, "step": 330 }, { "epoch": 0.0796115340806906, "grad_norm": 1.3553147315979004, "learning_rate": 9.737788792069709e-05, "loss": 2.0167, "step": 332 }, { "epoch": 0.08009112163539356, "grad_norm": 2.167755126953125, "learning_rate": 9.736189943240867e-05, "loss": 2.0275, "step": 334 }, { "epoch": 0.08057070919009651, "grad_norm": 1.3995441198349, "learning_rate": 9.734591094412024e-05, "loss": 2.0052, "step": 336 }, { "epoch": 0.08105029674479947, "grad_norm": 1.7762105464935303, "learning_rate": 9.73299224558318e-05, "loss": 1.9753, "step": 338 }, { "epoch": 0.08152988429950243, "grad_norm": 1.4403513669967651, "learning_rate": 9.731393396754337e-05, "loss": 1.9261, "step": 340 }, { "epoch": 0.08200947185420539, "grad_norm": 1.2762502431869507, "learning_rate": 9.729794547925493e-05, "loss": 1.9789, "step": 342 }, { "epoch": 0.08248905940890834, "grad_norm": 1.7073395252227783, "learning_rate": 9.72819569909665e-05, "loss": 1.9704, "step": 344 }, { "epoch": 0.0829686469636113, "grad_norm": 1.182620644569397, "learning_rate": 9.726596850267808e-05, "loss": 1.9989, "step": 346 }, { "epoch": 0.08344823451831425, "grad_norm": 1.595080018043518, "learning_rate": 9.724998001438964e-05, "loss": 1.9953, "step": 348 }, { "epoch": 0.0839278220730172, "grad_norm": 1.1806150674819946, "learning_rate": 9.723399152610121e-05, "loss": 2.0098, "step": 350 }, { "epoch": 0.08440740962772016, "grad_norm": 1.704587459564209, "learning_rate": 9.721800303781278e-05, "loss": 1.9809, "step": 352 }, { "epoch": 0.08488699718242311, "grad_norm": 1.2604211568832397, "learning_rate": 9.720201454952434e-05, "loss": 1.9069, "step": 354 }, { "epoch": 0.08536658473712608, "grad_norm": 1.3360824584960938, "learning_rate": 9.71860260612359e-05, "loss": 1.9582, "step": 356 }, { "epoch": 0.08584617229182903, "grad_norm": 1.6204489469528198, "learning_rate": 9.717003757294749e-05, "loss": 2.0044, "step": 358 }, { "epoch": 0.08632575984653199, "grad_norm": 1.501020908355713, "learning_rate": 9.715404908465905e-05, "loss": 1.9765, "step": 360 }, { "epoch": 0.08680534740123494, "grad_norm": 1.1495616436004639, "learning_rate": 9.713806059637062e-05, "loss": 2.0312, "step": 362 }, { "epoch": 0.08728493495593789, "grad_norm": 1.862821102142334, "learning_rate": 9.712207210808218e-05, "loss": 1.9382, "step": 364 }, { "epoch": 0.08776452251064085, "grad_norm": 1.2811983823776245, "learning_rate": 9.710608361979375e-05, "loss": 1.9525, "step": 366 }, { "epoch": 0.0882441100653438, "grad_norm": 1.3205124139785767, "learning_rate": 9.709009513150531e-05, "loss": 2.0031, "step": 368 }, { "epoch": 0.08872369762004675, "grad_norm": 1.2900264263153076, "learning_rate": 9.70741066432169e-05, "loss": 1.9068, "step": 370 }, { "epoch": 0.08920328517474972, "grad_norm": 1.2781169414520264, "learning_rate": 9.705811815492846e-05, "loss": 2.0105, "step": 372 }, { "epoch": 0.08968287272945268, "grad_norm": 1.5174392461776733, "learning_rate": 9.704212966664003e-05, "loss": 1.919, "step": 374 }, { "epoch": 0.09016246028415563, "grad_norm": 1.4147112369537354, "learning_rate": 9.702614117835159e-05, "loss": 2.0072, "step": 376 }, { "epoch": 0.09064204783885858, "grad_norm": 1.2113193273544312, "learning_rate": 9.701015269006316e-05, "loss": 1.9774, "step": 378 }, { "epoch": 0.09112163539356154, "grad_norm": 1.347078800201416, "learning_rate": 9.699416420177472e-05, "loss": 1.9176, "step": 380 }, { "epoch": 0.09160122294826449, "grad_norm": 1.373464584350586, "learning_rate": 9.69781757134863e-05, "loss": 1.9594, "step": 382 }, { "epoch": 0.09208081050296744, "grad_norm": 1.4861701726913452, "learning_rate": 9.696218722519787e-05, "loss": 1.988, "step": 384 }, { "epoch": 0.0925603980576704, "grad_norm": 1.1796787977218628, "learning_rate": 9.694619873690943e-05, "loss": 1.9725, "step": 386 }, { "epoch": 0.09303998561237335, "grad_norm": 1.7563891410827637, "learning_rate": 9.6930210248621e-05, "loss": 1.9376, "step": 388 }, { "epoch": 0.09351957316707632, "grad_norm": 1.1446044445037842, "learning_rate": 9.691422176033256e-05, "loss": 1.9425, "step": 390 }, { "epoch": 0.09399916072177927, "grad_norm": 1.5360944271087646, "learning_rate": 9.689823327204413e-05, "loss": 1.9819, "step": 392 }, { "epoch": 0.09447874827648223, "grad_norm": 1.5220248699188232, "learning_rate": 9.688224478375571e-05, "loss": 2.0304, "step": 394 }, { "epoch": 0.09495833583118518, "grad_norm": 1.41269052028656, "learning_rate": 9.686625629546727e-05, "loss": 2.0074, "step": 396 }, { "epoch": 0.09543792338588813, "grad_norm": 1.2587449550628662, "learning_rate": 9.685026780717884e-05, "loss": 1.9987, "step": 398 }, { "epoch": 0.09591751094059109, "grad_norm": 1.3371970653533936, "learning_rate": 9.68342793188904e-05, "loss": 2.0015, "step": 400 }, { "epoch": 0.09591751094059109, "eval_loss": 1.9252121448516846, "eval_runtime": 331.093, "eval_samples_per_second": 403.056, "eval_steps_per_second": 12.598, "step": 400 }, { "epoch": 0.09639709849529404, "grad_norm": 1.3168896436691284, "learning_rate": 9.681829083060197e-05, "loss": 1.97, "step": 402 }, { "epoch": 0.096876686049997, "grad_norm": 1.4563161134719849, "learning_rate": 9.680230234231354e-05, "loss": 1.9936, "step": 404 }, { "epoch": 0.09735627360469996, "grad_norm": 1.4465227127075195, "learning_rate": 9.67863138540251e-05, "loss": 1.9528, "step": 406 }, { "epoch": 0.09783586115940292, "grad_norm": 1.439718246459961, "learning_rate": 9.677032536573667e-05, "loss": 1.9818, "step": 408 }, { "epoch": 0.09831544871410587, "grad_norm": 1.7008998394012451, "learning_rate": 9.675433687744823e-05, "loss": 2.0152, "step": 410 }, { "epoch": 0.09879503626880883, "grad_norm": 1.293601393699646, "learning_rate": 9.67383483891598e-05, "loss": 1.9634, "step": 412 }, { "epoch": 0.09927462382351178, "grad_norm": 2.1239864826202393, "learning_rate": 9.672235990087138e-05, "loss": 1.9633, "step": 414 }, { "epoch": 0.09975421137821473, "grad_norm": 1.2996041774749756, "learning_rate": 9.670637141258295e-05, "loss": 1.9126, "step": 416 }, { "epoch": 0.10023379893291769, "grad_norm": 1.6497595310211182, "learning_rate": 9.669038292429451e-05, "loss": 1.945, "step": 418 }, { "epoch": 0.10071338648762064, "grad_norm": 1.8146965503692627, "learning_rate": 9.667439443600608e-05, "loss": 1.9923, "step": 420 }, { "epoch": 0.10119297404232361, "grad_norm": 1.5439316034317017, "learning_rate": 9.665840594771764e-05, "loss": 1.9613, "step": 422 }, { "epoch": 0.10167256159702656, "grad_norm": 1.5164133310317993, "learning_rate": 9.664241745942921e-05, "loss": 2.0473, "step": 424 }, { "epoch": 0.10215214915172952, "grad_norm": 1.4498145580291748, "learning_rate": 9.662642897114079e-05, "loss": 1.9571, "step": 426 }, { "epoch": 0.10263173670643247, "grad_norm": 1.315183401107788, "learning_rate": 9.661044048285235e-05, "loss": 1.9338, "step": 428 }, { "epoch": 0.10311132426113542, "grad_norm": 1.3200712203979492, "learning_rate": 9.659445199456392e-05, "loss": 1.9254, "step": 430 }, { "epoch": 0.10359091181583838, "grad_norm": 1.253326177597046, "learning_rate": 9.657846350627548e-05, "loss": 2.0371, "step": 432 }, { "epoch": 0.10407049937054133, "grad_norm": 1.4737728834152222, "learning_rate": 9.656247501798705e-05, "loss": 1.9589, "step": 434 }, { "epoch": 0.10455008692524428, "grad_norm": 1.2018907070159912, "learning_rate": 9.654648652969862e-05, "loss": 1.9754, "step": 436 }, { "epoch": 0.10502967447994725, "grad_norm": 1.2446662187576294, "learning_rate": 9.65304980414102e-05, "loss": 1.9818, "step": 438 }, { "epoch": 0.1055092620346502, "grad_norm": 1.623217225074768, "learning_rate": 9.651450955312176e-05, "loss": 1.9574, "step": 440 }, { "epoch": 0.10598884958935316, "grad_norm": 1.262779712677002, "learning_rate": 9.649852106483333e-05, "loss": 1.9579, "step": 442 }, { "epoch": 0.10646843714405611, "grad_norm": 1.2270114421844482, "learning_rate": 9.648253257654489e-05, "loss": 1.9772, "step": 444 }, { "epoch": 0.10694802469875907, "grad_norm": 1.2315534353256226, "learning_rate": 9.646654408825646e-05, "loss": 1.9336, "step": 446 }, { "epoch": 0.10742761225346202, "grad_norm": 1.3502308130264282, "learning_rate": 9.645055559996802e-05, "loss": 1.9441, "step": 448 }, { "epoch": 0.10790719980816497, "grad_norm": 1.5277208089828491, "learning_rate": 9.64345671116796e-05, "loss": 1.9462, "step": 450 }, { "epoch": 0.10838678736286793, "grad_norm": 1.536509394645691, "learning_rate": 9.641857862339117e-05, "loss": 1.9502, "step": 452 }, { "epoch": 0.10886637491757088, "grad_norm": 1.4916837215423584, "learning_rate": 9.640259013510273e-05, "loss": 1.9322, "step": 454 }, { "epoch": 0.10934596247227385, "grad_norm": 1.329494833946228, "learning_rate": 9.63866016468143e-05, "loss": 1.9693, "step": 456 }, { "epoch": 0.1098255500269768, "grad_norm": 1.3029866218566895, "learning_rate": 9.637061315852587e-05, "loss": 1.9783, "step": 458 }, { "epoch": 0.11030513758167976, "grad_norm": 1.4716542959213257, "learning_rate": 9.635462467023743e-05, "loss": 1.9386, "step": 460 }, { "epoch": 0.11078472513638271, "grad_norm": 1.2512433528900146, "learning_rate": 9.633863618194901e-05, "loss": 1.9574, "step": 462 }, { "epoch": 0.11126431269108567, "grad_norm": 1.3634129762649536, "learning_rate": 9.632264769366058e-05, "loss": 1.9858, "step": 464 }, { "epoch": 0.11174390024578862, "grad_norm": 1.3121025562286377, "learning_rate": 9.630665920537214e-05, "loss": 1.9287, "step": 466 }, { "epoch": 0.11222348780049157, "grad_norm": 1.5207208395004272, "learning_rate": 9.629067071708371e-05, "loss": 1.9496, "step": 468 }, { "epoch": 0.11270307535519453, "grad_norm": 1.2897610664367676, "learning_rate": 9.627468222879527e-05, "loss": 1.9052, "step": 470 }, { "epoch": 0.1131826629098975, "grad_norm": 1.1859676837921143, "learning_rate": 9.625869374050684e-05, "loss": 1.9538, "step": 472 }, { "epoch": 0.11366225046460045, "grad_norm": 1.223778486251831, "learning_rate": 9.624270525221842e-05, "loss": 1.9465, "step": 474 }, { "epoch": 0.1141418380193034, "grad_norm": 1.4020237922668457, "learning_rate": 9.622671676392998e-05, "loss": 1.9435, "step": 476 }, { "epoch": 0.11462142557400636, "grad_norm": 1.3018676042556763, "learning_rate": 9.621072827564154e-05, "loss": 1.9264, "step": 478 }, { "epoch": 0.11510101312870931, "grad_norm": 1.5312923192977905, "learning_rate": 9.61947397873531e-05, "loss": 1.9581, "step": 480 }, { "epoch": 0.11558060068341226, "grad_norm": 1.5727242231369019, "learning_rate": 9.617875129906467e-05, "loss": 1.968, "step": 482 }, { "epoch": 0.11606018823811522, "grad_norm": 1.2110865116119385, "learning_rate": 9.616276281077625e-05, "loss": 1.9456, "step": 484 }, { "epoch": 0.11653977579281817, "grad_norm": 1.276063323020935, "learning_rate": 9.614677432248781e-05, "loss": 1.9601, "step": 486 }, { "epoch": 0.11701936334752114, "grad_norm": 1.6699234247207642, "learning_rate": 9.613078583419938e-05, "loss": 1.949, "step": 488 }, { "epoch": 0.11749895090222409, "grad_norm": 1.3957699537277222, "learning_rate": 9.611479734591094e-05, "loss": 1.9638, "step": 490 }, { "epoch": 0.11797853845692705, "grad_norm": 1.4786585569381714, "learning_rate": 9.609880885762251e-05, "loss": 1.8961, "step": 492 }, { "epoch": 0.11845812601163, "grad_norm": 1.1373939514160156, "learning_rate": 9.608282036933408e-05, "loss": 1.9323, "step": 494 }, { "epoch": 0.11893771356633295, "grad_norm": 1.3426014184951782, "learning_rate": 9.606683188104565e-05, "loss": 1.9726, "step": 496 }, { "epoch": 0.11941730112103591, "grad_norm": 1.4167038202285767, "learning_rate": 9.605084339275722e-05, "loss": 1.9328, "step": 498 }, { "epoch": 0.11989688867573886, "grad_norm": 1.221078872680664, "learning_rate": 9.603485490446879e-05, "loss": 1.9628, "step": 500 }, { "epoch": 0.12037647623044181, "grad_norm": 1.2866795063018799, "learning_rate": 9.601886641618035e-05, "loss": 1.9379, "step": 502 }, { "epoch": 0.12085606378514478, "grad_norm": 1.4759160280227661, "learning_rate": 9.600287792789192e-05, "loss": 1.9454, "step": 504 }, { "epoch": 0.12133565133984774, "grad_norm": 1.3871409893035889, "learning_rate": 9.598688943960348e-05, "loss": 1.9354, "step": 506 }, { "epoch": 0.12181523889455069, "grad_norm": 1.7413160800933838, "learning_rate": 9.597090095131506e-05, "loss": 1.9148, "step": 508 }, { "epoch": 0.12229482644925364, "grad_norm": 1.2414860725402832, "learning_rate": 9.595491246302663e-05, "loss": 1.9676, "step": 510 }, { "epoch": 0.1227744140039566, "grad_norm": 1.0617872476577759, "learning_rate": 9.59389239747382e-05, "loss": 1.9442, "step": 512 }, { "epoch": 0.12325400155865955, "grad_norm": 1.5308327674865723, "learning_rate": 9.592293548644976e-05, "loss": 2.0263, "step": 514 }, { "epoch": 0.1237335891133625, "grad_norm": 1.5236148834228516, "learning_rate": 9.590694699816133e-05, "loss": 1.9259, "step": 516 }, { "epoch": 0.12421317666806546, "grad_norm": 2.5736546516418457, "learning_rate": 9.589095850987289e-05, "loss": 1.9429, "step": 518 }, { "epoch": 0.12469276422276841, "grad_norm": 1.2423032522201538, "learning_rate": 9.587497002158447e-05, "loss": 1.9679, "step": 520 }, { "epoch": 0.12517235177747138, "grad_norm": 1.2832344770431519, "learning_rate": 9.585898153329604e-05, "loss": 1.9247, "step": 522 }, { "epoch": 0.12565193933217433, "grad_norm": 1.8385121822357178, "learning_rate": 9.58429930450076e-05, "loss": 2.0164, "step": 524 }, { "epoch": 0.1261315268868773, "grad_norm": 1.287920355796814, "learning_rate": 9.582700455671917e-05, "loss": 1.9194, "step": 526 }, { "epoch": 0.12661111444158024, "grad_norm": 1.3395719528198242, "learning_rate": 9.581101606843073e-05, "loss": 1.9189, "step": 528 }, { "epoch": 0.1270907019962832, "grad_norm": 1.6566509008407593, "learning_rate": 9.57950275801423e-05, "loss": 1.879, "step": 530 }, { "epoch": 0.12757028955098615, "grad_norm": 1.3947685956954956, "learning_rate": 9.577903909185388e-05, "loss": 1.9275, "step": 532 }, { "epoch": 0.1280498771056891, "grad_norm": 1.0772836208343506, "learning_rate": 9.576305060356544e-05, "loss": 1.9899, "step": 534 }, { "epoch": 0.12852946466039206, "grad_norm": 1.7339404821395874, "learning_rate": 9.574706211527701e-05, "loss": 1.9317, "step": 536 }, { "epoch": 0.129009052215095, "grad_norm": 1.143018364906311, "learning_rate": 9.573107362698858e-05, "loss": 1.9288, "step": 538 }, { "epoch": 0.12948863976979796, "grad_norm": 1.1285758018493652, "learning_rate": 9.571508513870014e-05, "loss": 1.9511, "step": 540 }, { "epoch": 0.12996822732450092, "grad_norm": 1.1610206365585327, "learning_rate": 9.56990966504117e-05, "loss": 1.9474, "step": 542 }, { "epoch": 0.13044781487920387, "grad_norm": 1.3246122598648071, "learning_rate": 9.568310816212329e-05, "loss": 1.9043, "step": 544 }, { "epoch": 0.13092740243390685, "grad_norm": 1.1753571033477783, "learning_rate": 9.566711967383485e-05, "loss": 2.0352, "step": 546 }, { "epoch": 0.1314069899886098, "grad_norm": 1.4533817768096924, "learning_rate": 9.565113118554642e-05, "loss": 1.909, "step": 548 }, { "epoch": 0.13188657754331276, "grad_norm": 1.3799842596054077, "learning_rate": 9.563514269725797e-05, "loss": 1.9411, "step": 550 }, { "epoch": 0.13236616509801571, "grad_norm": 1.2329798936843872, "learning_rate": 9.561915420896954e-05, "loss": 1.9152, "step": 552 }, { "epoch": 0.13284575265271867, "grad_norm": 1.2540152072906494, "learning_rate": 9.560316572068111e-05, "loss": 1.8669, "step": 554 }, { "epoch": 0.13332534020742162, "grad_norm": 1.1107324361801147, "learning_rate": 9.558717723239268e-05, "loss": 1.9049, "step": 556 }, { "epoch": 0.13380492776212458, "grad_norm": 1.3846670389175415, "learning_rate": 9.557118874410425e-05, "loss": 1.9457, "step": 558 }, { "epoch": 0.13428451531682753, "grad_norm": 1.150028109550476, "learning_rate": 9.555520025581581e-05, "loss": 1.9138, "step": 560 }, { "epoch": 0.13476410287153048, "grad_norm": 1.2424002885818481, "learning_rate": 9.553921176752738e-05, "loss": 1.9336, "step": 562 }, { "epoch": 0.13524369042623344, "grad_norm": 1.3219919204711914, "learning_rate": 9.552322327923894e-05, "loss": 1.932, "step": 564 }, { "epoch": 0.1357232779809364, "grad_norm": 1.2750744819641113, "learning_rate": 9.550723479095052e-05, "loss": 2.0011, "step": 566 }, { "epoch": 0.13620286553563934, "grad_norm": 1.4798171520233154, "learning_rate": 9.549124630266209e-05, "loss": 1.9338, "step": 568 }, { "epoch": 0.1366824530903423, "grad_norm": 1.3065439462661743, "learning_rate": 9.547525781437365e-05, "loss": 1.9374, "step": 570 }, { "epoch": 0.13716204064504525, "grad_norm": 1.2051888704299927, "learning_rate": 9.545926932608522e-05, "loss": 1.9047, "step": 572 }, { "epoch": 0.1376416281997482, "grad_norm": 1.5030369758605957, "learning_rate": 9.544328083779678e-05, "loss": 1.9334, "step": 574 }, { "epoch": 0.13812121575445116, "grad_norm": 1.1668771505355835, "learning_rate": 9.542729234950835e-05, "loss": 1.9077, "step": 576 }, { "epoch": 0.1386008033091541, "grad_norm": 1.568035364151001, "learning_rate": 9.541130386121993e-05, "loss": 1.9084, "step": 578 }, { "epoch": 0.1390803908638571, "grad_norm": 1.1449460983276367, "learning_rate": 9.53953153729315e-05, "loss": 1.981, "step": 580 }, { "epoch": 0.13955997841856005, "grad_norm": 1.6607357263565063, "learning_rate": 9.537932688464306e-05, "loss": 1.9252, "step": 582 }, { "epoch": 0.140039565973263, "grad_norm": 1.1768040657043457, "learning_rate": 9.536333839635463e-05, "loss": 1.9139, "step": 584 }, { "epoch": 0.14051915352796596, "grad_norm": 1.1166595220565796, "learning_rate": 9.534734990806619e-05, "loss": 1.9516, "step": 586 }, { "epoch": 0.1409987410826689, "grad_norm": 1.1225496530532837, "learning_rate": 9.533136141977776e-05, "loss": 1.9305, "step": 588 }, { "epoch": 0.14147832863737186, "grad_norm": 1.2023111581802368, "learning_rate": 9.531537293148934e-05, "loss": 1.8963, "step": 590 }, { "epoch": 0.14195791619207482, "grad_norm": 1.281446933746338, "learning_rate": 9.52993844432009e-05, "loss": 1.9369, "step": 592 }, { "epoch": 0.14243750374677777, "grad_norm": 1.2900079488754272, "learning_rate": 9.528339595491247e-05, "loss": 1.9217, "step": 594 }, { "epoch": 0.14291709130148073, "grad_norm": 1.2165719270706177, "learning_rate": 9.526740746662403e-05, "loss": 1.9439, "step": 596 }, { "epoch": 0.14339667885618368, "grad_norm": 1.1870462894439697, "learning_rate": 9.52514189783356e-05, "loss": 1.9215, "step": 598 }, { "epoch": 0.14387626641088663, "grad_norm": 1.1263521909713745, "learning_rate": 9.523543049004717e-05, "loss": 1.9429, "step": 600 }, { "epoch": 0.1443558539655896, "grad_norm": 1.333202600479126, "learning_rate": 9.521944200175875e-05, "loss": 1.962, "step": 602 }, { "epoch": 0.14483544152029254, "grad_norm": 1.1717902421951294, "learning_rate": 9.520345351347031e-05, "loss": 1.908, "step": 604 }, { "epoch": 0.1453150290749955, "grad_norm": 1.220607042312622, "learning_rate": 9.518746502518188e-05, "loss": 1.9544, "step": 606 }, { "epoch": 0.14579461662969845, "grad_norm": 1.467587947845459, "learning_rate": 9.517147653689344e-05, "loss": 1.9148, "step": 608 }, { "epoch": 0.1462742041844014, "grad_norm": 1.7173333168029785, "learning_rate": 9.515548804860501e-05, "loss": 1.9361, "step": 610 }, { "epoch": 0.14675379173910438, "grad_norm": 1.300430178642273, "learning_rate": 9.513949956031659e-05, "loss": 1.9045, "step": 612 }, { "epoch": 0.14723337929380734, "grad_norm": 1.2261357307434082, "learning_rate": 9.512351107202815e-05, "loss": 1.9477, "step": 614 }, { "epoch": 0.1477129668485103, "grad_norm": 1.2258354425430298, "learning_rate": 9.510752258373972e-05, "loss": 1.8657, "step": 616 }, { "epoch": 0.14819255440321324, "grad_norm": 1.41975998878479, "learning_rate": 9.509153409545128e-05, "loss": 1.8941, "step": 618 }, { "epoch": 0.1486721419579162, "grad_norm": 1.4277480840682983, "learning_rate": 9.507554560716285e-05, "loss": 1.9199, "step": 620 }, { "epoch": 0.14915172951261915, "grad_norm": 1.136846899986267, "learning_rate": 9.505955711887442e-05, "loss": 1.8723, "step": 622 }, { "epoch": 0.1496313170673221, "grad_norm": 1.3811041116714478, "learning_rate": 9.504356863058598e-05, "loss": 1.9485, "step": 624 }, { "epoch": 0.15011090462202506, "grad_norm": 1.1356114149093628, "learning_rate": 9.502758014229755e-05, "loss": 1.9435, "step": 626 }, { "epoch": 0.150590492176728, "grad_norm": 1.5349056720733643, "learning_rate": 9.501159165400911e-05, "loss": 1.9403, "step": 628 }, { "epoch": 0.15107007973143097, "grad_norm": 1.3672250509262085, "learning_rate": 9.499560316572068e-05, "loss": 1.8985, "step": 630 }, { "epoch": 0.15154966728613392, "grad_norm": 1.3397271633148193, "learning_rate": 9.497961467743224e-05, "loss": 1.8921, "step": 632 }, { "epoch": 0.15202925484083687, "grad_norm": 1.5770150423049927, "learning_rate": 9.496362618914382e-05, "loss": 1.9249, "step": 634 }, { "epoch": 0.15250884239553983, "grad_norm": 2.0038909912109375, "learning_rate": 9.494763770085539e-05, "loss": 1.939, "step": 636 }, { "epoch": 0.15298842995024278, "grad_norm": 1.2619320154190063, "learning_rate": 9.493164921256696e-05, "loss": 1.9228, "step": 638 }, { "epoch": 0.15346801750494574, "grad_norm": 1.4982367753982544, "learning_rate": 9.491566072427852e-05, "loss": 1.9649, "step": 640 }, { "epoch": 0.1539476050596487, "grad_norm": 1.1893073320388794, "learning_rate": 9.489967223599009e-05, "loss": 1.8909, "step": 642 }, { "epoch": 0.15442719261435164, "grad_norm": 1.3155988454818726, "learning_rate": 9.488368374770165e-05, "loss": 1.9062, "step": 644 }, { "epoch": 0.15490678016905463, "grad_norm": 1.359671950340271, "learning_rate": 9.486769525941323e-05, "loss": 1.9257, "step": 646 }, { "epoch": 0.15538636772375758, "grad_norm": 1.2156246900558472, "learning_rate": 9.48517067711248e-05, "loss": 1.8975, "step": 648 }, { "epoch": 0.15586595527846053, "grad_norm": 1.5443341732025146, "learning_rate": 9.483571828283636e-05, "loss": 1.9279, "step": 650 }, { "epoch": 0.1563455428331635, "grad_norm": 1.138866901397705, "learning_rate": 9.481972979454793e-05, "loss": 1.8996, "step": 652 }, { "epoch": 0.15682513038786644, "grad_norm": 1.634689450263977, "learning_rate": 9.48037413062595e-05, "loss": 1.9253, "step": 654 }, { "epoch": 0.1573047179425694, "grad_norm": 1.0367298126220703, "learning_rate": 9.478775281797106e-05, "loss": 1.9442, "step": 656 }, { "epoch": 0.15778430549727235, "grad_norm": 1.3804144859313965, "learning_rate": 9.477176432968264e-05, "loss": 1.9101, "step": 658 }, { "epoch": 0.1582638930519753, "grad_norm": 1.4692952632904053, "learning_rate": 9.47557758413942e-05, "loss": 1.9217, "step": 660 }, { "epoch": 0.15874348060667826, "grad_norm": 1.273321509361267, "learning_rate": 9.473978735310577e-05, "loss": 1.9443, "step": 662 }, { "epoch": 0.1592230681613812, "grad_norm": 1.311980128288269, "learning_rate": 9.472379886481734e-05, "loss": 1.9346, "step": 664 }, { "epoch": 0.15970265571608416, "grad_norm": 1.0210908651351929, "learning_rate": 9.47078103765289e-05, "loss": 1.9183, "step": 666 }, { "epoch": 0.16018224327078712, "grad_norm": 1.1315797567367554, "learning_rate": 9.469182188824047e-05, "loss": 1.8879, "step": 668 }, { "epoch": 0.16066183082549007, "grad_norm": 1.1723376512527466, "learning_rate": 9.467583339995205e-05, "loss": 1.9464, "step": 670 }, { "epoch": 0.16114141838019302, "grad_norm": 1.3254740238189697, "learning_rate": 9.465984491166361e-05, "loss": 1.9207, "step": 672 }, { "epoch": 0.16162100593489598, "grad_norm": 1.2056156396865845, "learning_rate": 9.464385642337518e-05, "loss": 1.8803, "step": 674 }, { "epoch": 0.16210059348959893, "grad_norm": 1.33647882938385, "learning_rate": 9.462786793508674e-05, "loss": 1.9514, "step": 676 }, { "epoch": 0.1625801810443019, "grad_norm": 1.4209730625152588, "learning_rate": 9.461187944679831e-05, "loss": 1.9183, "step": 678 }, { "epoch": 0.16305976859900487, "grad_norm": 1.6771619319915771, "learning_rate": 9.459589095850988e-05, "loss": 1.9661, "step": 680 }, { "epoch": 0.16353935615370782, "grad_norm": 1.2272751331329346, "learning_rate": 9.457990247022145e-05, "loss": 1.9021, "step": 682 }, { "epoch": 0.16401894370841077, "grad_norm": 1.1933482885360718, "learning_rate": 9.456391398193302e-05, "loss": 1.955, "step": 684 }, { "epoch": 0.16449853126311373, "grad_norm": 1.1291298866271973, "learning_rate": 9.454792549364459e-05, "loss": 1.9346, "step": 686 }, { "epoch": 0.16497811881781668, "grad_norm": 1.0653220415115356, "learning_rate": 9.453193700535615e-05, "loss": 1.8785, "step": 688 }, { "epoch": 0.16545770637251964, "grad_norm": 1.2784061431884766, "learning_rate": 9.451594851706772e-05, "loss": 1.9188, "step": 690 }, { "epoch": 0.1659372939272226, "grad_norm": 1.2165429592132568, "learning_rate": 9.449996002877928e-05, "loss": 1.8728, "step": 692 }, { "epoch": 0.16641688148192554, "grad_norm": 1.8687474727630615, "learning_rate": 9.448397154049086e-05, "loss": 1.9342, "step": 694 }, { "epoch": 0.1668964690366285, "grad_norm": 1.1216782331466675, "learning_rate": 9.446798305220241e-05, "loss": 1.8986, "step": 696 }, { "epoch": 0.16737605659133145, "grad_norm": 1.129507064819336, "learning_rate": 9.445199456391398e-05, "loss": 1.8913, "step": 698 }, { "epoch": 0.1678556441460344, "grad_norm": 1.2432583570480347, "learning_rate": 9.443600607562555e-05, "loss": 1.9273, "step": 700 }, { "epoch": 0.16833523170073736, "grad_norm": 1.3295261859893799, "learning_rate": 9.442001758733711e-05, "loss": 1.8826, "step": 702 }, { "epoch": 0.1688148192554403, "grad_norm": 1.3097573518753052, "learning_rate": 9.440402909904869e-05, "loss": 1.9765, "step": 704 }, { "epoch": 0.16929440681014327, "grad_norm": 1.286086916923523, "learning_rate": 9.438804061076026e-05, "loss": 1.8904, "step": 706 }, { "epoch": 0.16977399436484622, "grad_norm": 1.3315508365631104, "learning_rate": 9.437205212247182e-05, "loss": 1.8934, "step": 708 }, { "epoch": 0.17025358191954917, "grad_norm": 1.0868085622787476, "learning_rate": 9.435606363418339e-05, "loss": 1.9332, "step": 710 }, { "epoch": 0.17073316947425216, "grad_norm": 1.2563633918762207, "learning_rate": 9.434007514589495e-05, "loss": 1.8989, "step": 712 }, { "epoch": 0.1712127570289551, "grad_norm": 1.1288738250732422, "learning_rate": 9.432408665760652e-05, "loss": 1.8819, "step": 714 }, { "epoch": 0.17169234458365806, "grad_norm": 1.0369620323181152, "learning_rate": 9.43080981693181e-05, "loss": 1.9099, "step": 716 }, { "epoch": 0.17217193213836102, "grad_norm": 1.4581665992736816, "learning_rate": 9.429210968102966e-05, "loss": 1.9444, "step": 718 }, { "epoch": 0.17265151969306397, "grad_norm": 1.8353164196014404, "learning_rate": 9.427612119274123e-05, "loss": 1.9396, "step": 720 }, { "epoch": 0.17313110724776692, "grad_norm": 1.455346941947937, "learning_rate": 9.42601327044528e-05, "loss": 1.9334, "step": 722 }, { "epoch": 0.17361069480246988, "grad_norm": 1.0840576887130737, "learning_rate": 9.424414421616436e-05, "loss": 1.9387, "step": 724 }, { "epoch": 0.17409028235717283, "grad_norm": 1.127699375152588, "learning_rate": 9.422815572787593e-05, "loss": 1.8832, "step": 726 }, { "epoch": 0.17456986991187579, "grad_norm": 1.5089322328567505, "learning_rate": 9.42121672395875e-05, "loss": 1.9514, "step": 728 }, { "epoch": 0.17504945746657874, "grad_norm": 1.5981113910675049, "learning_rate": 9.419617875129907e-05, "loss": 1.8832, "step": 730 }, { "epoch": 0.1755290450212817, "grad_norm": 1.3719141483306885, "learning_rate": 9.418019026301064e-05, "loss": 1.937, "step": 732 }, { "epoch": 0.17600863257598465, "grad_norm": 1.2148356437683105, "learning_rate": 9.41642017747222e-05, "loss": 1.8763, "step": 734 }, { "epoch": 0.1764882201306876, "grad_norm": 1.0717540979385376, "learning_rate": 9.414821328643377e-05, "loss": 1.9186, "step": 736 }, { "epoch": 0.17696780768539055, "grad_norm": 1.4850674867630005, "learning_rate": 9.413222479814533e-05, "loss": 1.8841, "step": 738 }, { "epoch": 0.1774473952400935, "grad_norm": 1.2698620557785034, "learning_rate": 9.411623630985691e-05, "loss": 1.9498, "step": 740 }, { "epoch": 0.17792698279479646, "grad_norm": 1.394752860069275, "learning_rate": 9.410024782156848e-05, "loss": 1.9464, "step": 742 }, { "epoch": 0.17840657034949944, "grad_norm": 1.0521491765975952, "learning_rate": 9.408425933328005e-05, "loss": 1.8693, "step": 744 }, { "epoch": 0.1788861579042024, "grad_norm": 1.1926231384277344, "learning_rate": 9.406827084499161e-05, "loss": 1.9351, "step": 746 }, { "epoch": 0.17936574545890535, "grad_norm": 1.922950267791748, "learning_rate": 9.405228235670318e-05, "loss": 1.8759, "step": 748 }, { "epoch": 0.1798453330136083, "grad_norm": 1.3787970542907715, "learning_rate": 9.403629386841474e-05, "loss": 1.8733, "step": 750 }, { "epoch": 0.18032492056831126, "grad_norm": 1.8921425342559814, "learning_rate": 9.402030538012632e-05, "loss": 1.8732, "step": 752 }, { "epoch": 0.1808045081230142, "grad_norm": 1.850492000579834, "learning_rate": 9.400431689183789e-05, "loss": 1.9433, "step": 754 }, { "epoch": 0.18128409567771717, "grad_norm": 1.2897714376449585, "learning_rate": 9.398832840354945e-05, "loss": 1.895, "step": 756 }, { "epoch": 0.18176368323242012, "grad_norm": 1.5924084186553955, "learning_rate": 9.397233991526102e-05, "loss": 1.8931, "step": 758 }, { "epoch": 0.18224327078712307, "grad_norm": 1.2997933626174927, "learning_rate": 9.395635142697258e-05, "loss": 1.908, "step": 760 }, { "epoch": 0.18272285834182603, "grad_norm": 1.4838957786560059, "learning_rate": 9.394036293868415e-05, "loss": 1.9407, "step": 762 }, { "epoch": 0.18320244589652898, "grad_norm": 1.4512441158294678, "learning_rate": 9.392437445039573e-05, "loss": 1.9359, "step": 764 }, { "epoch": 0.18368203345123194, "grad_norm": 1.2330009937286377, "learning_rate": 9.39083859621073e-05, "loss": 1.8995, "step": 766 }, { "epoch": 0.1841616210059349, "grad_norm": 1.0685012340545654, "learning_rate": 9.389239747381885e-05, "loss": 1.8694, "step": 768 }, { "epoch": 0.18464120856063784, "grad_norm": 1.3663227558135986, "learning_rate": 9.387640898553041e-05, "loss": 1.8651, "step": 770 }, { "epoch": 0.1851207961153408, "grad_norm": 2.041266679763794, "learning_rate": 9.386042049724198e-05, "loss": 1.9084, "step": 772 }, { "epoch": 0.18560038367004375, "grad_norm": 1.6762182712554932, "learning_rate": 9.384443200895356e-05, "loss": 1.8664, "step": 774 }, { "epoch": 0.1860799712247467, "grad_norm": 1.1576430797576904, "learning_rate": 9.382844352066512e-05, "loss": 1.95, "step": 776 }, { "epoch": 0.18655955877944969, "grad_norm": 1.9010225534439087, "learning_rate": 9.381245503237669e-05, "loss": 1.9053, "step": 778 }, { "epoch": 0.18703914633415264, "grad_norm": 1.1217994689941406, "learning_rate": 9.379646654408826e-05, "loss": 1.8912, "step": 780 }, { "epoch": 0.1875187338888556, "grad_norm": 1.1229023933410645, "learning_rate": 9.378047805579982e-05, "loss": 1.9232, "step": 782 }, { "epoch": 0.18799832144355855, "grad_norm": 0.9996882081031799, "learning_rate": 9.376448956751139e-05, "loss": 1.9129, "step": 784 }, { "epoch": 0.1884779089982615, "grad_norm": 1.3881341218948364, "learning_rate": 9.374850107922297e-05, "loss": 1.9242, "step": 786 }, { "epoch": 0.18895749655296445, "grad_norm": 1.1440995931625366, "learning_rate": 9.373251259093453e-05, "loss": 1.8845, "step": 788 }, { "epoch": 0.1894370841076674, "grad_norm": 1.5389996767044067, "learning_rate": 9.37165241026461e-05, "loss": 1.8427, "step": 790 }, { "epoch": 0.18991667166237036, "grad_norm": 1.193466305732727, "learning_rate": 9.370053561435766e-05, "loss": 1.9153, "step": 792 }, { "epoch": 0.19039625921707332, "grad_norm": 1.1383545398712158, "learning_rate": 9.368454712606923e-05, "loss": 1.9732, "step": 794 }, { "epoch": 0.19087584677177627, "grad_norm": 2.147712469100952, "learning_rate": 9.36685586377808e-05, "loss": 1.8973, "step": 796 }, { "epoch": 0.19135543432647922, "grad_norm": 1.9236748218536377, "learning_rate": 9.365257014949237e-05, "loss": 1.8517, "step": 798 }, { "epoch": 0.19183502188118218, "grad_norm": 1.1967313289642334, "learning_rate": 9.363658166120394e-05, "loss": 1.8842, "step": 800 }, { "epoch": 0.19183502188118218, "eval_loss": 1.8737967014312744, "eval_runtime": 331.1126, "eval_samples_per_second": 403.032, "eval_steps_per_second": 12.597, "step": 800 }, { "epoch": 0.19231460943588513, "grad_norm": 1.4783129692077637, "learning_rate": 9.36205931729155e-05, "loss": 1.8999, "step": 802 }, { "epoch": 0.19279419699058808, "grad_norm": 2.122056484222412, "learning_rate": 9.360460468462707e-05, "loss": 1.8579, "step": 804 }, { "epoch": 0.19327378454529104, "grad_norm": 1.542930006980896, "learning_rate": 9.358861619633864e-05, "loss": 1.8974, "step": 806 }, { "epoch": 0.193753372099994, "grad_norm": 1.149114966392517, "learning_rate": 9.357262770805022e-05, "loss": 1.9245, "step": 808 }, { "epoch": 0.19423295965469697, "grad_norm": 1.7105470895767212, "learning_rate": 9.355663921976178e-05, "loss": 1.9382, "step": 810 }, { "epoch": 0.19471254720939993, "grad_norm": 1.2905925512313843, "learning_rate": 9.354065073147335e-05, "loss": 1.8965, "step": 812 }, { "epoch": 0.19519213476410288, "grad_norm": 1.2261406183242798, "learning_rate": 9.352466224318491e-05, "loss": 1.8945, "step": 814 }, { "epoch": 0.19567172231880584, "grad_norm": 1.1789605617523193, "learning_rate": 9.350867375489648e-05, "loss": 1.8845, "step": 816 }, { "epoch": 0.1961513098735088, "grad_norm": 1.365492582321167, "learning_rate": 9.349268526660804e-05, "loss": 1.9023, "step": 818 }, { "epoch": 0.19663089742821174, "grad_norm": 1.1342097520828247, "learning_rate": 9.347669677831962e-05, "loss": 1.8902, "step": 820 }, { "epoch": 0.1971104849829147, "grad_norm": 1.1547783613204956, "learning_rate": 9.346070829003119e-05, "loss": 1.9539, "step": 822 }, { "epoch": 0.19759007253761765, "grad_norm": 1.0069791078567505, "learning_rate": 9.344471980174275e-05, "loss": 1.869, "step": 824 }, { "epoch": 0.1980696600923206, "grad_norm": 1.7839009761810303, "learning_rate": 9.342873131345432e-05, "loss": 1.8903, "step": 826 }, { "epoch": 0.19854924764702356, "grad_norm": 1.5608015060424805, "learning_rate": 9.341274282516589e-05, "loss": 1.8626, "step": 828 }, { "epoch": 0.1990288352017265, "grad_norm": 1.3124170303344727, "learning_rate": 9.339675433687745e-05, "loss": 1.9267, "step": 830 }, { "epoch": 0.19950842275642947, "grad_norm": 1.289948582649231, "learning_rate": 9.338076584858903e-05, "loss": 1.9215, "step": 832 }, { "epoch": 0.19998801031113242, "grad_norm": 1.1613963842391968, "learning_rate": 9.33647773603006e-05, "loss": 1.8908, "step": 834 }, { "epoch": 0.20046759786583537, "grad_norm": 1.3437671661376953, "learning_rate": 9.334878887201216e-05, "loss": 1.8757, "step": 836 }, { "epoch": 0.20094718542053833, "grad_norm": 1.330276608467102, "learning_rate": 9.333280038372373e-05, "loss": 1.9356, "step": 838 }, { "epoch": 0.20142677297524128, "grad_norm": 1.1401090621948242, "learning_rate": 9.33168118954353e-05, "loss": 1.8715, "step": 840 }, { "epoch": 0.20190636052994423, "grad_norm": 1.4701188802719116, "learning_rate": 9.330082340714686e-05, "loss": 1.9208, "step": 842 }, { "epoch": 0.20238594808464722, "grad_norm": 1.7162588834762573, "learning_rate": 9.328483491885843e-05, "loss": 1.9226, "step": 844 }, { "epoch": 0.20286553563935017, "grad_norm": 1.3135123252868652, "learning_rate": 9.326884643056999e-05, "loss": 1.9438, "step": 846 }, { "epoch": 0.20334512319405312, "grad_norm": 1.6803627014160156, "learning_rate": 9.325285794228156e-05, "loss": 1.8911, "step": 848 }, { "epoch": 0.20382471074875608, "grad_norm": 2.0309066772460938, "learning_rate": 9.323686945399312e-05, "loss": 1.9425, "step": 850 }, { "epoch": 0.20430429830345903, "grad_norm": 1.3207412958145142, "learning_rate": 9.322088096570469e-05, "loss": 1.8689, "step": 852 }, { "epoch": 0.20478388585816198, "grad_norm": 1.19448983669281, "learning_rate": 9.320489247741627e-05, "loss": 1.9099, "step": 854 }, { "epoch": 0.20526347341286494, "grad_norm": 1.1181459426879883, "learning_rate": 9.318890398912783e-05, "loss": 1.9411, "step": 856 }, { "epoch": 0.2057430609675679, "grad_norm": 1.9483321905136108, "learning_rate": 9.31729155008394e-05, "loss": 1.8774, "step": 858 }, { "epoch": 0.20622264852227085, "grad_norm": 1.14616858959198, "learning_rate": 9.315692701255096e-05, "loss": 1.9428, "step": 860 }, { "epoch": 0.2067022360769738, "grad_norm": 1.1116836071014404, "learning_rate": 9.314093852426253e-05, "loss": 1.8763, "step": 862 }, { "epoch": 0.20718182363167675, "grad_norm": 1.839694619178772, "learning_rate": 9.31249500359741e-05, "loss": 1.8674, "step": 864 }, { "epoch": 0.2076614111863797, "grad_norm": 1.049057126045227, "learning_rate": 9.310896154768568e-05, "loss": 1.9023, "step": 866 }, { "epoch": 0.20814099874108266, "grad_norm": 1.0505375862121582, "learning_rate": 9.309297305939724e-05, "loss": 1.937, "step": 868 }, { "epoch": 0.20862058629578561, "grad_norm": 1.2693896293640137, "learning_rate": 9.30769845711088e-05, "loss": 1.8804, "step": 870 }, { "epoch": 0.20910017385048857, "grad_norm": 1.2242896556854248, "learning_rate": 9.306099608282037e-05, "loss": 1.882, "step": 872 }, { "epoch": 0.20957976140519152, "grad_norm": 1.211146354675293, "learning_rate": 9.304500759453194e-05, "loss": 1.9058, "step": 874 }, { "epoch": 0.2100593489598945, "grad_norm": 1.0868144035339355, "learning_rate": 9.30290191062435e-05, "loss": 1.842, "step": 876 }, { "epoch": 0.21053893651459746, "grad_norm": 1.9897221326828003, "learning_rate": 9.301303061795508e-05, "loss": 1.8967, "step": 878 }, { "epoch": 0.2110185240693004, "grad_norm": 1.3162264823913574, "learning_rate": 9.299704212966665e-05, "loss": 1.9145, "step": 880 }, { "epoch": 0.21149811162400337, "grad_norm": 1.0291255712509155, "learning_rate": 9.298105364137821e-05, "loss": 1.8772, "step": 882 }, { "epoch": 0.21197769917870632, "grad_norm": 1.7426282167434692, "learning_rate": 9.296506515308978e-05, "loss": 1.8159, "step": 884 }, { "epoch": 0.21245728673340927, "grad_norm": 1.2758121490478516, "learning_rate": 9.294907666480135e-05, "loss": 1.9179, "step": 886 }, { "epoch": 0.21293687428811223, "grad_norm": 0.9720642566680908, "learning_rate": 9.293308817651291e-05, "loss": 1.8626, "step": 888 }, { "epoch": 0.21341646184281518, "grad_norm": 1.320380687713623, "learning_rate": 9.291709968822449e-05, "loss": 1.9218, "step": 890 }, { "epoch": 0.21389604939751813, "grad_norm": 1.2245665788650513, "learning_rate": 9.290111119993606e-05, "loss": 1.931, "step": 892 }, { "epoch": 0.2143756369522211, "grad_norm": 1.1743690967559814, "learning_rate": 9.288512271164762e-05, "loss": 1.9257, "step": 894 }, { "epoch": 0.21485522450692404, "grad_norm": 2.031343460083008, "learning_rate": 9.286913422335919e-05, "loss": 1.8498, "step": 896 }, { "epoch": 0.215334812061627, "grad_norm": 1.2709555625915527, "learning_rate": 9.285314573507075e-05, "loss": 1.9387, "step": 898 }, { "epoch": 0.21581439961632995, "grad_norm": 2.348764657974243, "learning_rate": 9.283715724678232e-05, "loss": 1.8977, "step": 900 }, { "epoch": 0.2162939871710329, "grad_norm": 1.1253505945205688, "learning_rate": 9.28211687584939e-05, "loss": 1.9273, "step": 902 }, { "epoch": 0.21677357472573586, "grad_norm": 1.2091413736343384, "learning_rate": 9.280518027020546e-05, "loss": 1.8872, "step": 904 }, { "epoch": 0.2172531622804388, "grad_norm": 1.5402253866195679, "learning_rate": 9.278919178191703e-05, "loss": 1.8952, "step": 906 }, { "epoch": 0.21773274983514176, "grad_norm": 1.4927297830581665, "learning_rate": 9.27732032936286e-05, "loss": 1.9321, "step": 908 }, { "epoch": 0.21821233738984475, "grad_norm": 1.688975214958191, "learning_rate": 9.275721480534016e-05, "loss": 1.9219, "step": 910 }, { "epoch": 0.2186919249445477, "grad_norm": 1.2269971370697021, "learning_rate": 9.274122631705173e-05, "loss": 1.8948, "step": 912 }, { "epoch": 0.21917151249925065, "grad_norm": 1.4145207405090332, "learning_rate": 9.272523782876329e-05, "loss": 1.8709, "step": 914 }, { "epoch": 0.2196511000539536, "grad_norm": 1.3676304817199707, "learning_rate": 9.270924934047486e-05, "loss": 1.9081, "step": 916 }, { "epoch": 0.22013068760865656, "grad_norm": 1.897584080696106, "learning_rate": 9.269326085218642e-05, "loss": 1.8789, "step": 918 }, { "epoch": 0.22061027516335951, "grad_norm": 1.3016343116760254, "learning_rate": 9.267727236389799e-05, "loss": 1.8617, "step": 920 }, { "epoch": 0.22108986271806247, "grad_norm": 2.0249593257904053, "learning_rate": 9.266128387560956e-05, "loss": 1.893, "step": 922 }, { "epoch": 0.22156945027276542, "grad_norm": 1.1152557134628296, "learning_rate": 9.264529538732113e-05, "loss": 1.8667, "step": 924 }, { "epoch": 0.22204903782746838, "grad_norm": 1.0178502798080444, "learning_rate": 9.26293068990327e-05, "loss": 1.8917, "step": 926 }, { "epoch": 0.22252862538217133, "grad_norm": 1.760991096496582, "learning_rate": 9.261331841074427e-05, "loss": 1.8881, "step": 928 }, { "epoch": 0.22300821293687428, "grad_norm": 1.328386664390564, "learning_rate": 9.259732992245583e-05, "loss": 1.9016, "step": 930 }, { "epoch": 0.22348780049157724, "grad_norm": 1.205684781074524, "learning_rate": 9.25813414341674e-05, "loss": 1.9362, "step": 932 }, { "epoch": 0.2239673880462802, "grad_norm": 1.2836710214614868, "learning_rate": 9.256535294587896e-05, "loss": 1.8582, "step": 934 }, { "epoch": 0.22444697560098315, "grad_norm": 1.0976121425628662, "learning_rate": 9.254936445759054e-05, "loss": 1.9313, "step": 936 }, { "epoch": 0.2249265631556861, "grad_norm": 1.1915277242660522, "learning_rate": 9.253337596930211e-05, "loss": 1.8415, "step": 938 }, { "epoch": 0.22540615071038905, "grad_norm": 1.461254596710205, "learning_rate": 9.251738748101367e-05, "loss": 1.9341, "step": 940 }, { "epoch": 0.22588573826509203, "grad_norm": 1.0394477844238281, "learning_rate": 9.250139899272524e-05, "loss": 1.8848, "step": 942 }, { "epoch": 0.226365325819795, "grad_norm": 0.9759736657142639, "learning_rate": 9.24854105044368e-05, "loss": 1.8848, "step": 944 }, { "epoch": 0.22684491337449794, "grad_norm": 1.0541903972625732, "learning_rate": 9.246942201614837e-05, "loss": 1.9019, "step": 946 }, { "epoch": 0.2273245009292009, "grad_norm": 2.1612563133239746, "learning_rate": 9.245343352785995e-05, "loss": 1.8982, "step": 948 }, { "epoch": 0.22780408848390385, "grad_norm": 1.2894749641418457, "learning_rate": 9.243744503957152e-05, "loss": 1.814, "step": 950 }, { "epoch": 0.2282836760386068, "grad_norm": 1.776828646659851, "learning_rate": 9.242145655128308e-05, "loss": 1.8694, "step": 952 }, { "epoch": 0.22876326359330976, "grad_norm": 1.5055408477783203, "learning_rate": 9.240546806299465e-05, "loss": 1.9386, "step": 954 }, { "epoch": 0.2292428511480127, "grad_norm": 1.0839173793792725, "learning_rate": 9.238947957470621e-05, "loss": 1.8295, "step": 956 }, { "epoch": 0.22972243870271566, "grad_norm": 1.2553919553756714, "learning_rate": 9.237349108641778e-05, "loss": 1.9403, "step": 958 }, { "epoch": 0.23020202625741862, "grad_norm": 1.0689080953598022, "learning_rate": 9.235750259812936e-05, "loss": 1.9079, "step": 960 }, { "epoch": 0.23068161381212157, "grad_norm": 1.142225742340088, "learning_rate": 9.234151410984092e-05, "loss": 1.9123, "step": 962 }, { "epoch": 0.23116120136682453, "grad_norm": 1.0817763805389404, "learning_rate": 9.232552562155249e-05, "loss": 1.9249, "step": 964 }, { "epoch": 0.23164078892152748, "grad_norm": 1.6216228008270264, "learning_rate": 9.230953713326406e-05, "loss": 1.8643, "step": 966 }, { "epoch": 0.23212037647623043, "grad_norm": 1.8095309734344482, "learning_rate": 9.229354864497562e-05, "loss": 1.8589, "step": 968 }, { "epoch": 0.2325999640309334, "grad_norm": 1.2040364742279053, "learning_rate": 9.227756015668719e-05, "loss": 1.8601, "step": 970 }, { "epoch": 0.23307955158563634, "grad_norm": 1.32833731174469, "learning_rate": 9.226157166839877e-05, "loss": 1.9115, "step": 972 }, { "epoch": 0.2335591391403393, "grad_norm": 1.5752009153366089, "learning_rate": 9.224558318011033e-05, "loss": 1.828, "step": 974 }, { "epoch": 0.23403872669504228, "grad_norm": 1.1846336126327515, "learning_rate": 9.22295946918219e-05, "loss": 1.9457, "step": 976 }, { "epoch": 0.23451831424974523, "grad_norm": 1.1441166400909424, "learning_rate": 9.221360620353346e-05, "loss": 1.9665, "step": 978 }, { "epoch": 0.23499790180444818, "grad_norm": 1.1384873390197754, "learning_rate": 9.219761771524503e-05, "loss": 1.8771, "step": 980 }, { "epoch": 0.23547748935915114, "grad_norm": 1.1287832260131836, "learning_rate": 9.21816292269566e-05, "loss": 1.852, "step": 982 }, { "epoch": 0.2359570769138541, "grad_norm": 1.0329426527023315, "learning_rate": 9.216564073866817e-05, "loss": 1.8373, "step": 984 }, { "epoch": 0.23643666446855705, "grad_norm": 1.8852229118347168, "learning_rate": 9.214965225037973e-05, "loss": 1.8895, "step": 986 }, { "epoch": 0.23691625202326, "grad_norm": 1.2329435348510742, "learning_rate": 9.213366376209129e-05, "loss": 1.9235, "step": 988 }, { "epoch": 0.23739583957796295, "grad_norm": 1.3080620765686035, "learning_rate": 9.211767527380286e-05, "loss": 1.8742, "step": 990 }, { "epoch": 0.2378754271326659, "grad_norm": 1.029453158378601, "learning_rate": 9.210168678551442e-05, "loss": 1.927, "step": 992 }, { "epoch": 0.23835501468736886, "grad_norm": 0.9611729979515076, "learning_rate": 9.2085698297226e-05, "loss": 1.9168, "step": 994 }, { "epoch": 0.23883460224207181, "grad_norm": 1.244159460067749, "learning_rate": 9.206970980893757e-05, "loss": 1.8754, "step": 996 }, { "epoch": 0.23931418979677477, "grad_norm": 0.998087465763092, "learning_rate": 9.205372132064913e-05, "loss": 1.8747, "step": 998 }, { "epoch": 0.23979377735147772, "grad_norm": 1.9934749603271484, "learning_rate": 9.20377328323607e-05, "loss": 1.8961, "step": 1000 }, { "epoch": 0.24027336490618068, "grad_norm": 1.1881060600280762, "learning_rate": 9.202174434407226e-05, "loss": 1.8976, "step": 1002 }, { "epoch": 0.24075295246088363, "grad_norm": 1.3279857635498047, "learning_rate": 9.200575585578384e-05, "loss": 1.8573, "step": 1004 }, { "epoch": 0.24123254001558658, "grad_norm": 1.3107773065567017, "learning_rate": 9.198976736749541e-05, "loss": 1.9404, "step": 1006 }, { "epoch": 0.24171212757028956, "grad_norm": 1.3390698432922363, "learning_rate": 9.197377887920698e-05, "loss": 1.945, "step": 1008 }, { "epoch": 0.24219171512499252, "grad_norm": 0.8757002949714661, "learning_rate": 9.195779039091854e-05, "loss": 1.8731, "step": 1010 }, { "epoch": 0.24267130267969547, "grad_norm": 1.0043641328811646, "learning_rate": 9.194180190263011e-05, "loss": 1.8971, "step": 1012 }, { "epoch": 0.24315089023439843, "grad_norm": 1.6496073007583618, "learning_rate": 9.192581341434167e-05, "loss": 1.9195, "step": 1014 }, { "epoch": 0.24363047778910138, "grad_norm": 1.1098390817642212, "learning_rate": 9.190982492605325e-05, "loss": 1.8963, "step": 1016 }, { "epoch": 0.24411006534380433, "grad_norm": 1.0610833168029785, "learning_rate": 9.189383643776482e-05, "loss": 1.8264, "step": 1018 }, { "epoch": 0.2445896528985073, "grad_norm": 1.1836704015731812, "learning_rate": 9.187784794947638e-05, "loss": 1.8701, "step": 1020 }, { "epoch": 0.24506924045321024, "grad_norm": 1.1351152658462524, "learning_rate": 9.186185946118795e-05, "loss": 1.9257, "step": 1022 }, { "epoch": 0.2455488280079132, "grad_norm": 1.0919548273086548, "learning_rate": 9.184587097289951e-05, "loss": 1.9367, "step": 1024 }, { "epoch": 0.24602841556261615, "grad_norm": 1.1491820812225342, "learning_rate": 9.182988248461108e-05, "loss": 1.8289, "step": 1026 }, { "epoch": 0.2465080031173191, "grad_norm": 1.1131271123886108, "learning_rate": 9.181389399632266e-05, "loss": 1.8965, "step": 1028 }, { "epoch": 0.24698759067202206, "grad_norm": 1.0305322408676147, "learning_rate": 9.179790550803423e-05, "loss": 1.8733, "step": 1030 }, { "epoch": 0.247467178226725, "grad_norm": 0.9863612651824951, "learning_rate": 9.178191701974579e-05, "loss": 1.8618, "step": 1032 }, { "epoch": 0.24794676578142796, "grad_norm": 1.0042669773101807, "learning_rate": 9.176592853145736e-05, "loss": 1.9096, "step": 1034 }, { "epoch": 0.24842635333613092, "grad_norm": 1.1164077520370483, "learning_rate": 9.174994004316892e-05, "loss": 1.8846, "step": 1036 }, { "epoch": 0.24890594089083387, "grad_norm": 1.1574684381484985, "learning_rate": 9.173395155488049e-05, "loss": 1.9016, "step": 1038 }, { "epoch": 0.24938552844553682, "grad_norm": 1.1588337421417236, "learning_rate": 9.171796306659207e-05, "loss": 1.9299, "step": 1040 }, { "epoch": 0.2498651160002398, "grad_norm": 1.566069483757019, "learning_rate": 9.170197457830363e-05, "loss": 1.9121, "step": 1042 }, { "epoch": 0.25034470355494276, "grad_norm": 1.10956609249115, "learning_rate": 9.16859860900152e-05, "loss": 1.9441, "step": 1044 }, { "epoch": 0.2508242911096457, "grad_norm": 1.5847198963165283, "learning_rate": 9.166999760172676e-05, "loss": 1.9221, "step": 1046 }, { "epoch": 0.25130387866434867, "grad_norm": 0.9756669998168945, "learning_rate": 9.165400911343833e-05, "loss": 1.8398, "step": 1048 }, { "epoch": 0.2517834662190516, "grad_norm": 1.0864949226379395, "learning_rate": 9.16380206251499e-05, "loss": 1.9022, "step": 1050 }, { "epoch": 0.2522630537737546, "grad_norm": 1.1762267351150513, "learning_rate": 9.162203213686148e-05, "loss": 1.8913, "step": 1052 }, { "epoch": 0.25274264132845753, "grad_norm": 1.0905719995498657, "learning_rate": 9.160604364857304e-05, "loss": 1.9078, "step": 1054 }, { "epoch": 0.2532222288831605, "grad_norm": 1.730722188949585, "learning_rate": 9.15900551602846e-05, "loss": 1.8932, "step": 1056 }, { "epoch": 0.25370181643786344, "grad_norm": 1.7810372114181519, "learning_rate": 9.157406667199617e-05, "loss": 1.9604, "step": 1058 }, { "epoch": 0.2541814039925664, "grad_norm": 1.0169857740402222, "learning_rate": 9.155807818370772e-05, "loss": 1.8951, "step": 1060 }, { "epoch": 0.25466099154726934, "grad_norm": 1.2882864475250244, "learning_rate": 9.15420896954193e-05, "loss": 1.8937, "step": 1062 }, { "epoch": 0.2551405791019723, "grad_norm": 1.1796954870224, "learning_rate": 9.152610120713087e-05, "loss": 1.8659, "step": 1064 }, { "epoch": 0.25562016665667525, "grad_norm": 1.8156334161758423, "learning_rate": 9.151011271884243e-05, "loss": 1.8667, "step": 1066 }, { "epoch": 0.2560997542113782, "grad_norm": 0.9660361409187317, "learning_rate": 9.1494124230554e-05, "loss": 1.8754, "step": 1068 }, { "epoch": 0.25657934176608116, "grad_norm": 1.384482741355896, "learning_rate": 9.147813574226557e-05, "loss": 1.8692, "step": 1070 }, { "epoch": 0.2570589293207841, "grad_norm": 1.5964986085891724, "learning_rate": 9.146214725397713e-05, "loss": 1.863, "step": 1072 }, { "epoch": 0.25753851687548707, "grad_norm": 0.9485005140304565, "learning_rate": 9.144615876568871e-05, "loss": 1.906, "step": 1074 }, { "epoch": 0.25801810443019, "grad_norm": 0.9855512380599976, "learning_rate": 9.143017027740028e-05, "loss": 1.9022, "step": 1076 }, { "epoch": 0.258497691984893, "grad_norm": 1.0390692949295044, "learning_rate": 9.141418178911184e-05, "loss": 1.8877, "step": 1078 }, { "epoch": 0.25897727953959593, "grad_norm": 1.0565491914749146, "learning_rate": 9.139819330082341e-05, "loss": 1.8069, "step": 1080 }, { "epoch": 0.2594568670942989, "grad_norm": 1.02567458152771, "learning_rate": 9.138220481253497e-05, "loss": 1.8979, "step": 1082 }, { "epoch": 0.25993645464900184, "grad_norm": 1.0196717977523804, "learning_rate": 9.136621632424654e-05, "loss": 1.7874, "step": 1084 }, { "epoch": 0.2604160422037048, "grad_norm": 1.3616623878479004, "learning_rate": 9.135022783595812e-05, "loss": 1.8864, "step": 1086 }, { "epoch": 0.26089562975840774, "grad_norm": 1.0823595523834229, "learning_rate": 9.133423934766968e-05, "loss": 1.8813, "step": 1088 }, { "epoch": 0.2613752173131107, "grad_norm": 1.056580662727356, "learning_rate": 9.131825085938125e-05, "loss": 1.8577, "step": 1090 }, { "epoch": 0.2618548048678137, "grad_norm": 1.3077820539474487, "learning_rate": 9.130226237109282e-05, "loss": 1.8538, "step": 1092 }, { "epoch": 0.26233439242251666, "grad_norm": 1.054772138595581, "learning_rate": 9.128627388280438e-05, "loss": 1.9251, "step": 1094 }, { "epoch": 0.2628139799772196, "grad_norm": 1.1024770736694336, "learning_rate": 9.127028539451595e-05, "loss": 1.844, "step": 1096 }, { "epoch": 0.26329356753192257, "grad_norm": 1.119834065437317, "learning_rate": 9.125429690622753e-05, "loss": 1.9102, "step": 1098 }, { "epoch": 0.2637731550866255, "grad_norm": 1.2952468395233154, "learning_rate": 9.123830841793909e-05, "loss": 1.89, "step": 1100 }, { "epoch": 0.2642527426413285, "grad_norm": 1.0840941667556763, "learning_rate": 9.122231992965066e-05, "loss": 1.883, "step": 1102 }, { "epoch": 0.26473233019603143, "grad_norm": 1.6319247484207153, "learning_rate": 9.120633144136222e-05, "loss": 1.9053, "step": 1104 }, { "epoch": 0.2652119177507344, "grad_norm": 1.2331438064575195, "learning_rate": 9.119034295307379e-05, "loss": 1.8971, "step": 1106 }, { "epoch": 0.26569150530543734, "grad_norm": 1.231180191040039, "learning_rate": 9.117435446478536e-05, "loss": 1.8328, "step": 1108 }, { "epoch": 0.2661710928601403, "grad_norm": 1.3904527425765991, "learning_rate": 9.115836597649693e-05, "loss": 1.9128, "step": 1110 }, { "epoch": 0.26665068041484324, "grad_norm": 1.7800018787384033, "learning_rate": 9.11423774882085e-05, "loss": 1.8789, "step": 1112 }, { "epoch": 0.2671302679695462, "grad_norm": 1.095393180847168, "learning_rate": 9.112638899992007e-05, "loss": 1.9301, "step": 1114 }, { "epoch": 0.26760985552424915, "grad_norm": 1.3189587593078613, "learning_rate": 9.111040051163163e-05, "loss": 1.9273, "step": 1116 }, { "epoch": 0.2680894430789521, "grad_norm": 1.0807442665100098, "learning_rate": 9.10944120233432e-05, "loss": 1.8566, "step": 1118 }, { "epoch": 0.26856903063365506, "grad_norm": 0.9769893884658813, "learning_rate": 9.107842353505476e-05, "loss": 1.9116, "step": 1120 }, { "epoch": 0.269048618188358, "grad_norm": 1.3703863620758057, "learning_rate": 9.106243504676634e-05, "loss": 1.8523, "step": 1122 }, { "epoch": 0.26952820574306097, "grad_norm": 1.178132176399231, "learning_rate": 9.104644655847791e-05, "loss": 1.8789, "step": 1124 }, { "epoch": 0.2700077932977639, "grad_norm": 1.4955428838729858, "learning_rate": 9.103045807018947e-05, "loss": 1.9233, "step": 1126 }, { "epoch": 0.2704873808524669, "grad_norm": 1.0175281763076782, "learning_rate": 9.101446958190104e-05, "loss": 1.9104, "step": 1128 }, { "epoch": 0.27096696840716983, "grad_norm": 0.9274328351020813, "learning_rate": 9.09984810936126e-05, "loss": 1.8871, "step": 1130 }, { "epoch": 0.2714465559618728, "grad_norm": 0.985501229763031, "learning_rate": 9.098249260532417e-05, "loss": 1.8781, "step": 1132 }, { "epoch": 0.27192614351657574, "grad_norm": 1.6959666013717651, "learning_rate": 9.096650411703574e-05, "loss": 1.8818, "step": 1134 }, { "epoch": 0.2724057310712787, "grad_norm": 1.1157482862472534, "learning_rate": 9.09505156287473e-05, "loss": 1.8509, "step": 1136 }, { "epoch": 0.27288531862598164, "grad_norm": 1.145900011062622, "learning_rate": 9.093452714045887e-05, "loss": 1.8983, "step": 1138 }, { "epoch": 0.2733649061806846, "grad_norm": 0.9269135594367981, "learning_rate": 9.091853865217043e-05, "loss": 1.8661, "step": 1140 }, { "epoch": 0.27384449373538755, "grad_norm": 1.034123420715332, "learning_rate": 9.0902550163882e-05, "loss": 1.9048, "step": 1142 }, { "epoch": 0.2743240812900905, "grad_norm": 1.627294898033142, "learning_rate": 9.088656167559358e-05, "loss": 1.9668, "step": 1144 }, { "epoch": 0.27480366884479346, "grad_norm": 1.5464342832565308, "learning_rate": 9.087057318730514e-05, "loss": 1.8901, "step": 1146 }, { "epoch": 0.2752832563994964, "grad_norm": 0.9135870337486267, "learning_rate": 9.085458469901671e-05, "loss": 1.8205, "step": 1148 }, { "epoch": 0.27576284395419937, "grad_norm": 1.1122444868087769, "learning_rate": 9.083859621072828e-05, "loss": 1.8448, "step": 1150 }, { "epoch": 0.2762424315089023, "grad_norm": 1.0001667737960815, "learning_rate": 9.082260772243984e-05, "loss": 1.9232, "step": 1152 }, { "epoch": 0.2767220190636053, "grad_norm": 1.2342156171798706, "learning_rate": 9.080661923415141e-05, "loss": 1.8812, "step": 1154 }, { "epoch": 0.2772016066183082, "grad_norm": 1.019490122795105, "learning_rate": 9.079063074586299e-05, "loss": 1.92, "step": 1156 }, { "epoch": 0.27768119417301124, "grad_norm": 1.0130747556686401, "learning_rate": 9.077464225757455e-05, "loss": 1.8688, "step": 1158 }, { "epoch": 0.2781607817277142, "grad_norm": 0.999671995639801, "learning_rate": 9.075865376928612e-05, "loss": 1.8896, "step": 1160 }, { "epoch": 0.27864036928241714, "grad_norm": 1.0791696310043335, "learning_rate": 9.074266528099768e-05, "loss": 1.8711, "step": 1162 }, { "epoch": 0.2791199568371201, "grad_norm": 1.3686411380767822, "learning_rate": 9.072667679270925e-05, "loss": 1.8709, "step": 1164 }, { "epoch": 0.27959954439182305, "grad_norm": 1.0843201875686646, "learning_rate": 9.071068830442081e-05, "loss": 1.9129, "step": 1166 }, { "epoch": 0.280079131946526, "grad_norm": 0.9757197499275208, "learning_rate": 9.06946998161324e-05, "loss": 1.8608, "step": 1168 }, { "epoch": 0.28055871950122896, "grad_norm": 0.9084925651550293, "learning_rate": 9.067871132784396e-05, "loss": 1.8609, "step": 1170 }, { "epoch": 0.2810383070559319, "grad_norm": 1.4326200485229492, "learning_rate": 9.066272283955553e-05, "loss": 1.8346, "step": 1172 }, { "epoch": 0.28151789461063487, "grad_norm": 0.9769284129142761, "learning_rate": 9.064673435126709e-05, "loss": 1.9052, "step": 1174 }, { "epoch": 0.2819974821653378, "grad_norm": 1.3293954133987427, "learning_rate": 9.063074586297866e-05, "loss": 1.815, "step": 1176 }, { "epoch": 0.2824770697200408, "grad_norm": 1.0619103908538818, "learning_rate": 9.061475737469022e-05, "loss": 1.921, "step": 1178 }, { "epoch": 0.28295665727474373, "grad_norm": 1.0049539804458618, "learning_rate": 9.05987688864018e-05, "loss": 1.9181, "step": 1180 }, { "epoch": 0.2834362448294467, "grad_norm": 1.0559053421020508, "learning_rate": 9.058278039811337e-05, "loss": 1.8676, "step": 1182 }, { "epoch": 0.28391583238414964, "grad_norm": 1.1607379913330078, "learning_rate": 9.056679190982493e-05, "loss": 1.8859, "step": 1184 }, { "epoch": 0.2843954199388526, "grad_norm": 1.1627625226974487, "learning_rate": 9.05508034215365e-05, "loss": 1.8512, "step": 1186 }, { "epoch": 0.28487500749355554, "grad_norm": 0.9043356776237488, "learning_rate": 9.053481493324806e-05, "loss": 1.8756, "step": 1188 }, { "epoch": 0.2853545950482585, "grad_norm": 1.328465461730957, "learning_rate": 9.051882644495963e-05, "loss": 1.8886, "step": 1190 }, { "epoch": 0.28583418260296145, "grad_norm": 0.9860932230949402, "learning_rate": 9.050283795667121e-05, "loss": 1.8469, "step": 1192 }, { "epoch": 0.2863137701576644, "grad_norm": 1.1470932960510254, "learning_rate": 9.048684946838278e-05, "loss": 1.9148, "step": 1194 }, { "epoch": 0.28679335771236736, "grad_norm": 0.9389126896858215, "learning_rate": 9.047086098009434e-05, "loss": 1.8605, "step": 1196 }, { "epoch": 0.2872729452670703, "grad_norm": 1.0993415117263794, "learning_rate": 9.04548724918059e-05, "loss": 1.8738, "step": 1198 }, { "epoch": 0.28775253282177327, "grad_norm": 1.0814725160598755, "learning_rate": 9.043888400351747e-05, "loss": 1.9057, "step": 1200 }, { "epoch": 0.28775253282177327, "eval_loss": 1.8479559421539307, "eval_runtime": 331.2755, "eval_samples_per_second": 402.834, "eval_steps_per_second": 12.591, "step": 1200 }, { "epoch": 0.2882321203764762, "grad_norm": 1.522811770439148, "learning_rate": 9.042289551522904e-05, "loss": 1.8572, "step": 1202 }, { "epoch": 0.2887117079311792, "grad_norm": 1.2286044359207153, "learning_rate": 9.04069070269406e-05, "loss": 1.8478, "step": 1204 }, { "epoch": 0.2891912954858821, "grad_norm": 1.1168609857559204, "learning_rate": 9.039091853865217e-05, "loss": 1.9117, "step": 1206 }, { "epoch": 0.2896708830405851, "grad_norm": 1.1902549266815186, "learning_rate": 9.037493005036374e-05, "loss": 1.8844, "step": 1208 }, { "epoch": 0.29015047059528803, "grad_norm": 1.2014899253845215, "learning_rate": 9.03589415620753e-05, "loss": 1.8785, "step": 1210 }, { "epoch": 0.290630058149991, "grad_norm": 1.2109922170639038, "learning_rate": 9.034295307378688e-05, "loss": 1.8969, "step": 1212 }, { "epoch": 0.29110964570469394, "grad_norm": 1.3509944677352905, "learning_rate": 9.032696458549845e-05, "loss": 1.8877, "step": 1214 }, { "epoch": 0.2915892332593969, "grad_norm": 1.0125397443771362, "learning_rate": 9.031097609721001e-05, "loss": 1.935, "step": 1216 }, { "epoch": 0.29206882081409985, "grad_norm": 0.9948733448982239, "learning_rate": 9.029498760892158e-05, "loss": 1.8988, "step": 1218 }, { "epoch": 0.2925484083688028, "grad_norm": 0.9781015515327454, "learning_rate": 9.027899912063314e-05, "loss": 1.8686, "step": 1220 }, { "epoch": 0.29302799592350576, "grad_norm": 1.0050970315933228, "learning_rate": 9.026301063234471e-05, "loss": 1.8762, "step": 1222 }, { "epoch": 0.29350758347820877, "grad_norm": 1.0958921909332275, "learning_rate": 9.024702214405629e-05, "loss": 1.8171, "step": 1224 }, { "epoch": 0.2939871710329117, "grad_norm": 1.1794092655181885, "learning_rate": 9.023103365576785e-05, "loss": 1.9078, "step": 1226 }, { "epoch": 0.2944667585876147, "grad_norm": 1.7846693992614746, "learning_rate": 9.021504516747942e-05, "loss": 1.8844, "step": 1228 }, { "epoch": 0.29494634614231763, "grad_norm": 1.303965449333191, "learning_rate": 9.019905667919098e-05, "loss": 1.8776, "step": 1230 }, { "epoch": 0.2954259336970206, "grad_norm": 1.00540030002594, "learning_rate": 9.018306819090255e-05, "loss": 1.8256, "step": 1232 }, { "epoch": 0.29590552125172354, "grad_norm": 1.658916711807251, "learning_rate": 9.016707970261412e-05, "loss": 1.9143, "step": 1234 }, { "epoch": 0.2963851088064265, "grad_norm": 1.1302499771118164, "learning_rate": 9.01510912143257e-05, "loss": 1.8809, "step": 1236 }, { "epoch": 0.29686469636112944, "grad_norm": 0.9792537093162537, "learning_rate": 9.013510272603726e-05, "loss": 1.9058, "step": 1238 }, { "epoch": 0.2973442839158324, "grad_norm": 1.1121571063995361, "learning_rate": 9.011911423774883e-05, "loss": 1.8519, "step": 1240 }, { "epoch": 0.29782387147053535, "grad_norm": 1.0954921245574951, "learning_rate": 9.010312574946039e-05, "loss": 1.8309, "step": 1242 }, { "epoch": 0.2983034590252383, "grad_norm": 1.4943294525146484, "learning_rate": 9.008713726117196e-05, "loss": 1.9341, "step": 1244 }, { "epoch": 0.29878304657994126, "grad_norm": 1.9447418451309204, "learning_rate": 9.007114877288352e-05, "loss": 1.8999, "step": 1246 }, { "epoch": 0.2992626341346442, "grad_norm": 1.4571460485458374, "learning_rate": 9.00551602845951e-05, "loss": 1.8428, "step": 1248 }, { "epoch": 0.29974222168934717, "grad_norm": 1.1601732969284058, "learning_rate": 9.003917179630667e-05, "loss": 1.8971, "step": 1250 }, { "epoch": 0.3002218092440501, "grad_norm": 1.3696157932281494, "learning_rate": 9.002318330801823e-05, "loss": 1.8559, "step": 1252 }, { "epoch": 0.3007013967987531, "grad_norm": 1.578497290611267, "learning_rate": 9.00071948197298e-05, "loss": 1.8542, "step": 1254 }, { "epoch": 0.301180984353456, "grad_norm": 1.5284955501556396, "learning_rate": 8.999120633144137e-05, "loss": 1.8785, "step": 1256 }, { "epoch": 0.301660571908159, "grad_norm": 1.0061014890670776, "learning_rate": 8.997521784315293e-05, "loss": 1.877, "step": 1258 }, { "epoch": 0.30214015946286193, "grad_norm": 0.9771902561187744, "learning_rate": 8.995922935486451e-05, "loss": 1.8461, "step": 1260 }, { "epoch": 0.3026197470175649, "grad_norm": 1.4666539430618286, "learning_rate": 8.994324086657608e-05, "loss": 1.8521, "step": 1262 }, { "epoch": 0.30309933457226784, "grad_norm": 0.9875162243843079, "learning_rate": 8.992725237828764e-05, "loss": 1.884, "step": 1264 }, { "epoch": 0.3035789221269708, "grad_norm": 1.0740891695022583, "learning_rate": 8.991126388999921e-05, "loss": 1.8351, "step": 1266 }, { "epoch": 0.30405850968167375, "grad_norm": 1.1968547105789185, "learning_rate": 8.989527540171077e-05, "loss": 1.8659, "step": 1268 }, { "epoch": 0.3045380972363767, "grad_norm": 1.1654797792434692, "learning_rate": 8.987928691342234e-05, "loss": 1.8705, "step": 1270 }, { "epoch": 0.30501768479107966, "grad_norm": 1.0625569820404053, "learning_rate": 8.986329842513392e-05, "loss": 1.858, "step": 1272 }, { "epoch": 0.3054972723457826, "grad_norm": 1.2560423612594604, "learning_rate": 8.984730993684548e-05, "loss": 1.8597, "step": 1274 }, { "epoch": 0.30597685990048556, "grad_norm": 1.4128667116165161, "learning_rate": 8.983132144855705e-05, "loss": 1.8743, "step": 1276 }, { "epoch": 0.3064564474551885, "grad_norm": 1.0925734043121338, "learning_rate": 8.98153329602686e-05, "loss": 1.792, "step": 1278 }, { "epoch": 0.3069360350098915, "grad_norm": 1.8337174654006958, "learning_rate": 8.979934447198017e-05, "loss": 1.8939, "step": 1280 }, { "epoch": 0.3074156225645944, "grad_norm": 1.0772267580032349, "learning_rate": 8.978335598369175e-05, "loss": 1.8893, "step": 1282 }, { "epoch": 0.3078952101192974, "grad_norm": 1.1308804750442505, "learning_rate": 8.976736749540331e-05, "loss": 1.8681, "step": 1284 }, { "epoch": 0.30837479767400033, "grad_norm": 1.0090506076812744, "learning_rate": 8.975137900711488e-05, "loss": 1.8453, "step": 1286 }, { "epoch": 0.3088543852287033, "grad_norm": 0.9463669061660767, "learning_rate": 8.973539051882644e-05, "loss": 1.8864, "step": 1288 }, { "epoch": 0.3093339727834063, "grad_norm": 1.1371676921844482, "learning_rate": 8.971940203053801e-05, "loss": 1.818, "step": 1290 }, { "epoch": 0.30981356033810925, "grad_norm": 1.0438905954360962, "learning_rate": 8.970341354224958e-05, "loss": 1.8551, "step": 1292 }, { "epoch": 0.3102931478928122, "grad_norm": 1.0140902996063232, "learning_rate": 8.968742505396116e-05, "loss": 1.9298, "step": 1294 }, { "epoch": 0.31077273544751516, "grad_norm": 1.0990973711013794, "learning_rate": 8.967143656567272e-05, "loss": 1.876, "step": 1296 }, { "epoch": 0.3112523230022181, "grad_norm": 0.9518069624900818, "learning_rate": 8.965544807738429e-05, "loss": 1.8924, "step": 1298 }, { "epoch": 0.31173191055692107, "grad_norm": 0.9441239237785339, "learning_rate": 8.963945958909585e-05, "loss": 1.8399, "step": 1300 }, { "epoch": 0.312211498111624, "grad_norm": 1.177436351776123, "learning_rate": 8.962347110080742e-05, "loss": 1.8439, "step": 1302 }, { "epoch": 0.312691085666327, "grad_norm": 1.0487161874771118, "learning_rate": 8.960748261251898e-05, "loss": 1.8533, "step": 1304 }, { "epoch": 0.3131706732210299, "grad_norm": 1.0563266277313232, "learning_rate": 8.959149412423056e-05, "loss": 1.866, "step": 1306 }, { "epoch": 0.3136502607757329, "grad_norm": 1.3727524280548096, "learning_rate": 8.957550563594213e-05, "loss": 1.8813, "step": 1308 }, { "epoch": 0.31412984833043583, "grad_norm": 0.9706259369850159, "learning_rate": 8.95595171476537e-05, "loss": 1.917, "step": 1310 }, { "epoch": 0.3146094358851388, "grad_norm": 0.9451547265052795, "learning_rate": 8.954352865936526e-05, "loss": 1.871, "step": 1312 }, { "epoch": 0.31508902343984174, "grad_norm": 1.1103397607803345, "learning_rate": 8.952754017107683e-05, "loss": 1.9045, "step": 1314 }, { "epoch": 0.3155686109945447, "grad_norm": 1.046226143836975, "learning_rate": 8.951155168278839e-05, "loss": 1.8897, "step": 1316 }, { "epoch": 0.31604819854924765, "grad_norm": 1.1267763376235962, "learning_rate": 8.949556319449997e-05, "loss": 1.8718, "step": 1318 }, { "epoch": 0.3165277861039506, "grad_norm": 1.1690788269042969, "learning_rate": 8.947957470621154e-05, "loss": 1.8562, "step": 1320 }, { "epoch": 0.31700737365865356, "grad_norm": 1.2786952257156372, "learning_rate": 8.94635862179231e-05, "loss": 1.8677, "step": 1322 }, { "epoch": 0.3174869612133565, "grad_norm": 1.2001302242279053, "learning_rate": 8.944759772963467e-05, "loss": 1.9014, "step": 1324 }, { "epoch": 0.31796654876805946, "grad_norm": 1.0998553037643433, "learning_rate": 8.943160924134623e-05, "loss": 1.846, "step": 1326 }, { "epoch": 0.3184461363227624, "grad_norm": 1.319162130355835, "learning_rate": 8.94156207530578e-05, "loss": 1.8246, "step": 1328 }, { "epoch": 0.3189257238774654, "grad_norm": 1.4629111289978027, "learning_rate": 8.939963226476938e-05, "loss": 1.9233, "step": 1330 }, { "epoch": 0.3194053114321683, "grad_norm": 1.0853009223937988, "learning_rate": 8.938364377648094e-05, "loss": 1.8496, "step": 1332 }, { "epoch": 0.3198848989868713, "grad_norm": 0.9647769927978516, "learning_rate": 8.936765528819251e-05, "loss": 1.8592, "step": 1334 }, { "epoch": 0.32036448654157423, "grad_norm": 0.8997206687927246, "learning_rate": 8.935166679990408e-05, "loss": 1.875, "step": 1336 }, { "epoch": 0.3208440740962772, "grad_norm": 1.0613595247268677, "learning_rate": 8.933567831161564e-05, "loss": 1.9196, "step": 1338 }, { "epoch": 0.32132366165098014, "grad_norm": 0.9635891914367676, "learning_rate": 8.931968982332721e-05, "loss": 1.8684, "step": 1340 }, { "epoch": 0.3218032492056831, "grad_norm": 1.04140305519104, "learning_rate": 8.930370133503879e-05, "loss": 1.8697, "step": 1342 }, { "epoch": 0.32228283676038605, "grad_norm": 0.8385540246963501, "learning_rate": 8.928771284675035e-05, "loss": 1.8369, "step": 1344 }, { "epoch": 0.322762424315089, "grad_norm": 1.0665290355682373, "learning_rate": 8.927172435846192e-05, "loss": 1.8287, "step": 1346 }, { "epoch": 0.32324201186979196, "grad_norm": 1.0262112617492676, "learning_rate": 8.925573587017348e-05, "loss": 1.9034, "step": 1348 }, { "epoch": 0.3237215994244949, "grad_norm": 0.9956510663032532, "learning_rate": 8.923974738188504e-05, "loss": 1.8413, "step": 1350 }, { "epoch": 0.32420118697919786, "grad_norm": 1.751608967781067, "learning_rate": 8.922375889359661e-05, "loss": 1.8938, "step": 1352 }, { "epoch": 0.3246807745339008, "grad_norm": 1.551231861114502, "learning_rate": 8.920777040530818e-05, "loss": 1.8856, "step": 1354 }, { "epoch": 0.3251603620886038, "grad_norm": 0.9341421723365784, "learning_rate": 8.919178191701975e-05, "loss": 1.8288, "step": 1356 }, { "epoch": 0.3256399496433068, "grad_norm": 1.3362356424331665, "learning_rate": 8.917579342873131e-05, "loss": 1.8715, "step": 1358 }, { "epoch": 0.32611953719800973, "grad_norm": 0.8954707980155945, "learning_rate": 8.915980494044288e-05, "loss": 1.9312, "step": 1360 }, { "epoch": 0.3265991247527127, "grad_norm": 0.9015675187110901, "learning_rate": 8.914381645215444e-05, "loss": 1.8277, "step": 1362 }, { "epoch": 0.32707871230741564, "grad_norm": 1.0464507341384888, "learning_rate": 8.912782796386602e-05, "loss": 1.8308, "step": 1364 }, { "epoch": 0.3275582998621186, "grad_norm": 1.0412074327468872, "learning_rate": 8.911183947557759e-05, "loss": 1.8734, "step": 1366 }, { "epoch": 0.32803788741682155, "grad_norm": 1.438984751701355, "learning_rate": 8.909585098728915e-05, "loss": 1.9297, "step": 1368 }, { "epoch": 0.3285174749715245, "grad_norm": 1.328107476234436, "learning_rate": 8.907986249900072e-05, "loss": 1.8263, "step": 1370 }, { "epoch": 0.32899706252622746, "grad_norm": 1.1202678680419922, "learning_rate": 8.906387401071229e-05, "loss": 1.8591, "step": 1372 }, { "epoch": 0.3294766500809304, "grad_norm": 1.5344290733337402, "learning_rate": 8.904788552242385e-05, "loss": 1.9036, "step": 1374 }, { "epoch": 0.32995623763563336, "grad_norm": 1.1025265455245972, "learning_rate": 8.903189703413543e-05, "loss": 1.8322, "step": 1376 }, { "epoch": 0.3304358251903363, "grad_norm": 1.2690999507904053, "learning_rate": 8.9015908545847e-05, "loss": 1.8325, "step": 1378 }, { "epoch": 0.3309154127450393, "grad_norm": 1.1306064128875732, "learning_rate": 8.899992005755856e-05, "loss": 1.8397, "step": 1380 }, { "epoch": 0.3313950002997422, "grad_norm": 1.120040774345398, "learning_rate": 8.898393156927013e-05, "loss": 1.8822, "step": 1382 }, { "epoch": 0.3318745878544452, "grad_norm": 0.8903915882110596, "learning_rate": 8.896794308098169e-05, "loss": 1.8627, "step": 1384 }, { "epoch": 0.33235417540914813, "grad_norm": 1.0395910739898682, "learning_rate": 8.895195459269326e-05, "loss": 1.8342, "step": 1386 }, { "epoch": 0.3328337629638511, "grad_norm": 1.3505882024765015, "learning_rate": 8.893596610440484e-05, "loss": 1.9171, "step": 1388 }, { "epoch": 0.33331335051855404, "grad_norm": 0.9146899580955505, "learning_rate": 8.89199776161164e-05, "loss": 1.8902, "step": 1390 }, { "epoch": 0.333792938073257, "grad_norm": 1.2048488855361938, "learning_rate": 8.890398912782797e-05, "loss": 1.8729, "step": 1392 }, { "epoch": 0.33427252562795995, "grad_norm": 1.14456045627594, "learning_rate": 8.888800063953953e-05, "loss": 1.8535, "step": 1394 }, { "epoch": 0.3347521131826629, "grad_norm": 1.0320550203323364, "learning_rate": 8.88720121512511e-05, "loss": 1.8973, "step": 1396 }, { "epoch": 0.33523170073736586, "grad_norm": 1.747861623764038, "learning_rate": 8.885602366296267e-05, "loss": 1.8575, "step": 1398 }, { "epoch": 0.3357112882920688, "grad_norm": 1.1360749006271362, "learning_rate": 8.884003517467425e-05, "loss": 1.8756, "step": 1400 }, { "epoch": 0.33619087584677176, "grad_norm": 1.140502691268921, "learning_rate": 8.882404668638581e-05, "loss": 1.8773, "step": 1402 }, { "epoch": 0.3366704634014747, "grad_norm": 0.960890531539917, "learning_rate": 8.880805819809738e-05, "loss": 1.8951, "step": 1404 }, { "epoch": 0.33715005095617767, "grad_norm": 0.9602674245834351, "learning_rate": 8.879206970980894e-05, "loss": 1.859, "step": 1406 }, { "epoch": 0.3376296385108806, "grad_norm": 1.1205930709838867, "learning_rate": 8.877608122152051e-05, "loss": 1.8881, "step": 1408 }, { "epoch": 0.3381092260655836, "grad_norm": 1.2184183597564697, "learning_rate": 8.876009273323207e-05, "loss": 1.8293, "step": 1410 }, { "epoch": 0.33858881362028653, "grad_norm": 1.01003897190094, "learning_rate": 8.874410424494365e-05, "loss": 1.8637, "step": 1412 }, { "epoch": 0.3390684011749895, "grad_norm": 0.9622764587402344, "learning_rate": 8.872811575665522e-05, "loss": 1.8476, "step": 1414 }, { "epoch": 0.33954798872969244, "grad_norm": 1.0376614332199097, "learning_rate": 8.871212726836678e-05, "loss": 1.9537, "step": 1416 }, { "epoch": 0.3400275762843954, "grad_norm": 1.1122199296951294, "learning_rate": 8.869613878007835e-05, "loss": 1.9089, "step": 1418 }, { "epoch": 0.34050716383909835, "grad_norm": 1.2202246189117432, "learning_rate": 8.868015029178992e-05, "loss": 1.8209, "step": 1420 }, { "epoch": 0.34098675139380136, "grad_norm": 0.9083878397941589, "learning_rate": 8.86641618035015e-05, "loss": 1.8927, "step": 1422 }, { "epoch": 0.3414663389485043, "grad_norm": 1.7316542863845825, "learning_rate": 8.864817331521305e-05, "loss": 1.9095, "step": 1424 }, { "epoch": 0.34194592650320726, "grad_norm": 0.9481311440467834, "learning_rate": 8.863218482692461e-05, "loss": 1.8321, "step": 1426 }, { "epoch": 0.3424255140579102, "grad_norm": 1.3150486946105957, "learning_rate": 8.861619633863618e-05, "loss": 1.8306, "step": 1428 }, { "epoch": 0.3429051016126132, "grad_norm": 1.1525537967681885, "learning_rate": 8.860020785034774e-05, "loss": 1.8898, "step": 1430 }, { "epoch": 0.3433846891673161, "grad_norm": 0.9735218286514282, "learning_rate": 8.858421936205932e-05, "loss": 1.8769, "step": 1432 }, { "epoch": 0.3438642767220191, "grad_norm": 1.2486522197723389, "learning_rate": 8.856823087377089e-05, "loss": 1.8578, "step": 1434 }, { "epoch": 0.34434386427672203, "grad_norm": 1.1417350769042969, "learning_rate": 8.855224238548246e-05, "loss": 1.9377, "step": 1436 }, { "epoch": 0.344823451831425, "grad_norm": 0.8824297785758972, "learning_rate": 8.853625389719402e-05, "loss": 1.8632, "step": 1438 }, { "epoch": 0.34530303938612794, "grad_norm": 0.9795541167259216, "learning_rate": 8.852026540890559e-05, "loss": 1.9101, "step": 1440 }, { "epoch": 0.3457826269408309, "grad_norm": 0.950244128704071, "learning_rate": 8.850427692061715e-05, "loss": 1.9359, "step": 1442 }, { "epoch": 0.34626221449553385, "grad_norm": 0.9290071725845337, "learning_rate": 8.848828843232873e-05, "loss": 1.8148, "step": 1444 }, { "epoch": 0.3467418020502368, "grad_norm": 0.9999263882637024, "learning_rate": 8.84722999440403e-05, "loss": 1.8381, "step": 1446 }, { "epoch": 0.34722138960493976, "grad_norm": 0.920124351978302, "learning_rate": 8.845631145575186e-05, "loss": 1.9182, "step": 1448 }, { "epoch": 0.3477009771596427, "grad_norm": 1.1097139120101929, "learning_rate": 8.844032296746343e-05, "loss": 1.8682, "step": 1450 }, { "epoch": 0.34818056471434566, "grad_norm": 0.9645344018936157, "learning_rate": 8.8424334479175e-05, "loss": 1.858, "step": 1452 }, { "epoch": 0.3486601522690486, "grad_norm": 0.9093652963638306, "learning_rate": 8.840834599088656e-05, "loss": 1.8361, "step": 1454 }, { "epoch": 0.34913973982375157, "grad_norm": 0.9624741077423096, "learning_rate": 8.839235750259814e-05, "loss": 1.8472, "step": 1456 }, { "epoch": 0.3496193273784545, "grad_norm": 1.2425785064697266, "learning_rate": 8.83763690143097e-05, "loss": 1.9243, "step": 1458 }, { "epoch": 0.3500989149331575, "grad_norm": 1.424736499786377, "learning_rate": 8.836038052602127e-05, "loss": 1.9453, "step": 1460 }, { "epoch": 0.35057850248786043, "grad_norm": 0.937961220741272, "learning_rate": 8.834439203773284e-05, "loss": 1.9217, "step": 1462 }, { "epoch": 0.3510580900425634, "grad_norm": 1.8000656366348267, "learning_rate": 8.83284035494444e-05, "loss": 1.8543, "step": 1464 }, { "epoch": 0.35153767759726634, "grad_norm": 1.0813064575195312, "learning_rate": 8.831241506115597e-05, "loss": 1.8663, "step": 1466 }, { "epoch": 0.3520172651519693, "grad_norm": 0.9548282623291016, "learning_rate": 8.829642657286755e-05, "loss": 1.8449, "step": 1468 }, { "epoch": 0.35249685270667225, "grad_norm": 0.9620568156242371, "learning_rate": 8.828043808457911e-05, "loss": 1.843, "step": 1470 }, { "epoch": 0.3529764402613752, "grad_norm": 1.0314240455627441, "learning_rate": 8.826444959629068e-05, "loss": 1.8589, "step": 1472 }, { "epoch": 0.35345602781607816, "grad_norm": 0.9351568818092346, "learning_rate": 8.824846110800224e-05, "loss": 1.825, "step": 1474 }, { "epoch": 0.3539356153707811, "grad_norm": 0.992695152759552, "learning_rate": 8.823247261971381e-05, "loss": 1.875, "step": 1476 }, { "epoch": 0.35441520292548406, "grad_norm": 1.100189208984375, "learning_rate": 8.821648413142538e-05, "loss": 1.8495, "step": 1478 }, { "epoch": 0.354894790480187, "grad_norm": 0.8025861978530884, "learning_rate": 8.820049564313695e-05, "loss": 1.8481, "step": 1480 }, { "epoch": 0.35537437803488997, "grad_norm": 1.6580811738967896, "learning_rate": 8.818450715484852e-05, "loss": 1.8489, "step": 1482 }, { "epoch": 0.3558539655895929, "grad_norm": 1.198567271232605, "learning_rate": 8.816851866656009e-05, "loss": 1.8739, "step": 1484 }, { "epoch": 0.3563335531442959, "grad_norm": 0.9481224417686462, "learning_rate": 8.815253017827165e-05, "loss": 1.8795, "step": 1486 }, { "epoch": 0.3568131406989989, "grad_norm": 1.3052680492401123, "learning_rate": 8.813654168998322e-05, "loss": 1.8707, "step": 1488 }, { "epoch": 0.35729272825370184, "grad_norm": 1.1563316583633423, "learning_rate": 8.812055320169478e-05, "loss": 1.8337, "step": 1490 }, { "epoch": 0.3577723158084048, "grad_norm": 1.2986749410629272, "learning_rate": 8.810456471340636e-05, "loss": 1.8454, "step": 1492 }, { "epoch": 0.35825190336310775, "grad_norm": 0.9479560256004333, "learning_rate": 8.808857622511793e-05, "loss": 1.8295, "step": 1494 }, { "epoch": 0.3587314909178107, "grad_norm": 1.0035783052444458, "learning_rate": 8.807258773682948e-05, "loss": 1.8932, "step": 1496 }, { "epoch": 0.35921107847251366, "grad_norm": 1.1547349691390991, "learning_rate": 8.805659924854105e-05, "loss": 1.7977, "step": 1498 }, { "epoch": 0.3596906660272166, "grad_norm": 1.4455478191375732, "learning_rate": 8.804061076025261e-05, "loss": 1.8681, "step": 1500 }, { "epoch": 0.36017025358191956, "grad_norm": 0.8527921438217163, "learning_rate": 8.802462227196419e-05, "loss": 1.8627, "step": 1502 }, { "epoch": 0.3606498411366225, "grad_norm": 1.464475393295288, "learning_rate": 8.800863378367576e-05, "loss": 1.8873, "step": 1504 }, { "epoch": 0.36112942869132547, "grad_norm": 1.652496576309204, "learning_rate": 8.799264529538732e-05, "loss": 1.827, "step": 1506 }, { "epoch": 0.3616090162460284, "grad_norm": 1.5386677980422974, "learning_rate": 8.797665680709889e-05, "loss": 1.851, "step": 1508 }, { "epoch": 0.3620886038007314, "grad_norm": 1.2024132013320923, "learning_rate": 8.796066831881045e-05, "loss": 1.8781, "step": 1510 }, { "epoch": 0.36256819135543433, "grad_norm": 0.947281002998352, "learning_rate": 8.794467983052202e-05, "loss": 1.8301, "step": 1512 }, { "epoch": 0.3630477789101373, "grad_norm": 0.9266417622566223, "learning_rate": 8.79286913422336e-05, "loss": 1.8662, "step": 1514 }, { "epoch": 0.36352736646484024, "grad_norm": 0.9370485544204712, "learning_rate": 8.791270285394516e-05, "loss": 1.8611, "step": 1516 }, { "epoch": 0.3640069540195432, "grad_norm": 0.9214601516723633, "learning_rate": 8.789671436565673e-05, "loss": 1.8413, "step": 1518 }, { "epoch": 0.36448654157424615, "grad_norm": 0.9784045815467834, "learning_rate": 8.78807258773683e-05, "loss": 1.855, "step": 1520 }, { "epoch": 0.3649661291289491, "grad_norm": 0.9996565580368042, "learning_rate": 8.786473738907986e-05, "loss": 1.8723, "step": 1522 }, { "epoch": 0.36544571668365206, "grad_norm": 1.0385538339614868, "learning_rate": 8.784874890079143e-05, "loss": 1.8576, "step": 1524 }, { "epoch": 0.365925304238355, "grad_norm": 0.8806433081626892, "learning_rate": 8.7832760412503e-05, "loss": 1.7696, "step": 1526 }, { "epoch": 0.36640489179305796, "grad_norm": 1.0767042636871338, "learning_rate": 8.781677192421457e-05, "loss": 1.8607, "step": 1528 }, { "epoch": 0.3668844793477609, "grad_norm": 0.932468056678772, "learning_rate": 8.780078343592614e-05, "loss": 1.858, "step": 1530 }, { "epoch": 0.36736406690246387, "grad_norm": 1.0166387557983398, "learning_rate": 8.77847949476377e-05, "loss": 1.8826, "step": 1532 }, { "epoch": 0.3678436544571668, "grad_norm": 0.9883090853691101, "learning_rate": 8.776880645934927e-05, "loss": 1.8415, "step": 1534 }, { "epoch": 0.3683232420118698, "grad_norm": 1.0378657579421997, "learning_rate": 8.775281797106084e-05, "loss": 1.821, "step": 1536 }, { "epoch": 0.36880282956657273, "grad_norm": 0.9686614871025085, "learning_rate": 8.773682948277241e-05, "loss": 1.8278, "step": 1538 }, { "epoch": 0.3692824171212757, "grad_norm": 0.969861626625061, "learning_rate": 8.772084099448398e-05, "loss": 1.8131, "step": 1540 }, { "epoch": 0.36976200467597864, "grad_norm": 1.0103886127471924, "learning_rate": 8.770485250619555e-05, "loss": 1.876, "step": 1542 }, { "epoch": 0.3702415922306816, "grad_norm": 1.084994912147522, "learning_rate": 8.768886401790711e-05, "loss": 1.8672, "step": 1544 }, { "epoch": 0.37072117978538455, "grad_norm": 0.9051440954208374, "learning_rate": 8.767287552961868e-05, "loss": 1.8332, "step": 1546 }, { "epoch": 0.3712007673400875, "grad_norm": 0.9737153053283691, "learning_rate": 8.765688704133024e-05, "loss": 1.8148, "step": 1548 }, { "epoch": 0.37168035489479045, "grad_norm": 0.9959837794303894, "learning_rate": 8.764089855304182e-05, "loss": 1.8322, "step": 1550 }, { "epoch": 0.3721599424494934, "grad_norm": 1.0898813009262085, "learning_rate": 8.762491006475339e-05, "loss": 1.7983, "step": 1552 }, { "epoch": 0.3726395300041964, "grad_norm": 1.050705075263977, "learning_rate": 8.760892157646495e-05, "loss": 1.8538, "step": 1554 }, { "epoch": 0.37311911755889937, "grad_norm": 1.0105609893798828, "learning_rate": 8.759293308817652e-05, "loss": 1.8664, "step": 1556 }, { "epoch": 0.3735987051136023, "grad_norm": 1.1131259202957153, "learning_rate": 8.757694459988808e-05, "loss": 1.833, "step": 1558 }, { "epoch": 0.3740782926683053, "grad_norm": 1.0372366905212402, "learning_rate": 8.756095611159965e-05, "loss": 1.8441, "step": 1560 }, { "epoch": 0.37455788022300823, "grad_norm": 1.1000142097473145, "learning_rate": 8.754496762331123e-05, "loss": 1.8436, "step": 1562 }, { "epoch": 0.3750374677777112, "grad_norm": 1.0711838006973267, "learning_rate": 8.75289791350228e-05, "loss": 1.8683, "step": 1564 }, { "epoch": 0.37551705533241414, "grad_norm": 1.1873761415481567, "learning_rate": 8.751299064673436e-05, "loss": 1.8728, "step": 1566 }, { "epoch": 0.3759966428871171, "grad_norm": 1.033284068107605, "learning_rate": 8.749700215844591e-05, "loss": 1.8674, "step": 1568 }, { "epoch": 0.37647623044182005, "grad_norm": 0.8765760660171509, "learning_rate": 8.748101367015748e-05, "loss": 1.9325, "step": 1570 }, { "epoch": 0.376955817996523, "grad_norm": 1.1346018314361572, "learning_rate": 8.746502518186906e-05, "loss": 1.8292, "step": 1572 }, { "epoch": 0.37743540555122596, "grad_norm": 1.012105107307434, "learning_rate": 8.744903669358062e-05, "loss": 1.8468, "step": 1574 }, { "epoch": 0.3779149931059289, "grad_norm": 0.8835533261299133, "learning_rate": 8.743304820529219e-05, "loss": 1.838, "step": 1576 }, { "epoch": 0.37839458066063186, "grad_norm": 1.095618724822998, "learning_rate": 8.741705971700376e-05, "loss": 1.8758, "step": 1578 }, { "epoch": 0.3788741682153348, "grad_norm": 1.3384482860565186, "learning_rate": 8.740107122871532e-05, "loss": 1.8784, "step": 1580 }, { "epoch": 0.37935375577003777, "grad_norm": 1.3704334497451782, "learning_rate": 8.738508274042689e-05, "loss": 1.8323, "step": 1582 }, { "epoch": 0.3798333433247407, "grad_norm": 1.3065412044525146, "learning_rate": 8.736909425213847e-05, "loss": 1.8716, "step": 1584 }, { "epoch": 0.3803129308794437, "grad_norm": 1.1596449613571167, "learning_rate": 8.735310576385003e-05, "loss": 1.8384, "step": 1586 }, { "epoch": 0.38079251843414663, "grad_norm": 0.9154991507530212, "learning_rate": 8.73371172755616e-05, "loss": 1.8502, "step": 1588 }, { "epoch": 0.3812721059888496, "grad_norm": 1.015302062034607, "learning_rate": 8.732112878727316e-05, "loss": 1.8749, "step": 1590 }, { "epoch": 0.38175169354355254, "grad_norm": 0.9783477783203125, "learning_rate": 8.730514029898473e-05, "loss": 1.7676, "step": 1592 }, { "epoch": 0.3822312810982555, "grad_norm": 1.0758490562438965, "learning_rate": 8.72891518106963e-05, "loss": 1.8277, "step": 1594 }, { "epoch": 0.38271086865295845, "grad_norm": 1.145180583000183, "learning_rate": 8.727316332240787e-05, "loss": 1.904, "step": 1596 }, { "epoch": 0.3831904562076614, "grad_norm": 1.14093816280365, "learning_rate": 8.725717483411944e-05, "loss": 1.8406, "step": 1598 }, { "epoch": 0.38367004376236435, "grad_norm": 0.9228721261024475, "learning_rate": 8.7241186345831e-05, "loss": 1.8935, "step": 1600 }, { "epoch": 0.38367004376236435, "eval_loss": 1.826748251914978, "eval_runtime": 331.1939, "eval_samples_per_second": 402.933, "eval_steps_per_second": 12.594, "step": 1600 }, { "epoch": 0.3841496313170673, "grad_norm": 0.8792436122894287, "learning_rate": 8.722519785754257e-05, "loss": 1.8424, "step": 1602 }, { "epoch": 0.38462921887177026, "grad_norm": 1.1386128664016724, "learning_rate": 8.720920936925414e-05, "loss": 1.8394, "step": 1604 }, { "epoch": 0.3851088064264732, "grad_norm": 0.9437385201454163, "learning_rate": 8.719322088096572e-05, "loss": 1.8233, "step": 1606 }, { "epoch": 0.38558839398117617, "grad_norm": 0.9490256309509277, "learning_rate": 8.717723239267728e-05, "loss": 1.8002, "step": 1608 }, { "epoch": 0.3860679815358791, "grad_norm": 1.0651062726974487, "learning_rate": 8.716124390438885e-05, "loss": 1.8337, "step": 1610 }, { "epoch": 0.3865475690905821, "grad_norm": 0.9213554263114929, "learning_rate": 8.714525541610041e-05, "loss": 1.8098, "step": 1612 }, { "epoch": 0.38702715664528503, "grad_norm": 0.971231997013092, "learning_rate": 8.712926692781198e-05, "loss": 1.8407, "step": 1614 }, { "epoch": 0.387506744199988, "grad_norm": 1.0127501487731934, "learning_rate": 8.711327843952354e-05, "loss": 1.866, "step": 1616 }, { "epoch": 0.38798633175469094, "grad_norm": 1.047533392906189, "learning_rate": 8.709728995123512e-05, "loss": 1.8395, "step": 1618 }, { "epoch": 0.38846591930939395, "grad_norm": 1.0203245878219604, "learning_rate": 8.708130146294669e-05, "loss": 1.8305, "step": 1620 }, { "epoch": 0.3889455068640969, "grad_norm": 1.1769187450408936, "learning_rate": 8.706531297465826e-05, "loss": 1.8447, "step": 1622 }, { "epoch": 0.38942509441879986, "grad_norm": 0.9305581450462341, "learning_rate": 8.704932448636982e-05, "loss": 1.8527, "step": 1624 }, { "epoch": 0.3899046819735028, "grad_norm": 1.1632441282272339, "learning_rate": 8.703333599808139e-05, "loss": 1.9161, "step": 1626 }, { "epoch": 0.39038426952820576, "grad_norm": 1.4080644845962524, "learning_rate": 8.701734750979295e-05, "loss": 1.8857, "step": 1628 }, { "epoch": 0.3908638570829087, "grad_norm": 1.018717885017395, "learning_rate": 8.700135902150453e-05, "loss": 1.8977, "step": 1630 }, { "epoch": 0.39134344463761167, "grad_norm": 0.9083398580551147, "learning_rate": 8.69853705332161e-05, "loss": 1.8749, "step": 1632 }, { "epoch": 0.3918230321923146, "grad_norm": 1.1681653261184692, "learning_rate": 8.696938204492766e-05, "loss": 1.8608, "step": 1634 }, { "epoch": 0.3923026197470176, "grad_norm": 1.4223008155822754, "learning_rate": 8.695339355663923e-05, "loss": 1.8316, "step": 1636 }, { "epoch": 0.39278220730172053, "grad_norm": 0.9041860103607178, "learning_rate": 8.69374050683508e-05, "loss": 1.8408, "step": 1638 }, { "epoch": 0.3932617948564235, "grad_norm": 1.059019684791565, "learning_rate": 8.692141658006236e-05, "loss": 1.9068, "step": 1640 }, { "epoch": 0.39374138241112644, "grad_norm": 1.1327712535858154, "learning_rate": 8.690542809177393e-05, "loss": 1.8561, "step": 1642 }, { "epoch": 0.3942209699658294, "grad_norm": 1.1966285705566406, "learning_rate": 8.688943960348549e-05, "loss": 1.9008, "step": 1644 }, { "epoch": 0.39470055752053235, "grad_norm": 1.0451692342758179, "learning_rate": 8.687345111519706e-05, "loss": 1.7992, "step": 1646 }, { "epoch": 0.3951801450752353, "grad_norm": 0.9073315858840942, "learning_rate": 8.685746262690862e-05, "loss": 1.8382, "step": 1648 }, { "epoch": 0.39565973262993825, "grad_norm": 1.4907424449920654, "learning_rate": 8.684147413862019e-05, "loss": 1.8762, "step": 1650 }, { "epoch": 0.3961393201846412, "grad_norm": 0.9932870864868164, "learning_rate": 8.682548565033177e-05, "loss": 1.8704, "step": 1652 }, { "epoch": 0.39661890773934416, "grad_norm": 1.2524325847625732, "learning_rate": 8.680949716204333e-05, "loss": 1.8471, "step": 1654 }, { "epoch": 0.3970984952940471, "grad_norm": 0.9633117318153381, "learning_rate": 8.67935086737549e-05, "loss": 1.8305, "step": 1656 }, { "epoch": 0.39757808284875007, "grad_norm": 0.9102377891540527, "learning_rate": 8.677752018546646e-05, "loss": 1.8363, "step": 1658 }, { "epoch": 0.398057670403453, "grad_norm": 0.9749413132667542, "learning_rate": 8.676153169717803e-05, "loss": 1.9093, "step": 1660 }, { "epoch": 0.398537257958156, "grad_norm": 1.1303660869598389, "learning_rate": 8.67455432088896e-05, "loss": 1.8184, "step": 1662 }, { "epoch": 0.39901684551285893, "grad_norm": 1.0233136415481567, "learning_rate": 8.672955472060118e-05, "loss": 1.8071, "step": 1664 }, { "epoch": 0.3994964330675619, "grad_norm": 1.6510919332504272, "learning_rate": 8.671356623231274e-05, "loss": 1.8101, "step": 1666 }, { "epoch": 0.39997602062226484, "grad_norm": 0.9247718453407288, "learning_rate": 8.669757774402431e-05, "loss": 1.7965, "step": 1668 }, { "epoch": 0.4004556081769678, "grad_norm": 1.061901569366455, "learning_rate": 8.668158925573587e-05, "loss": 1.8146, "step": 1670 }, { "epoch": 0.40093519573167075, "grad_norm": 0.8460214734077454, "learning_rate": 8.666560076744744e-05, "loss": 1.8634, "step": 1672 }, { "epoch": 0.4014147832863737, "grad_norm": 1.0196794271469116, "learning_rate": 8.6649612279159e-05, "loss": 1.7926, "step": 1674 }, { "epoch": 0.40189437084107665, "grad_norm": 0.8597721457481384, "learning_rate": 8.663362379087058e-05, "loss": 1.84, "step": 1676 }, { "epoch": 0.4023739583957796, "grad_norm": 1.000961422920227, "learning_rate": 8.661763530258215e-05, "loss": 1.8286, "step": 1678 }, { "epoch": 0.40285354595048256, "grad_norm": 1.3278608322143555, "learning_rate": 8.660164681429371e-05, "loss": 1.7985, "step": 1680 }, { "epoch": 0.4033331335051855, "grad_norm": 0.9014906883239746, "learning_rate": 8.658565832600528e-05, "loss": 1.8155, "step": 1682 }, { "epoch": 0.40381272105988847, "grad_norm": 1.1079460382461548, "learning_rate": 8.656966983771685e-05, "loss": 1.8425, "step": 1684 }, { "epoch": 0.4042923086145915, "grad_norm": 0.9410251975059509, "learning_rate": 8.655368134942841e-05, "loss": 1.8032, "step": 1686 }, { "epoch": 0.40477189616929443, "grad_norm": 0.9663451910018921, "learning_rate": 8.653769286113999e-05, "loss": 1.8668, "step": 1688 }, { "epoch": 0.4052514837239974, "grad_norm": 0.9886740446090698, "learning_rate": 8.652170437285156e-05, "loss": 1.8869, "step": 1690 }, { "epoch": 0.40573107127870034, "grad_norm": 0.8532866835594177, "learning_rate": 8.650571588456312e-05, "loss": 1.832, "step": 1692 }, { "epoch": 0.4062106588334033, "grad_norm": 1.0138763189315796, "learning_rate": 8.648972739627469e-05, "loss": 1.8611, "step": 1694 }, { "epoch": 0.40669024638810625, "grad_norm": 0.8680122494697571, "learning_rate": 8.647373890798625e-05, "loss": 1.8479, "step": 1696 }, { "epoch": 0.4071698339428092, "grad_norm": 0.9747514724731445, "learning_rate": 8.645775041969782e-05, "loss": 1.8441, "step": 1698 }, { "epoch": 0.40764942149751215, "grad_norm": 0.8733306527137756, "learning_rate": 8.64417619314094e-05, "loss": 1.8089, "step": 1700 }, { "epoch": 0.4081290090522151, "grad_norm": 0.8219605684280396, "learning_rate": 8.642577344312096e-05, "loss": 1.84, "step": 1702 }, { "epoch": 0.40860859660691806, "grad_norm": 0.8218125104904175, "learning_rate": 8.640978495483253e-05, "loss": 1.8979, "step": 1704 }, { "epoch": 0.409088184161621, "grad_norm": 0.842439591884613, "learning_rate": 8.63937964665441e-05, "loss": 1.8212, "step": 1706 }, { "epoch": 0.40956777171632397, "grad_norm": 1.188188910484314, "learning_rate": 8.637780797825566e-05, "loss": 1.857, "step": 1708 }, { "epoch": 0.4100473592710269, "grad_norm": 1.0158064365386963, "learning_rate": 8.636181948996723e-05, "loss": 1.8474, "step": 1710 }, { "epoch": 0.4105269468257299, "grad_norm": 1.1295950412750244, "learning_rate": 8.63458310016788e-05, "loss": 1.835, "step": 1712 }, { "epoch": 0.41100653438043283, "grad_norm": 0.8737322092056274, "learning_rate": 8.632984251339036e-05, "loss": 1.8164, "step": 1714 }, { "epoch": 0.4114861219351358, "grad_norm": 1.2849684953689575, "learning_rate": 8.631385402510192e-05, "loss": 1.8109, "step": 1716 }, { "epoch": 0.41196570948983874, "grad_norm": 0.9931622743606567, "learning_rate": 8.629786553681349e-05, "loss": 1.8315, "step": 1718 }, { "epoch": 0.4124452970445417, "grad_norm": 0.9484555125236511, "learning_rate": 8.628187704852506e-05, "loss": 1.8156, "step": 1720 }, { "epoch": 0.41292488459924465, "grad_norm": 1.3674583435058594, "learning_rate": 8.626588856023663e-05, "loss": 1.8767, "step": 1722 }, { "epoch": 0.4134044721539476, "grad_norm": 1.4415162801742554, "learning_rate": 8.62499000719482e-05, "loss": 1.868, "step": 1724 }, { "epoch": 0.41388405970865055, "grad_norm": 0.9017748236656189, "learning_rate": 8.623391158365977e-05, "loss": 1.8091, "step": 1726 }, { "epoch": 0.4143636472633535, "grad_norm": 0.9315550327301025, "learning_rate": 8.621792309537133e-05, "loss": 1.8918, "step": 1728 }, { "epoch": 0.41484323481805646, "grad_norm": 0.9897119998931885, "learning_rate": 8.62019346070829e-05, "loss": 1.8122, "step": 1730 }, { "epoch": 0.4153228223727594, "grad_norm": 1.0621271133422852, "learning_rate": 8.618594611879446e-05, "loss": 1.9017, "step": 1732 }, { "epoch": 0.41580240992746237, "grad_norm": 1.0777676105499268, "learning_rate": 8.616995763050604e-05, "loss": 1.8679, "step": 1734 }, { "epoch": 0.4162819974821653, "grad_norm": 0.9771785140037537, "learning_rate": 8.615396914221761e-05, "loss": 1.8841, "step": 1736 }, { "epoch": 0.4167615850368683, "grad_norm": 0.9452453851699829, "learning_rate": 8.613798065392917e-05, "loss": 1.8577, "step": 1738 }, { "epoch": 0.41724117259157123, "grad_norm": 0.8741773962974548, "learning_rate": 8.612199216564074e-05, "loss": 1.8637, "step": 1740 }, { "epoch": 0.4177207601462742, "grad_norm": 0.9313515424728394, "learning_rate": 8.61060036773523e-05, "loss": 1.8782, "step": 1742 }, { "epoch": 0.41820034770097714, "grad_norm": 1.0112303495407104, "learning_rate": 8.609001518906387e-05, "loss": 1.8602, "step": 1744 }, { "epoch": 0.4186799352556801, "grad_norm": 1.123449683189392, "learning_rate": 8.607402670077545e-05, "loss": 1.8342, "step": 1746 }, { "epoch": 0.41915952281038305, "grad_norm": 0.8778249621391296, "learning_rate": 8.605803821248702e-05, "loss": 1.8155, "step": 1748 }, { "epoch": 0.419639110365086, "grad_norm": 1.030681848526001, "learning_rate": 8.604204972419858e-05, "loss": 1.8482, "step": 1750 }, { "epoch": 0.420118697919789, "grad_norm": 1.1203950643539429, "learning_rate": 8.602606123591015e-05, "loss": 1.8318, "step": 1752 }, { "epoch": 0.42059828547449196, "grad_norm": 1.0247114896774292, "learning_rate": 8.601007274762171e-05, "loss": 1.8934, "step": 1754 }, { "epoch": 0.4210778730291949, "grad_norm": 1.0385669469833374, "learning_rate": 8.599408425933328e-05, "loss": 1.8441, "step": 1756 }, { "epoch": 0.42155746058389787, "grad_norm": 1.0732425451278687, "learning_rate": 8.597809577104486e-05, "loss": 1.8398, "step": 1758 }, { "epoch": 0.4220370481386008, "grad_norm": 0.9328681230545044, "learning_rate": 8.596210728275642e-05, "loss": 1.8776, "step": 1760 }, { "epoch": 0.4225166356933038, "grad_norm": 1.0273592472076416, "learning_rate": 8.594611879446799e-05, "loss": 1.8274, "step": 1762 }, { "epoch": 0.42299622324800673, "grad_norm": 0.9712184071540833, "learning_rate": 8.593013030617956e-05, "loss": 1.9164, "step": 1764 }, { "epoch": 0.4234758108027097, "grad_norm": 1.4277074337005615, "learning_rate": 8.591414181789112e-05, "loss": 1.8177, "step": 1766 }, { "epoch": 0.42395539835741264, "grad_norm": 0.9203393459320068, "learning_rate": 8.589815332960269e-05, "loss": 1.7562, "step": 1768 }, { "epoch": 0.4244349859121156, "grad_norm": 0.965216338634491, "learning_rate": 8.588216484131427e-05, "loss": 1.7924, "step": 1770 }, { "epoch": 0.42491457346681855, "grad_norm": 0.9520439505577087, "learning_rate": 8.586617635302583e-05, "loss": 1.8304, "step": 1772 }, { "epoch": 0.4253941610215215, "grad_norm": 1.2402479648590088, "learning_rate": 8.58501878647374e-05, "loss": 1.8956, "step": 1774 }, { "epoch": 0.42587374857622445, "grad_norm": 0.9644680023193359, "learning_rate": 8.583419937644896e-05, "loss": 1.8525, "step": 1776 }, { "epoch": 0.4263533361309274, "grad_norm": 0.9554945230484009, "learning_rate": 8.581821088816053e-05, "loss": 1.8296, "step": 1778 }, { "epoch": 0.42683292368563036, "grad_norm": 0.8657746911048889, "learning_rate": 8.58022223998721e-05, "loss": 1.8434, "step": 1780 }, { "epoch": 0.4273125112403333, "grad_norm": 1.0457285642623901, "learning_rate": 8.578623391158367e-05, "loss": 1.8223, "step": 1782 }, { "epoch": 0.42779209879503627, "grad_norm": 1.1848336458206177, "learning_rate": 8.577024542329524e-05, "loss": 1.9083, "step": 1784 }, { "epoch": 0.4282716863497392, "grad_norm": 0.9375316500663757, "learning_rate": 8.575425693500679e-05, "loss": 1.8582, "step": 1786 }, { "epoch": 0.4287512739044422, "grad_norm": 1.2311869859695435, "learning_rate": 8.573826844671836e-05, "loss": 1.7791, "step": 1788 }, { "epoch": 0.42923086145914513, "grad_norm": 1.2337474822998047, "learning_rate": 8.572227995842992e-05, "loss": 1.8519, "step": 1790 }, { "epoch": 0.4297104490138481, "grad_norm": 1.2230091094970703, "learning_rate": 8.57062914701415e-05, "loss": 1.8997, "step": 1792 }, { "epoch": 0.43019003656855104, "grad_norm": 1.349648356437683, "learning_rate": 8.569030298185307e-05, "loss": 1.7994, "step": 1794 }, { "epoch": 0.430669624123254, "grad_norm": 0.9987906813621521, "learning_rate": 8.567431449356463e-05, "loss": 1.8148, "step": 1796 }, { "epoch": 0.43114921167795695, "grad_norm": 1.0619721412658691, "learning_rate": 8.56583260052762e-05, "loss": 1.8694, "step": 1798 }, { "epoch": 0.4316287992326599, "grad_norm": 0.8959397673606873, "learning_rate": 8.564233751698777e-05, "loss": 1.8654, "step": 1800 }, { "epoch": 0.43210838678736285, "grad_norm": 0.8970352411270142, "learning_rate": 8.562634902869934e-05, "loss": 1.8457, "step": 1802 }, { "epoch": 0.4325879743420658, "grad_norm": 1.078121304512024, "learning_rate": 8.561036054041091e-05, "loss": 1.8365, "step": 1804 }, { "epoch": 0.43306756189676876, "grad_norm": 1.2307698726654053, "learning_rate": 8.559437205212248e-05, "loss": 1.8008, "step": 1806 }, { "epoch": 0.4335471494514717, "grad_norm": 0.9452076554298401, "learning_rate": 8.557838356383404e-05, "loss": 1.8224, "step": 1808 }, { "epoch": 0.43402673700617467, "grad_norm": 1.6369608640670776, "learning_rate": 8.556239507554561e-05, "loss": 1.8731, "step": 1810 }, { "epoch": 0.4345063245608776, "grad_norm": 1.0903210639953613, "learning_rate": 8.554640658725717e-05, "loss": 1.8029, "step": 1812 }, { "epoch": 0.4349859121155806, "grad_norm": 0.9056496620178223, "learning_rate": 8.553041809896875e-05, "loss": 1.8379, "step": 1814 }, { "epoch": 0.43546549967028353, "grad_norm": 1.2796868085861206, "learning_rate": 8.551442961068032e-05, "loss": 1.8001, "step": 1816 }, { "epoch": 0.43594508722498654, "grad_norm": 1.0481796264648438, "learning_rate": 8.549844112239188e-05, "loss": 1.885, "step": 1818 }, { "epoch": 0.4364246747796895, "grad_norm": 1.0999383926391602, "learning_rate": 8.548245263410345e-05, "loss": 1.8079, "step": 1820 }, { "epoch": 0.43690426233439245, "grad_norm": 0.8575541973114014, "learning_rate": 8.546646414581501e-05, "loss": 1.8859, "step": 1822 }, { "epoch": 0.4373838498890954, "grad_norm": 0.800172746181488, "learning_rate": 8.545047565752658e-05, "loss": 1.8446, "step": 1824 }, { "epoch": 0.43786343744379835, "grad_norm": 0.8454898595809937, "learning_rate": 8.543448716923816e-05, "loss": 1.8282, "step": 1826 }, { "epoch": 0.4383430249985013, "grad_norm": 1.0416619777679443, "learning_rate": 8.541849868094973e-05, "loss": 1.8402, "step": 1828 }, { "epoch": 0.43882261255320426, "grad_norm": 1.00301992893219, "learning_rate": 8.540251019266129e-05, "loss": 1.8493, "step": 1830 }, { "epoch": 0.4393022001079072, "grad_norm": 0.8389581441879272, "learning_rate": 8.538652170437286e-05, "loss": 1.7953, "step": 1832 }, { "epoch": 0.43978178766261017, "grad_norm": 1.0400092601776123, "learning_rate": 8.537053321608442e-05, "loss": 1.8097, "step": 1834 }, { "epoch": 0.4402613752173131, "grad_norm": 0.8985393047332764, "learning_rate": 8.535454472779599e-05, "loss": 1.8257, "step": 1836 }, { "epoch": 0.4407409627720161, "grad_norm": 0.9387130737304688, "learning_rate": 8.533855623950757e-05, "loss": 1.8286, "step": 1838 }, { "epoch": 0.44122055032671903, "grad_norm": 0.9773204326629639, "learning_rate": 8.532256775121913e-05, "loss": 1.7939, "step": 1840 }, { "epoch": 0.441700137881422, "grad_norm": 1.0605615377426147, "learning_rate": 8.53065792629307e-05, "loss": 1.9087, "step": 1842 }, { "epoch": 0.44217972543612494, "grad_norm": 1.2595386505126953, "learning_rate": 8.529059077464226e-05, "loss": 1.8226, "step": 1844 }, { "epoch": 0.4426593129908279, "grad_norm": 1.5531840324401855, "learning_rate": 8.527460228635383e-05, "loss": 1.8388, "step": 1846 }, { "epoch": 0.44313890054553084, "grad_norm": 1.1801584959030151, "learning_rate": 8.52586137980654e-05, "loss": 1.8368, "step": 1848 }, { "epoch": 0.4436184881002338, "grad_norm": 1.2791333198547363, "learning_rate": 8.524262530977698e-05, "loss": 1.8364, "step": 1850 }, { "epoch": 0.44409807565493675, "grad_norm": 1.5799909830093384, "learning_rate": 8.522663682148854e-05, "loss": 1.862, "step": 1852 }, { "epoch": 0.4445776632096397, "grad_norm": 0.9895210862159729, "learning_rate": 8.52106483332001e-05, "loss": 1.8158, "step": 1854 }, { "epoch": 0.44505725076434266, "grad_norm": 0.8515310287475586, "learning_rate": 8.519465984491167e-05, "loss": 1.858, "step": 1856 }, { "epoch": 0.4455368383190456, "grad_norm": 1.193617582321167, "learning_rate": 8.517867135662324e-05, "loss": 1.8558, "step": 1858 }, { "epoch": 0.44601642587374857, "grad_norm": 0.8488537669181824, "learning_rate": 8.51626828683348e-05, "loss": 1.8311, "step": 1860 }, { "epoch": 0.4464960134284515, "grad_norm": 0.9646802544593811, "learning_rate": 8.514669438004637e-05, "loss": 1.8617, "step": 1862 }, { "epoch": 0.4469756009831545, "grad_norm": 1.0033082962036133, "learning_rate": 8.513070589175794e-05, "loss": 1.8288, "step": 1864 }, { "epoch": 0.44745518853785743, "grad_norm": 1.0534225702285767, "learning_rate": 8.51147174034695e-05, "loss": 1.8704, "step": 1866 }, { "epoch": 0.4479347760925604, "grad_norm": 0.8781608939170837, "learning_rate": 8.509872891518107e-05, "loss": 1.8323, "step": 1868 }, { "epoch": 0.44841436364726334, "grad_norm": 1.0080018043518066, "learning_rate": 8.508274042689263e-05, "loss": 1.849, "step": 1870 }, { "epoch": 0.4488939512019663, "grad_norm": 0.881230890750885, "learning_rate": 8.506675193860421e-05, "loss": 1.8602, "step": 1872 }, { "epoch": 0.44937353875666924, "grad_norm": 0.902185320854187, "learning_rate": 8.505076345031578e-05, "loss": 1.7635, "step": 1874 }, { "epoch": 0.4498531263113722, "grad_norm": 1.0849958658218384, "learning_rate": 8.503477496202734e-05, "loss": 1.8833, "step": 1876 }, { "epoch": 0.45033271386607515, "grad_norm": 1.0188125371932983, "learning_rate": 8.501878647373891e-05, "loss": 1.8526, "step": 1878 }, { "epoch": 0.4508123014207781, "grad_norm": 0.9507432579994202, "learning_rate": 8.500279798545047e-05, "loss": 1.8474, "step": 1880 }, { "epoch": 0.45129188897548106, "grad_norm": 0.8854891657829285, "learning_rate": 8.498680949716204e-05, "loss": 1.7979, "step": 1882 }, { "epoch": 0.45177147653018407, "grad_norm": 0.9255229234695435, "learning_rate": 8.497082100887362e-05, "loss": 1.8515, "step": 1884 }, { "epoch": 0.452251064084887, "grad_norm": 1.2057191133499146, "learning_rate": 8.495483252058518e-05, "loss": 1.787, "step": 1886 }, { "epoch": 0.45273065163959, "grad_norm": 1.0205113887786865, "learning_rate": 8.493884403229675e-05, "loss": 1.8297, "step": 1888 }, { "epoch": 0.45321023919429293, "grad_norm": 0.9802207946777344, "learning_rate": 8.492285554400832e-05, "loss": 1.8452, "step": 1890 }, { "epoch": 0.4536898267489959, "grad_norm": 1.145207405090332, "learning_rate": 8.490686705571988e-05, "loss": 1.8521, "step": 1892 }, { "epoch": 0.45416941430369884, "grad_norm": 1.0552637577056885, "learning_rate": 8.489087856743145e-05, "loss": 1.8991, "step": 1894 }, { "epoch": 0.4546490018584018, "grad_norm": 0.9638448357582092, "learning_rate": 8.487489007914303e-05, "loss": 1.811, "step": 1896 }, { "epoch": 0.45512858941310474, "grad_norm": 0.9441689848899841, "learning_rate": 8.485890159085459e-05, "loss": 1.8459, "step": 1898 }, { "epoch": 0.4556081769678077, "grad_norm": 0.8479006290435791, "learning_rate": 8.484291310256616e-05, "loss": 1.8073, "step": 1900 }, { "epoch": 0.45608776452251065, "grad_norm": 1.2420101165771484, "learning_rate": 8.482692461427772e-05, "loss": 1.8567, "step": 1902 }, { "epoch": 0.4565673520772136, "grad_norm": 1.5604170560836792, "learning_rate": 8.481093612598929e-05, "loss": 1.8346, "step": 1904 }, { "epoch": 0.45704693963191656, "grad_norm": 0.8790860772132874, "learning_rate": 8.479494763770086e-05, "loss": 1.8265, "step": 1906 }, { "epoch": 0.4575265271866195, "grad_norm": 0.840888261795044, "learning_rate": 8.477895914941243e-05, "loss": 1.8317, "step": 1908 }, { "epoch": 0.45800611474132247, "grad_norm": 0.9074258804321289, "learning_rate": 8.4762970661124e-05, "loss": 1.8075, "step": 1910 }, { "epoch": 0.4584857022960254, "grad_norm": 1.2067028284072876, "learning_rate": 8.474698217283557e-05, "loss": 1.8188, "step": 1912 }, { "epoch": 0.4589652898507284, "grad_norm": 0.9563915133476257, "learning_rate": 8.473099368454713e-05, "loss": 1.8445, "step": 1914 }, { "epoch": 0.45944487740543133, "grad_norm": 0.9805328845977783, "learning_rate": 8.47150051962587e-05, "loss": 1.8167, "step": 1916 }, { "epoch": 0.4599244649601343, "grad_norm": 0.9611313343048096, "learning_rate": 8.469901670797026e-05, "loss": 1.8727, "step": 1918 }, { "epoch": 0.46040405251483724, "grad_norm": 0.8399175405502319, "learning_rate": 8.468302821968184e-05, "loss": 1.8492, "step": 1920 }, { "epoch": 0.4608836400695402, "grad_norm": 0.810640275478363, "learning_rate": 8.466703973139341e-05, "loss": 1.719, "step": 1922 }, { "epoch": 0.46136322762424314, "grad_norm": 0.8514111042022705, "learning_rate": 8.465105124310497e-05, "loss": 1.8178, "step": 1924 }, { "epoch": 0.4618428151789461, "grad_norm": 0.9439705610275269, "learning_rate": 8.463506275481654e-05, "loss": 1.823, "step": 1926 }, { "epoch": 0.46232240273364905, "grad_norm": 0.9182960987091064, "learning_rate": 8.46190742665281e-05, "loss": 1.8254, "step": 1928 }, { "epoch": 0.462801990288352, "grad_norm": 0.8051034808158875, "learning_rate": 8.460308577823967e-05, "loss": 1.9049, "step": 1930 }, { "epoch": 0.46328157784305496, "grad_norm": 1.007118582725525, "learning_rate": 8.458709728995124e-05, "loss": 1.906, "step": 1932 }, { "epoch": 0.4637611653977579, "grad_norm": 0.8031899929046631, "learning_rate": 8.45711088016628e-05, "loss": 1.8521, "step": 1934 }, { "epoch": 0.46424075295246087, "grad_norm": 0.9369514584541321, "learning_rate": 8.455512031337437e-05, "loss": 1.841, "step": 1936 }, { "epoch": 0.4647203405071638, "grad_norm": 0.7785843014717102, "learning_rate": 8.453913182508593e-05, "loss": 1.8563, "step": 1938 }, { "epoch": 0.4651999280618668, "grad_norm": 0.9110360741615295, "learning_rate": 8.45231433367975e-05, "loss": 1.84, "step": 1940 }, { "epoch": 0.46567951561656973, "grad_norm": 1.0959868431091309, "learning_rate": 8.450715484850908e-05, "loss": 1.8457, "step": 1942 }, { "epoch": 0.4661591031712727, "grad_norm": 0.8966951370239258, "learning_rate": 8.449116636022064e-05, "loss": 1.8438, "step": 1944 }, { "epoch": 0.46663869072597564, "grad_norm": 0.978128969669342, "learning_rate": 8.447517787193221e-05, "loss": 1.8577, "step": 1946 }, { "epoch": 0.4671182782806786, "grad_norm": 0.874697744846344, "learning_rate": 8.445918938364378e-05, "loss": 1.8398, "step": 1948 }, { "epoch": 0.4675978658353816, "grad_norm": 0.8804771900177002, "learning_rate": 8.444320089535534e-05, "loss": 1.8121, "step": 1950 }, { "epoch": 0.46807745339008455, "grad_norm": 0.9832872748374939, "learning_rate": 8.442721240706691e-05, "loss": 1.8154, "step": 1952 }, { "epoch": 0.4685570409447875, "grad_norm": 1.1364140510559082, "learning_rate": 8.441122391877849e-05, "loss": 1.8762, "step": 1954 }, { "epoch": 0.46903662849949046, "grad_norm": 1.1568024158477783, "learning_rate": 8.439523543049005e-05, "loss": 1.8338, "step": 1956 }, { "epoch": 0.4695162160541934, "grad_norm": 0.9282981157302856, "learning_rate": 8.437924694220162e-05, "loss": 1.8622, "step": 1958 }, { "epoch": 0.46999580360889637, "grad_norm": 0.899114191532135, "learning_rate": 8.436325845391318e-05, "loss": 1.8899, "step": 1960 }, { "epoch": 0.4704753911635993, "grad_norm": 1.5354300737380981, "learning_rate": 8.434726996562475e-05, "loss": 1.8335, "step": 1962 }, { "epoch": 0.4709549787183023, "grad_norm": 0.9688185453414917, "learning_rate": 8.433128147733632e-05, "loss": 1.8921, "step": 1964 }, { "epoch": 0.47143456627300523, "grad_norm": 0.9213922023773193, "learning_rate": 8.43152929890479e-05, "loss": 1.836, "step": 1966 }, { "epoch": 0.4719141538277082, "grad_norm": 0.8238869905471802, "learning_rate": 8.429930450075946e-05, "loss": 1.8616, "step": 1968 }, { "epoch": 0.47239374138241114, "grad_norm": 1.0595403909683228, "learning_rate": 8.428331601247103e-05, "loss": 1.7882, "step": 1970 }, { "epoch": 0.4728733289371141, "grad_norm": 0.9482323527336121, "learning_rate": 8.426732752418259e-05, "loss": 1.8198, "step": 1972 }, { "epoch": 0.47335291649181704, "grad_norm": 1.2731683254241943, "learning_rate": 8.425133903589416e-05, "loss": 1.8435, "step": 1974 }, { "epoch": 0.47383250404652, "grad_norm": 0.9282073974609375, "learning_rate": 8.423535054760572e-05, "loss": 1.8288, "step": 1976 }, { "epoch": 0.47431209160122295, "grad_norm": 0.8841426372528076, "learning_rate": 8.42193620593173e-05, "loss": 1.8025, "step": 1978 }, { "epoch": 0.4747916791559259, "grad_norm": 1.0703173875808716, "learning_rate": 8.420337357102887e-05, "loss": 1.8723, "step": 1980 }, { "epoch": 0.47527126671062886, "grad_norm": 1.5237516164779663, "learning_rate": 8.418738508274043e-05, "loss": 1.8705, "step": 1982 }, { "epoch": 0.4757508542653318, "grad_norm": 0.9846270084381104, "learning_rate": 8.4171396594452e-05, "loss": 1.8161, "step": 1984 }, { "epoch": 0.47623044182003477, "grad_norm": 1.0854849815368652, "learning_rate": 8.415540810616356e-05, "loss": 1.8412, "step": 1986 }, { "epoch": 0.4767100293747377, "grad_norm": 1.1480991840362549, "learning_rate": 8.413941961787513e-05, "loss": 1.8525, "step": 1988 }, { "epoch": 0.4771896169294407, "grad_norm": 1.5457807779312134, "learning_rate": 8.412343112958671e-05, "loss": 1.8558, "step": 1990 }, { "epoch": 0.47766920448414363, "grad_norm": 1.2047770023345947, "learning_rate": 8.410744264129828e-05, "loss": 1.8293, "step": 1992 }, { "epoch": 0.4781487920388466, "grad_norm": 1.1364389657974243, "learning_rate": 8.409145415300984e-05, "loss": 1.8122, "step": 1994 }, { "epoch": 0.47862837959354954, "grad_norm": 1.5275181531906128, "learning_rate": 8.407546566472141e-05, "loss": 1.9244, "step": 1996 }, { "epoch": 0.4791079671482525, "grad_norm": 0.73636394739151, "learning_rate": 8.405947717643297e-05, "loss": 1.8131, "step": 1998 }, { "epoch": 0.47958755470295544, "grad_norm": 1.2514373064041138, "learning_rate": 8.404348868814454e-05, "loss": 1.8203, "step": 2000 }, { "epoch": 0.47958755470295544, "eval_loss": 1.817832350730896, "eval_runtime": 331.2218, "eval_samples_per_second": 402.899, "eval_steps_per_second": 12.593, "step": 2000 }, { "epoch": 0.4800671422576584, "grad_norm": 0.9685493111610413, "learning_rate": 8.402750019985612e-05, "loss": 1.8022, "step": 2002 }, { "epoch": 0.48054672981236135, "grad_norm": 1.0509530305862427, "learning_rate": 8.401151171156767e-05, "loss": 1.8145, "step": 2004 }, { "epoch": 0.4810263173670643, "grad_norm": 0.9474796652793884, "learning_rate": 8.399552322327924e-05, "loss": 1.8567, "step": 2006 }, { "epoch": 0.48150590492176726, "grad_norm": 1.100654125213623, "learning_rate": 8.39795347349908e-05, "loss": 1.8224, "step": 2008 }, { "epoch": 0.4819854924764702, "grad_norm": 0.8159398436546326, "learning_rate": 8.396354624670238e-05, "loss": 1.8005, "step": 2010 }, { "epoch": 0.48246508003117317, "grad_norm": 1.1211557388305664, "learning_rate": 8.394755775841395e-05, "loss": 1.8996, "step": 2012 }, { "epoch": 0.4829446675858761, "grad_norm": 0.9183927774429321, "learning_rate": 8.393156927012551e-05, "loss": 1.8338, "step": 2014 }, { "epoch": 0.48342425514057913, "grad_norm": 0.8458068370819092, "learning_rate": 8.391558078183708e-05, "loss": 1.8315, "step": 2016 }, { "epoch": 0.4839038426952821, "grad_norm": 0.7304191589355469, "learning_rate": 8.389959229354864e-05, "loss": 1.8184, "step": 2018 }, { "epoch": 0.48438343024998504, "grad_norm": 0.9224951863288879, "learning_rate": 8.388360380526021e-05, "loss": 1.8115, "step": 2020 }, { "epoch": 0.484863017804688, "grad_norm": 1.3673295974731445, "learning_rate": 8.386761531697179e-05, "loss": 1.8378, "step": 2022 }, { "epoch": 0.48534260535939094, "grad_norm": 0.9152979254722595, "learning_rate": 8.385162682868335e-05, "loss": 1.8455, "step": 2024 }, { "epoch": 0.4858221929140939, "grad_norm": 1.1985877752304077, "learning_rate": 8.383563834039492e-05, "loss": 1.8103, "step": 2026 }, { "epoch": 0.48630178046879685, "grad_norm": 0.8775197863578796, "learning_rate": 8.381964985210649e-05, "loss": 1.8493, "step": 2028 }, { "epoch": 0.4867813680234998, "grad_norm": 1.049750566482544, "learning_rate": 8.380366136381805e-05, "loss": 1.8323, "step": 2030 }, { "epoch": 0.48726095557820276, "grad_norm": 1.471175193786621, "learning_rate": 8.378767287552962e-05, "loss": 1.8553, "step": 2032 }, { "epoch": 0.4877405431329057, "grad_norm": 0.7510826587677002, "learning_rate": 8.37716843872412e-05, "loss": 1.7948, "step": 2034 }, { "epoch": 0.48822013068760867, "grad_norm": 0.8959689140319824, "learning_rate": 8.375569589895276e-05, "loss": 1.8274, "step": 2036 }, { "epoch": 0.4886997182423116, "grad_norm": 0.8847079277038574, "learning_rate": 8.373970741066433e-05, "loss": 1.8225, "step": 2038 }, { "epoch": 0.4891793057970146, "grad_norm": 0.9970871210098267, "learning_rate": 8.372371892237589e-05, "loss": 1.8472, "step": 2040 }, { "epoch": 0.48965889335171753, "grad_norm": 0.7939209342002869, "learning_rate": 8.370773043408746e-05, "loss": 1.8842, "step": 2042 }, { "epoch": 0.4901384809064205, "grad_norm": 1.1339771747589111, "learning_rate": 8.369174194579902e-05, "loss": 1.8354, "step": 2044 }, { "epoch": 0.49061806846112344, "grad_norm": 0.9350486397743225, "learning_rate": 8.36757534575106e-05, "loss": 1.8036, "step": 2046 }, { "epoch": 0.4910976560158264, "grad_norm": 0.8644850850105286, "learning_rate": 8.365976496922217e-05, "loss": 1.8181, "step": 2048 }, { "epoch": 0.49157724357052934, "grad_norm": 0.7497583627700806, "learning_rate": 8.364377648093373e-05, "loss": 1.8225, "step": 2050 }, { "epoch": 0.4920568311252323, "grad_norm": 0.8808600306510925, "learning_rate": 8.36277879926453e-05, "loss": 1.8245, "step": 2052 }, { "epoch": 0.49253641867993525, "grad_norm": 1.0755715370178223, "learning_rate": 8.361179950435687e-05, "loss": 1.7921, "step": 2054 }, { "epoch": 0.4930160062346382, "grad_norm": 0.8944563269615173, "learning_rate": 8.359581101606843e-05, "loss": 1.8522, "step": 2056 }, { "epoch": 0.49349559378934116, "grad_norm": 0.8937315344810486, "learning_rate": 8.357982252778001e-05, "loss": 1.8574, "step": 2058 }, { "epoch": 0.4939751813440441, "grad_norm": 0.9179726243019104, "learning_rate": 8.356383403949158e-05, "loss": 1.8389, "step": 2060 }, { "epoch": 0.49445476889874707, "grad_norm": 0.7784353494644165, "learning_rate": 8.354784555120314e-05, "loss": 1.7612, "step": 2062 }, { "epoch": 0.49493435645345, "grad_norm": 0.9600009918212891, "learning_rate": 8.353185706291471e-05, "loss": 1.8673, "step": 2064 }, { "epoch": 0.495413944008153, "grad_norm": 0.8424609303474426, "learning_rate": 8.351586857462627e-05, "loss": 1.8305, "step": 2066 }, { "epoch": 0.4958935315628559, "grad_norm": 0.7456308603286743, "learning_rate": 8.349988008633784e-05, "loss": 1.8407, "step": 2068 }, { "epoch": 0.4963731191175589, "grad_norm": 0.6956048607826233, "learning_rate": 8.348389159804942e-05, "loss": 1.8076, "step": 2070 }, { "epoch": 0.49685270667226183, "grad_norm": 0.9036033153533936, "learning_rate": 8.346790310976098e-05, "loss": 1.8582, "step": 2072 }, { "epoch": 0.4973322942269648, "grad_norm": 0.9136260151863098, "learning_rate": 8.345191462147255e-05, "loss": 1.8536, "step": 2074 }, { "epoch": 0.49781188178166774, "grad_norm": 0.8295150995254517, "learning_rate": 8.343592613318412e-05, "loss": 1.8506, "step": 2076 }, { "epoch": 0.4982914693363707, "grad_norm": 0.8571803569793701, "learning_rate": 8.341993764489567e-05, "loss": 1.8388, "step": 2078 }, { "epoch": 0.49877105689107365, "grad_norm": 1.1190805435180664, "learning_rate": 8.340394915660725e-05, "loss": 1.7698, "step": 2080 }, { "epoch": 0.49925064444577666, "grad_norm": 0.93168044090271, "learning_rate": 8.338796066831881e-05, "loss": 1.7829, "step": 2082 }, { "epoch": 0.4997302320004796, "grad_norm": 0.9064432382583618, "learning_rate": 8.337197218003038e-05, "loss": 1.8195, "step": 2084 }, { "epoch": 0.5002098195551825, "grad_norm": 0.9492483735084534, "learning_rate": 8.335598369174194e-05, "loss": 1.8761, "step": 2086 }, { "epoch": 0.5006894071098855, "grad_norm": 0.9211865067481995, "learning_rate": 8.333999520345351e-05, "loss": 1.8933, "step": 2088 }, { "epoch": 0.5011689946645884, "grad_norm": 1.0944627523422241, "learning_rate": 8.332400671516508e-05, "loss": 1.8496, "step": 2090 }, { "epoch": 0.5016485822192914, "grad_norm": 0.9225418567657471, "learning_rate": 8.330801822687666e-05, "loss": 1.8587, "step": 2092 }, { "epoch": 0.5021281697739943, "grad_norm": 0.9156660437583923, "learning_rate": 8.329202973858822e-05, "loss": 1.868, "step": 2094 }, { "epoch": 0.5026077573286973, "grad_norm": 0.9641127586364746, "learning_rate": 8.327604125029979e-05, "loss": 1.8337, "step": 2096 }, { "epoch": 0.5030873448834002, "grad_norm": 1.1805025339126587, "learning_rate": 8.326005276201135e-05, "loss": 1.8166, "step": 2098 }, { "epoch": 0.5035669324381032, "grad_norm": 1.0341885089874268, "learning_rate": 8.324406427372292e-05, "loss": 1.8015, "step": 2100 }, { "epoch": 0.5040465199928061, "grad_norm": 0.9048952460289001, "learning_rate": 8.322807578543448e-05, "loss": 1.8408, "step": 2102 }, { "epoch": 0.5045261075475092, "grad_norm": 0.9217455387115479, "learning_rate": 8.321208729714606e-05, "loss": 1.7949, "step": 2104 }, { "epoch": 0.505005695102212, "grad_norm": 0.8602827191352844, "learning_rate": 8.319609880885763e-05, "loss": 1.8431, "step": 2106 }, { "epoch": 0.5054852826569151, "grad_norm": 0.9383849501609802, "learning_rate": 8.31801103205692e-05, "loss": 1.8076, "step": 2108 }, { "epoch": 0.505964870211618, "grad_norm": 0.8477554321289062, "learning_rate": 8.316412183228076e-05, "loss": 1.8283, "step": 2110 }, { "epoch": 0.506444457766321, "grad_norm": 0.9301886558532715, "learning_rate": 8.314813334399233e-05, "loss": 1.8495, "step": 2112 }, { "epoch": 0.5069240453210239, "grad_norm": 0.9696794152259827, "learning_rate": 8.313214485570389e-05, "loss": 1.8151, "step": 2114 }, { "epoch": 0.5074036328757269, "grad_norm": 0.7927327752113342, "learning_rate": 8.311615636741547e-05, "loss": 1.7679, "step": 2116 }, { "epoch": 0.5078832204304299, "grad_norm": 0.9555681943893433, "learning_rate": 8.310016787912704e-05, "loss": 1.8374, "step": 2118 }, { "epoch": 0.5083628079851328, "grad_norm": 0.9167666435241699, "learning_rate": 8.30841793908386e-05, "loss": 1.7939, "step": 2120 }, { "epoch": 0.5088423955398358, "grad_norm": 1.1139322519302368, "learning_rate": 8.306819090255017e-05, "loss": 1.7713, "step": 2122 }, { "epoch": 0.5093219830945387, "grad_norm": 1.1044987440109253, "learning_rate": 8.305220241426173e-05, "loss": 1.8255, "step": 2124 }, { "epoch": 0.5098015706492417, "grad_norm": 0.823825478553772, "learning_rate": 8.30362139259733e-05, "loss": 1.8759, "step": 2126 }, { "epoch": 0.5102811582039446, "grad_norm": 0.8174486756324768, "learning_rate": 8.302022543768488e-05, "loss": 1.8288, "step": 2128 }, { "epoch": 0.5107607457586476, "grad_norm": 1.07783842086792, "learning_rate": 8.300423694939644e-05, "loss": 1.789, "step": 2130 }, { "epoch": 0.5112403333133505, "grad_norm": 0.8728610277175903, "learning_rate": 8.298824846110801e-05, "loss": 1.8307, "step": 2132 }, { "epoch": 0.5117199208680535, "grad_norm": 0.7414913773536682, "learning_rate": 8.297225997281958e-05, "loss": 1.8055, "step": 2134 }, { "epoch": 0.5121995084227564, "grad_norm": 0.9684213399887085, "learning_rate": 8.295627148453114e-05, "loss": 1.8368, "step": 2136 }, { "epoch": 0.5126790959774594, "grad_norm": 0.742603600025177, "learning_rate": 8.294028299624271e-05, "loss": 1.8376, "step": 2138 }, { "epoch": 0.5131586835321623, "grad_norm": 1.113700270652771, "learning_rate": 8.292429450795429e-05, "loss": 1.8153, "step": 2140 }, { "epoch": 0.5136382710868653, "grad_norm": 0.9710909724235535, "learning_rate": 8.290830601966585e-05, "loss": 1.8241, "step": 2142 }, { "epoch": 0.5141178586415682, "grad_norm": 0.831440269947052, "learning_rate": 8.289231753137742e-05, "loss": 1.8345, "step": 2144 }, { "epoch": 0.5145974461962712, "grad_norm": 0.8449375629425049, "learning_rate": 8.287632904308898e-05, "loss": 1.8497, "step": 2146 }, { "epoch": 0.5150770337509741, "grad_norm": 1.0221365690231323, "learning_rate": 8.286034055480055e-05, "loss": 1.8556, "step": 2148 }, { "epoch": 0.5155566213056771, "grad_norm": 0.9965918660163879, "learning_rate": 8.284435206651211e-05, "loss": 1.8132, "step": 2150 }, { "epoch": 0.51603620886038, "grad_norm": 0.7504016160964966, "learning_rate": 8.282836357822368e-05, "loss": 1.8544, "step": 2152 }, { "epoch": 0.516515796415083, "grad_norm": 0.815392792224884, "learning_rate": 8.281237508993525e-05, "loss": 1.8244, "step": 2154 }, { "epoch": 0.516995383969786, "grad_norm": 0.7610076069831848, "learning_rate": 8.279638660164681e-05, "loss": 1.7912, "step": 2156 }, { "epoch": 0.517474971524489, "grad_norm": 0.868376612663269, "learning_rate": 8.278039811335838e-05, "loss": 1.8121, "step": 2158 }, { "epoch": 0.5179545590791919, "grad_norm": 0.7683380246162415, "learning_rate": 8.276440962506994e-05, "loss": 1.8636, "step": 2160 }, { "epoch": 0.5184341466338949, "grad_norm": 0.8600881099700928, "learning_rate": 8.274842113678152e-05, "loss": 1.7974, "step": 2162 }, { "epoch": 0.5189137341885978, "grad_norm": 0.8804888725280762, "learning_rate": 8.273243264849309e-05, "loss": 1.8452, "step": 2164 }, { "epoch": 0.5193933217433008, "grad_norm": 0.8438601493835449, "learning_rate": 8.271644416020465e-05, "loss": 1.832, "step": 2166 }, { "epoch": 0.5198729092980037, "grad_norm": 0.7845274806022644, "learning_rate": 8.270045567191622e-05, "loss": 1.7987, "step": 2168 }, { "epoch": 0.5203524968527067, "grad_norm": 0.8616762161254883, "learning_rate": 8.268446718362779e-05, "loss": 1.8066, "step": 2170 }, { "epoch": 0.5208320844074096, "grad_norm": 1.109925389289856, "learning_rate": 8.266847869533935e-05, "loss": 1.7939, "step": 2172 }, { "epoch": 0.5213116719621126, "grad_norm": 1.2303601503372192, "learning_rate": 8.265249020705093e-05, "loss": 1.8272, "step": 2174 }, { "epoch": 0.5217912595168155, "grad_norm": 0.8999001979827881, "learning_rate": 8.26365017187625e-05, "loss": 1.8182, "step": 2176 }, { "epoch": 0.5222708470715185, "grad_norm": 1.1958459615707397, "learning_rate": 8.262051323047406e-05, "loss": 1.8048, "step": 2178 }, { "epoch": 0.5227504346262214, "grad_norm": 1.3294223546981812, "learning_rate": 8.260452474218563e-05, "loss": 1.8194, "step": 2180 }, { "epoch": 0.5232300221809244, "grad_norm": 0.868995726108551, "learning_rate": 8.258853625389719e-05, "loss": 1.8339, "step": 2182 }, { "epoch": 0.5237096097356274, "grad_norm": 1.1017285585403442, "learning_rate": 8.257254776560876e-05, "loss": 1.8378, "step": 2184 }, { "epoch": 0.5241891972903303, "grad_norm": 1.0961867570877075, "learning_rate": 8.255655927732034e-05, "loss": 1.8085, "step": 2186 }, { "epoch": 0.5246687848450333, "grad_norm": 0.8379493355751038, "learning_rate": 8.25405707890319e-05, "loss": 1.81, "step": 2188 }, { "epoch": 0.5251483723997362, "grad_norm": 1.0546656847000122, "learning_rate": 8.252458230074347e-05, "loss": 1.7725, "step": 2190 }, { "epoch": 0.5256279599544392, "grad_norm": 0.7546354532241821, "learning_rate": 8.250859381245504e-05, "loss": 1.7822, "step": 2192 }, { "epoch": 0.5261075475091421, "grad_norm": 0.7526242733001709, "learning_rate": 8.24926053241666e-05, "loss": 1.788, "step": 2194 }, { "epoch": 0.5265871350638451, "grad_norm": 0.7855644226074219, "learning_rate": 8.247661683587817e-05, "loss": 1.8111, "step": 2196 }, { "epoch": 0.527066722618548, "grad_norm": 0.7986014485359192, "learning_rate": 8.246062834758975e-05, "loss": 1.7744, "step": 2198 }, { "epoch": 0.527546310173251, "grad_norm": 0.8527908325195312, "learning_rate": 8.244463985930131e-05, "loss": 1.8863, "step": 2200 }, { "epoch": 0.5280258977279539, "grad_norm": 0.7832108736038208, "learning_rate": 8.242865137101288e-05, "loss": 1.8437, "step": 2202 }, { "epoch": 0.528505485282657, "grad_norm": 0.7310022711753845, "learning_rate": 8.241266288272444e-05, "loss": 1.8095, "step": 2204 }, { "epoch": 0.5289850728373598, "grad_norm": 0.877179741859436, "learning_rate": 8.239667439443601e-05, "loss": 1.8423, "step": 2206 }, { "epoch": 0.5294646603920629, "grad_norm": 0.7367309331893921, "learning_rate": 8.238068590614757e-05, "loss": 1.8789, "step": 2208 }, { "epoch": 0.5299442479467658, "grad_norm": 0.8306716680526733, "learning_rate": 8.236469741785915e-05, "loss": 1.8148, "step": 2210 }, { "epoch": 0.5304238355014688, "grad_norm": 0.9970924854278564, "learning_rate": 8.234870892957072e-05, "loss": 1.8372, "step": 2212 }, { "epoch": 0.5309034230561717, "grad_norm": 0.9853947758674622, "learning_rate": 8.233272044128228e-05, "loss": 1.7911, "step": 2214 }, { "epoch": 0.5313830106108747, "grad_norm": 0.9442545771598816, "learning_rate": 8.231673195299385e-05, "loss": 1.8808, "step": 2216 }, { "epoch": 0.5318625981655776, "grad_norm": 0.8420426249504089, "learning_rate": 8.230074346470542e-05, "loss": 1.8169, "step": 2218 }, { "epoch": 0.5323421857202806, "grad_norm": 0.7420244216918945, "learning_rate": 8.2284754976417e-05, "loss": 1.8499, "step": 2220 }, { "epoch": 0.5328217732749835, "grad_norm": 0.8869497179985046, "learning_rate": 8.226876648812856e-05, "loss": 1.8322, "step": 2222 }, { "epoch": 0.5333013608296865, "grad_norm": 0.804044246673584, "learning_rate": 8.225277799984011e-05, "loss": 1.7984, "step": 2224 }, { "epoch": 0.5337809483843894, "grad_norm": 0.9107562303543091, "learning_rate": 8.223678951155168e-05, "loss": 1.8061, "step": 2226 }, { "epoch": 0.5342605359390924, "grad_norm": 0.9454451203346252, "learning_rate": 8.222080102326324e-05, "loss": 1.7985, "step": 2228 }, { "epoch": 0.5347401234937953, "grad_norm": 1.1177397966384888, "learning_rate": 8.220481253497482e-05, "loss": 1.7976, "step": 2230 }, { "epoch": 0.5352197110484983, "grad_norm": 1.0931987762451172, "learning_rate": 8.218882404668639e-05, "loss": 1.8597, "step": 2232 }, { "epoch": 0.5356992986032012, "grad_norm": 0.7651786804199219, "learning_rate": 8.217283555839796e-05, "loss": 1.8259, "step": 2234 }, { "epoch": 0.5361788861579042, "grad_norm": 0.7684177756309509, "learning_rate": 8.215684707010952e-05, "loss": 1.8532, "step": 2236 }, { "epoch": 0.5366584737126071, "grad_norm": 0.7531395554542542, "learning_rate": 8.214085858182109e-05, "loss": 1.8259, "step": 2238 }, { "epoch": 0.5371380612673101, "grad_norm": 0.7877879738807678, "learning_rate": 8.212487009353265e-05, "loss": 1.7853, "step": 2240 }, { "epoch": 0.537617648822013, "grad_norm": 0.8469850420951843, "learning_rate": 8.210888160524423e-05, "loss": 1.8086, "step": 2242 }, { "epoch": 0.538097236376716, "grad_norm": 1.200268030166626, "learning_rate": 8.20928931169558e-05, "loss": 1.8307, "step": 2244 }, { "epoch": 0.5385768239314189, "grad_norm": 0.8724194169044495, "learning_rate": 8.207690462866736e-05, "loss": 1.8163, "step": 2246 }, { "epoch": 0.5390564114861219, "grad_norm": 0.8582920432090759, "learning_rate": 8.206091614037893e-05, "loss": 1.8379, "step": 2248 }, { "epoch": 0.5395359990408249, "grad_norm": 0.9777652025222778, "learning_rate": 8.20449276520905e-05, "loss": 1.8206, "step": 2250 }, { "epoch": 0.5400155865955278, "grad_norm": 0.7773507833480835, "learning_rate": 8.202893916380206e-05, "loss": 1.7793, "step": 2252 }, { "epoch": 0.5404951741502309, "grad_norm": 0.8182154893875122, "learning_rate": 8.201295067551364e-05, "loss": 1.8531, "step": 2254 }, { "epoch": 0.5409747617049337, "grad_norm": 0.9722955822944641, "learning_rate": 8.19969621872252e-05, "loss": 1.8132, "step": 2256 }, { "epoch": 0.5414543492596368, "grad_norm": 1.0997123718261719, "learning_rate": 8.198097369893677e-05, "loss": 1.8275, "step": 2258 }, { "epoch": 0.5419339368143397, "grad_norm": 0.80416339635849, "learning_rate": 8.196498521064834e-05, "loss": 1.8394, "step": 2260 }, { "epoch": 0.5424135243690427, "grad_norm": 0.7239370346069336, "learning_rate": 8.19489967223599e-05, "loss": 1.8251, "step": 2262 }, { "epoch": 0.5428931119237456, "grad_norm": 0.8200324773788452, "learning_rate": 8.193300823407147e-05, "loss": 1.8269, "step": 2264 }, { "epoch": 0.5433726994784486, "grad_norm": 0.7871708869934082, "learning_rate": 8.191701974578305e-05, "loss": 1.8402, "step": 2266 }, { "epoch": 0.5438522870331515, "grad_norm": 1.1177228689193726, "learning_rate": 8.190103125749461e-05, "loss": 1.828, "step": 2268 }, { "epoch": 0.5443318745878545, "grad_norm": 0.814207911491394, "learning_rate": 8.188504276920618e-05, "loss": 1.8151, "step": 2270 }, { "epoch": 0.5448114621425574, "grad_norm": 0.85468989610672, "learning_rate": 8.186905428091774e-05, "loss": 1.8072, "step": 2272 }, { "epoch": 0.5452910496972604, "grad_norm": 0.863826334476471, "learning_rate": 8.185306579262931e-05, "loss": 1.798, "step": 2274 }, { "epoch": 0.5457706372519633, "grad_norm": 0.8793989419937134, "learning_rate": 8.183707730434088e-05, "loss": 1.8487, "step": 2276 }, { "epoch": 0.5462502248066663, "grad_norm": 0.8887887597084045, "learning_rate": 8.182108881605246e-05, "loss": 1.8155, "step": 2278 }, { "epoch": 0.5467298123613692, "grad_norm": 0.9089771509170532, "learning_rate": 8.180510032776402e-05, "loss": 1.8773, "step": 2280 }, { "epoch": 0.5472093999160722, "grad_norm": 0.7721947431564331, "learning_rate": 8.178911183947559e-05, "loss": 1.7947, "step": 2282 }, { "epoch": 0.5476889874707751, "grad_norm": 0.9575307965278625, "learning_rate": 8.177312335118715e-05, "loss": 1.8396, "step": 2284 }, { "epoch": 0.5481685750254781, "grad_norm": 1.1602458953857422, "learning_rate": 8.175713486289872e-05, "loss": 1.879, "step": 2286 }, { "epoch": 0.548648162580181, "grad_norm": 0.9841418266296387, "learning_rate": 8.174114637461028e-05, "loss": 1.7561, "step": 2288 }, { "epoch": 0.549127750134884, "grad_norm": 0.9794514179229736, "learning_rate": 8.172515788632186e-05, "loss": 1.8176, "step": 2290 }, { "epoch": 0.5496073376895869, "grad_norm": 0.8870856165885925, "learning_rate": 8.170916939803343e-05, "loss": 1.7982, "step": 2292 }, { "epoch": 0.5500869252442899, "grad_norm": 0.7453255653381348, "learning_rate": 8.1693180909745e-05, "loss": 1.8546, "step": 2294 }, { "epoch": 0.5505665127989928, "grad_norm": 0.8425607681274414, "learning_rate": 8.167719242145655e-05, "loss": 1.8013, "step": 2296 }, { "epoch": 0.5510461003536958, "grad_norm": 0.8117558360099792, "learning_rate": 8.166120393316811e-05, "loss": 1.8678, "step": 2298 }, { "epoch": 0.5515256879083987, "grad_norm": 0.8181368112564087, "learning_rate": 8.164521544487969e-05, "loss": 1.8365, "step": 2300 }, { "epoch": 0.5520052754631017, "grad_norm": 0.7324437499046326, "learning_rate": 8.162922695659126e-05, "loss": 1.7889, "step": 2302 }, { "epoch": 0.5524848630178046, "grad_norm": 0.766448438167572, "learning_rate": 8.161323846830282e-05, "loss": 1.7847, "step": 2304 }, { "epoch": 0.5529644505725076, "grad_norm": 0.9112101197242737, "learning_rate": 8.159724998001439e-05, "loss": 1.8376, "step": 2306 }, { "epoch": 0.5534440381272105, "grad_norm": 0.8366121053695679, "learning_rate": 8.158126149172595e-05, "loss": 1.8797, "step": 2308 }, { "epoch": 0.5539236256819136, "grad_norm": 0.7311052083969116, "learning_rate": 8.156527300343752e-05, "loss": 1.7761, "step": 2310 }, { "epoch": 0.5544032132366165, "grad_norm": 0.8404399752616882, "learning_rate": 8.15492845151491e-05, "loss": 1.7772, "step": 2312 }, { "epoch": 0.5548828007913195, "grad_norm": 1.5071115493774414, "learning_rate": 8.153329602686066e-05, "loss": 1.8618, "step": 2314 }, { "epoch": 0.5553623883460225, "grad_norm": 0.8672218918800354, "learning_rate": 8.151730753857223e-05, "loss": 1.7549, "step": 2316 }, { "epoch": 0.5558419759007254, "grad_norm": 0.8545186519622803, "learning_rate": 8.15013190502838e-05, "loss": 1.8171, "step": 2318 }, { "epoch": 0.5563215634554284, "grad_norm": 0.7870068550109863, "learning_rate": 8.148533056199536e-05, "loss": 1.8359, "step": 2320 }, { "epoch": 0.5568011510101313, "grad_norm": 0.9607504606246948, "learning_rate": 8.146934207370693e-05, "loss": 1.8375, "step": 2322 }, { "epoch": 0.5572807385648343, "grad_norm": 0.8286476135253906, "learning_rate": 8.145335358541851e-05, "loss": 1.8252, "step": 2324 }, { "epoch": 0.5577603261195372, "grad_norm": 0.9421722888946533, "learning_rate": 8.143736509713007e-05, "loss": 1.8594, "step": 2326 }, { "epoch": 0.5582399136742402, "grad_norm": 0.9053072929382324, "learning_rate": 8.142137660884164e-05, "loss": 1.8355, "step": 2328 }, { "epoch": 0.5587195012289431, "grad_norm": 0.9172499179840088, "learning_rate": 8.14053881205532e-05, "loss": 1.8495, "step": 2330 }, { "epoch": 0.5591990887836461, "grad_norm": 0.9131478667259216, "learning_rate": 8.138939963226477e-05, "loss": 1.7829, "step": 2332 }, { "epoch": 0.559678676338349, "grad_norm": 0.9228172898292542, "learning_rate": 8.137341114397634e-05, "loss": 1.8464, "step": 2334 }, { "epoch": 0.560158263893052, "grad_norm": 0.8376054763793945, "learning_rate": 8.135742265568791e-05, "loss": 1.8168, "step": 2336 }, { "epoch": 0.5606378514477549, "grad_norm": 0.9498407244682312, "learning_rate": 8.134143416739948e-05, "loss": 1.8222, "step": 2338 }, { "epoch": 0.5611174390024579, "grad_norm": 0.7964874505996704, "learning_rate": 8.132544567911105e-05, "loss": 1.7831, "step": 2340 }, { "epoch": 0.5615970265571608, "grad_norm": 1.125036358833313, "learning_rate": 8.130945719082261e-05, "loss": 1.8174, "step": 2342 }, { "epoch": 0.5620766141118638, "grad_norm": 0.7301112413406372, "learning_rate": 8.129346870253418e-05, "loss": 1.8305, "step": 2344 }, { "epoch": 0.5625562016665667, "grad_norm": 0.75786292552948, "learning_rate": 8.127748021424574e-05, "loss": 1.7782, "step": 2346 }, { "epoch": 0.5630357892212697, "grad_norm": 0.8245341181755066, "learning_rate": 8.126149172595732e-05, "loss": 1.7814, "step": 2348 }, { "epoch": 0.5635153767759726, "grad_norm": 0.7744756937026978, "learning_rate": 8.124550323766889e-05, "loss": 1.8204, "step": 2350 }, { "epoch": 0.5639949643306756, "grad_norm": 0.8528450131416321, "learning_rate": 8.122951474938045e-05, "loss": 1.7682, "step": 2352 }, { "epoch": 0.5644745518853785, "grad_norm": 1.0421406030654907, "learning_rate": 8.121352626109202e-05, "loss": 1.8206, "step": 2354 }, { "epoch": 0.5649541394400815, "grad_norm": 1.0248520374298096, "learning_rate": 8.119753777280359e-05, "loss": 1.7683, "step": 2356 }, { "epoch": 0.5654337269947844, "grad_norm": 0.8575286269187927, "learning_rate": 8.118154928451515e-05, "loss": 1.7948, "step": 2358 }, { "epoch": 0.5659133145494875, "grad_norm": 0.8587629199028015, "learning_rate": 8.116556079622673e-05, "loss": 1.7509, "step": 2360 }, { "epoch": 0.5663929021041904, "grad_norm": 0.8476513028144836, "learning_rate": 8.11495723079383e-05, "loss": 1.7845, "step": 2362 }, { "epoch": 0.5668724896588934, "grad_norm": 1.18324875831604, "learning_rate": 8.113358381964986e-05, "loss": 1.8101, "step": 2364 }, { "epoch": 0.5673520772135963, "grad_norm": 0.8868889212608337, "learning_rate": 8.111759533136143e-05, "loss": 1.7938, "step": 2366 }, { "epoch": 0.5678316647682993, "grad_norm": 1.0483042001724243, "learning_rate": 8.110160684307298e-05, "loss": 1.861, "step": 2368 }, { "epoch": 0.5683112523230022, "grad_norm": 1.0813654661178589, "learning_rate": 8.108561835478456e-05, "loss": 1.7902, "step": 2370 }, { "epoch": 0.5687908398777052, "grad_norm": 0.8076615333557129, "learning_rate": 8.106962986649612e-05, "loss": 1.8132, "step": 2372 }, { "epoch": 0.5692704274324081, "grad_norm": 0.7622639536857605, "learning_rate": 8.105364137820769e-05, "loss": 1.8414, "step": 2374 }, { "epoch": 0.5697500149871111, "grad_norm": 0.8489013314247131, "learning_rate": 8.103765288991926e-05, "loss": 1.8346, "step": 2376 }, { "epoch": 0.570229602541814, "grad_norm": 1.0544272661209106, "learning_rate": 8.102166440163082e-05, "loss": 1.8384, "step": 2378 }, { "epoch": 0.570709190096517, "grad_norm": 1.0994126796722412, "learning_rate": 8.100567591334239e-05, "loss": 1.8266, "step": 2380 }, { "epoch": 0.57118877765122, "grad_norm": 0.9946407079696655, "learning_rate": 8.098968742505397e-05, "loss": 1.844, "step": 2382 }, { "epoch": 0.5716683652059229, "grad_norm": 1.0281453132629395, "learning_rate": 8.097369893676553e-05, "loss": 1.8215, "step": 2384 }, { "epoch": 0.5721479527606259, "grad_norm": 0.9157495498657227, "learning_rate": 8.09577104484771e-05, "loss": 1.8137, "step": 2386 }, { "epoch": 0.5726275403153288, "grad_norm": 0.9379921555519104, "learning_rate": 8.094172196018866e-05, "loss": 1.7922, "step": 2388 }, { "epoch": 0.5731071278700318, "grad_norm": 1.1026980876922607, "learning_rate": 8.092573347190023e-05, "loss": 1.862, "step": 2390 }, { "epoch": 0.5735867154247347, "grad_norm": 0.9502924680709839, "learning_rate": 8.09097449836118e-05, "loss": 1.871, "step": 2392 }, { "epoch": 0.5740663029794377, "grad_norm": 0.8441060781478882, "learning_rate": 8.089375649532337e-05, "loss": 1.8536, "step": 2394 }, { "epoch": 0.5745458905341406, "grad_norm": 0.8363927006721497, "learning_rate": 8.087776800703494e-05, "loss": 1.8048, "step": 2396 }, { "epoch": 0.5750254780888436, "grad_norm": 0.9717321395874023, "learning_rate": 8.08617795187465e-05, "loss": 1.8007, "step": 2398 }, { "epoch": 0.5755050656435465, "grad_norm": 0.8668280243873596, "learning_rate": 8.084579103045807e-05, "loss": 1.805, "step": 2400 }, { "epoch": 0.5755050656435465, "eval_loss": 1.7996037006378174, "eval_runtime": 331.1732, "eval_samples_per_second": 402.958, "eval_steps_per_second": 12.595, "step": 2400 }, { "epoch": 0.5759846531982495, "grad_norm": 1.0002896785736084, "learning_rate": 8.082980254216964e-05, "loss": 1.8243, "step": 2402 }, { "epoch": 0.5764642407529524, "grad_norm": 1.1271153688430786, "learning_rate": 8.08138140538812e-05, "loss": 1.8249, "step": 2404 }, { "epoch": 0.5769438283076554, "grad_norm": 0.8835245370864868, "learning_rate": 8.079782556559278e-05, "loss": 1.7621, "step": 2406 }, { "epoch": 0.5774234158623583, "grad_norm": 1.061867117881775, "learning_rate": 8.078183707730435e-05, "loss": 1.8029, "step": 2408 }, { "epoch": 0.5779030034170614, "grad_norm": 0.8314512968063354, "learning_rate": 8.076584858901591e-05, "loss": 1.8274, "step": 2410 }, { "epoch": 0.5783825909717643, "grad_norm": 1.0367372035980225, "learning_rate": 8.074986010072748e-05, "loss": 1.8333, "step": 2412 }, { "epoch": 0.5788621785264673, "grad_norm": 0.9081689119338989, "learning_rate": 8.073387161243904e-05, "loss": 1.818, "step": 2414 }, { "epoch": 0.5793417660811702, "grad_norm": 0.8859843015670776, "learning_rate": 8.071788312415062e-05, "loss": 1.7767, "step": 2416 }, { "epoch": 0.5798213536358732, "grad_norm": 0.8254542350769043, "learning_rate": 8.070189463586219e-05, "loss": 1.7891, "step": 2418 }, { "epoch": 0.5803009411905761, "grad_norm": 0.7077252864837646, "learning_rate": 8.068590614757376e-05, "loss": 1.7652, "step": 2420 }, { "epoch": 0.5807805287452791, "grad_norm": 0.7632471919059753, "learning_rate": 8.066991765928532e-05, "loss": 1.8262, "step": 2422 }, { "epoch": 0.581260116299982, "grad_norm": 0.9953832030296326, "learning_rate": 8.065392917099689e-05, "loss": 1.7694, "step": 2424 }, { "epoch": 0.581739703854685, "grad_norm": 0.9403093457221985, "learning_rate": 8.063794068270845e-05, "loss": 1.8139, "step": 2426 }, { "epoch": 0.5822192914093879, "grad_norm": 0.7380833029747009, "learning_rate": 8.062195219442003e-05, "loss": 1.87, "step": 2428 }, { "epoch": 0.5826988789640909, "grad_norm": 0.9159483909606934, "learning_rate": 8.06059637061316e-05, "loss": 1.8004, "step": 2430 }, { "epoch": 0.5831784665187938, "grad_norm": 1.2316815853118896, "learning_rate": 8.058997521784316e-05, "loss": 1.8138, "step": 2432 }, { "epoch": 0.5836580540734968, "grad_norm": 0.9223155975341797, "learning_rate": 8.057398672955473e-05, "loss": 1.7883, "step": 2434 }, { "epoch": 0.5841376416281997, "grad_norm": 0.7929102778434753, "learning_rate": 8.05579982412663e-05, "loss": 1.8284, "step": 2436 }, { "epoch": 0.5846172291829027, "grad_norm": 0.9628846645355225, "learning_rate": 8.054200975297786e-05, "loss": 1.7771, "step": 2438 }, { "epoch": 0.5850968167376056, "grad_norm": 0.7841970920562744, "learning_rate": 8.052602126468944e-05, "loss": 1.8142, "step": 2440 }, { "epoch": 0.5855764042923086, "grad_norm": 0.7842591404914856, "learning_rate": 8.051003277640099e-05, "loss": 1.813, "step": 2442 }, { "epoch": 0.5860559918470115, "grad_norm": 0.8268870711326599, "learning_rate": 8.049404428811256e-05, "loss": 1.8566, "step": 2444 }, { "epoch": 0.5865355794017145, "grad_norm": 0.737072229385376, "learning_rate": 8.047805579982412e-05, "loss": 1.7968, "step": 2446 }, { "epoch": 0.5870151669564175, "grad_norm": 0.8399505019187927, "learning_rate": 8.046206731153569e-05, "loss": 1.858, "step": 2448 }, { "epoch": 0.5874947545111204, "grad_norm": 0.7225112915039062, "learning_rate": 8.044607882324727e-05, "loss": 1.808, "step": 2450 }, { "epoch": 0.5879743420658234, "grad_norm": 0.8332202434539795, "learning_rate": 8.043009033495883e-05, "loss": 1.8244, "step": 2452 }, { "epoch": 0.5884539296205263, "grad_norm": 1.0801018476486206, "learning_rate": 8.04141018466704e-05, "loss": 1.8124, "step": 2454 }, { "epoch": 0.5889335171752293, "grad_norm": 0.9548690319061279, "learning_rate": 8.039811335838197e-05, "loss": 1.8545, "step": 2456 }, { "epoch": 0.5894131047299322, "grad_norm": 0.8765873908996582, "learning_rate": 8.038212487009353e-05, "loss": 1.8269, "step": 2458 }, { "epoch": 0.5898926922846353, "grad_norm": 0.7783584594726562, "learning_rate": 8.03661363818051e-05, "loss": 1.7882, "step": 2460 }, { "epoch": 0.5903722798393382, "grad_norm": 1.058979868888855, "learning_rate": 8.035014789351668e-05, "loss": 1.8687, "step": 2462 }, { "epoch": 0.5908518673940412, "grad_norm": 0.8436552882194519, "learning_rate": 8.033415940522824e-05, "loss": 1.8133, "step": 2464 }, { "epoch": 0.5913314549487441, "grad_norm": 0.7766116261482239, "learning_rate": 8.031817091693981e-05, "loss": 1.8197, "step": 2466 }, { "epoch": 0.5918110425034471, "grad_norm": 0.7245225310325623, "learning_rate": 8.030218242865137e-05, "loss": 1.8204, "step": 2468 }, { "epoch": 0.59229063005815, "grad_norm": 0.7852773070335388, "learning_rate": 8.028619394036294e-05, "loss": 1.8318, "step": 2470 }, { "epoch": 0.592770217612853, "grad_norm": 0.7157665491104126, "learning_rate": 8.02702054520745e-05, "loss": 1.8015, "step": 2472 }, { "epoch": 0.5932498051675559, "grad_norm": 0.7644356489181519, "learning_rate": 8.025421696378608e-05, "loss": 1.8273, "step": 2474 }, { "epoch": 0.5937293927222589, "grad_norm": 0.7434369325637817, "learning_rate": 8.023822847549765e-05, "loss": 1.8433, "step": 2476 }, { "epoch": 0.5942089802769618, "grad_norm": 0.7399640679359436, "learning_rate": 8.022223998720921e-05, "loss": 1.8207, "step": 2478 }, { "epoch": 0.5946885678316648, "grad_norm": 0.8645913600921631, "learning_rate": 8.020625149892078e-05, "loss": 1.8238, "step": 2480 }, { "epoch": 0.5951681553863677, "grad_norm": 0.9867789149284363, "learning_rate": 8.019026301063235e-05, "loss": 1.8251, "step": 2482 }, { "epoch": 0.5956477429410707, "grad_norm": 0.7316281795501709, "learning_rate": 8.017427452234391e-05, "loss": 1.832, "step": 2484 }, { "epoch": 0.5961273304957736, "grad_norm": 0.7543156147003174, "learning_rate": 8.015828603405549e-05, "loss": 1.8011, "step": 2486 }, { "epoch": 0.5966069180504766, "grad_norm": 0.7637927532196045, "learning_rate": 8.014229754576706e-05, "loss": 1.7927, "step": 2488 }, { "epoch": 0.5970865056051795, "grad_norm": 0.8989391326904297, "learning_rate": 8.012630905747862e-05, "loss": 1.8331, "step": 2490 }, { "epoch": 0.5975660931598825, "grad_norm": 0.7246668934822083, "learning_rate": 8.011032056919019e-05, "loss": 1.8688, "step": 2492 }, { "epoch": 0.5980456807145854, "grad_norm": 0.7925149202346802, "learning_rate": 8.009433208090175e-05, "loss": 1.8586, "step": 2494 }, { "epoch": 0.5985252682692884, "grad_norm": 0.730944037437439, "learning_rate": 8.007834359261332e-05, "loss": 1.8073, "step": 2496 }, { "epoch": 0.5990048558239913, "grad_norm": 0.7384891510009766, "learning_rate": 8.00623551043249e-05, "loss": 1.7553, "step": 2498 }, { "epoch": 0.5994844433786943, "grad_norm": 0.8395422697067261, "learning_rate": 8.004636661603646e-05, "loss": 1.8046, "step": 2500 }, { "epoch": 0.5999640309333972, "grad_norm": 0.6851178407669067, "learning_rate": 8.003037812774803e-05, "loss": 1.7485, "step": 2502 }, { "epoch": 0.6004436184881002, "grad_norm": 0.7051516175270081, "learning_rate": 8.00143896394596e-05, "loss": 1.7638, "step": 2504 }, { "epoch": 0.6009232060428031, "grad_norm": 0.8066529035568237, "learning_rate": 7.999840115117116e-05, "loss": 1.8114, "step": 2506 }, { "epoch": 0.6014027935975061, "grad_norm": 0.7157605886459351, "learning_rate": 7.998241266288273e-05, "loss": 1.7968, "step": 2508 }, { "epoch": 0.601882381152209, "grad_norm": 0.7422674298286438, "learning_rate": 7.99664241745943e-05, "loss": 1.7696, "step": 2510 }, { "epoch": 0.602361968706912, "grad_norm": 0.7892662286758423, "learning_rate": 7.995043568630587e-05, "loss": 1.836, "step": 2512 }, { "epoch": 0.6028415562616151, "grad_norm": 0.857159435749054, "learning_rate": 7.993444719801742e-05, "loss": 1.8131, "step": 2514 }, { "epoch": 0.603321143816318, "grad_norm": 0.7988845109939575, "learning_rate": 7.991845870972899e-05, "loss": 1.8503, "step": 2516 }, { "epoch": 0.603800731371021, "grad_norm": 0.8914473652839661, "learning_rate": 7.990247022144056e-05, "loss": 1.8027, "step": 2518 }, { "epoch": 0.6042803189257239, "grad_norm": 0.8884130120277405, "learning_rate": 7.988648173315214e-05, "loss": 1.8479, "step": 2520 }, { "epoch": 0.6047599064804269, "grad_norm": 0.7094120383262634, "learning_rate": 7.98704932448637e-05, "loss": 1.7961, "step": 2522 }, { "epoch": 0.6052394940351298, "grad_norm": 0.9467212557792664, "learning_rate": 7.985450475657527e-05, "loss": 1.8384, "step": 2524 }, { "epoch": 0.6057190815898328, "grad_norm": 0.8544528484344482, "learning_rate": 7.983851626828683e-05, "loss": 1.7886, "step": 2526 }, { "epoch": 0.6061986691445357, "grad_norm": 0.7958546876907349, "learning_rate": 7.98225277799984e-05, "loss": 1.8696, "step": 2528 }, { "epoch": 0.6066782566992387, "grad_norm": 0.7515583038330078, "learning_rate": 7.980653929170996e-05, "loss": 1.7767, "step": 2530 }, { "epoch": 0.6071578442539416, "grad_norm": 1.1828036308288574, "learning_rate": 7.979055080342154e-05, "loss": 1.8244, "step": 2532 }, { "epoch": 0.6076374318086446, "grad_norm": 0.9078673124313354, "learning_rate": 7.977456231513311e-05, "loss": 1.8324, "step": 2534 }, { "epoch": 0.6081170193633475, "grad_norm": 0.8681762218475342, "learning_rate": 7.975857382684467e-05, "loss": 1.8194, "step": 2536 }, { "epoch": 0.6085966069180505, "grad_norm": 0.7300459146499634, "learning_rate": 7.974258533855624e-05, "loss": 1.8166, "step": 2538 }, { "epoch": 0.6090761944727534, "grad_norm": 0.752516508102417, "learning_rate": 7.97265968502678e-05, "loss": 1.842, "step": 2540 }, { "epoch": 0.6095557820274564, "grad_norm": 0.956609845161438, "learning_rate": 7.971060836197937e-05, "loss": 1.8427, "step": 2542 }, { "epoch": 0.6100353695821593, "grad_norm": 0.8300648331642151, "learning_rate": 7.969461987369095e-05, "loss": 1.8837, "step": 2544 }, { "epoch": 0.6105149571368623, "grad_norm": 0.7574582695960999, "learning_rate": 7.967863138540252e-05, "loss": 1.8116, "step": 2546 }, { "epoch": 0.6109945446915652, "grad_norm": 0.7463668584823608, "learning_rate": 7.966264289711408e-05, "loss": 1.7763, "step": 2548 }, { "epoch": 0.6114741322462682, "grad_norm": 0.7550466060638428, "learning_rate": 7.964665440882565e-05, "loss": 1.8285, "step": 2550 }, { "epoch": 0.6119537198009711, "grad_norm": 0.730358362197876, "learning_rate": 7.963066592053721e-05, "loss": 1.8164, "step": 2552 }, { "epoch": 0.6124333073556741, "grad_norm": 1.0106319189071655, "learning_rate": 7.961467743224878e-05, "loss": 1.8412, "step": 2554 }, { "epoch": 0.612912894910377, "grad_norm": 1.0240299701690674, "learning_rate": 7.959868894396036e-05, "loss": 1.7716, "step": 2556 }, { "epoch": 0.61339248246508, "grad_norm": 0.797468364238739, "learning_rate": 7.958270045567192e-05, "loss": 1.8468, "step": 2558 }, { "epoch": 0.613872070019783, "grad_norm": 0.8344363570213318, "learning_rate": 7.956671196738349e-05, "loss": 1.8133, "step": 2560 }, { "epoch": 0.614351657574486, "grad_norm": 1.0396696329116821, "learning_rate": 7.955072347909506e-05, "loss": 1.806, "step": 2562 }, { "epoch": 0.6148312451291889, "grad_norm": 0.733478307723999, "learning_rate": 7.953473499080662e-05, "loss": 1.813, "step": 2564 }, { "epoch": 0.6153108326838919, "grad_norm": 0.8606105446815491, "learning_rate": 7.951874650251819e-05, "loss": 1.8325, "step": 2566 }, { "epoch": 0.6157904202385948, "grad_norm": 0.7484846115112305, "learning_rate": 7.950275801422977e-05, "loss": 1.7912, "step": 2568 }, { "epoch": 0.6162700077932978, "grad_norm": 0.9288269877433777, "learning_rate": 7.948676952594133e-05, "loss": 1.7559, "step": 2570 }, { "epoch": 0.6167495953480007, "grad_norm": 0.7969287037849426, "learning_rate": 7.94707810376529e-05, "loss": 1.8259, "step": 2572 }, { "epoch": 0.6172291829027037, "grad_norm": 1.1840615272521973, "learning_rate": 7.945479254936446e-05, "loss": 1.8195, "step": 2574 }, { "epoch": 0.6177087704574066, "grad_norm": 1.0825062990188599, "learning_rate": 7.943880406107603e-05, "loss": 1.8449, "step": 2576 }, { "epoch": 0.6181883580121096, "grad_norm": 0.6929304003715515, "learning_rate": 7.94228155727876e-05, "loss": 1.8397, "step": 2578 }, { "epoch": 0.6186679455668126, "grad_norm": 1.172890305519104, "learning_rate": 7.940682708449917e-05, "loss": 1.7401, "step": 2580 }, { "epoch": 0.6191475331215155, "grad_norm": 0.7225690484046936, "learning_rate": 7.939083859621074e-05, "loss": 1.8399, "step": 2582 }, { "epoch": 0.6196271206762185, "grad_norm": 0.8053535223007202, "learning_rate": 7.93748501079223e-05, "loss": 1.7744, "step": 2584 }, { "epoch": 0.6201067082309214, "grad_norm": 0.7899699211120605, "learning_rate": 7.935886161963386e-05, "loss": 1.7979, "step": 2586 }, { "epoch": 0.6205862957856244, "grad_norm": 0.8230764269828796, "learning_rate": 7.934287313134542e-05, "loss": 1.8048, "step": 2588 }, { "epoch": 0.6210658833403273, "grad_norm": 0.7939730882644653, "learning_rate": 7.9326884643057e-05, "loss": 1.8164, "step": 2590 }, { "epoch": 0.6215454708950303, "grad_norm": 0.9377521872520447, "learning_rate": 7.931089615476857e-05, "loss": 1.8322, "step": 2592 }, { "epoch": 0.6220250584497332, "grad_norm": 0.7793135643005371, "learning_rate": 7.929490766648013e-05, "loss": 1.769, "step": 2594 }, { "epoch": 0.6225046460044362, "grad_norm": 0.8014957308769226, "learning_rate": 7.92789191781917e-05, "loss": 1.8074, "step": 2596 }, { "epoch": 0.6229842335591391, "grad_norm": 0.8260568380355835, "learning_rate": 7.926293068990327e-05, "loss": 1.821, "step": 2598 }, { "epoch": 0.6234638211138421, "grad_norm": 0.7368150353431702, "learning_rate": 7.924694220161483e-05, "loss": 1.8022, "step": 2600 }, { "epoch": 0.623943408668545, "grad_norm": 0.8161789178848267, "learning_rate": 7.923095371332641e-05, "loss": 1.8185, "step": 2602 }, { "epoch": 0.624422996223248, "grad_norm": 0.9167683124542236, "learning_rate": 7.921496522503798e-05, "loss": 1.8486, "step": 2604 }, { "epoch": 0.6249025837779509, "grad_norm": 1.0788770914077759, "learning_rate": 7.919897673674954e-05, "loss": 1.8556, "step": 2606 }, { "epoch": 0.625382171332654, "grad_norm": 0.8483620882034302, "learning_rate": 7.918298824846111e-05, "loss": 1.789, "step": 2608 }, { "epoch": 0.6258617588873568, "grad_norm": 1.056351900100708, "learning_rate": 7.916699976017267e-05, "loss": 1.8043, "step": 2610 }, { "epoch": 0.6263413464420599, "grad_norm": 0.9254528880119324, "learning_rate": 7.915101127188425e-05, "loss": 1.7544, "step": 2612 }, { "epoch": 0.6268209339967628, "grad_norm": 0.8012200593948364, "learning_rate": 7.913502278359582e-05, "loss": 1.8052, "step": 2614 }, { "epoch": 0.6273005215514658, "grad_norm": 1.03676176071167, "learning_rate": 7.911903429530738e-05, "loss": 1.8515, "step": 2616 }, { "epoch": 0.6277801091061687, "grad_norm": 0.8429546356201172, "learning_rate": 7.910304580701895e-05, "loss": 1.8755, "step": 2618 }, { "epoch": 0.6282596966608717, "grad_norm": 0.9590525031089783, "learning_rate": 7.908705731873052e-05, "loss": 1.8174, "step": 2620 }, { "epoch": 0.6287392842155746, "grad_norm": 0.79049152135849, "learning_rate": 7.907106883044208e-05, "loss": 1.8554, "step": 2622 }, { "epoch": 0.6292188717702776, "grad_norm": 0.6966078281402588, "learning_rate": 7.905508034215366e-05, "loss": 1.7979, "step": 2624 }, { "epoch": 0.6296984593249805, "grad_norm": 0.7394617795944214, "learning_rate": 7.903909185386523e-05, "loss": 1.7483, "step": 2626 }, { "epoch": 0.6301780468796835, "grad_norm": 0.7278834581375122, "learning_rate": 7.902310336557679e-05, "loss": 1.8912, "step": 2628 }, { "epoch": 0.6306576344343864, "grad_norm": 0.8591573238372803, "learning_rate": 7.900711487728836e-05, "loss": 1.7824, "step": 2630 }, { "epoch": 0.6311372219890894, "grad_norm": 0.7513567805290222, "learning_rate": 7.899112638899992e-05, "loss": 1.8126, "step": 2632 }, { "epoch": 0.6316168095437923, "grad_norm": 0.8000609278678894, "learning_rate": 7.897513790071149e-05, "loss": 1.7668, "step": 2634 }, { "epoch": 0.6320963970984953, "grad_norm": 0.7733408808708191, "learning_rate": 7.895914941242307e-05, "loss": 1.833, "step": 2636 }, { "epoch": 0.6325759846531982, "grad_norm": 0.8133561611175537, "learning_rate": 7.894316092413463e-05, "loss": 1.8406, "step": 2638 }, { "epoch": 0.6330555722079012, "grad_norm": 0.868613064289093, "learning_rate": 7.89271724358462e-05, "loss": 1.7792, "step": 2640 }, { "epoch": 0.6335351597626041, "grad_norm": 0.7906621694564819, "learning_rate": 7.891118394755776e-05, "loss": 1.7715, "step": 2642 }, { "epoch": 0.6340147473173071, "grad_norm": 0.8647173047065735, "learning_rate": 7.889519545926933e-05, "loss": 1.8322, "step": 2644 }, { "epoch": 0.6344943348720101, "grad_norm": 0.869770884513855, "learning_rate": 7.88792069709809e-05, "loss": 1.8091, "step": 2646 }, { "epoch": 0.634973922426713, "grad_norm": 0.7323320508003235, "learning_rate": 7.886321848269248e-05, "loss": 1.8215, "step": 2648 }, { "epoch": 0.635453509981416, "grad_norm": 1.1495035886764526, "learning_rate": 7.884722999440404e-05, "loss": 1.8296, "step": 2650 }, { "epoch": 0.6359330975361189, "grad_norm": 0.795081377029419, "learning_rate": 7.883124150611561e-05, "loss": 1.7409, "step": 2652 }, { "epoch": 0.6364126850908219, "grad_norm": 0.9633416533470154, "learning_rate": 7.881525301782717e-05, "loss": 1.7927, "step": 2654 }, { "epoch": 0.6368922726455248, "grad_norm": 1.325184941291809, "learning_rate": 7.879926452953874e-05, "loss": 1.8059, "step": 2656 }, { "epoch": 0.6373718602002278, "grad_norm": 0.8147624731063843, "learning_rate": 7.87832760412503e-05, "loss": 1.8058, "step": 2658 }, { "epoch": 0.6378514477549307, "grad_norm": 0.8177316188812256, "learning_rate": 7.876728755296187e-05, "loss": 1.7824, "step": 2660 }, { "epoch": 0.6383310353096338, "grad_norm": 0.8800510764122009, "learning_rate": 7.875129906467344e-05, "loss": 1.8397, "step": 2662 }, { "epoch": 0.6388106228643367, "grad_norm": 0.8333784937858582, "learning_rate": 7.8735310576385e-05, "loss": 1.7881, "step": 2664 }, { "epoch": 0.6392902104190397, "grad_norm": 0.7791417241096497, "learning_rate": 7.871932208809657e-05, "loss": 1.783, "step": 2666 }, { "epoch": 0.6397697979737426, "grad_norm": 1.0761479139328003, "learning_rate": 7.870333359980813e-05, "loss": 1.8441, "step": 2668 }, { "epoch": 0.6402493855284456, "grad_norm": 0.9761336445808411, "learning_rate": 7.868734511151971e-05, "loss": 1.7605, "step": 2670 }, { "epoch": 0.6407289730831485, "grad_norm": 0.6732177138328552, "learning_rate": 7.867135662323128e-05, "loss": 1.7823, "step": 2672 }, { "epoch": 0.6412085606378515, "grad_norm": 0.8765828609466553, "learning_rate": 7.865536813494284e-05, "loss": 1.847, "step": 2674 }, { "epoch": 0.6416881481925544, "grad_norm": 0.7555404901504517, "learning_rate": 7.863937964665441e-05, "loss": 1.8538, "step": 2676 }, { "epoch": 0.6421677357472574, "grad_norm": 0.8509027361869812, "learning_rate": 7.862339115836597e-05, "loss": 1.8054, "step": 2678 }, { "epoch": 0.6426473233019603, "grad_norm": 0.7729858756065369, "learning_rate": 7.860740267007754e-05, "loss": 1.7999, "step": 2680 }, { "epoch": 0.6431269108566633, "grad_norm": 0.8906476497650146, "learning_rate": 7.859141418178912e-05, "loss": 1.8532, "step": 2682 }, { "epoch": 0.6436064984113662, "grad_norm": 0.8677057027816772, "learning_rate": 7.857542569350069e-05, "loss": 1.8366, "step": 2684 }, { "epoch": 0.6440860859660692, "grad_norm": 0.8123737573623657, "learning_rate": 7.855943720521225e-05, "loss": 1.8387, "step": 2686 }, { "epoch": 0.6445656735207721, "grad_norm": 0.7428637146949768, "learning_rate": 7.854344871692382e-05, "loss": 1.8432, "step": 2688 }, { "epoch": 0.6450452610754751, "grad_norm": 0.9231522083282471, "learning_rate": 7.852746022863538e-05, "loss": 1.8491, "step": 2690 }, { "epoch": 0.645524848630178, "grad_norm": 0.7590905427932739, "learning_rate": 7.851147174034695e-05, "loss": 1.8116, "step": 2692 }, { "epoch": 0.646004436184881, "grad_norm": 0.89023756980896, "learning_rate": 7.849548325205853e-05, "loss": 1.8272, "step": 2694 }, { "epoch": 0.6464840237395839, "grad_norm": 0.876691460609436, "learning_rate": 7.847949476377009e-05, "loss": 1.7477, "step": 2696 }, { "epoch": 0.6469636112942869, "grad_norm": 0.7389882802963257, "learning_rate": 7.846350627548166e-05, "loss": 1.7698, "step": 2698 }, { "epoch": 0.6474431988489898, "grad_norm": 0.7794725894927979, "learning_rate": 7.844751778719322e-05, "loss": 1.7917, "step": 2700 }, { "epoch": 0.6479227864036928, "grad_norm": 0.6935551166534424, "learning_rate": 7.843152929890479e-05, "loss": 1.8075, "step": 2702 }, { "epoch": 0.6484023739583957, "grad_norm": 0.9052838087081909, "learning_rate": 7.841554081061636e-05, "loss": 1.8495, "step": 2704 }, { "epoch": 0.6488819615130987, "grad_norm": 0.8804116249084473, "learning_rate": 7.839955232232794e-05, "loss": 1.8618, "step": 2706 }, { "epoch": 0.6493615490678016, "grad_norm": 0.7541095018386841, "learning_rate": 7.83835638340395e-05, "loss": 1.824, "step": 2708 }, { "epoch": 0.6498411366225046, "grad_norm": 0.7736824154853821, "learning_rate": 7.836757534575107e-05, "loss": 1.7591, "step": 2710 }, { "epoch": 0.6503207241772077, "grad_norm": 0.6916802525520325, "learning_rate": 7.835158685746263e-05, "loss": 1.7959, "step": 2712 }, { "epoch": 0.6508003117319106, "grad_norm": 0.8041627407073975, "learning_rate": 7.83355983691742e-05, "loss": 1.8173, "step": 2714 }, { "epoch": 0.6512798992866136, "grad_norm": 0.8053293824195862, "learning_rate": 7.831960988088576e-05, "loss": 1.7852, "step": 2716 }, { "epoch": 0.6517594868413165, "grad_norm": 0.7801178097724915, "learning_rate": 7.830362139259734e-05, "loss": 1.8141, "step": 2718 }, { "epoch": 0.6522390743960195, "grad_norm": 0.7386508584022522, "learning_rate": 7.828763290430891e-05, "loss": 1.8227, "step": 2720 }, { "epoch": 0.6527186619507224, "grad_norm": 0.801705002784729, "learning_rate": 7.827164441602047e-05, "loss": 1.7615, "step": 2722 }, { "epoch": 0.6531982495054254, "grad_norm": 0.7257188558578491, "learning_rate": 7.825565592773204e-05, "loss": 1.7875, "step": 2724 }, { "epoch": 0.6536778370601283, "grad_norm": 0.8389003872871399, "learning_rate": 7.82396674394436e-05, "loss": 1.8023, "step": 2726 }, { "epoch": 0.6541574246148313, "grad_norm": 0.8134016394615173, "learning_rate": 7.822367895115517e-05, "loss": 1.8703, "step": 2728 }, { "epoch": 0.6546370121695342, "grad_norm": 0.6485612392425537, "learning_rate": 7.820769046286675e-05, "loss": 1.8201, "step": 2730 }, { "epoch": 0.6551165997242372, "grad_norm": 0.739435076713562, "learning_rate": 7.81917019745783e-05, "loss": 1.801, "step": 2732 }, { "epoch": 0.6555961872789401, "grad_norm": 1.0640904903411865, "learning_rate": 7.817571348628987e-05, "loss": 1.8185, "step": 2734 }, { "epoch": 0.6560757748336431, "grad_norm": 0.7865619659423828, "learning_rate": 7.815972499800143e-05, "loss": 1.7455, "step": 2736 }, { "epoch": 0.656555362388346, "grad_norm": 0.7308880686759949, "learning_rate": 7.8143736509713e-05, "loss": 1.7839, "step": 2738 }, { "epoch": 0.657034949943049, "grad_norm": 0.919751763343811, "learning_rate": 7.812774802142458e-05, "loss": 1.7748, "step": 2740 }, { "epoch": 0.6575145374977519, "grad_norm": 0.8834330439567566, "learning_rate": 7.811175953313614e-05, "loss": 1.8864, "step": 2742 }, { "epoch": 0.6579941250524549, "grad_norm": 0.7200546264648438, "learning_rate": 7.809577104484771e-05, "loss": 1.8179, "step": 2744 }, { "epoch": 0.6584737126071578, "grad_norm": 0.6773492097854614, "learning_rate": 7.807978255655928e-05, "loss": 1.796, "step": 2746 }, { "epoch": 0.6589533001618608, "grad_norm": 0.9566057920455933, "learning_rate": 7.806379406827084e-05, "loss": 1.7924, "step": 2748 }, { "epoch": 0.6594328877165637, "grad_norm": 0.7815729379653931, "learning_rate": 7.804780557998241e-05, "loss": 1.8487, "step": 2750 }, { "epoch": 0.6599124752712667, "grad_norm": 0.7578445672988892, "learning_rate": 7.803181709169399e-05, "loss": 1.7562, "step": 2752 }, { "epoch": 0.6603920628259696, "grad_norm": 0.9069249033927917, "learning_rate": 7.801582860340555e-05, "loss": 1.7729, "step": 2754 }, { "epoch": 0.6608716503806726, "grad_norm": 0.7550199627876282, "learning_rate": 7.799984011511712e-05, "loss": 1.8422, "step": 2756 }, { "epoch": 0.6613512379353755, "grad_norm": 0.7798390984535217, "learning_rate": 7.798385162682868e-05, "loss": 1.8313, "step": 2758 }, { "epoch": 0.6618308254900785, "grad_norm": 1.0727492570877075, "learning_rate": 7.796786313854025e-05, "loss": 1.8408, "step": 2760 }, { "epoch": 0.6623104130447814, "grad_norm": 0.8099225163459778, "learning_rate": 7.795187465025182e-05, "loss": 1.8356, "step": 2762 }, { "epoch": 0.6627900005994845, "grad_norm": 0.7574849724769592, "learning_rate": 7.79358861619634e-05, "loss": 1.8167, "step": 2764 }, { "epoch": 0.6632695881541874, "grad_norm": 0.8986452221870422, "learning_rate": 7.791989767367496e-05, "loss": 1.8373, "step": 2766 }, { "epoch": 0.6637491757088904, "grad_norm": 0.8803322315216064, "learning_rate": 7.790390918538653e-05, "loss": 1.7725, "step": 2768 }, { "epoch": 0.6642287632635933, "grad_norm": 0.8181229829788208, "learning_rate": 7.788792069709809e-05, "loss": 1.7845, "step": 2770 }, { "epoch": 0.6647083508182963, "grad_norm": 0.6683567762374878, "learning_rate": 7.787193220880966e-05, "loss": 1.8047, "step": 2772 }, { "epoch": 0.6651879383729992, "grad_norm": 1.0457181930541992, "learning_rate": 7.785594372052122e-05, "loss": 1.8581, "step": 2774 }, { "epoch": 0.6656675259277022, "grad_norm": 0.8004958629608154, "learning_rate": 7.78399552322328e-05, "loss": 1.8624, "step": 2776 }, { "epoch": 0.6661471134824052, "grad_norm": 0.8720858693122864, "learning_rate": 7.782396674394437e-05, "loss": 1.8149, "step": 2778 }, { "epoch": 0.6666267010371081, "grad_norm": 1.0170738697052002, "learning_rate": 7.780797825565593e-05, "loss": 1.7645, "step": 2780 }, { "epoch": 0.6671062885918111, "grad_norm": 0.8186866044998169, "learning_rate": 7.77919897673675e-05, "loss": 1.7772, "step": 2782 }, { "epoch": 0.667585876146514, "grad_norm": 0.8334677815437317, "learning_rate": 7.777600127907907e-05, "loss": 1.7922, "step": 2784 }, { "epoch": 0.668065463701217, "grad_norm": 1.0382202863693237, "learning_rate": 7.776001279079063e-05, "loss": 1.8689, "step": 2786 }, { "epoch": 0.6685450512559199, "grad_norm": 0.7566665410995483, "learning_rate": 7.774402430250221e-05, "loss": 1.811, "step": 2788 }, { "epoch": 0.6690246388106229, "grad_norm": 0.7071545720100403, "learning_rate": 7.772803581421378e-05, "loss": 1.7751, "step": 2790 }, { "epoch": 0.6695042263653258, "grad_norm": 0.7393055558204651, "learning_rate": 7.771204732592534e-05, "loss": 1.7786, "step": 2792 }, { "epoch": 0.6699838139200288, "grad_norm": 0.8369373679161072, "learning_rate": 7.769605883763691e-05, "loss": 1.7774, "step": 2794 }, { "epoch": 0.6704634014747317, "grad_norm": 0.7549570798873901, "learning_rate": 7.768007034934847e-05, "loss": 1.845, "step": 2796 }, { "epoch": 0.6709429890294347, "grad_norm": 0.8199227452278137, "learning_rate": 7.766408186106004e-05, "loss": 1.8225, "step": 2798 }, { "epoch": 0.6714225765841376, "grad_norm": 0.8445465564727783, "learning_rate": 7.764809337277162e-05, "loss": 1.8125, "step": 2800 }, { "epoch": 0.6714225765841376, "eval_loss": 1.7878366708755493, "eval_runtime": 331.2548, "eval_samples_per_second": 402.859, "eval_steps_per_second": 12.592, "step": 2800 }, { "epoch": 0.6719021641388406, "grad_norm": 0.8423258662223816, "learning_rate": 7.763210488448318e-05, "loss": 1.7964, "step": 2802 }, { "epoch": 0.6723817516935435, "grad_norm": 0.7697099447250366, "learning_rate": 7.761611639619474e-05, "loss": 1.7984, "step": 2804 }, { "epoch": 0.6728613392482465, "grad_norm": 0.8488075137138367, "learning_rate": 7.76001279079063e-05, "loss": 1.772, "step": 2806 }, { "epoch": 0.6733409268029494, "grad_norm": 0.8002026677131653, "learning_rate": 7.758413941961788e-05, "loss": 1.7839, "step": 2808 }, { "epoch": 0.6738205143576524, "grad_norm": 0.6913251280784607, "learning_rate": 7.756815093132945e-05, "loss": 1.7867, "step": 2810 }, { "epoch": 0.6743001019123553, "grad_norm": 0.7700398564338684, "learning_rate": 7.755216244304101e-05, "loss": 1.7686, "step": 2812 }, { "epoch": 0.6747796894670584, "grad_norm": 0.9794091582298279, "learning_rate": 7.753617395475258e-05, "loss": 1.8096, "step": 2814 }, { "epoch": 0.6752592770217613, "grad_norm": 0.8876166343688965, "learning_rate": 7.752018546646414e-05, "loss": 1.8383, "step": 2816 }, { "epoch": 0.6757388645764643, "grad_norm": 1.0019450187683105, "learning_rate": 7.750419697817571e-05, "loss": 1.7584, "step": 2818 }, { "epoch": 0.6762184521311672, "grad_norm": 0.8695688843727112, "learning_rate": 7.748820848988729e-05, "loss": 1.7339, "step": 2820 }, { "epoch": 0.6766980396858702, "grad_norm": 0.8881654143333435, "learning_rate": 7.747222000159885e-05, "loss": 1.8518, "step": 2822 }, { "epoch": 0.6771776272405731, "grad_norm": 1.0527842044830322, "learning_rate": 7.745623151331042e-05, "loss": 1.8325, "step": 2824 }, { "epoch": 0.6776572147952761, "grad_norm": 1.0231884717941284, "learning_rate": 7.744024302502199e-05, "loss": 1.8236, "step": 2826 }, { "epoch": 0.678136802349979, "grad_norm": 1.0572279691696167, "learning_rate": 7.742425453673355e-05, "loss": 1.7777, "step": 2828 }, { "epoch": 0.678616389904682, "grad_norm": 0.830051600933075, "learning_rate": 7.740826604844512e-05, "loss": 1.8359, "step": 2830 }, { "epoch": 0.6790959774593849, "grad_norm": 0.8113921880722046, "learning_rate": 7.73922775601567e-05, "loss": 1.8349, "step": 2832 }, { "epoch": 0.6795755650140879, "grad_norm": 1.149129033088684, "learning_rate": 7.737628907186826e-05, "loss": 1.7984, "step": 2834 }, { "epoch": 0.6800551525687908, "grad_norm": 0.6593514680862427, "learning_rate": 7.736030058357983e-05, "loss": 1.7687, "step": 2836 }, { "epoch": 0.6805347401234938, "grad_norm": 0.7904682159423828, "learning_rate": 7.734431209529139e-05, "loss": 1.8559, "step": 2838 }, { "epoch": 0.6810143276781967, "grad_norm": 0.7587451338768005, "learning_rate": 7.732832360700296e-05, "loss": 1.7332, "step": 2840 }, { "epoch": 0.6814939152328997, "grad_norm": 0.8387437462806702, "learning_rate": 7.731233511871452e-05, "loss": 1.8274, "step": 2842 }, { "epoch": 0.6819735027876027, "grad_norm": 0.8549642562866211, "learning_rate": 7.72963466304261e-05, "loss": 1.7971, "step": 2844 }, { "epoch": 0.6824530903423056, "grad_norm": 0.747821569442749, "learning_rate": 7.728035814213767e-05, "loss": 1.819, "step": 2846 }, { "epoch": 0.6829326778970086, "grad_norm": 0.714723527431488, "learning_rate": 7.726436965384924e-05, "loss": 1.7672, "step": 2848 }, { "epoch": 0.6834122654517115, "grad_norm": 0.6630494594573975, "learning_rate": 7.72483811655608e-05, "loss": 1.7984, "step": 2850 }, { "epoch": 0.6838918530064145, "grad_norm": 0.9868255257606506, "learning_rate": 7.723239267727237e-05, "loss": 1.7681, "step": 2852 }, { "epoch": 0.6843714405611174, "grad_norm": 0.8597872257232666, "learning_rate": 7.721640418898393e-05, "loss": 1.8349, "step": 2854 }, { "epoch": 0.6848510281158204, "grad_norm": 0.7714802622795105, "learning_rate": 7.720041570069551e-05, "loss": 1.7588, "step": 2856 }, { "epoch": 0.6853306156705233, "grad_norm": 0.7528527975082397, "learning_rate": 7.718442721240708e-05, "loss": 1.8244, "step": 2858 }, { "epoch": 0.6858102032252263, "grad_norm": 0.8679733872413635, "learning_rate": 7.716843872411864e-05, "loss": 1.7991, "step": 2860 }, { "epoch": 0.6862897907799292, "grad_norm": 0.7884739637374878, "learning_rate": 7.715245023583021e-05, "loss": 1.8139, "step": 2862 }, { "epoch": 0.6867693783346323, "grad_norm": 0.7325998544692993, "learning_rate": 7.713646174754177e-05, "loss": 1.7384, "step": 2864 }, { "epoch": 0.6872489658893352, "grad_norm": 0.8004255294799805, "learning_rate": 7.712047325925334e-05, "loss": 1.8009, "step": 2866 }, { "epoch": 0.6877285534440382, "grad_norm": 0.895677924156189, "learning_rate": 7.710448477096492e-05, "loss": 1.8406, "step": 2868 }, { "epoch": 0.6882081409987411, "grad_norm": 0.9671145081520081, "learning_rate": 7.708849628267649e-05, "loss": 1.8787, "step": 2870 }, { "epoch": 0.6886877285534441, "grad_norm": 1.0479432344436646, "learning_rate": 7.707250779438805e-05, "loss": 1.8591, "step": 2872 }, { "epoch": 0.689167316108147, "grad_norm": 0.9324403405189514, "learning_rate": 7.705651930609962e-05, "loss": 1.838, "step": 2874 }, { "epoch": 0.68964690366285, "grad_norm": 1.3607088327407837, "learning_rate": 7.704053081781118e-05, "loss": 1.7993, "step": 2876 }, { "epoch": 0.6901264912175529, "grad_norm": 0.7732170820236206, "learning_rate": 7.702454232952275e-05, "loss": 1.8177, "step": 2878 }, { "epoch": 0.6906060787722559, "grad_norm": 0.7650914192199707, "learning_rate": 7.700855384123431e-05, "loss": 1.7916, "step": 2880 }, { "epoch": 0.6910856663269588, "grad_norm": 0.872480571269989, "learning_rate": 7.699256535294588e-05, "loss": 1.8165, "step": 2882 }, { "epoch": 0.6915652538816618, "grad_norm": 0.703552782535553, "learning_rate": 7.697657686465744e-05, "loss": 1.7857, "step": 2884 }, { "epoch": 0.6920448414363647, "grad_norm": 0.6962745785713196, "learning_rate": 7.696058837636901e-05, "loss": 1.821, "step": 2886 }, { "epoch": 0.6925244289910677, "grad_norm": 0.7239725589752197, "learning_rate": 7.694459988808058e-05, "loss": 1.786, "step": 2888 }, { "epoch": 0.6930040165457706, "grad_norm": 0.8397719264030457, "learning_rate": 7.692861139979216e-05, "loss": 1.8069, "step": 2890 }, { "epoch": 0.6934836041004736, "grad_norm": 0.7454531192779541, "learning_rate": 7.691262291150372e-05, "loss": 1.831, "step": 2892 }, { "epoch": 0.6939631916551765, "grad_norm": 0.8239006996154785, "learning_rate": 7.689663442321529e-05, "loss": 1.8404, "step": 2894 }, { "epoch": 0.6944427792098795, "grad_norm": 0.8327688574790955, "learning_rate": 7.688064593492685e-05, "loss": 1.8126, "step": 2896 }, { "epoch": 0.6949223667645824, "grad_norm": 0.75544673204422, "learning_rate": 7.686465744663842e-05, "loss": 1.7369, "step": 2898 }, { "epoch": 0.6954019543192854, "grad_norm": 0.8838487863540649, "learning_rate": 7.684866895834998e-05, "loss": 1.7988, "step": 2900 }, { "epoch": 0.6958815418739883, "grad_norm": 0.7779418230056763, "learning_rate": 7.683268047006156e-05, "loss": 1.8188, "step": 2902 }, { "epoch": 0.6963611294286913, "grad_norm": 0.8739464282989502, "learning_rate": 7.681669198177313e-05, "loss": 1.7346, "step": 2904 }, { "epoch": 0.6968407169833942, "grad_norm": 0.8234220743179321, "learning_rate": 7.68007034934847e-05, "loss": 1.7572, "step": 2906 }, { "epoch": 0.6973203045380972, "grad_norm": 1.0147874355316162, "learning_rate": 7.678471500519626e-05, "loss": 1.8507, "step": 2908 }, { "epoch": 0.6977998920928002, "grad_norm": 0.8401166796684265, "learning_rate": 7.676872651690783e-05, "loss": 1.7809, "step": 2910 }, { "epoch": 0.6982794796475031, "grad_norm": 0.6976987719535828, "learning_rate": 7.675273802861939e-05, "loss": 1.8089, "step": 2912 }, { "epoch": 0.6987590672022062, "grad_norm": 0.7806252241134644, "learning_rate": 7.673674954033097e-05, "loss": 1.7777, "step": 2914 }, { "epoch": 0.699238654756909, "grad_norm": 0.8260698318481445, "learning_rate": 7.672076105204254e-05, "loss": 1.7549, "step": 2916 }, { "epoch": 0.6997182423116121, "grad_norm": 0.8416153192520142, "learning_rate": 7.67047725637541e-05, "loss": 1.7671, "step": 2918 }, { "epoch": 0.700197829866315, "grad_norm": 0.6992304921150208, "learning_rate": 7.668878407546567e-05, "loss": 1.7704, "step": 2920 }, { "epoch": 0.700677417421018, "grad_norm": 0.7262789011001587, "learning_rate": 7.667279558717723e-05, "loss": 1.7624, "step": 2922 }, { "epoch": 0.7011570049757209, "grad_norm": 0.6783840656280518, "learning_rate": 7.66568070988888e-05, "loss": 1.8442, "step": 2924 }, { "epoch": 0.7016365925304239, "grad_norm": 0.8316023945808411, "learning_rate": 7.664081861060038e-05, "loss": 1.8295, "step": 2926 }, { "epoch": 0.7021161800851268, "grad_norm": 1.0261812210083008, "learning_rate": 7.662483012231194e-05, "loss": 1.8309, "step": 2928 }, { "epoch": 0.7025957676398298, "grad_norm": 0.9694039821624756, "learning_rate": 7.660884163402351e-05, "loss": 1.7615, "step": 2930 }, { "epoch": 0.7030753551945327, "grad_norm": 0.799229621887207, "learning_rate": 7.659285314573508e-05, "loss": 1.7911, "step": 2932 }, { "epoch": 0.7035549427492357, "grad_norm": 0.7558437585830688, "learning_rate": 7.657686465744664e-05, "loss": 1.761, "step": 2934 }, { "epoch": 0.7040345303039386, "grad_norm": 0.6804102659225464, "learning_rate": 7.656087616915821e-05, "loss": 1.7999, "step": 2936 }, { "epoch": 0.7045141178586416, "grad_norm": 0.928348183631897, "learning_rate": 7.654488768086979e-05, "loss": 1.7956, "step": 2938 }, { "epoch": 0.7049937054133445, "grad_norm": 0.6873201131820679, "learning_rate": 7.652889919258135e-05, "loss": 1.7722, "step": 2940 }, { "epoch": 0.7054732929680475, "grad_norm": 0.6956375241279602, "learning_rate": 7.651291070429292e-05, "loss": 1.7838, "step": 2942 }, { "epoch": 0.7059528805227504, "grad_norm": 0.7363762855529785, "learning_rate": 7.649692221600448e-05, "loss": 1.8102, "step": 2944 }, { "epoch": 0.7064324680774534, "grad_norm": 1.0677250623703003, "learning_rate": 7.648093372771605e-05, "loss": 1.8606, "step": 2946 }, { "epoch": 0.7069120556321563, "grad_norm": 0.873247504234314, "learning_rate": 7.646494523942762e-05, "loss": 1.8632, "step": 2948 }, { "epoch": 0.7073916431868593, "grad_norm": 0.7901259064674377, "learning_rate": 7.644895675113918e-05, "loss": 1.8248, "step": 2950 }, { "epoch": 0.7078712307415622, "grad_norm": 0.9635350108146667, "learning_rate": 7.643296826285075e-05, "loss": 1.8174, "step": 2952 }, { "epoch": 0.7083508182962652, "grad_norm": 0.7184690833091736, "learning_rate": 7.641697977456231e-05, "loss": 1.8385, "step": 2954 }, { "epoch": 0.7088304058509681, "grad_norm": 0.7637758255004883, "learning_rate": 7.640099128627388e-05, "loss": 1.8036, "step": 2956 }, { "epoch": 0.7093099934056711, "grad_norm": 0.7588894963264465, "learning_rate": 7.638500279798544e-05, "loss": 1.7884, "step": 2958 }, { "epoch": 0.709789580960374, "grad_norm": 1.065736174583435, "learning_rate": 7.636901430969702e-05, "loss": 1.8133, "step": 2960 }, { "epoch": 0.710269168515077, "grad_norm": 0.8562358617782593, "learning_rate": 7.635302582140859e-05, "loss": 1.8084, "step": 2962 }, { "epoch": 0.7107487560697799, "grad_norm": 0.8096973299980164, "learning_rate": 7.633703733312015e-05, "loss": 1.8065, "step": 2964 }, { "epoch": 0.711228343624483, "grad_norm": 0.7101355791091919, "learning_rate": 7.632104884483172e-05, "loss": 1.877, "step": 2966 }, { "epoch": 0.7117079311791858, "grad_norm": 0.7988585829734802, "learning_rate": 7.630506035654329e-05, "loss": 1.7904, "step": 2968 }, { "epoch": 0.7121875187338889, "grad_norm": 0.9437737464904785, "learning_rate": 7.628907186825485e-05, "loss": 1.7947, "step": 2970 }, { "epoch": 0.7126671062885918, "grad_norm": 0.8623157739639282, "learning_rate": 7.627308337996643e-05, "loss": 1.8371, "step": 2972 }, { "epoch": 0.7131466938432948, "grad_norm": 0.7251987457275391, "learning_rate": 7.6257094891678e-05, "loss": 1.7937, "step": 2974 }, { "epoch": 0.7136262813979978, "grad_norm": 0.8412840962409973, "learning_rate": 7.624110640338956e-05, "loss": 1.8174, "step": 2976 }, { "epoch": 0.7141058689527007, "grad_norm": 0.7923659682273865, "learning_rate": 7.622511791510113e-05, "loss": 1.7505, "step": 2978 }, { "epoch": 0.7145854565074037, "grad_norm": 0.7865984439849854, "learning_rate": 7.62091294268127e-05, "loss": 1.8057, "step": 2980 }, { "epoch": 0.7150650440621066, "grad_norm": 1.0081987380981445, "learning_rate": 7.619314093852426e-05, "loss": 1.8315, "step": 2982 }, { "epoch": 0.7155446316168096, "grad_norm": 0.8400549292564392, "learning_rate": 7.617715245023584e-05, "loss": 1.849, "step": 2984 }, { "epoch": 0.7160242191715125, "grad_norm": 0.7921363115310669, "learning_rate": 7.61611639619474e-05, "loss": 1.7816, "step": 2986 }, { "epoch": 0.7165038067262155, "grad_norm": 0.8683953881263733, "learning_rate": 7.614517547365897e-05, "loss": 1.7954, "step": 2988 }, { "epoch": 0.7169833942809184, "grad_norm": 0.8292727470397949, "learning_rate": 7.612918698537054e-05, "loss": 1.7836, "step": 2990 }, { "epoch": 0.7174629818356214, "grad_norm": 0.7929032444953918, "learning_rate": 7.61131984970821e-05, "loss": 1.7941, "step": 2992 }, { "epoch": 0.7179425693903243, "grad_norm": 0.7431273460388184, "learning_rate": 7.609721000879367e-05, "loss": 1.7483, "step": 2994 }, { "epoch": 0.7184221569450273, "grad_norm": 0.8853791952133179, "learning_rate": 7.608122152050525e-05, "loss": 1.8654, "step": 2996 }, { "epoch": 0.7189017444997302, "grad_norm": 0.7874630689620972, "learning_rate": 7.606523303221681e-05, "loss": 1.8021, "step": 2998 }, { "epoch": 0.7193813320544332, "grad_norm": 0.7033799290657043, "learning_rate": 7.604924454392838e-05, "loss": 1.8079, "step": 3000 }, { "epoch": 0.7198609196091361, "grad_norm": 0.8226549625396729, "learning_rate": 7.603325605563994e-05, "loss": 1.8956, "step": 3002 }, { "epoch": 0.7203405071638391, "grad_norm": 0.7610841393470764, "learning_rate": 7.601726756735151e-05, "loss": 1.8249, "step": 3004 }, { "epoch": 0.720820094718542, "grad_norm": 0.706194281578064, "learning_rate": 7.600127907906307e-05, "loss": 1.7772, "step": 3006 }, { "epoch": 0.721299682273245, "grad_norm": 0.762429416179657, "learning_rate": 7.598529059077465e-05, "loss": 1.8063, "step": 3008 }, { "epoch": 0.7217792698279479, "grad_norm": 0.8512184023857117, "learning_rate": 7.596930210248622e-05, "loss": 1.835, "step": 3010 }, { "epoch": 0.7222588573826509, "grad_norm": 0.6736253499984741, "learning_rate": 7.595331361419779e-05, "loss": 1.8539, "step": 3012 }, { "epoch": 0.7227384449373538, "grad_norm": 0.8465672135353088, "learning_rate": 7.593732512590935e-05, "loss": 1.8275, "step": 3014 }, { "epoch": 0.7232180324920569, "grad_norm": 0.6953487992286682, "learning_rate": 7.592133663762092e-05, "loss": 1.7986, "step": 3016 }, { "epoch": 0.7236976200467597, "grad_norm": 0.6745506525039673, "learning_rate": 7.59053481493325e-05, "loss": 1.7656, "step": 3018 }, { "epoch": 0.7241772076014628, "grad_norm": 0.7614222764968872, "learning_rate": 7.588935966104406e-05, "loss": 1.8127, "step": 3020 }, { "epoch": 0.7246567951561657, "grad_norm": 0.8112572431564331, "learning_rate": 7.587337117275563e-05, "loss": 1.7798, "step": 3022 }, { "epoch": 0.7251363827108687, "grad_norm": 0.8769838809967041, "learning_rate": 7.585738268446718e-05, "loss": 1.7595, "step": 3024 }, { "epoch": 0.7256159702655716, "grad_norm": 1.329817771911621, "learning_rate": 7.584139419617875e-05, "loss": 1.7928, "step": 3026 }, { "epoch": 0.7260955578202746, "grad_norm": 0.7580428123474121, "learning_rate": 7.582540570789032e-05, "loss": 1.8424, "step": 3028 }, { "epoch": 0.7265751453749775, "grad_norm": 0.7087814211845398, "learning_rate": 7.580941721960189e-05, "loss": 1.8085, "step": 3030 }, { "epoch": 0.7270547329296805, "grad_norm": 0.8580549359321594, "learning_rate": 7.579342873131346e-05, "loss": 1.8132, "step": 3032 }, { "epoch": 0.7275343204843834, "grad_norm": 0.6808536052703857, "learning_rate": 7.577744024302502e-05, "loss": 1.7417, "step": 3034 }, { "epoch": 0.7280139080390864, "grad_norm": 0.7284507155418396, "learning_rate": 7.576145175473659e-05, "loss": 1.7886, "step": 3036 }, { "epoch": 0.7284934955937893, "grad_norm": 0.8210279941558838, "learning_rate": 7.574546326644815e-05, "loss": 1.7905, "step": 3038 }, { "epoch": 0.7289730831484923, "grad_norm": 0.6401440501213074, "learning_rate": 7.572947477815973e-05, "loss": 1.8388, "step": 3040 }, { "epoch": 0.7294526707031953, "grad_norm": 0.6954905986785889, "learning_rate": 7.57134862898713e-05, "loss": 1.7679, "step": 3042 }, { "epoch": 0.7299322582578982, "grad_norm": 0.9023464322090149, "learning_rate": 7.569749780158286e-05, "loss": 1.7963, "step": 3044 }, { "epoch": 0.7304118458126012, "grad_norm": 0.8698818683624268, "learning_rate": 7.568150931329443e-05, "loss": 1.7818, "step": 3046 }, { "epoch": 0.7308914333673041, "grad_norm": 0.898729681968689, "learning_rate": 7.5665520825006e-05, "loss": 1.8124, "step": 3048 }, { "epoch": 0.7313710209220071, "grad_norm": 0.8879214525222778, "learning_rate": 7.564953233671756e-05, "loss": 1.8182, "step": 3050 }, { "epoch": 0.73185060847671, "grad_norm": 0.7847311496734619, "learning_rate": 7.563354384842914e-05, "loss": 1.8279, "step": 3052 }, { "epoch": 0.732330196031413, "grad_norm": 0.7755553126335144, "learning_rate": 7.56175553601407e-05, "loss": 1.7556, "step": 3054 }, { "epoch": 0.7328097835861159, "grad_norm": 0.9336764216423035, "learning_rate": 7.560156687185227e-05, "loss": 1.8121, "step": 3056 }, { "epoch": 0.7332893711408189, "grad_norm": 0.7496489882469177, "learning_rate": 7.558557838356384e-05, "loss": 1.8106, "step": 3058 }, { "epoch": 0.7337689586955218, "grad_norm": 0.9008498191833496, "learning_rate": 7.55695898952754e-05, "loss": 1.7892, "step": 3060 }, { "epoch": 0.7342485462502248, "grad_norm": 0.7246591448783875, "learning_rate": 7.555360140698697e-05, "loss": 1.7858, "step": 3062 }, { "epoch": 0.7347281338049277, "grad_norm": 0.9854170083999634, "learning_rate": 7.553761291869855e-05, "loss": 1.7149, "step": 3064 }, { "epoch": 0.7352077213596308, "grad_norm": 0.8071674108505249, "learning_rate": 7.552162443041011e-05, "loss": 1.8269, "step": 3066 }, { "epoch": 0.7356873089143336, "grad_norm": 0.8154370188713074, "learning_rate": 7.550563594212168e-05, "loss": 1.8153, "step": 3068 }, { "epoch": 0.7361668964690367, "grad_norm": 0.7501091361045837, "learning_rate": 7.548964745383324e-05, "loss": 1.7313, "step": 3070 }, { "epoch": 0.7366464840237396, "grad_norm": 0.8202069997787476, "learning_rate": 7.547365896554481e-05, "loss": 1.8583, "step": 3072 }, { "epoch": 0.7371260715784426, "grad_norm": 1.0328272581100464, "learning_rate": 7.545767047725638e-05, "loss": 1.7763, "step": 3074 }, { "epoch": 0.7376056591331455, "grad_norm": 0.8659077286720276, "learning_rate": 7.544168198896796e-05, "loss": 1.7829, "step": 3076 }, { "epoch": 0.7380852466878485, "grad_norm": 0.8723615407943726, "learning_rate": 7.542569350067952e-05, "loss": 1.7046, "step": 3078 }, { "epoch": 0.7385648342425514, "grad_norm": 0.8471003770828247, "learning_rate": 7.540970501239109e-05, "loss": 1.8041, "step": 3080 }, { "epoch": 0.7390444217972544, "grad_norm": 0.7223091721534729, "learning_rate": 7.539371652410265e-05, "loss": 1.7862, "step": 3082 }, { "epoch": 0.7395240093519573, "grad_norm": 0.691214919090271, "learning_rate": 7.537772803581422e-05, "loss": 1.8129, "step": 3084 }, { "epoch": 0.7400035969066603, "grad_norm": 0.7275089621543884, "learning_rate": 7.536173954752578e-05, "loss": 1.7073, "step": 3086 }, { "epoch": 0.7404831844613632, "grad_norm": 0.7480186820030212, "learning_rate": 7.534575105923736e-05, "loss": 1.77, "step": 3088 }, { "epoch": 0.7409627720160662, "grad_norm": 0.7633335590362549, "learning_rate": 7.532976257094893e-05, "loss": 1.8261, "step": 3090 }, { "epoch": 0.7414423595707691, "grad_norm": 0.710408627986908, "learning_rate": 7.53137740826605e-05, "loss": 1.7826, "step": 3092 }, { "epoch": 0.7419219471254721, "grad_norm": 0.7407817840576172, "learning_rate": 7.529778559437206e-05, "loss": 1.7823, "step": 3094 }, { "epoch": 0.742401534680175, "grad_norm": 0.7482059597969055, "learning_rate": 7.528179710608361e-05, "loss": 1.7446, "step": 3096 }, { "epoch": 0.742881122234878, "grad_norm": 1.002848744392395, "learning_rate": 7.526580861779519e-05, "loss": 1.834, "step": 3098 }, { "epoch": 0.7433607097895809, "grad_norm": 0.6763379573822021, "learning_rate": 7.524982012950676e-05, "loss": 1.7559, "step": 3100 }, { "epoch": 0.7438402973442839, "grad_norm": 0.6926894783973694, "learning_rate": 7.523383164121832e-05, "loss": 1.8128, "step": 3102 }, { "epoch": 0.7443198848989868, "grad_norm": 1.0382893085479736, "learning_rate": 7.521784315292989e-05, "loss": 1.8034, "step": 3104 }, { "epoch": 0.7447994724536898, "grad_norm": 0.798424482345581, "learning_rate": 7.520185466464145e-05, "loss": 1.8122, "step": 3106 }, { "epoch": 0.7452790600083928, "grad_norm": 0.815355658531189, "learning_rate": 7.518586617635302e-05, "loss": 1.7747, "step": 3108 }, { "epoch": 0.7457586475630957, "grad_norm": 0.7880761027336121, "learning_rate": 7.51698776880646e-05, "loss": 1.7676, "step": 3110 }, { "epoch": 0.7462382351177987, "grad_norm": 0.8055126070976257, "learning_rate": 7.515388919977617e-05, "loss": 1.8021, "step": 3112 }, { "epoch": 0.7467178226725016, "grad_norm": 0.730841875076294, "learning_rate": 7.513790071148773e-05, "loss": 1.8067, "step": 3114 }, { "epoch": 0.7471974102272047, "grad_norm": 0.6910733580589294, "learning_rate": 7.51219122231993e-05, "loss": 1.7908, "step": 3116 }, { "epoch": 0.7476769977819075, "grad_norm": 0.780958890914917, "learning_rate": 7.510592373491086e-05, "loss": 1.8133, "step": 3118 }, { "epoch": 0.7481565853366106, "grad_norm": 0.746619462966919, "learning_rate": 7.508993524662243e-05, "loss": 1.8146, "step": 3120 }, { "epoch": 0.7486361728913135, "grad_norm": 0.9138036370277405, "learning_rate": 7.507394675833401e-05, "loss": 1.7681, "step": 3122 }, { "epoch": 0.7491157604460165, "grad_norm": 0.8001703023910522, "learning_rate": 7.505795827004557e-05, "loss": 1.7732, "step": 3124 }, { "epoch": 0.7495953480007194, "grad_norm": 0.6671168208122253, "learning_rate": 7.504196978175714e-05, "loss": 1.7852, "step": 3126 }, { "epoch": 0.7500749355554224, "grad_norm": 0.8019338250160217, "learning_rate": 7.50259812934687e-05, "loss": 1.7884, "step": 3128 }, { "epoch": 0.7505545231101253, "grad_norm": 0.813353955745697, "learning_rate": 7.500999280518027e-05, "loss": 1.8029, "step": 3130 }, { "epoch": 0.7510341106648283, "grad_norm": 0.7796133160591125, "learning_rate": 7.499400431689184e-05, "loss": 1.8081, "step": 3132 }, { "epoch": 0.7515136982195312, "grad_norm": 0.6407152414321899, "learning_rate": 7.497801582860341e-05, "loss": 1.7621, "step": 3134 }, { "epoch": 0.7519932857742342, "grad_norm": 0.6809926629066467, "learning_rate": 7.496202734031498e-05, "loss": 1.7745, "step": 3136 }, { "epoch": 0.7524728733289371, "grad_norm": 0.7925243973731995, "learning_rate": 7.494603885202655e-05, "loss": 1.7781, "step": 3138 }, { "epoch": 0.7529524608836401, "grad_norm": 0.9123616218566895, "learning_rate": 7.493005036373811e-05, "loss": 1.7913, "step": 3140 }, { "epoch": 0.753432048438343, "grad_norm": 0.7136143445968628, "learning_rate": 7.491406187544968e-05, "loss": 1.8428, "step": 3142 }, { "epoch": 0.753911635993046, "grad_norm": 0.7649164199829102, "learning_rate": 7.489807338716124e-05, "loss": 1.7706, "step": 3144 }, { "epoch": 0.7543912235477489, "grad_norm": 0.6299951076507568, "learning_rate": 7.488208489887282e-05, "loss": 1.815, "step": 3146 }, { "epoch": 0.7548708111024519, "grad_norm": 0.807616114616394, "learning_rate": 7.486609641058439e-05, "loss": 1.7652, "step": 3148 }, { "epoch": 0.7553503986571548, "grad_norm": 0.7499390244483948, "learning_rate": 7.485010792229595e-05, "loss": 1.8399, "step": 3150 }, { "epoch": 0.7558299862118578, "grad_norm": 0.7142356634140015, "learning_rate": 7.483411943400752e-05, "loss": 1.8364, "step": 3152 }, { "epoch": 0.7563095737665607, "grad_norm": 0.8315350413322449, "learning_rate": 7.481813094571909e-05, "loss": 1.8281, "step": 3154 }, { "epoch": 0.7567891613212637, "grad_norm": 0.7582898139953613, "learning_rate": 7.480214245743065e-05, "loss": 1.8309, "step": 3156 }, { "epoch": 0.7572687488759666, "grad_norm": 0.6691254377365112, "learning_rate": 7.478615396914223e-05, "loss": 1.773, "step": 3158 }, { "epoch": 0.7577483364306696, "grad_norm": 0.8355371952056885, "learning_rate": 7.47701654808538e-05, "loss": 1.8192, "step": 3160 }, { "epoch": 0.7582279239853725, "grad_norm": 0.9306164383888245, "learning_rate": 7.475417699256536e-05, "loss": 1.7598, "step": 3162 }, { "epoch": 0.7587075115400755, "grad_norm": 0.723198652267456, "learning_rate": 7.473818850427693e-05, "loss": 1.7807, "step": 3164 }, { "epoch": 0.7591870990947784, "grad_norm": 0.6665619015693665, "learning_rate": 7.472220001598849e-05, "loss": 1.7237, "step": 3166 }, { "epoch": 0.7596666866494814, "grad_norm": 0.9319331645965576, "learning_rate": 7.470621152770006e-05, "loss": 1.7886, "step": 3168 }, { "epoch": 0.7601462742041843, "grad_norm": 0.6910361647605896, "learning_rate": 7.469022303941162e-05, "loss": 1.8145, "step": 3170 }, { "epoch": 0.7606258617588874, "grad_norm": 0.838578999042511, "learning_rate": 7.467423455112319e-05, "loss": 1.8155, "step": 3172 }, { "epoch": 0.7611054493135904, "grad_norm": 0.8359108567237854, "learning_rate": 7.465824606283476e-05, "loss": 1.8014, "step": 3174 }, { "epoch": 0.7615850368682933, "grad_norm": 1.184833288192749, "learning_rate": 7.464225757454632e-05, "loss": 1.805, "step": 3176 }, { "epoch": 0.7620646244229963, "grad_norm": 0.7036992311477661, "learning_rate": 7.462626908625789e-05, "loss": 1.815, "step": 3178 }, { "epoch": 0.7625442119776992, "grad_norm": 0.8039335608482361, "learning_rate": 7.461028059796947e-05, "loss": 1.8538, "step": 3180 }, { "epoch": 0.7630237995324022, "grad_norm": 0.7638260722160339, "learning_rate": 7.459429210968103e-05, "loss": 1.8086, "step": 3182 }, { "epoch": 0.7635033870871051, "grad_norm": 0.7836443781852722, "learning_rate": 7.45783036213926e-05, "loss": 1.8015, "step": 3184 }, { "epoch": 0.7639829746418081, "grad_norm": 0.7600427269935608, "learning_rate": 7.456231513310416e-05, "loss": 1.7768, "step": 3186 }, { "epoch": 0.764462562196511, "grad_norm": 0.7480159401893616, "learning_rate": 7.454632664481573e-05, "loss": 1.8143, "step": 3188 }, { "epoch": 0.764942149751214, "grad_norm": 1.0682438611984253, "learning_rate": 7.45303381565273e-05, "loss": 1.7973, "step": 3190 }, { "epoch": 0.7654217373059169, "grad_norm": 0.7279807925224304, "learning_rate": 7.451434966823887e-05, "loss": 1.7687, "step": 3192 }, { "epoch": 0.7659013248606199, "grad_norm": 0.8338211178779602, "learning_rate": 7.449836117995044e-05, "loss": 1.813, "step": 3194 }, { "epoch": 0.7663809124153228, "grad_norm": 0.9744908213615417, "learning_rate": 7.4482372691662e-05, "loss": 1.8493, "step": 3196 }, { "epoch": 0.7668604999700258, "grad_norm": 0.7694433927536011, "learning_rate": 7.446638420337357e-05, "loss": 1.8039, "step": 3198 }, { "epoch": 0.7673400875247287, "grad_norm": 0.8484742045402527, "learning_rate": 7.445039571508514e-05, "loss": 1.8324, "step": 3200 }, { "epoch": 0.7673400875247287, "eval_loss": 1.7848272323608398, "eval_runtime": 331.2507, "eval_samples_per_second": 402.864, "eval_steps_per_second": 12.592, "step": 3200 }, { "epoch": 0.7678196750794317, "grad_norm": 0.884623110294342, "learning_rate": 7.44344072267967e-05, "loss": 1.7887, "step": 3202 }, { "epoch": 0.7682992626341346, "grad_norm": 0.7301356196403503, "learning_rate": 7.441841873850828e-05, "loss": 1.811, "step": 3204 }, { "epoch": 0.7687788501888376, "grad_norm": 0.8791192770004272, "learning_rate": 7.440243025021985e-05, "loss": 1.7625, "step": 3206 }, { "epoch": 0.7692584377435405, "grad_norm": 0.8262335658073425, "learning_rate": 7.438644176193141e-05, "loss": 1.801, "step": 3208 }, { "epoch": 0.7697380252982435, "grad_norm": 0.8168545961380005, "learning_rate": 7.437045327364298e-05, "loss": 1.7976, "step": 3210 }, { "epoch": 0.7702176128529464, "grad_norm": 0.7349648475646973, "learning_rate": 7.435446478535454e-05, "loss": 1.736, "step": 3212 }, { "epoch": 0.7706972004076494, "grad_norm": 0.8318861126899719, "learning_rate": 7.433847629706612e-05, "loss": 1.7923, "step": 3214 }, { "epoch": 0.7711767879623523, "grad_norm": 0.9523859620094299, "learning_rate": 7.432248780877769e-05, "loss": 1.7631, "step": 3216 }, { "epoch": 0.7716563755170553, "grad_norm": 0.8030902743339539, "learning_rate": 7.430649932048926e-05, "loss": 1.8011, "step": 3218 }, { "epoch": 0.7721359630717582, "grad_norm": 0.7280271649360657, "learning_rate": 7.429051083220082e-05, "loss": 1.8609, "step": 3220 }, { "epoch": 0.7726155506264613, "grad_norm": 0.765598475933075, "learning_rate": 7.427452234391239e-05, "loss": 1.7741, "step": 3222 }, { "epoch": 0.7730951381811642, "grad_norm": 0.7228988409042358, "learning_rate": 7.425853385562395e-05, "loss": 1.7672, "step": 3224 }, { "epoch": 0.7735747257358672, "grad_norm": 0.9624915719032288, "learning_rate": 7.424254536733553e-05, "loss": 1.8181, "step": 3226 }, { "epoch": 0.7740543132905701, "grad_norm": 0.8118737936019897, "learning_rate": 7.42265568790471e-05, "loss": 1.8601, "step": 3228 }, { "epoch": 0.7745339008452731, "grad_norm": 1.1571601629257202, "learning_rate": 7.421056839075866e-05, "loss": 1.7998, "step": 3230 }, { "epoch": 0.775013488399976, "grad_norm": 0.7497812509536743, "learning_rate": 7.419457990247023e-05, "loss": 1.8402, "step": 3232 }, { "epoch": 0.775493075954679, "grad_norm": 0.7567397952079773, "learning_rate": 7.41785914141818e-05, "loss": 1.8015, "step": 3234 }, { "epoch": 0.7759726635093819, "grad_norm": 0.7816744446754456, "learning_rate": 7.416260292589336e-05, "loss": 1.7933, "step": 3236 }, { "epoch": 0.7764522510640849, "grad_norm": 0.609933614730835, "learning_rate": 7.414661443760494e-05, "loss": 1.8177, "step": 3238 }, { "epoch": 0.7769318386187879, "grad_norm": 0.7790315747261047, "learning_rate": 7.41306259493165e-05, "loss": 1.8343, "step": 3240 }, { "epoch": 0.7774114261734908, "grad_norm": 0.651598334312439, "learning_rate": 7.411463746102806e-05, "loss": 1.798, "step": 3242 }, { "epoch": 0.7778910137281938, "grad_norm": 0.7868808507919312, "learning_rate": 7.409864897273962e-05, "loss": 1.7726, "step": 3244 }, { "epoch": 0.7783706012828967, "grad_norm": 0.7679083347320557, "learning_rate": 7.408266048445119e-05, "loss": 1.8075, "step": 3246 }, { "epoch": 0.7788501888375997, "grad_norm": 0.7632908821105957, "learning_rate": 7.406667199616277e-05, "loss": 1.7825, "step": 3248 }, { "epoch": 0.7793297763923026, "grad_norm": 0.8223320841789246, "learning_rate": 7.405068350787433e-05, "loss": 1.7759, "step": 3250 }, { "epoch": 0.7798093639470056, "grad_norm": 0.81022047996521, "learning_rate": 7.40346950195859e-05, "loss": 1.8251, "step": 3252 }, { "epoch": 0.7802889515017085, "grad_norm": 0.8204407691955566, "learning_rate": 7.401870653129747e-05, "loss": 1.7648, "step": 3254 }, { "epoch": 0.7807685390564115, "grad_norm": 0.6493328809738159, "learning_rate": 7.400271804300903e-05, "loss": 1.7575, "step": 3256 }, { "epoch": 0.7812481266111144, "grad_norm": 0.7093895077705383, "learning_rate": 7.39867295547206e-05, "loss": 1.7907, "step": 3258 }, { "epoch": 0.7817277141658174, "grad_norm": 0.8470747470855713, "learning_rate": 7.397074106643218e-05, "loss": 1.8094, "step": 3260 }, { "epoch": 0.7822073017205203, "grad_norm": 0.6442354917526245, "learning_rate": 7.395475257814374e-05, "loss": 1.7708, "step": 3262 }, { "epoch": 0.7826868892752233, "grad_norm": 0.9636713266372681, "learning_rate": 7.393876408985531e-05, "loss": 1.8229, "step": 3264 }, { "epoch": 0.7831664768299262, "grad_norm": 0.8312327861785889, "learning_rate": 7.392277560156687e-05, "loss": 1.7628, "step": 3266 }, { "epoch": 0.7836460643846292, "grad_norm": 0.8050245046615601, "learning_rate": 7.390678711327844e-05, "loss": 1.8269, "step": 3268 }, { "epoch": 0.7841256519393321, "grad_norm": 0.7052808403968811, "learning_rate": 7.389079862499e-05, "loss": 1.7413, "step": 3270 }, { "epoch": 0.7846052394940352, "grad_norm": 0.7083160877227783, "learning_rate": 7.387481013670158e-05, "loss": 1.8369, "step": 3272 }, { "epoch": 0.785084827048738, "grad_norm": 0.725884735584259, "learning_rate": 7.385882164841315e-05, "loss": 1.8182, "step": 3274 }, { "epoch": 0.7855644146034411, "grad_norm": 0.8574575781822205, "learning_rate": 7.384283316012472e-05, "loss": 1.767, "step": 3276 }, { "epoch": 0.786044002158144, "grad_norm": 0.6999529004096985, "learning_rate": 7.382684467183628e-05, "loss": 1.7802, "step": 3278 }, { "epoch": 0.786523589712847, "grad_norm": 0.6409703493118286, "learning_rate": 7.381085618354785e-05, "loss": 1.7615, "step": 3280 }, { "epoch": 0.7870031772675499, "grad_norm": 0.9895511865615845, "learning_rate": 7.379486769525941e-05, "loss": 1.792, "step": 3282 }, { "epoch": 0.7874827648222529, "grad_norm": 1.2490204572677612, "learning_rate": 7.377887920697099e-05, "loss": 1.8093, "step": 3284 }, { "epoch": 0.7879623523769558, "grad_norm": 0.6821249127388, "learning_rate": 7.376289071868256e-05, "loss": 1.7874, "step": 3286 }, { "epoch": 0.7884419399316588, "grad_norm": 0.8021611571311951, "learning_rate": 7.374690223039412e-05, "loss": 1.744, "step": 3288 }, { "epoch": 0.7889215274863617, "grad_norm": 0.6431329250335693, "learning_rate": 7.373091374210569e-05, "loss": 1.8168, "step": 3290 }, { "epoch": 0.7894011150410647, "grad_norm": 0.818727970123291, "learning_rate": 7.371492525381725e-05, "loss": 1.7812, "step": 3292 }, { "epoch": 0.7898807025957676, "grad_norm": 0.8258451819419861, "learning_rate": 7.369893676552882e-05, "loss": 1.7623, "step": 3294 }, { "epoch": 0.7903602901504706, "grad_norm": 0.7029935717582703, "learning_rate": 7.36829482772404e-05, "loss": 1.7756, "step": 3296 }, { "epoch": 0.7908398777051735, "grad_norm": 0.7139607667922974, "learning_rate": 7.366695978895196e-05, "loss": 1.792, "step": 3298 }, { "epoch": 0.7913194652598765, "grad_norm": 0.7210775017738342, "learning_rate": 7.365097130066353e-05, "loss": 1.7972, "step": 3300 }, { "epoch": 0.7917990528145794, "grad_norm": 0.7938894033432007, "learning_rate": 7.36349828123751e-05, "loss": 1.783, "step": 3302 }, { "epoch": 0.7922786403692824, "grad_norm": 0.8393096327781677, "learning_rate": 7.361899432408666e-05, "loss": 1.7463, "step": 3304 }, { "epoch": 0.7927582279239854, "grad_norm": 0.6925516724586487, "learning_rate": 7.360300583579823e-05, "loss": 1.7664, "step": 3306 }, { "epoch": 0.7932378154786883, "grad_norm": 1.0660189390182495, "learning_rate": 7.358701734750981e-05, "loss": 1.7977, "step": 3308 }, { "epoch": 0.7937174030333913, "grad_norm": 0.7446070313453674, "learning_rate": 7.357102885922137e-05, "loss": 1.7843, "step": 3310 }, { "epoch": 0.7941969905880942, "grad_norm": 0.7842859625816345, "learning_rate": 7.355504037093294e-05, "loss": 1.7759, "step": 3312 }, { "epoch": 0.7946765781427972, "grad_norm": 0.8522348999977112, "learning_rate": 7.353905188264449e-05, "loss": 1.7962, "step": 3314 }, { "epoch": 0.7951561656975001, "grad_norm": 1.0387167930603027, "learning_rate": 7.352306339435606e-05, "loss": 1.7702, "step": 3316 }, { "epoch": 0.7956357532522031, "grad_norm": 0.7096536159515381, "learning_rate": 7.350707490606764e-05, "loss": 1.8035, "step": 3318 }, { "epoch": 0.796115340806906, "grad_norm": 0.7326920628547668, "learning_rate": 7.34910864177792e-05, "loss": 1.7849, "step": 3320 }, { "epoch": 0.7965949283616091, "grad_norm": 0.775649905204773, "learning_rate": 7.347509792949077e-05, "loss": 1.8459, "step": 3322 }, { "epoch": 0.797074515916312, "grad_norm": 0.725222110748291, "learning_rate": 7.345910944120233e-05, "loss": 1.7601, "step": 3324 }, { "epoch": 0.797554103471015, "grad_norm": 0.8618197441101074, "learning_rate": 7.34431209529139e-05, "loss": 1.8237, "step": 3326 }, { "epoch": 0.7980336910257179, "grad_norm": 0.8762163519859314, "learning_rate": 7.342713246462546e-05, "loss": 1.8236, "step": 3328 }, { "epoch": 0.7985132785804209, "grad_norm": 0.8053518533706665, "learning_rate": 7.341114397633704e-05, "loss": 1.7939, "step": 3330 }, { "epoch": 0.7989928661351238, "grad_norm": 0.9238829016685486, "learning_rate": 7.339515548804861e-05, "loss": 1.7417, "step": 3332 }, { "epoch": 0.7994724536898268, "grad_norm": 0.734050452709198, "learning_rate": 7.337916699976017e-05, "loss": 1.8091, "step": 3334 }, { "epoch": 0.7999520412445297, "grad_norm": 0.7225842475891113, "learning_rate": 7.336317851147174e-05, "loss": 1.7773, "step": 3336 }, { "epoch": 0.8004316287992327, "grad_norm": 0.7738030552864075, "learning_rate": 7.33471900231833e-05, "loss": 1.7553, "step": 3338 }, { "epoch": 0.8009112163539356, "grad_norm": 0.7349827289581299, "learning_rate": 7.333120153489487e-05, "loss": 1.7955, "step": 3340 }, { "epoch": 0.8013908039086386, "grad_norm": 0.8458425998687744, "learning_rate": 7.331521304660645e-05, "loss": 1.769, "step": 3342 }, { "epoch": 0.8018703914633415, "grad_norm": 0.7268396615982056, "learning_rate": 7.329922455831802e-05, "loss": 1.7895, "step": 3344 }, { "epoch": 0.8023499790180445, "grad_norm": 0.7024621963500977, "learning_rate": 7.328323607002958e-05, "loss": 1.79, "step": 3346 }, { "epoch": 0.8028295665727474, "grad_norm": 0.8731728792190552, "learning_rate": 7.326724758174115e-05, "loss": 1.7699, "step": 3348 }, { "epoch": 0.8033091541274504, "grad_norm": 0.723365843296051, "learning_rate": 7.325125909345271e-05, "loss": 1.8703, "step": 3350 }, { "epoch": 0.8037887416821533, "grad_norm": 0.7114925384521484, "learning_rate": 7.323527060516428e-05, "loss": 1.7959, "step": 3352 }, { "epoch": 0.8042683292368563, "grad_norm": 0.70849609375, "learning_rate": 7.321928211687586e-05, "loss": 1.7913, "step": 3354 }, { "epoch": 0.8047479167915592, "grad_norm": 0.7906116843223572, "learning_rate": 7.320329362858742e-05, "loss": 1.7921, "step": 3356 }, { "epoch": 0.8052275043462622, "grad_norm": 0.7244399189949036, "learning_rate": 7.318730514029899e-05, "loss": 1.725, "step": 3358 }, { "epoch": 0.8057070919009651, "grad_norm": 0.7779842019081116, "learning_rate": 7.317131665201056e-05, "loss": 1.8328, "step": 3360 }, { "epoch": 0.8061866794556681, "grad_norm": 0.6836124062538147, "learning_rate": 7.315532816372212e-05, "loss": 1.8025, "step": 3362 }, { "epoch": 0.806666267010371, "grad_norm": 0.7371758222579956, "learning_rate": 7.313933967543369e-05, "loss": 1.7946, "step": 3364 }, { "epoch": 0.807145854565074, "grad_norm": 0.6931157112121582, "learning_rate": 7.312335118714527e-05, "loss": 1.8202, "step": 3366 }, { "epoch": 0.8076254421197769, "grad_norm": 0.7714723944664001, "learning_rate": 7.310736269885683e-05, "loss": 1.7654, "step": 3368 }, { "epoch": 0.80810502967448, "grad_norm": 0.7582923769950867, "learning_rate": 7.30913742105684e-05, "loss": 1.7561, "step": 3370 }, { "epoch": 0.808584617229183, "grad_norm": 0.8086176514625549, "learning_rate": 7.307538572227996e-05, "loss": 1.7855, "step": 3372 }, { "epoch": 0.8090642047838859, "grad_norm": 0.7114076614379883, "learning_rate": 7.305939723399153e-05, "loss": 1.7781, "step": 3374 }, { "epoch": 0.8095437923385889, "grad_norm": 0.7134729623794556, "learning_rate": 7.30434087457031e-05, "loss": 1.7504, "step": 3376 }, { "epoch": 0.8100233798932918, "grad_norm": 0.7734677195549011, "learning_rate": 7.302742025741467e-05, "loss": 1.8081, "step": 3378 }, { "epoch": 0.8105029674479948, "grad_norm": 0.6830224990844727, "learning_rate": 7.301143176912624e-05, "loss": 1.7177, "step": 3380 }, { "epoch": 0.8109825550026977, "grad_norm": 0.9041829109191895, "learning_rate": 7.29954432808378e-05, "loss": 1.8386, "step": 3382 }, { "epoch": 0.8114621425574007, "grad_norm": 0.6897194981575012, "learning_rate": 7.297945479254937e-05, "loss": 1.7953, "step": 3384 }, { "epoch": 0.8119417301121036, "grad_norm": 0.6677941679954529, "learning_rate": 7.296346630426092e-05, "loss": 1.7443, "step": 3386 }, { "epoch": 0.8124213176668066, "grad_norm": 1.0562522411346436, "learning_rate": 7.29474778159725e-05, "loss": 1.7835, "step": 3388 }, { "epoch": 0.8129009052215095, "grad_norm": 0.7301346659660339, "learning_rate": 7.293148932768407e-05, "loss": 1.7854, "step": 3390 }, { "epoch": 0.8133804927762125, "grad_norm": 0.7855016589164734, "learning_rate": 7.291550083939563e-05, "loss": 1.8283, "step": 3392 }, { "epoch": 0.8138600803309154, "grad_norm": 0.7802376747131348, "learning_rate": 7.28995123511072e-05, "loss": 1.789, "step": 3394 }, { "epoch": 0.8143396678856184, "grad_norm": 0.6985399127006531, "learning_rate": 7.288352386281877e-05, "loss": 1.7978, "step": 3396 }, { "epoch": 0.8148192554403213, "grad_norm": 0.8573805689811707, "learning_rate": 7.286753537453033e-05, "loss": 1.7949, "step": 3398 }, { "epoch": 0.8152988429950243, "grad_norm": 0.9617770314216614, "learning_rate": 7.285154688624191e-05, "loss": 1.826, "step": 3400 }, { "epoch": 0.8157784305497272, "grad_norm": 0.7437112331390381, "learning_rate": 7.283555839795348e-05, "loss": 1.8045, "step": 3402 }, { "epoch": 0.8162580181044302, "grad_norm": 0.8008386492729187, "learning_rate": 7.281956990966504e-05, "loss": 1.8271, "step": 3404 }, { "epoch": 0.8167376056591331, "grad_norm": 0.7610289454460144, "learning_rate": 7.280358142137661e-05, "loss": 1.8148, "step": 3406 }, { "epoch": 0.8172171932138361, "grad_norm": 0.8185227513313293, "learning_rate": 7.278759293308817e-05, "loss": 1.8032, "step": 3408 }, { "epoch": 0.817696780768539, "grad_norm": 1.1611098051071167, "learning_rate": 7.277160444479975e-05, "loss": 1.7799, "step": 3410 }, { "epoch": 0.818176368323242, "grad_norm": 0.6973974108695984, "learning_rate": 7.275561595651132e-05, "loss": 1.8613, "step": 3412 }, { "epoch": 0.8186559558779449, "grad_norm": 0.7533416748046875, "learning_rate": 7.273962746822288e-05, "loss": 1.8051, "step": 3414 }, { "epoch": 0.8191355434326479, "grad_norm": 1.279892921447754, "learning_rate": 7.272363897993445e-05, "loss": 1.7297, "step": 3416 }, { "epoch": 0.8196151309873508, "grad_norm": 0.9642564058303833, "learning_rate": 7.270765049164602e-05, "loss": 1.8081, "step": 3418 }, { "epoch": 0.8200947185420538, "grad_norm": 0.918080747127533, "learning_rate": 7.269166200335758e-05, "loss": 1.8036, "step": 3420 }, { "epoch": 0.8205743060967567, "grad_norm": 0.6568244695663452, "learning_rate": 7.267567351506916e-05, "loss": 1.8019, "step": 3422 }, { "epoch": 0.8210538936514598, "grad_norm": 0.731926441192627, "learning_rate": 7.265968502678073e-05, "loss": 1.8316, "step": 3424 }, { "epoch": 0.8215334812061627, "grad_norm": 0.8405430316925049, "learning_rate": 7.264369653849229e-05, "loss": 1.8321, "step": 3426 }, { "epoch": 0.8220130687608657, "grad_norm": 0.8710471987724304, "learning_rate": 7.262770805020386e-05, "loss": 1.8326, "step": 3428 }, { "epoch": 0.8224926563155686, "grad_norm": 0.8191102743148804, "learning_rate": 7.261171956191542e-05, "loss": 1.7867, "step": 3430 }, { "epoch": 0.8229722438702716, "grad_norm": 0.6755014061927795, "learning_rate": 7.259573107362699e-05, "loss": 1.7958, "step": 3432 }, { "epoch": 0.8234518314249745, "grad_norm": 0.7627677321434021, "learning_rate": 7.257974258533857e-05, "loss": 1.7739, "step": 3434 }, { "epoch": 0.8239314189796775, "grad_norm": 0.8865141868591309, "learning_rate": 7.256375409705013e-05, "loss": 1.82, "step": 3436 }, { "epoch": 0.8244110065343805, "grad_norm": 1.0364980697631836, "learning_rate": 7.25477656087617e-05, "loss": 1.8749, "step": 3438 }, { "epoch": 0.8248905940890834, "grad_norm": 1.0283490419387817, "learning_rate": 7.253177712047327e-05, "loss": 1.7697, "step": 3440 }, { "epoch": 0.8253701816437864, "grad_norm": 0.7547515630722046, "learning_rate": 7.251578863218483e-05, "loss": 1.7683, "step": 3442 }, { "epoch": 0.8258497691984893, "grad_norm": 0.9197484850883484, "learning_rate": 7.24998001438964e-05, "loss": 1.7891, "step": 3444 }, { "epoch": 0.8263293567531923, "grad_norm": 0.7391306757926941, "learning_rate": 7.248381165560798e-05, "loss": 1.7688, "step": 3446 }, { "epoch": 0.8268089443078952, "grad_norm": 0.7901893258094788, "learning_rate": 7.246782316731954e-05, "loss": 1.8469, "step": 3448 }, { "epoch": 0.8272885318625982, "grad_norm": 0.6807388663291931, "learning_rate": 7.245183467903111e-05, "loss": 1.8384, "step": 3450 }, { "epoch": 0.8277681194173011, "grad_norm": 0.7452210187911987, "learning_rate": 7.243584619074267e-05, "loss": 1.7851, "step": 3452 }, { "epoch": 0.8282477069720041, "grad_norm": 0.6523199677467346, "learning_rate": 7.241985770245424e-05, "loss": 1.7732, "step": 3454 }, { "epoch": 0.828727294526707, "grad_norm": 0.7923526167869568, "learning_rate": 7.24038692141658e-05, "loss": 1.7479, "step": 3456 }, { "epoch": 0.82920688208141, "grad_norm": 0.8439397811889648, "learning_rate": 7.238788072587738e-05, "loss": 1.8002, "step": 3458 }, { "epoch": 0.8296864696361129, "grad_norm": 0.7616860270500183, "learning_rate": 7.237189223758894e-05, "loss": 1.8076, "step": 3460 }, { "epoch": 0.8301660571908159, "grad_norm": 0.7048877477645874, "learning_rate": 7.23559037493005e-05, "loss": 1.7422, "step": 3462 }, { "epoch": 0.8306456447455188, "grad_norm": 0.6754016280174255, "learning_rate": 7.233991526101207e-05, "loss": 1.7781, "step": 3464 }, { "epoch": 0.8311252323002218, "grad_norm": 0.6776092052459717, "learning_rate": 7.232392677272363e-05, "loss": 1.7885, "step": 3466 }, { "epoch": 0.8316048198549247, "grad_norm": 0.8335336446762085, "learning_rate": 7.230793828443521e-05, "loss": 1.7896, "step": 3468 }, { "epoch": 0.8320844074096277, "grad_norm": 0.7285019159317017, "learning_rate": 7.229194979614678e-05, "loss": 1.7789, "step": 3470 }, { "epoch": 0.8325639949643306, "grad_norm": 0.6907365918159485, "learning_rate": 7.227596130785834e-05, "loss": 1.7613, "step": 3472 }, { "epoch": 0.8330435825190337, "grad_norm": 0.6421952247619629, "learning_rate": 7.225997281956991e-05, "loss": 1.7962, "step": 3474 }, { "epoch": 0.8335231700737366, "grad_norm": 0.7867948412895203, "learning_rate": 7.224398433128147e-05, "loss": 1.7799, "step": 3476 }, { "epoch": 0.8340027576284396, "grad_norm": 0.7450773119926453, "learning_rate": 7.222799584299304e-05, "loss": 1.7968, "step": 3478 }, { "epoch": 0.8344823451831425, "grad_norm": 0.7415729761123657, "learning_rate": 7.221200735470462e-05, "loss": 1.7754, "step": 3480 }, { "epoch": 0.8349619327378455, "grad_norm": 0.7813719511032104, "learning_rate": 7.219601886641619e-05, "loss": 1.8129, "step": 3482 }, { "epoch": 0.8354415202925484, "grad_norm": 0.9102758169174194, "learning_rate": 7.218003037812775e-05, "loss": 1.8013, "step": 3484 }, { "epoch": 0.8359211078472514, "grad_norm": 0.7490069270133972, "learning_rate": 7.216404188983932e-05, "loss": 1.8131, "step": 3486 }, { "epoch": 0.8364006954019543, "grad_norm": 0.7951523065567017, "learning_rate": 7.214805340155088e-05, "loss": 1.8088, "step": 3488 }, { "epoch": 0.8368802829566573, "grad_norm": 0.7305328249931335, "learning_rate": 7.213206491326245e-05, "loss": 1.7487, "step": 3490 }, { "epoch": 0.8373598705113602, "grad_norm": 0.6881986260414124, "learning_rate": 7.211607642497403e-05, "loss": 1.7859, "step": 3492 }, { "epoch": 0.8378394580660632, "grad_norm": 0.8157140612602234, "learning_rate": 7.21000879366856e-05, "loss": 1.8495, "step": 3494 }, { "epoch": 0.8383190456207661, "grad_norm": 0.9803712368011475, "learning_rate": 7.208409944839716e-05, "loss": 1.7618, "step": 3496 }, { "epoch": 0.8387986331754691, "grad_norm": 0.7217230200767517, "learning_rate": 7.206811096010872e-05, "loss": 1.7915, "step": 3498 }, { "epoch": 0.839278220730172, "grad_norm": 0.6947243213653564, "learning_rate": 7.205212247182029e-05, "loss": 1.7727, "step": 3500 }, { "epoch": 0.839757808284875, "grad_norm": 0.7628993988037109, "learning_rate": 7.203613398353186e-05, "loss": 1.7625, "step": 3502 }, { "epoch": 0.840237395839578, "grad_norm": 0.7505423426628113, "learning_rate": 7.202014549524344e-05, "loss": 1.7186, "step": 3504 }, { "epoch": 0.8407169833942809, "grad_norm": 0.92620849609375, "learning_rate": 7.2004157006955e-05, "loss": 1.7925, "step": 3506 }, { "epoch": 0.8411965709489839, "grad_norm": 0.7513558268547058, "learning_rate": 7.198816851866657e-05, "loss": 1.7858, "step": 3508 }, { "epoch": 0.8416761585036868, "grad_norm": 0.7778917551040649, "learning_rate": 7.197218003037813e-05, "loss": 1.7527, "step": 3510 }, { "epoch": 0.8421557460583898, "grad_norm": 0.7385754585266113, "learning_rate": 7.19561915420897e-05, "loss": 1.7814, "step": 3512 }, { "epoch": 0.8426353336130927, "grad_norm": 0.7506041526794434, "learning_rate": 7.194020305380126e-05, "loss": 1.7617, "step": 3514 }, { "epoch": 0.8431149211677957, "grad_norm": 0.6702131628990173, "learning_rate": 7.192421456551284e-05, "loss": 1.7896, "step": 3516 }, { "epoch": 0.8435945087224986, "grad_norm": 0.8267406821250916, "learning_rate": 7.190822607722441e-05, "loss": 1.8107, "step": 3518 }, { "epoch": 0.8440740962772016, "grad_norm": 0.6582144498825073, "learning_rate": 7.189223758893597e-05, "loss": 1.7968, "step": 3520 }, { "epoch": 0.8445536838319045, "grad_norm": 0.8363116979598999, "learning_rate": 7.187624910064754e-05, "loss": 1.7906, "step": 3522 }, { "epoch": 0.8450332713866076, "grad_norm": 0.9731744527816772, "learning_rate": 7.18602606123591e-05, "loss": 1.8025, "step": 3524 }, { "epoch": 0.8455128589413105, "grad_norm": 0.691801130771637, "learning_rate": 7.184427212407067e-05, "loss": 1.8003, "step": 3526 }, { "epoch": 0.8459924464960135, "grad_norm": 0.7644620537757874, "learning_rate": 7.182828363578225e-05, "loss": 1.7995, "step": 3528 }, { "epoch": 0.8464720340507164, "grad_norm": 0.7591093182563782, "learning_rate": 7.181229514749382e-05, "loss": 1.7429, "step": 3530 }, { "epoch": 0.8469516216054194, "grad_norm": 0.8140478730201721, "learning_rate": 7.179630665920537e-05, "loss": 1.813, "step": 3532 }, { "epoch": 0.8474312091601223, "grad_norm": 0.7690044641494751, "learning_rate": 7.178031817091693e-05, "loss": 1.7965, "step": 3534 }, { "epoch": 0.8479107967148253, "grad_norm": 0.7448798418045044, "learning_rate": 7.17643296826285e-05, "loss": 1.7959, "step": 3536 }, { "epoch": 0.8483903842695282, "grad_norm": 0.6890676617622375, "learning_rate": 7.174834119434008e-05, "loss": 1.7637, "step": 3538 }, { "epoch": 0.8488699718242312, "grad_norm": 0.8184501528739929, "learning_rate": 7.173235270605164e-05, "loss": 1.7529, "step": 3540 }, { "epoch": 0.8493495593789341, "grad_norm": 0.8835048675537109, "learning_rate": 7.171636421776321e-05, "loss": 1.754, "step": 3542 }, { "epoch": 0.8498291469336371, "grad_norm": 0.7899619936943054, "learning_rate": 7.170037572947478e-05, "loss": 1.8235, "step": 3544 }, { "epoch": 0.85030873448834, "grad_norm": 0.6529526710510254, "learning_rate": 7.168438724118634e-05, "loss": 1.7909, "step": 3546 }, { "epoch": 0.850788322043043, "grad_norm": 0.6888742446899414, "learning_rate": 7.166839875289791e-05, "loss": 1.7781, "step": 3548 }, { "epoch": 0.8512679095977459, "grad_norm": 0.7306722402572632, "learning_rate": 7.165241026460949e-05, "loss": 1.7654, "step": 3550 }, { "epoch": 0.8517474971524489, "grad_norm": 0.734072208404541, "learning_rate": 7.163642177632105e-05, "loss": 1.7707, "step": 3552 }, { "epoch": 0.8522270847071518, "grad_norm": 0.7796537280082703, "learning_rate": 7.162043328803262e-05, "loss": 1.7684, "step": 3554 }, { "epoch": 0.8527066722618548, "grad_norm": 0.7317808270454407, "learning_rate": 7.160444479974418e-05, "loss": 1.7256, "step": 3556 }, { "epoch": 0.8531862598165577, "grad_norm": 0.7617056965827942, "learning_rate": 7.158845631145575e-05, "loss": 1.835, "step": 3558 }, { "epoch": 0.8536658473712607, "grad_norm": 0.8613268733024597, "learning_rate": 7.157246782316732e-05, "loss": 1.7882, "step": 3560 }, { "epoch": 0.8541454349259636, "grad_norm": 0.6718327403068542, "learning_rate": 7.15564793348789e-05, "loss": 1.7462, "step": 3562 }, { "epoch": 0.8546250224806666, "grad_norm": 0.7175064086914062, "learning_rate": 7.154049084659046e-05, "loss": 1.81, "step": 3564 }, { "epoch": 0.8551046100353695, "grad_norm": 0.6850622892379761, "learning_rate": 7.152450235830203e-05, "loss": 1.7711, "step": 3566 }, { "epoch": 0.8555841975900725, "grad_norm": 0.9632387757301331, "learning_rate": 7.150851387001359e-05, "loss": 1.7929, "step": 3568 }, { "epoch": 0.8560637851447755, "grad_norm": 0.7028648853302002, "learning_rate": 7.149252538172516e-05, "loss": 1.8325, "step": 3570 }, { "epoch": 0.8565433726994784, "grad_norm": 0.7197695970535278, "learning_rate": 7.147653689343672e-05, "loss": 1.7602, "step": 3572 }, { "epoch": 0.8570229602541815, "grad_norm": 0.7885989546775818, "learning_rate": 7.14605484051483e-05, "loss": 1.8288, "step": 3574 }, { "epoch": 0.8575025478088844, "grad_norm": 0.8167256712913513, "learning_rate": 7.144455991685987e-05, "loss": 1.7559, "step": 3576 }, { "epoch": 0.8579821353635874, "grad_norm": 0.7871191501617432, "learning_rate": 7.142857142857143e-05, "loss": 1.8399, "step": 3578 }, { "epoch": 0.8584617229182903, "grad_norm": 0.7267895340919495, "learning_rate": 7.1412582940283e-05, "loss": 1.8588, "step": 3580 }, { "epoch": 0.8589413104729933, "grad_norm": 0.8294629454612732, "learning_rate": 7.139659445199457e-05, "loss": 1.8273, "step": 3582 }, { "epoch": 0.8594208980276962, "grad_norm": 0.9186896681785583, "learning_rate": 7.138060596370613e-05, "loss": 1.8079, "step": 3584 }, { "epoch": 0.8599004855823992, "grad_norm": 0.7976474165916443, "learning_rate": 7.136461747541771e-05, "loss": 1.7655, "step": 3586 }, { "epoch": 0.8603800731371021, "grad_norm": 0.823332667350769, "learning_rate": 7.134862898712928e-05, "loss": 1.7516, "step": 3588 }, { "epoch": 0.8608596606918051, "grad_norm": 0.6516361236572266, "learning_rate": 7.133264049884084e-05, "loss": 1.7551, "step": 3590 }, { "epoch": 0.861339248246508, "grad_norm": 0.7900442481040955, "learning_rate": 7.131665201055241e-05, "loss": 1.7914, "step": 3592 }, { "epoch": 0.861818835801211, "grad_norm": 1.0410716533660889, "learning_rate": 7.130066352226397e-05, "loss": 1.7858, "step": 3594 }, { "epoch": 0.8622984233559139, "grad_norm": 0.7463567852973938, "learning_rate": 7.128467503397554e-05, "loss": 1.7727, "step": 3596 }, { "epoch": 0.8627780109106169, "grad_norm": 0.7418103218078613, "learning_rate": 7.126868654568712e-05, "loss": 1.7731, "step": 3598 }, { "epoch": 0.8632575984653198, "grad_norm": 0.8903999924659729, "learning_rate": 7.125269805739868e-05, "loss": 1.8019, "step": 3600 }, { "epoch": 0.8632575984653198, "eval_loss": 1.7702364921569824, "eval_runtime": 331.2659, "eval_samples_per_second": 402.846, "eval_steps_per_second": 12.591, "step": 3600 }, { "epoch": 0.8637371860200228, "grad_norm": 0.7423052191734314, "learning_rate": 7.123670956911025e-05, "loss": 1.7948, "step": 3602 }, { "epoch": 0.8642167735747257, "grad_norm": 0.7115236520767212, "learning_rate": 7.12207210808218e-05, "loss": 1.7746, "step": 3604 }, { "epoch": 0.8646963611294287, "grad_norm": 0.9478175640106201, "learning_rate": 7.120473259253338e-05, "loss": 1.7676, "step": 3606 }, { "epoch": 0.8651759486841316, "grad_norm": 0.7322227358818054, "learning_rate": 7.118874410424495e-05, "loss": 1.8074, "step": 3608 }, { "epoch": 0.8656555362388346, "grad_norm": 0.9429389834403992, "learning_rate": 7.117275561595651e-05, "loss": 1.7689, "step": 3610 }, { "epoch": 0.8661351237935375, "grad_norm": 0.7195963859558105, "learning_rate": 7.115676712766808e-05, "loss": 1.7818, "step": 3612 }, { "epoch": 0.8666147113482405, "grad_norm": 0.784024178981781, "learning_rate": 7.114077863937964e-05, "loss": 1.797, "step": 3614 }, { "epoch": 0.8670942989029434, "grad_norm": 0.7494034767150879, "learning_rate": 7.112479015109121e-05, "loss": 1.7859, "step": 3616 }, { "epoch": 0.8675738864576464, "grad_norm": 1.0515717267990112, "learning_rate": 7.110880166280279e-05, "loss": 1.7276, "step": 3618 }, { "epoch": 0.8680534740123493, "grad_norm": 0.6659417152404785, "learning_rate": 7.109281317451435e-05, "loss": 1.8053, "step": 3620 }, { "epoch": 0.8685330615670523, "grad_norm": 0.7618584036827087, "learning_rate": 7.107682468622592e-05, "loss": 1.8236, "step": 3622 }, { "epoch": 0.8690126491217552, "grad_norm": 0.7394659519195557, "learning_rate": 7.106083619793749e-05, "loss": 1.7773, "step": 3624 }, { "epoch": 0.8694922366764583, "grad_norm": 0.6517754793167114, "learning_rate": 7.104484770964905e-05, "loss": 1.8037, "step": 3626 }, { "epoch": 0.8699718242311612, "grad_norm": 0.9361416697502136, "learning_rate": 7.102885922136062e-05, "loss": 1.7741, "step": 3628 }, { "epoch": 0.8704514117858642, "grad_norm": 0.6966683864593506, "learning_rate": 7.10128707330722e-05, "loss": 1.8582, "step": 3630 }, { "epoch": 0.8709309993405671, "grad_norm": 0.7604095935821533, "learning_rate": 7.099688224478376e-05, "loss": 1.7946, "step": 3632 }, { "epoch": 0.8714105868952701, "grad_norm": 0.7040010094642639, "learning_rate": 7.098089375649533e-05, "loss": 1.7231, "step": 3634 }, { "epoch": 0.8718901744499731, "grad_norm": 0.6783366203308105, "learning_rate": 7.09649052682069e-05, "loss": 1.7771, "step": 3636 }, { "epoch": 0.872369762004676, "grad_norm": 0.6508252620697021, "learning_rate": 7.094891677991846e-05, "loss": 1.7633, "step": 3638 }, { "epoch": 0.872849349559379, "grad_norm": 0.7667587399482727, "learning_rate": 7.093292829163002e-05, "loss": 1.8097, "step": 3640 }, { "epoch": 0.8733289371140819, "grad_norm": 0.6357836723327637, "learning_rate": 7.09169398033416e-05, "loss": 1.7468, "step": 3642 }, { "epoch": 0.8738085246687849, "grad_norm": 0.7282921671867371, "learning_rate": 7.090095131505317e-05, "loss": 1.7993, "step": 3644 }, { "epoch": 0.8742881122234878, "grad_norm": 0.695743203163147, "learning_rate": 7.088496282676474e-05, "loss": 1.7426, "step": 3646 }, { "epoch": 0.8747676997781908, "grad_norm": 0.8403111696243286, "learning_rate": 7.08689743384763e-05, "loss": 1.7707, "step": 3648 }, { "epoch": 0.8752472873328937, "grad_norm": 0.8287842273712158, "learning_rate": 7.085298585018787e-05, "loss": 1.8238, "step": 3650 }, { "epoch": 0.8757268748875967, "grad_norm": 0.6799539923667908, "learning_rate": 7.083699736189943e-05, "loss": 1.7492, "step": 3652 }, { "epoch": 0.8762064624422996, "grad_norm": 0.7174176573753357, "learning_rate": 7.082100887361101e-05, "loss": 1.7072, "step": 3654 }, { "epoch": 0.8766860499970026, "grad_norm": 0.736229419708252, "learning_rate": 7.080502038532258e-05, "loss": 1.7645, "step": 3656 }, { "epoch": 0.8771656375517055, "grad_norm": 0.706611692905426, "learning_rate": 7.078903189703414e-05, "loss": 1.7397, "step": 3658 }, { "epoch": 0.8776452251064085, "grad_norm": 0.6439035534858704, "learning_rate": 7.077304340874571e-05, "loss": 1.7366, "step": 3660 }, { "epoch": 0.8781248126611114, "grad_norm": 0.7397764921188354, "learning_rate": 7.075705492045727e-05, "loss": 1.8295, "step": 3662 }, { "epoch": 0.8786044002158144, "grad_norm": 0.7394357323646545, "learning_rate": 7.074106643216884e-05, "loss": 1.8332, "step": 3664 }, { "epoch": 0.8790839877705173, "grad_norm": 0.6792792081832886, "learning_rate": 7.072507794388042e-05, "loss": 1.8239, "step": 3666 }, { "epoch": 0.8795635753252203, "grad_norm": 0.8927785754203796, "learning_rate": 7.070908945559199e-05, "loss": 1.7963, "step": 3668 }, { "epoch": 0.8800431628799232, "grad_norm": 0.749718427658081, "learning_rate": 7.069310096730355e-05, "loss": 1.7878, "step": 3670 }, { "epoch": 0.8805227504346262, "grad_norm": 0.8195337057113647, "learning_rate": 7.067711247901512e-05, "loss": 1.8151, "step": 3672 }, { "epoch": 0.8810023379893291, "grad_norm": 0.6566280722618103, "learning_rate": 7.066112399072668e-05, "loss": 1.7847, "step": 3674 }, { "epoch": 0.8814819255440322, "grad_norm": 0.7958349585533142, "learning_rate": 7.064513550243825e-05, "loss": 1.7971, "step": 3676 }, { "epoch": 0.881961513098735, "grad_norm": 0.7316417694091797, "learning_rate": 7.062914701414981e-05, "loss": 1.8076, "step": 3678 }, { "epoch": 0.8824411006534381, "grad_norm": 0.7380154132843018, "learning_rate": 7.061315852586138e-05, "loss": 1.7885, "step": 3680 }, { "epoch": 0.882920688208141, "grad_norm": 0.7790927886962891, "learning_rate": 7.059717003757295e-05, "loss": 1.7687, "step": 3682 }, { "epoch": 0.883400275762844, "grad_norm": 0.7839226722717285, "learning_rate": 7.058118154928451e-05, "loss": 1.8187, "step": 3684 }, { "epoch": 0.8838798633175469, "grad_norm": 0.7479340434074402, "learning_rate": 7.056519306099608e-05, "loss": 1.7369, "step": 3686 }, { "epoch": 0.8843594508722499, "grad_norm": 0.7620726227760315, "learning_rate": 7.054920457270766e-05, "loss": 1.813, "step": 3688 }, { "epoch": 0.8848390384269528, "grad_norm": 0.7418226599693298, "learning_rate": 7.053321608441922e-05, "loss": 1.7159, "step": 3690 }, { "epoch": 0.8853186259816558, "grad_norm": 0.8096802830696106, "learning_rate": 7.051722759613079e-05, "loss": 1.7214, "step": 3692 }, { "epoch": 0.8857982135363587, "grad_norm": 1.093063473701477, "learning_rate": 7.050123910784235e-05, "loss": 1.7905, "step": 3694 }, { "epoch": 0.8862778010910617, "grad_norm": 0.7746894955635071, "learning_rate": 7.048525061955392e-05, "loss": 1.8236, "step": 3696 }, { "epoch": 0.8867573886457646, "grad_norm": 0.6991764903068542, "learning_rate": 7.046926213126548e-05, "loss": 1.7572, "step": 3698 }, { "epoch": 0.8872369762004676, "grad_norm": 0.7457902431488037, "learning_rate": 7.045327364297706e-05, "loss": 1.7589, "step": 3700 }, { "epoch": 0.8877165637551706, "grad_norm": 0.6851925253868103, "learning_rate": 7.043728515468863e-05, "loss": 1.7801, "step": 3702 }, { "epoch": 0.8881961513098735, "grad_norm": 0.7776912450790405, "learning_rate": 7.04212966664002e-05, "loss": 1.7794, "step": 3704 }, { "epoch": 0.8886757388645765, "grad_norm": 0.6833451986312866, "learning_rate": 7.040530817811176e-05, "loss": 1.7839, "step": 3706 }, { "epoch": 0.8891553264192794, "grad_norm": 0.6418639421463013, "learning_rate": 7.038931968982333e-05, "loss": 1.7857, "step": 3708 }, { "epoch": 0.8896349139739824, "grad_norm": 0.8115004897117615, "learning_rate": 7.037333120153489e-05, "loss": 1.7761, "step": 3710 }, { "epoch": 0.8901145015286853, "grad_norm": 0.6363310813903809, "learning_rate": 7.035734271324647e-05, "loss": 1.7139, "step": 3712 }, { "epoch": 0.8905940890833883, "grad_norm": 0.7716100811958313, "learning_rate": 7.034135422495804e-05, "loss": 1.7862, "step": 3714 }, { "epoch": 0.8910736766380912, "grad_norm": 0.8117420673370361, "learning_rate": 7.03253657366696e-05, "loss": 1.8263, "step": 3716 }, { "epoch": 0.8915532641927942, "grad_norm": 0.6747289299964905, "learning_rate": 7.030937724838117e-05, "loss": 1.807, "step": 3718 }, { "epoch": 0.8920328517474971, "grad_norm": 0.7210219502449036, "learning_rate": 7.029338876009273e-05, "loss": 1.8023, "step": 3720 }, { "epoch": 0.8925124393022001, "grad_norm": 0.8126586079597473, "learning_rate": 7.02774002718043e-05, "loss": 1.7767, "step": 3722 }, { "epoch": 0.892992026856903, "grad_norm": 0.9656429886817932, "learning_rate": 7.026141178351588e-05, "loss": 1.758, "step": 3724 }, { "epoch": 0.893471614411606, "grad_norm": 0.814085841178894, "learning_rate": 7.024542329522744e-05, "loss": 1.7501, "step": 3726 }, { "epoch": 0.893951201966309, "grad_norm": 0.6692916750907898, "learning_rate": 7.022943480693901e-05, "loss": 1.801, "step": 3728 }, { "epoch": 0.894430789521012, "grad_norm": 0.8301209807395935, "learning_rate": 7.021344631865058e-05, "loss": 1.8012, "step": 3730 }, { "epoch": 0.8949103770757149, "grad_norm": 0.7027314901351929, "learning_rate": 7.019745783036214e-05, "loss": 1.8226, "step": 3732 }, { "epoch": 0.8953899646304179, "grad_norm": 0.756018340587616, "learning_rate": 7.018146934207371e-05, "loss": 1.7822, "step": 3734 }, { "epoch": 0.8958695521851208, "grad_norm": 0.7270020246505737, "learning_rate": 7.016548085378529e-05, "loss": 1.8626, "step": 3736 }, { "epoch": 0.8963491397398238, "grad_norm": 0.6810704469680786, "learning_rate": 7.014949236549685e-05, "loss": 1.7416, "step": 3738 }, { "epoch": 0.8968287272945267, "grad_norm": 0.6944155097007751, "learning_rate": 7.013350387720842e-05, "loss": 1.7364, "step": 3740 }, { "epoch": 0.8973083148492297, "grad_norm": 0.769107460975647, "learning_rate": 7.011751538891998e-05, "loss": 1.8456, "step": 3742 }, { "epoch": 0.8977879024039326, "grad_norm": 0.7342574000358582, "learning_rate": 7.010152690063155e-05, "loss": 1.7721, "step": 3744 }, { "epoch": 0.8982674899586356, "grad_norm": 0.9443333745002747, "learning_rate": 7.008553841234312e-05, "loss": 1.8345, "step": 3746 }, { "epoch": 0.8987470775133385, "grad_norm": 0.6666823029518127, "learning_rate": 7.00695499240547e-05, "loss": 1.8254, "step": 3748 }, { "epoch": 0.8992266650680415, "grad_norm": 0.802214503288269, "learning_rate": 7.005356143576625e-05, "loss": 1.8223, "step": 3750 }, { "epoch": 0.8997062526227444, "grad_norm": 0.6630362868309021, "learning_rate": 7.003757294747781e-05, "loss": 1.7894, "step": 3752 }, { "epoch": 0.9001858401774474, "grad_norm": 0.6564825177192688, "learning_rate": 7.002158445918938e-05, "loss": 1.7727, "step": 3754 }, { "epoch": 0.9006654277321503, "grad_norm": 0.689751148223877, "learning_rate": 7.000559597090094e-05, "loss": 1.7793, "step": 3756 }, { "epoch": 0.9011450152868533, "grad_norm": 0.6983111500740051, "learning_rate": 6.998960748261252e-05, "loss": 1.7641, "step": 3758 }, { "epoch": 0.9016246028415562, "grad_norm": 0.7889987230300903, "learning_rate": 6.997361899432409e-05, "loss": 1.8344, "step": 3760 }, { "epoch": 0.9021041903962592, "grad_norm": 0.8261160254478455, "learning_rate": 6.995763050603565e-05, "loss": 1.7606, "step": 3762 }, { "epoch": 0.9025837779509621, "grad_norm": 0.6871518492698669, "learning_rate": 6.994164201774722e-05, "loss": 1.762, "step": 3764 }, { "epoch": 0.9030633655056651, "grad_norm": 0.6509608626365662, "learning_rate": 6.992565352945879e-05, "loss": 1.7964, "step": 3766 }, { "epoch": 0.9035429530603681, "grad_norm": 0.6852083802223206, "learning_rate": 6.990966504117035e-05, "loss": 1.8473, "step": 3768 }, { "epoch": 0.904022540615071, "grad_norm": 0.6640275716781616, "learning_rate": 6.989367655288193e-05, "loss": 1.7842, "step": 3770 }, { "epoch": 0.904502128169774, "grad_norm": 0.6655310988426208, "learning_rate": 6.98776880645935e-05, "loss": 1.752, "step": 3772 }, { "epoch": 0.9049817157244769, "grad_norm": 0.6869705319404602, "learning_rate": 6.986169957630506e-05, "loss": 1.7466, "step": 3774 }, { "epoch": 0.90546130327918, "grad_norm": 0.7209716439247131, "learning_rate": 6.984571108801663e-05, "loss": 1.7483, "step": 3776 }, { "epoch": 0.9059408908338829, "grad_norm": 0.7495008707046509, "learning_rate": 6.98297225997282e-05, "loss": 1.7335, "step": 3778 }, { "epoch": 0.9064204783885859, "grad_norm": 0.6951792240142822, "learning_rate": 6.981373411143976e-05, "loss": 1.7713, "step": 3780 }, { "epoch": 0.9069000659432888, "grad_norm": 0.7701365947723389, "learning_rate": 6.979774562315134e-05, "loss": 1.7557, "step": 3782 }, { "epoch": 0.9073796534979918, "grad_norm": 0.6337345838546753, "learning_rate": 6.97817571348629e-05, "loss": 1.7809, "step": 3784 }, { "epoch": 0.9078592410526947, "grad_norm": 0.7861867547035217, "learning_rate": 6.976576864657447e-05, "loss": 1.7687, "step": 3786 }, { "epoch": 0.9083388286073977, "grad_norm": 0.7456669211387634, "learning_rate": 6.974978015828604e-05, "loss": 1.767, "step": 3788 }, { "epoch": 0.9088184161621006, "grad_norm": 0.9429383277893066, "learning_rate": 6.97337916699976e-05, "loss": 1.7706, "step": 3790 }, { "epoch": 0.9092980037168036, "grad_norm": 0.7808070778846741, "learning_rate": 6.971780318170917e-05, "loss": 1.7342, "step": 3792 }, { "epoch": 0.9097775912715065, "grad_norm": 0.7272949814796448, "learning_rate": 6.970181469342075e-05, "loss": 1.8016, "step": 3794 }, { "epoch": 0.9102571788262095, "grad_norm": 0.6962153315544128, "learning_rate": 6.968582620513231e-05, "loss": 1.806, "step": 3796 }, { "epoch": 0.9107367663809124, "grad_norm": 0.7808266878128052, "learning_rate": 6.966983771684388e-05, "loss": 1.7541, "step": 3798 }, { "epoch": 0.9112163539356154, "grad_norm": 0.7198723554611206, "learning_rate": 6.965384922855544e-05, "loss": 1.7798, "step": 3800 }, { "epoch": 0.9116959414903183, "grad_norm": 0.7509251832962036, "learning_rate": 6.963786074026701e-05, "loss": 1.8449, "step": 3802 }, { "epoch": 0.9121755290450213, "grad_norm": 0.8833305239677429, "learning_rate": 6.962187225197857e-05, "loss": 1.7243, "step": 3804 }, { "epoch": 0.9126551165997242, "grad_norm": 0.7926498055458069, "learning_rate": 6.960588376369015e-05, "loss": 1.822, "step": 3806 }, { "epoch": 0.9131347041544272, "grad_norm": 0.6801818609237671, "learning_rate": 6.958989527540172e-05, "loss": 1.8086, "step": 3808 }, { "epoch": 0.9136142917091301, "grad_norm": 0.619688868522644, "learning_rate": 6.957390678711329e-05, "loss": 1.7455, "step": 3810 }, { "epoch": 0.9140938792638331, "grad_norm": 0.6991743445396423, "learning_rate": 6.955791829882485e-05, "loss": 1.8115, "step": 3812 }, { "epoch": 0.914573466818536, "grad_norm": 0.7222469449043274, "learning_rate": 6.954192981053642e-05, "loss": 1.78, "step": 3814 }, { "epoch": 0.915053054373239, "grad_norm": 0.8038175106048584, "learning_rate": 6.9525941322248e-05, "loss": 1.7457, "step": 3816 }, { "epoch": 0.9155326419279419, "grad_norm": 0.7080976963043213, "learning_rate": 6.950995283395956e-05, "loss": 1.7974, "step": 3818 }, { "epoch": 0.9160122294826449, "grad_norm": 1.0029659271240234, "learning_rate": 6.949396434567113e-05, "loss": 1.8091, "step": 3820 }, { "epoch": 0.9164918170373478, "grad_norm": 0.7370526790618896, "learning_rate": 6.94779758573827e-05, "loss": 1.7753, "step": 3822 }, { "epoch": 0.9169714045920508, "grad_norm": 0.7352128028869629, "learning_rate": 6.946198736909425e-05, "loss": 1.7906, "step": 3824 }, { "epoch": 0.9174509921467537, "grad_norm": 0.9142953157424927, "learning_rate": 6.944599888080582e-05, "loss": 1.7866, "step": 3826 }, { "epoch": 0.9179305797014568, "grad_norm": 0.6521286964416504, "learning_rate": 6.943001039251739e-05, "loss": 1.7913, "step": 3828 }, { "epoch": 0.9184101672561596, "grad_norm": 0.7110651135444641, "learning_rate": 6.941402190422896e-05, "loss": 1.7918, "step": 3830 }, { "epoch": 0.9188897548108627, "grad_norm": 0.6824669241905212, "learning_rate": 6.939803341594052e-05, "loss": 1.7985, "step": 3832 }, { "epoch": 0.9193693423655657, "grad_norm": 0.7059363126754761, "learning_rate": 6.938204492765209e-05, "loss": 1.8015, "step": 3834 }, { "epoch": 0.9198489299202686, "grad_norm": 0.6696851253509521, "learning_rate": 6.936605643936365e-05, "loss": 1.7482, "step": 3836 }, { "epoch": 0.9203285174749716, "grad_norm": 0.6899709701538086, "learning_rate": 6.935006795107523e-05, "loss": 1.8145, "step": 3838 }, { "epoch": 0.9208081050296745, "grad_norm": 0.7761172652244568, "learning_rate": 6.93340794627868e-05, "loss": 1.763, "step": 3840 }, { "epoch": 0.9212876925843775, "grad_norm": 0.7577086091041565, "learning_rate": 6.931809097449836e-05, "loss": 1.7888, "step": 3842 }, { "epoch": 0.9217672801390804, "grad_norm": 0.675665020942688, "learning_rate": 6.930210248620993e-05, "loss": 1.7597, "step": 3844 }, { "epoch": 0.9222468676937834, "grad_norm": 0.6748313307762146, "learning_rate": 6.92861139979215e-05, "loss": 1.7323, "step": 3846 }, { "epoch": 0.9227264552484863, "grad_norm": 0.7162947654724121, "learning_rate": 6.927012550963306e-05, "loss": 1.7303, "step": 3848 }, { "epoch": 0.9232060428031893, "grad_norm": 0.698582649230957, "learning_rate": 6.925413702134464e-05, "loss": 1.7597, "step": 3850 }, { "epoch": 0.9236856303578922, "grad_norm": 0.6932827234268188, "learning_rate": 6.92381485330562e-05, "loss": 1.7661, "step": 3852 }, { "epoch": 0.9241652179125952, "grad_norm": 0.6721543669700623, "learning_rate": 6.922216004476777e-05, "loss": 1.8082, "step": 3854 }, { "epoch": 0.9246448054672981, "grad_norm": 0.7535015344619751, "learning_rate": 6.920617155647934e-05, "loss": 1.7872, "step": 3856 }, { "epoch": 0.9251243930220011, "grad_norm": 0.7165185809135437, "learning_rate": 6.91901830681909e-05, "loss": 1.8322, "step": 3858 }, { "epoch": 0.925603980576704, "grad_norm": 0.6856184601783752, "learning_rate": 6.917419457990247e-05, "loss": 1.7718, "step": 3860 }, { "epoch": 0.926083568131407, "grad_norm": 0.6940291523933411, "learning_rate": 6.915820609161405e-05, "loss": 1.778, "step": 3862 }, { "epoch": 0.9265631556861099, "grad_norm": 0.6882027387619019, "learning_rate": 6.914221760332561e-05, "loss": 1.7841, "step": 3864 }, { "epoch": 0.9270427432408129, "grad_norm": 0.6682393550872803, "learning_rate": 6.912622911503718e-05, "loss": 1.7895, "step": 3866 }, { "epoch": 0.9275223307955158, "grad_norm": 0.8339086771011353, "learning_rate": 6.911024062674874e-05, "loss": 1.8243, "step": 3868 }, { "epoch": 0.9280019183502188, "grad_norm": 0.8226137161254883, "learning_rate": 6.909425213846031e-05, "loss": 1.7929, "step": 3870 }, { "epoch": 0.9284815059049217, "grad_norm": 0.6548885107040405, "learning_rate": 6.907826365017188e-05, "loss": 1.7764, "step": 3872 }, { "epoch": 0.9289610934596247, "grad_norm": 0.9074609875679016, "learning_rate": 6.906227516188346e-05, "loss": 1.8063, "step": 3874 }, { "epoch": 0.9294406810143276, "grad_norm": 0.6804929971694946, "learning_rate": 6.904628667359502e-05, "loss": 1.7956, "step": 3876 }, { "epoch": 0.9299202685690307, "grad_norm": 0.8572920560836792, "learning_rate": 6.903029818530659e-05, "loss": 1.8679, "step": 3878 }, { "epoch": 0.9303998561237335, "grad_norm": 0.6091808080673218, "learning_rate": 6.901430969701815e-05, "loss": 1.7887, "step": 3880 }, { "epoch": 0.9308794436784366, "grad_norm": 0.7369956970214844, "learning_rate": 6.899832120872972e-05, "loss": 1.7731, "step": 3882 }, { "epoch": 0.9313590312331395, "grad_norm": 0.7605348229408264, "learning_rate": 6.898233272044128e-05, "loss": 1.7578, "step": 3884 }, { "epoch": 0.9318386187878425, "grad_norm": 0.7946153283119202, "learning_rate": 6.896634423215286e-05, "loss": 1.7757, "step": 3886 }, { "epoch": 0.9323182063425454, "grad_norm": 0.832282543182373, "learning_rate": 6.895035574386443e-05, "loss": 1.7886, "step": 3888 }, { "epoch": 0.9327977938972484, "grad_norm": 0.8274644017219543, "learning_rate": 6.8934367255576e-05, "loss": 1.7579, "step": 3890 }, { "epoch": 0.9332773814519513, "grad_norm": 0.728166401386261, "learning_rate": 6.891837876728756e-05, "loss": 1.7674, "step": 3892 }, { "epoch": 0.9337569690066543, "grad_norm": 0.6928520202636719, "learning_rate": 6.890239027899913e-05, "loss": 1.7935, "step": 3894 }, { "epoch": 0.9342365565613572, "grad_norm": 0.8974084258079529, "learning_rate": 6.888640179071069e-05, "loss": 1.7579, "step": 3896 }, { "epoch": 0.9347161441160602, "grad_norm": 0.7827080488204956, "learning_rate": 6.887041330242226e-05, "loss": 1.7526, "step": 3898 }, { "epoch": 0.9351957316707632, "grad_norm": 0.7260924577713013, "learning_rate": 6.885442481413382e-05, "loss": 1.7631, "step": 3900 }, { "epoch": 0.9356753192254661, "grad_norm": 0.6685707569122314, "learning_rate": 6.883843632584539e-05, "loss": 1.7919, "step": 3902 }, { "epoch": 0.9361549067801691, "grad_norm": 0.8668267726898193, "learning_rate": 6.882244783755695e-05, "loss": 1.7797, "step": 3904 }, { "epoch": 0.936634494334872, "grad_norm": 1.0781723260879517, "learning_rate": 6.880645934926852e-05, "loss": 1.7332, "step": 3906 }, { "epoch": 0.937114081889575, "grad_norm": 0.8196031451225281, "learning_rate": 6.87904708609801e-05, "loss": 1.7697, "step": 3908 }, { "epoch": 0.9375936694442779, "grad_norm": 0.8321779370307922, "learning_rate": 6.877448237269167e-05, "loss": 1.7743, "step": 3910 }, { "epoch": 0.9380732569989809, "grad_norm": 0.6612207293510437, "learning_rate": 6.875849388440323e-05, "loss": 1.7548, "step": 3912 }, { "epoch": 0.9385528445536838, "grad_norm": 0.726118803024292, "learning_rate": 6.87425053961148e-05, "loss": 1.7857, "step": 3914 }, { "epoch": 0.9390324321083868, "grad_norm": 0.7967771291732788, "learning_rate": 6.872651690782636e-05, "loss": 1.7392, "step": 3916 }, { "epoch": 0.9395120196630897, "grad_norm": 0.8730937838554382, "learning_rate": 6.871052841953793e-05, "loss": 1.8184, "step": 3918 }, { "epoch": 0.9399916072177927, "grad_norm": 0.7913429141044617, "learning_rate": 6.869453993124951e-05, "loss": 1.763, "step": 3920 }, { "epoch": 0.9404711947724956, "grad_norm": 0.8649652004241943, "learning_rate": 6.867855144296107e-05, "loss": 1.7902, "step": 3922 }, { "epoch": 0.9409507823271986, "grad_norm": 0.9297704696655273, "learning_rate": 6.866256295467264e-05, "loss": 1.7639, "step": 3924 }, { "epoch": 0.9414303698819015, "grad_norm": 0.769991397857666, "learning_rate": 6.86465744663842e-05, "loss": 1.7531, "step": 3926 }, { "epoch": 0.9419099574366046, "grad_norm": 0.7845906615257263, "learning_rate": 6.863058597809577e-05, "loss": 1.7463, "step": 3928 }, { "epoch": 0.9423895449913074, "grad_norm": 0.9675518274307251, "learning_rate": 6.861459748980734e-05, "loss": 1.7955, "step": 3930 }, { "epoch": 0.9428691325460105, "grad_norm": 0.9024473428726196, "learning_rate": 6.859860900151892e-05, "loss": 1.7572, "step": 3932 }, { "epoch": 0.9433487201007134, "grad_norm": 0.7597419619560242, "learning_rate": 6.858262051323048e-05, "loss": 1.7412, "step": 3934 }, { "epoch": 0.9438283076554164, "grad_norm": 0.7196982502937317, "learning_rate": 6.856663202494205e-05, "loss": 1.7935, "step": 3936 }, { "epoch": 0.9443078952101193, "grad_norm": 0.7551305890083313, "learning_rate": 6.855064353665361e-05, "loss": 1.7996, "step": 3938 }, { "epoch": 0.9447874827648223, "grad_norm": 0.9772862195968628, "learning_rate": 6.853465504836518e-05, "loss": 1.7251, "step": 3940 }, { "epoch": 0.9452670703195252, "grad_norm": 0.7666837573051453, "learning_rate": 6.851866656007674e-05, "loss": 1.7781, "step": 3942 }, { "epoch": 0.9457466578742282, "grad_norm": 0.6923601627349854, "learning_rate": 6.850267807178832e-05, "loss": 1.7556, "step": 3944 }, { "epoch": 0.9462262454289311, "grad_norm": 0.6713660955429077, "learning_rate": 6.848668958349989e-05, "loss": 1.8224, "step": 3946 }, { "epoch": 0.9467058329836341, "grad_norm": 0.7106120586395264, "learning_rate": 6.847070109521145e-05, "loss": 1.8198, "step": 3948 }, { "epoch": 0.947185420538337, "grad_norm": 0.6869609355926514, "learning_rate": 6.845471260692302e-05, "loss": 1.7188, "step": 3950 }, { "epoch": 0.94766500809304, "grad_norm": 0.7771978378295898, "learning_rate": 6.843872411863459e-05, "loss": 1.7264, "step": 3952 }, { "epoch": 0.9481445956477429, "grad_norm": 0.8270847797393799, "learning_rate": 6.842273563034615e-05, "loss": 1.7899, "step": 3954 }, { "epoch": 0.9486241832024459, "grad_norm": 0.667815089225769, "learning_rate": 6.840674714205773e-05, "loss": 1.7527, "step": 3956 }, { "epoch": 0.9491037707571488, "grad_norm": 0.7655726671218872, "learning_rate": 6.83907586537693e-05, "loss": 1.7419, "step": 3958 }, { "epoch": 0.9495833583118518, "grad_norm": 0.7035162448883057, "learning_rate": 6.837477016548086e-05, "loss": 1.755, "step": 3960 }, { "epoch": 0.9500629458665547, "grad_norm": 0.832826554775238, "learning_rate": 6.835878167719243e-05, "loss": 1.7961, "step": 3962 }, { "epoch": 0.9505425334212577, "grad_norm": 0.8059853315353394, "learning_rate": 6.8342793188904e-05, "loss": 1.7907, "step": 3964 }, { "epoch": 0.9510221209759607, "grad_norm": 0.8037518262863159, "learning_rate": 6.832680470061556e-05, "loss": 1.7414, "step": 3966 }, { "epoch": 0.9515017085306636, "grad_norm": 1.1219021081924438, "learning_rate": 6.831081621232712e-05, "loss": 1.8125, "step": 3968 }, { "epoch": 0.9519812960853666, "grad_norm": 0.7367568016052246, "learning_rate": 6.829482772403869e-05, "loss": 1.7437, "step": 3970 }, { "epoch": 0.9524608836400695, "grad_norm": 0.7045671939849854, "learning_rate": 6.827883923575026e-05, "loss": 1.7711, "step": 3972 }, { "epoch": 0.9529404711947725, "grad_norm": 0.7061318755149841, "learning_rate": 6.826285074746182e-05, "loss": 1.7017, "step": 3974 }, { "epoch": 0.9534200587494754, "grad_norm": 0.9860260486602783, "learning_rate": 6.824686225917339e-05, "loss": 1.7359, "step": 3976 }, { "epoch": 0.9538996463041785, "grad_norm": 0.8517863750457764, "learning_rate": 6.823087377088497e-05, "loss": 1.8438, "step": 3978 }, { "epoch": 0.9543792338588813, "grad_norm": 0.751642107963562, "learning_rate": 6.821488528259653e-05, "loss": 1.7935, "step": 3980 }, { "epoch": 0.9548588214135844, "grad_norm": 0.758598268032074, "learning_rate": 6.81988967943081e-05, "loss": 1.7603, "step": 3982 }, { "epoch": 0.9553384089682873, "grad_norm": 0.719031035900116, "learning_rate": 6.818290830601966e-05, "loss": 1.7804, "step": 3984 }, { "epoch": 0.9558179965229903, "grad_norm": 0.7542909383773804, "learning_rate": 6.816691981773123e-05, "loss": 1.8107, "step": 3986 }, { "epoch": 0.9562975840776932, "grad_norm": 0.7406023740768433, "learning_rate": 6.81509313294428e-05, "loss": 1.7498, "step": 3988 }, { "epoch": 0.9567771716323962, "grad_norm": 0.6547685265541077, "learning_rate": 6.813494284115437e-05, "loss": 1.784, "step": 3990 }, { "epoch": 0.9572567591870991, "grad_norm": 0.9153742790222168, "learning_rate": 6.811895435286594e-05, "loss": 1.7317, "step": 3992 }, { "epoch": 0.9577363467418021, "grad_norm": 0.6587347388267517, "learning_rate": 6.81029658645775e-05, "loss": 1.7936, "step": 3994 }, { "epoch": 0.958215934296505, "grad_norm": 0.7622671127319336, "learning_rate": 6.808697737628907e-05, "loss": 1.7814, "step": 3996 }, { "epoch": 0.958695521851208, "grad_norm": 0.6503249406814575, "learning_rate": 6.807098888800064e-05, "loss": 1.8072, "step": 3998 }, { "epoch": 0.9591751094059109, "grad_norm": 0.720059871673584, "learning_rate": 6.80550003997122e-05, "loss": 1.8189, "step": 4000 }, { "epoch": 0.9591751094059109, "eval_loss": 1.7650362253189087, "eval_runtime": 331.4509, "eval_samples_per_second": 402.621, "eval_steps_per_second": 12.584, "step": 4000 }, { "epoch": 0.9596546969606139, "grad_norm": 0.6223319172859192, "learning_rate": 6.803901191142378e-05, "loss": 1.7865, "step": 4002 }, { "epoch": 0.9601342845153168, "grad_norm": 0.8339089155197144, "learning_rate": 6.802302342313535e-05, "loss": 1.8052, "step": 4004 }, { "epoch": 0.9606138720700198, "grad_norm": 0.7577294111251831, "learning_rate": 6.800703493484691e-05, "loss": 1.7613, "step": 4006 }, { "epoch": 0.9610934596247227, "grad_norm": 0.6510248780250549, "learning_rate": 6.799104644655848e-05, "loss": 1.7698, "step": 4008 }, { "epoch": 0.9615730471794257, "grad_norm": 0.6065031290054321, "learning_rate": 6.797505795827005e-05, "loss": 1.7961, "step": 4010 }, { "epoch": 0.9620526347341286, "grad_norm": 0.7158570885658264, "learning_rate": 6.795906946998162e-05, "loss": 1.8023, "step": 4012 }, { "epoch": 0.9625322222888316, "grad_norm": 0.7701272964477539, "learning_rate": 6.794308098169319e-05, "loss": 1.7678, "step": 4014 }, { "epoch": 0.9630118098435345, "grad_norm": 0.6569875478744507, "learning_rate": 6.792709249340476e-05, "loss": 1.7964, "step": 4016 }, { "epoch": 0.9634913973982375, "grad_norm": 0.6379656195640564, "learning_rate": 6.791110400511632e-05, "loss": 1.7608, "step": 4018 }, { "epoch": 0.9639709849529404, "grad_norm": 0.6209776997566223, "learning_rate": 6.789511551682789e-05, "loss": 1.7394, "step": 4020 }, { "epoch": 0.9644505725076434, "grad_norm": 0.7290825843811035, "learning_rate": 6.787912702853945e-05, "loss": 1.7736, "step": 4022 }, { "epoch": 0.9649301600623463, "grad_norm": 0.7011139988899231, "learning_rate": 6.786313854025103e-05, "loss": 1.7548, "step": 4024 }, { "epoch": 0.9654097476170493, "grad_norm": 0.6293589472770691, "learning_rate": 6.78471500519626e-05, "loss": 1.8181, "step": 4026 }, { "epoch": 0.9658893351717522, "grad_norm": 0.7070820331573486, "learning_rate": 6.783116156367416e-05, "loss": 1.8171, "step": 4028 }, { "epoch": 0.9663689227264552, "grad_norm": 0.709465742111206, "learning_rate": 6.781517307538573e-05, "loss": 1.7688, "step": 4030 }, { "epoch": 0.9668485102811583, "grad_norm": 0.769922137260437, "learning_rate": 6.77991845870973e-05, "loss": 1.7737, "step": 4032 }, { "epoch": 0.9673280978358612, "grad_norm": 0.6863576769828796, "learning_rate": 6.778319609880886e-05, "loss": 1.7793, "step": 4034 }, { "epoch": 0.9678076853905642, "grad_norm": 0.716255247592926, "learning_rate": 6.776720761052044e-05, "loss": 1.74, "step": 4036 }, { "epoch": 0.9682872729452671, "grad_norm": 0.7149354815483093, "learning_rate": 6.7751219122232e-05, "loss": 1.8083, "step": 4038 }, { "epoch": 0.9687668604999701, "grad_norm": 0.7548081278800964, "learning_rate": 6.773523063394357e-05, "loss": 1.8015, "step": 4040 }, { "epoch": 0.969246448054673, "grad_norm": 0.83185213804245, "learning_rate": 6.771924214565512e-05, "loss": 1.7164, "step": 4042 }, { "epoch": 0.969726035609376, "grad_norm": 0.7239333987236023, "learning_rate": 6.770325365736669e-05, "loss": 1.768, "step": 4044 }, { "epoch": 0.9702056231640789, "grad_norm": 0.7250880002975464, "learning_rate": 6.768726516907827e-05, "loss": 1.7569, "step": 4046 }, { "epoch": 0.9706852107187819, "grad_norm": 0.7848089933395386, "learning_rate": 6.767127668078983e-05, "loss": 1.7412, "step": 4048 }, { "epoch": 0.9711647982734848, "grad_norm": 0.8475637435913086, "learning_rate": 6.76552881925014e-05, "loss": 1.7555, "step": 4050 }, { "epoch": 0.9716443858281878, "grad_norm": 0.7634351849555969, "learning_rate": 6.763929970421297e-05, "loss": 1.7701, "step": 4052 }, { "epoch": 0.9721239733828907, "grad_norm": 0.6980100274085999, "learning_rate": 6.762331121592453e-05, "loss": 1.7686, "step": 4054 }, { "epoch": 0.9726035609375937, "grad_norm": 0.7307558655738831, "learning_rate": 6.76073227276361e-05, "loss": 1.7409, "step": 4056 }, { "epoch": 0.9730831484922966, "grad_norm": 0.711893618106842, "learning_rate": 6.759133423934768e-05, "loss": 1.764, "step": 4058 }, { "epoch": 0.9735627360469996, "grad_norm": 0.729195773601532, "learning_rate": 6.757534575105924e-05, "loss": 1.729, "step": 4060 }, { "epoch": 0.9740423236017025, "grad_norm": 0.979459285736084, "learning_rate": 6.755935726277081e-05, "loss": 1.8189, "step": 4062 }, { "epoch": 0.9745219111564055, "grad_norm": 0.7086123824119568, "learning_rate": 6.754336877448237e-05, "loss": 1.7714, "step": 4064 }, { "epoch": 0.9750014987111084, "grad_norm": 0.7240733504295349, "learning_rate": 6.752738028619394e-05, "loss": 1.7946, "step": 4066 }, { "epoch": 0.9754810862658114, "grad_norm": 0.7854089140892029, "learning_rate": 6.75113917979055e-05, "loss": 1.8451, "step": 4068 }, { "epoch": 0.9759606738205143, "grad_norm": 0.7492378950119019, "learning_rate": 6.749540330961708e-05, "loss": 1.7468, "step": 4070 }, { "epoch": 0.9764402613752173, "grad_norm": 0.6819878220558167, "learning_rate": 6.747941482132865e-05, "loss": 1.8244, "step": 4072 }, { "epoch": 0.9769198489299202, "grad_norm": 0.6691513061523438, "learning_rate": 6.746342633304022e-05, "loss": 1.7927, "step": 4074 }, { "epoch": 0.9773994364846232, "grad_norm": 0.7405861020088196, "learning_rate": 6.744743784475178e-05, "loss": 1.7484, "step": 4076 }, { "epoch": 0.9778790240393261, "grad_norm": 0.6340120434761047, "learning_rate": 6.743144935646335e-05, "loss": 1.7346, "step": 4078 }, { "epoch": 0.9783586115940291, "grad_norm": 0.7395811080932617, "learning_rate": 6.741546086817491e-05, "loss": 1.7895, "step": 4080 }, { "epoch": 0.978838199148732, "grad_norm": 0.6243704557418823, "learning_rate": 6.739947237988649e-05, "loss": 1.7729, "step": 4082 }, { "epoch": 0.9793177867034351, "grad_norm": 0.7743636965751648, "learning_rate": 6.738348389159806e-05, "loss": 1.8141, "step": 4084 }, { "epoch": 0.979797374258138, "grad_norm": 0.6898829340934753, "learning_rate": 6.736749540330962e-05, "loss": 1.7692, "step": 4086 }, { "epoch": 0.980276961812841, "grad_norm": 0.643641471862793, "learning_rate": 6.735150691502119e-05, "loss": 1.7511, "step": 4088 }, { "epoch": 0.9807565493675439, "grad_norm": 1.0246528387069702, "learning_rate": 6.733551842673275e-05, "loss": 1.7639, "step": 4090 }, { "epoch": 0.9812361369222469, "grad_norm": 0.629093587398529, "learning_rate": 6.731952993844432e-05, "loss": 1.8132, "step": 4092 }, { "epoch": 0.9817157244769498, "grad_norm": 0.6685495972633362, "learning_rate": 6.73035414501559e-05, "loss": 1.8184, "step": 4094 }, { "epoch": 0.9821953120316528, "grad_norm": 0.8080766797065735, "learning_rate": 6.728755296186747e-05, "loss": 1.7426, "step": 4096 }, { "epoch": 0.9826748995863558, "grad_norm": 0.6481422781944275, "learning_rate": 6.727156447357903e-05, "loss": 1.7628, "step": 4098 }, { "epoch": 0.9831544871410587, "grad_norm": 0.7287728786468506, "learning_rate": 6.72555759852906e-05, "loss": 1.7629, "step": 4100 }, { "epoch": 0.9836340746957617, "grad_norm": 0.77411949634552, "learning_rate": 6.723958749700216e-05, "loss": 1.7485, "step": 4102 }, { "epoch": 0.9841136622504646, "grad_norm": 0.7039063572883606, "learning_rate": 6.722359900871373e-05, "loss": 1.7894, "step": 4104 }, { "epoch": 0.9845932498051676, "grad_norm": 0.6456789970397949, "learning_rate": 6.720761052042531e-05, "loss": 1.7964, "step": 4106 }, { "epoch": 0.9850728373598705, "grad_norm": 0.6734626293182373, "learning_rate": 6.719162203213687e-05, "loss": 1.8458, "step": 4108 }, { "epoch": 0.9855524249145735, "grad_norm": 0.7576918601989746, "learning_rate": 6.717563354384844e-05, "loss": 1.7753, "step": 4110 }, { "epoch": 0.9860320124692764, "grad_norm": 0.826819658279419, "learning_rate": 6.715964505556e-05, "loss": 1.6975, "step": 4112 }, { "epoch": 0.9865116000239794, "grad_norm": 0.677684485912323, "learning_rate": 6.714365656727156e-05, "loss": 1.7131, "step": 4114 }, { "epoch": 0.9869911875786823, "grad_norm": 0.8036590218544006, "learning_rate": 6.712766807898314e-05, "loss": 1.7727, "step": 4116 }, { "epoch": 0.9874707751333853, "grad_norm": 0.683920681476593, "learning_rate": 6.71116795906947e-05, "loss": 1.7757, "step": 4118 }, { "epoch": 0.9879503626880882, "grad_norm": 0.7132298350334167, "learning_rate": 6.709569110240627e-05, "loss": 1.729, "step": 4120 }, { "epoch": 0.9884299502427912, "grad_norm": 0.7470959424972534, "learning_rate": 6.707970261411783e-05, "loss": 1.8369, "step": 4122 }, { "epoch": 0.9889095377974941, "grad_norm": 0.7283341884613037, "learning_rate": 6.70637141258294e-05, "loss": 1.7583, "step": 4124 }, { "epoch": 0.9893891253521971, "grad_norm": 1.0758384466171265, "learning_rate": 6.704772563754096e-05, "loss": 1.7271, "step": 4126 }, { "epoch": 0.9898687129069, "grad_norm": 0.6640116572380066, "learning_rate": 6.703173714925254e-05, "loss": 1.7842, "step": 4128 }, { "epoch": 0.990348300461603, "grad_norm": 0.7408930659294128, "learning_rate": 6.701574866096411e-05, "loss": 1.7553, "step": 4130 }, { "epoch": 0.990827888016306, "grad_norm": 0.7363208532333374, "learning_rate": 6.699976017267567e-05, "loss": 1.7821, "step": 4132 }, { "epoch": 0.991307475571009, "grad_norm": 0.7331721186637878, "learning_rate": 6.698377168438724e-05, "loss": 1.8172, "step": 4134 }, { "epoch": 0.9917870631257119, "grad_norm": 0.6729336977005005, "learning_rate": 6.69677831960988e-05, "loss": 1.7972, "step": 4136 }, { "epoch": 0.9922666506804149, "grad_norm": 0.9765750169754028, "learning_rate": 6.695179470781037e-05, "loss": 1.7999, "step": 4138 }, { "epoch": 0.9927462382351178, "grad_norm": 0.6997969746589661, "learning_rate": 6.693580621952195e-05, "loss": 1.7588, "step": 4140 }, { "epoch": 0.9932258257898208, "grad_norm": 0.7894157767295837, "learning_rate": 6.691981773123352e-05, "loss": 1.7273, "step": 4142 }, { "epoch": 0.9937054133445237, "grad_norm": 0.6557096242904663, "learning_rate": 6.690382924294508e-05, "loss": 1.8123, "step": 4144 }, { "epoch": 0.9941850008992267, "grad_norm": 1.0688025951385498, "learning_rate": 6.688784075465665e-05, "loss": 1.8237, "step": 4146 }, { "epoch": 0.9946645884539296, "grad_norm": 0.8423780202865601, "learning_rate": 6.687185226636821e-05, "loss": 1.7828, "step": 4148 }, { "epoch": 0.9951441760086326, "grad_norm": 0.6653779745101929, "learning_rate": 6.685586377807978e-05, "loss": 1.7339, "step": 4150 }, { "epoch": 0.9956237635633355, "grad_norm": 0.7360559105873108, "learning_rate": 6.683987528979136e-05, "loss": 1.7278, "step": 4152 }, { "epoch": 0.9961033511180385, "grad_norm": 0.8668364882469177, "learning_rate": 6.682388680150292e-05, "loss": 1.7656, "step": 4154 }, { "epoch": 0.9965829386727414, "grad_norm": 0.75026935338974, "learning_rate": 6.680789831321449e-05, "loss": 1.8196, "step": 4156 }, { "epoch": 0.9970625262274444, "grad_norm": 0.7562640905380249, "learning_rate": 6.679190982492606e-05, "loss": 1.728, "step": 4158 }, { "epoch": 0.9975421137821473, "grad_norm": 0.7197509407997131, "learning_rate": 6.677592133663762e-05, "loss": 1.8016, "step": 4160 }, { "epoch": 0.9980217013368503, "grad_norm": 0.8321323990821838, "learning_rate": 6.675993284834919e-05, "loss": 1.7749, "step": 4162 }, { "epoch": 0.9985012888915533, "grad_norm": 0.626273512840271, "learning_rate": 6.674394436006077e-05, "loss": 1.8043, "step": 4164 }, { "epoch": 0.9989808764462562, "grad_norm": 0.7893314361572266, "learning_rate": 6.672795587177233e-05, "loss": 1.8401, "step": 4166 }, { "epoch": 0.9994604640009592, "grad_norm": 0.8364638090133667, "learning_rate": 6.67119673834839e-05, "loss": 1.8055, "step": 4168 }, { "epoch": 0.9999400515556621, "grad_norm": 0.779707133769989, "learning_rate": 6.669597889519546e-05, "loss": 1.7164, "step": 4170 }, { "epoch": 1.000419639110365, "grad_norm": 0.6913009881973267, "learning_rate": 6.667999040690703e-05, "loss": 1.7091, "step": 4172 }, { "epoch": 1.000899226665068, "grad_norm": 0.7264348268508911, "learning_rate": 6.66640019186186e-05, "loss": 1.7641, "step": 4174 }, { "epoch": 1.001378814219771, "grad_norm": 0.9708462357521057, "learning_rate": 6.664801343033017e-05, "loss": 1.7232, "step": 4176 }, { "epoch": 1.001858401774474, "grad_norm": 0.7802194952964783, "learning_rate": 6.663202494204174e-05, "loss": 1.7185, "step": 4178 }, { "epoch": 1.0023379893291768, "grad_norm": 0.7536959648132324, "learning_rate": 6.66160364537533e-05, "loss": 1.6628, "step": 4180 }, { "epoch": 1.0028175768838798, "grad_norm": 0.7532178163528442, "learning_rate": 6.660004796546487e-05, "loss": 1.6824, "step": 4182 }, { "epoch": 1.0032971644385829, "grad_norm": 0.7447124123573303, "learning_rate": 6.658405947717644e-05, "loss": 1.7224, "step": 4184 }, { "epoch": 1.0037767519932859, "grad_norm": 0.726237416267395, "learning_rate": 6.6568070988888e-05, "loss": 1.7193, "step": 4186 }, { "epoch": 1.0042563395479887, "grad_norm": 0.6820405125617981, "learning_rate": 6.655208250059957e-05, "loss": 1.6897, "step": 4188 }, { "epoch": 1.0047359271026917, "grad_norm": 0.6583048105239868, "learning_rate": 6.653609401231113e-05, "loss": 1.7324, "step": 4190 }, { "epoch": 1.0052155146573947, "grad_norm": 0.6564170122146606, "learning_rate": 6.65201055240227e-05, "loss": 1.7142, "step": 4192 }, { "epoch": 1.0056951022120977, "grad_norm": 0.7796624898910522, "learning_rate": 6.650411703573427e-05, "loss": 1.6835, "step": 4194 }, { "epoch": 1.0061746897668005, "grad_norm": 0.6837286353111267, "learning_rate": 6.648812854744583e-05, "loss": 1.7771, "step": 4196 }, { "epoch": 1.0066542773215035, "grad_norm": 0.6986687779426575, "learning_rate": 6.647214005915741e-05, "loss": 1.7276, "step": 4198 }, { "epoch": 1.0071338648762065, "grad_norm": 1.1258236169815063, "learning_rate": 6.645615157086898e-05, "loss": 1.6806, "step": 4200 }, { "epoch": 1.0076134524309095, "grad_norm": 0.8054463267326355, "learning_rate": 6.644016308258054e-05, "loss": 1.7109, "step": 4202 }, { "epoch": 1.0080930399856123, "grad_norm": 0.7847676277160645, "learning_rate": 6.642417459429211e-05, "loss": 1.6962, "step": 4204 }, { "epoch": 1.0085726275403153, "grad_norm": 0.7899786829948425, "learning_rate": 6.640818610600367e-05, "loss": 1.7669, "step": 4206 }, { "epoch": 1.0090522150950183, "grad_norm": 0.9780293703079224, "learning_rate": 6.639219761771525e-05, "loss": 1.6867, "step": 4208 }, { "epoch": 1.0095318026497213, "grad_norm": 0.7009777426719666, "learning_rate": 6.637620912942682e-05, "loss": 1.7114, "step": 4210 }, { "epoch": 1.010011390204424, "grad_norm": 0.7924069762229919, "learning_rate": 6.636022064113838e-05, "loss": 1.7089, "step": 4212 }, { "epoch": 1.010490977759127, "grad_norm": 0.6933296918869019, "learning_rate": 6.634423215284995e-05, "loss": 1.7197, "step": 4214 }, { "epoch": 1.0109705653138301, "grad_norm": 0.7523556351661682, "learning_rate": 6.632824366456152e-05, "loss": 1.6983, "step": 4216 }, { "epoch": 1.0114501528685331, "grad_norm": 0.7605069279670715, "learning_rate": 6.631225517627308e-05, "loss": 1.7215, "step": 4218 }, { "epoch": 1.011929740423236, "grad_norm": 0.8972727060317993, "learning_rate": 6.629626668798466e-05, "loss": 1.6373, "step": 4220 }, { "epoch": 1.012409327977939, "grad_norm": 0.7813913822174072, "learning_rate": 6.628027819969623e-05, "loss": 1.6818, "step": 4222 }, { "epoch": 1.012888915532642, "grad_norm": 0.7711468935012817, "learning_rate": 6.626428971140779e-05, "loss": 1.7225, "step": 4224 }, { "epoch": 1.013368503087345, "grad_norm": 0.7135785818099976, "learning_rate": 6.624830122311936e-05, "loss": 1.7267, "step": 4226 }, { "epoch": 1.013848090642048, "grad_norm": 0.6751489639282227, "learning_rate": 6.623231273483092e-05, "loss": 1.7638, "step": 4228 }, { "epoch": 1.0143276781967507, "grad_norm": 0.6581051349639893, "learning_rate": 6.621632424654249e-05, "loss": 1.7396, "step": 4230 }, { "epoch": 1.0148072657514537, "grad_norm": 0.713753342628479, "learning_rate": 6.620033575825407e-05, "loss": 1.701, "step": 4232 }, { "epoch": 1.0152868533061568, "grad_norm": 0.6599774956703186, "learning_rate": 6.618434726996563e-05, "loss": 1.6973, "step": 4234 }, { "epoch": 1.0157664408608598, "grad_norm": 0.6521281599998474, "learning_rate": 6.61683587816772e-05, "loss": 1.6887, "step": 4236 }, { "epoch": 1.0162460284155626, "grad_norm": 0.9124394655227661, "learning_rate": 6.615237029338877e-05, "loss": 1.7059, "step": 4238 }, { "epoch": 1.0167256159702656, "grad_norm": 0.8495545983314514, "learning_rate": 6.613638180510033e-05, "loss": 1.7354, "step": 4240 }, { "epoch": 1.0172052035249686, "grad_norm": 0.8399738073348999, "learning_rate": 6.61203933168119e-05, "loss": 1.7229, "step": 4242 }, { "epoch": 1.0176847910796716, "grad_norm": 0.6856234073638916, "learning_rate": 6.610440482852348e-05, "loss": 1.6765, "step": 4244 }, { "epoch": 1.0181643786343744, "grad_norm": 0.708949625492096, "learning_rate": 6.608841634023504e-05, "loss": 1.7074, "step": 4246 }, { "epoch": 1.0186439661890774, "grad_norm": 0.7900472283363342, "learning_rate": 6.607242785194661e-05, "loss": 1.7416, "step": 4248 }, { "epoch": 1.0191235537437804, "grad_norm": 0.7472550272941589, "learning_rate": 6.605643936365817e-05, "loss": 1.682, "step": 4250 }, { "epoch": 1.0196031412984834, "grad_norm": 0.8097535371780396, "learning_rate": 6.604045087536974e-05, "loss": 1.725, "step": 4252 }, { "epoch": 1.0200827288531862, "grad_norm": 0.6540176272392273, "learning_rate": 6.60244623870813e-05, "loss": 1.7097, "step": 4254 }, { "epoch": 1.0205623164078892, "grad_norm": 0.6753392815589905, "learning_rate": 6.600847389879288e-05, "loss": 1.714, "step": 4256 }, { "epoch": 1.0210419039625922, "grad_norm": 0.7157773971557617, "learning_rate": 6.599248541050445e-05, "loss": 1.6886, "step": 4258 }, { "epoch": 1.0215214915172952, "grad_norm": 0.6526656746864319, "learning_rate": 6.5976496922216e-05, "loss": 1.7189, "step": 4260 }, { "epoch": 1.022001079071998, "grad_norm": 0.744939923286438, "learning_rate": 6.596050843392757e-05, "loss": 1.7086, "step": 4262 }, { "epoch": 1.022480666626701, "grad_norm": 0.7222669124603271, "learning_rate": 6.594451994563913e-05, "loss": 1.7179, "step": 4264 }, { "epoch": 1.022960254181404, "grad_norm": 0.6678929924964905, "learning_rate": 6.592853145735071e-05, "loss": 1.7541, "step": 4266 }, { "epoch": 1.023439841736107, "grad_norm": 0.7754931449890137, "learning_rate": 6.591254296906228e-05, "loss": 1.7117, "step": 4268 }, { "epoch": 1.0239194292908098, "grad_norm": 0.6831541657447815, "learning_rate": 6.589655448077384e-05, "loss": 1.7247, "step": 4270 }, { "epoch": 1.0243990168455128, "grad_norm": 0.7230755090713501, "learning_rate": 6.588056599248541e-05, "loss": 1.7295, "step": 4272 }, { "epoch": 1.0248786044002158, "grad_norm": 0.7501009106636047, "learning_rate": 6.586457750419698e-05, "loss": 1.6557, "step": 4274 }, { "epoch": 1.0253581919549188, "grad_norm": 0.6164395809173584, "learning_rate": 6.584858901590854e-05, "loss": 1.6939, "step": 4276 }, { "epoch": 1.0258377795096216, "grad_norm": 0.8173842430114746, "learning_rate": 6.583260052762012e-05, "loss": 1.7332, "step": 4278 }, { "epoch": 1.0263173670643246, "grad_norm": 1.045758843421936, "learning_rate": 6.581661203933169e-05, "loss": 1.7506, "step": 4280 }, { "epoch": 1.0267969546190276, "grad_norm": 0.7184283137321472, "learning_rate": 6.580062355104325e-05, "loss": 1.7379, "step": 4282 }, { "epoch": 1.0272765421737307, "grad_norm": 0.7512888312339783, "learning_rate": 6.578463506275482e-05, "loss": 1.6824, "step": 4284 }, { "epoch": 1.0277561297284334, "grad_norm": 0.7014705538749695, "learning_rate": 6.576864657446638e-05, "loss": 1.689, "step": 4286 }, { "epoch": 1.0282357172831365, "grad_norm": 0.6795244812965393, "learning_rate": 6.575265808617795e-05, "loss": 1.7172, "step": 4288 }, { "epoch": 1.0287153048378395, "grad_norm": 0.8605761528015137, "learning_rate": 6.573666959788953e-05, "loss": 1.7255, "step": 4290 }, { "epoch": 1.0291948923925425, "grad_norm": 0.6988500356674194, "learning_rate": 6.57206811096011e-05, "loss": 1.7079, "step": 4292 }, { "epoch": 1.0296744799472455, "grad_norm": 0.6985084414482117, "learning_rate": 6.570469262131266e-05, "loss": 1.6879, "step": 4294 }, { "epoch": 1.0301540675019483, "grad_norm": 0.8748555183410645, "learning_rate": 6.568870413302422e-05, "loss": 1.7229, "step": 4296 }, { "epoch": 1.0306336550566513, "grad_norm": 1.0436627864837646, "learning_rate": 6.567271564473579e-05, "loss": 1.7329, "step": 4298 }, { "epoch": 1.0311132426113543, "grad_norm": 0.635516881942749, "learning_rate": 6.565672715644736e-05, "loss": 1.7111, "step": 4300 }, { "epoch": 1.0315928301660573, "grad_norm": 0.6965840458869934, "learning_rate": 6.564073866815894e-05, "loss": 1.6799, "step": 4302 }, { "epoch": 1.03207241772076, "grad_norm": 0.7360917925834656, "learning_rate": 6.56247501798705e-05, "loss": 1.7859, "step": 4304 }, { "epoch": 1.032552005275463, "grad_norm": 0.8468325138092041, "learning_rate": 6.560876169158207e-05, "loss": 1.6673, "step": 4306 }, { "epoch": 1.033031592830166, "grad_norm": 0.8641157746315002, "learning_rate": 6.559277320329363e-05, "loss": 1.74, "step": 4308 }, { "epoch": 1.033511180384869, "grad_norm": 0.7351201772689819, "learning_rate": 6.55767847150052e-05, "loss": 1.7611, "step": 4310 }, { "epoch": 1.033990767939572, "grad_norm": 0.8762410283088684, "learning_rate": 6.556079622671676e-05, "loss": 1.746, "step": 4312 }, { "epoch": 1.034470355494275, "grad_norm": 0.7924734354019165, "learning_rate": 6.554480773842834e-05, "loss": 1.7442, "step": 4314 }, { "epoch": 1.034949943048978, "grad_norm": 0.7252174019813538, "learning_rate": 6.552881925013991e-05, "loss": 1.7122, "step": 4316 }, { "epoch": 1.035429530603681, "grad_norm": 0.6781435608863831, "learning_rate": 6.551283076185147e-05, "loss": 1.7664, "step": 4318 }, { "epoch": 1.0359091181583837, "grad_norm": 0.8393166065216064, "learning_rate": 6.549684227356304e-05, "loss": 1.7306, "step": 4320 }, { "epoch": 1.0363887057130867, "grad_norm": 0.8006309866905212, "learning_rate": 6.54808537852746e-05, "loss": 1.7246, "step": 4322 }, { "epoch": 1.0368682932677897, "grad_norm": 0.741617739200592, "learning_rate": 6.546486529698617e-05, "loss": 1.6998, "step": 4324 }, { "epoch": 1.0373478808224927, "grad_norm": 0.7545636892318726, "learning_rate": 6.544887680869775e-05, "loss": 1.6767, "step": 4326 }, { "epoch": 1.0378274683771955, "grad_norm": 0.7685326337814331, "learning_rate": 6.543288832040932e-05, "loss": 1.6629, "step": 4328 }, { "epoch": 1.0383070559318985, "grad_norm": 0.6919386982917786, "learning_rate": 6.541689983212088e-05, "loss": 1.6935, "step": 4330 }, { "epoch": 1.0387866434866015, "grad_norm": 0.9721614122390747, "learning_rate": 6.540091134383243e-05, "loss": 1.7412, "step": 4332 }, { "epoch": 1.0392662310413046, "grad_norm": 1.1661304235458374, "learning_rate": 6.5384922855544e-05, "loss": 1.7065, "step": 4334 }, { "epoch": 1.0397458185960073, "grad_norm": 0.7312362790107727, "learning_rate": 6.536893436725558e-05, "loss": 1.7465, "step": 4336 }, { "epoch": 1.0402254061507104, "grad_norm": 0.6826890110969543, "learning_rate": 6.535294587896715e-05, "loss": 1.6859, "step": 4338 }, { "epoch": 1.0407049937054134, "grad_norm": 0.8888270258903503, "learning_rate": 6.533695739067871e-05, "loss": 1.7379, "step": 4340 }, { "epoch": 1.0411845812601164, "grad_norm": 0.7562370300292969, "learning_rate": 6.532096890239028e-05, "loss": 1.7144, "step": 4342 }, { "epoch": 1.0416641688148192, "grad_norm": 0.7284556031227112, "learning_rate": 6.530498041410184e-05, "loss": 1.7027, "step": 4344 }, { "epoch": 1.0421437563695222, "grad_norm": 0.7400043606758118, "learning_rate": 6.528899192581341e-05, "loss": 1.7419, "step": 4346 }, { "epoch": 1.0426233439242252, "grad_norm": 0.9061078429222107, "learning_rate": 6.527300343752499e-05, "loss": 1.69, "step": 4348 }, { "epoch": 1.0431029314789282, "grad_norm": 0.9193304777145386, "learning_rate": 6.525701494923655e-05, "loss": 1.6998, "step": 4350 }, { "epoch": 1.043582519033631, "grad_norm": 0.7385733127593994, "learning_rate": 6.524102646094812e-05, "loss": 1.6933, "step": 4352 }, { "epoch": 1.044062106588334, "grad_norm": 0.7465695142745972, "learning_rate": 6.522503797265968e-05, "loss": 1.7213, "step": 4354 }, { "epoch": 1.044541694143037, "grad_norm": 0.7685785889625549, "learning_rate": 6.520904948437125e-05, "loss": 1.7037, "step": 4356 }, { "epoch": 1.04502128169774, "grad_norm": 0.7164414525032043, "learning_rate": 6.519306099608282e-05, "loss": 1.7356, "step": 4358 }, { "epoch": 1.045500869252443, "grad_norm": 0.8082914352416992, "learning_rate": 6.51770725077944e-05, "loss": 1.7266, "step": 4360 }, { "epoch": 1.0459804568071458, "grad_norm": 0.6872982382774353, "learning_rate": 6.516108401950596e-05, "loss": 1.7194, "step": 4362 }, { "epoch": 1.0464600443618488, "grad_norm": 0.6558045744895935, "learning_rate": 6.514509553121753e-05, "loss": 1.6936, "step": 4364 }, { "epoch": 1.0469396319165518, "grad_norm": 0.7074092626571655, "learning_rate": 6.512910704292909e-05, "loss": 1.7166, "step": 4366 }, { "epoch": 1.0474192194712548, "grad_norm": 0.6572328805923462, "learning_rate": 6.511311855464066e-05, "loss": 1.6577, "step": 4368 }, { "epoch": 1.0478988070259576, "grad_norm": 0.718651294708252, "learning_rate": 6.509713006635222e-05, "loss": 1.6932, "step": 4370 }, { "epoch": 1.0483783945806606, "grad_norm": 0.7581137418746948, "learning_rate": 6.50811415780638e-05, "loss": 1.7666, "step": 4372 }, { "epoch": 1.0488579821353636, "grad_norm": 0.8468576073646545, "learning_rate": 6.506515308977537e-05, "loss": 1.6848, "step": 4374 }, { "epoch": 1.0493375696900666, "grad_norm": 0.677077054977417, "learning_rate": 6.504916460148693e-05, "loss": 1.7278, "step": 4376 }, { "epoch": 1.0498171572447694, "grad_norm": 0.7245108485221863, "learning_rate": 6.50331761131985e-05, "loss": 1.7268, "step": 4378 }, { "epoch": 1.0502967447994724, "grad_norm": 0.7260501384735107, "learning_rate": 6.501718762491007e-05, "loss": 1.73, "step": 4380 }, { "epoch": 1.0507763323541754, "grad_norm": 0.7354005575180054, "learning_rate": 6.500119913662163e-05, "loss": 1.7152, "step": 4382 }, { "epoch": 1.0512559199088785, "grad_norm": 0.7116497159004211, "learning_rate": 6.498521064833321e-05, "loss": 1.7015, "step": 4384 }, { "epoch": 1.0517355074635812, "grad_norm": 0.6879772543907166, "learning_rate": 6.496922216004478e-05, "loss": 1.7474, "step": 4386 }, { "epoch": 1.0522150950182843, "grad_norm": 0.6894081830978394, "learning_rate": 6.495323367175634e-05, "loss": 1.7004, "step": 4388 }, { "epoch": 1.0526946825729873, "grad_norm": 0.6086737513542175, "learning_rate": 6.493724518346791e-05, "loss": 1.6839, "step": 4390 }, { "epoch": 1.0531742701276903, "grad_norm": 0.7232165336608887, "learning_rate": 6.492125669517947e-05, "loss": 1.7081, "step": 4392 }, { "epoch": 1.053653857682393, "grad_norm": 0.7780346870422363, "learning_rate": 6.490526820689104e-05, "loss": 1.6745, "step": 4394 }, { "epoch": 1.054133445237096, "grad_norm": 0.779864490032196, "learning_rate": 6.488927971860262e-05, "loss": 1.7244, "step": 4396 }, { "epoch": 1.054613032791799, "grad_norm": 0.676195502281189, "learning_rate": 6.487329123031418e-05, "loss": 1.7764, "step": 4398 }, { "epoch": 1.055092620346502, "grad_norm": 0.6907246708869934, "learning_rate": 6.485730274202575e-05, "loss": 1.7153, "step": 4400 }, { "epoch": 1.055092620346502, "eval_loss": 1.761795997619629, "eval_runtime": 331.1236, "eval_samples_per_second": 403.019, "eval_steps_per_second": 12.597, "step": 4400 }, { "epoch": 1.0555722079012049, "grad_norm": 0.6307634115219116, "learning_rate": 6.484131425373732e-05, "loss": 1.65, "step": 4402 }, { "epoch": 1.0560517954559079, "grad_norm": 0.8325986862182617, "learning_rate": 6.482532576544888e-05, "loss": 1.6954, "step": 4404 }, { "epoch": 1.056531383010611, "grad_norm": 1.016687273979187, "learning_rate": 6.480933727716045e-05, "loss": 1.7518, "step": 4406 }, { "epoch": 1.057010970565314, "grad_norm": 0.6463380455970764, "learning_rate": 6.479334878887201e-05, "loss": 1.6942, "step": 4408 }, { "epoch": 1.0574905581200167, "grad_norm": 0.7540462613105774, "learning_rate": 6.477736030058358e-05, "loss": 1.7233, "step": 4410 }, { "epoch": 1.0579701456747197, "grad_norm": 0.7246760129928589, "learning_rate": 6.476137181229514e-05, "loss": 1.6959, "step": 4412 }, { "epoch": 1.0584497332294227, "grad_norm": 0.6709578633308411, "learning_rate": 6.474538332400671e-05, "loss": 1.7209, "step": 4414 }, { "epoch": 1.0589293207841257, "grad_norm": 0.6796646118164062, "learning_rate": 6.472939483571829e-05, "loss": 1.7119, "step": 4416 }, { "epoch": 1.0594089083388285, "grad_norm": 0.7148873805999756, "learning_rate": 6.471340634742985e-05, "loss": 1.7019, "step": 4418 }, { "epoch": 1.0598884958935315, "grad_norm": 0.8111835718154907, "learning_rate": 6.469741785914142e-05, "loss": 1.7421, "step": 4420 }, { "epoch": 1.0603680834482345, "grad_norm": 0.6817417740821838, "learning_rate": 6.468142937085299e-05, "loss": 1.6729, "step": 4422 }, { "epoch": 1.0608476710029375, "grad_norm": 0.727281928062439, "learning_rate": 6.466544088256455e-05, "loss": 1.7532, "step": 4424 }, { "epoch": 1.0613272585576405, "grad_norm": 0.8568244576454163, "learning_rate": 6.464945239427612e-05, "loss": 1.6926, "step": 4426 }, { "epoch": 1.0618068461123433, "grad_norm": 0.805366575717926, "learning_rate": 6.46334639059877e-05, "loss": 1.7203, "step": 4428 }, { "epoch": 1.0622864336670463, "grad_norm": 0.6629754900932312, "learning_rate": 6.461747541769926e-05, "loss": 1.6456, "step": 4430 }, { "epoch": 1.0627660212217493, "grad_norm": 0.7171276211738586, "learning_rate": 6.460148692941083e-05, "loss": 1.7222, "step": 4432 }, { "epoch": 1.0632456087764521, "grad_norm": 0.6500433087348938, "learning_rate": 6.45854984411224e-05, "loss": 1.6989, "step": 4434 }, { "epoch": 1.0637251963311551, "grad_norm": 0.6208454370498657, "learning_rate": 6.456950995283396e-05, "loss": 1.7245, "step": 4436 }, { "epoch": 1.0642047838858582, "grad_norm": 0.9494984745979309, "learning_rate": 6.455352146454553e-05, "loss": 1.6824, "step": 4438 }, { "epoch": 1.0646843714405612, "grad_norm": 0.8178668022155762, "learning_rate": 6.45375329762571e-05, "loss": 1.706, "step": 4440 }, { "epoch": 1.0651639589952642, "grad_norm": 0.6723330616950989, "learning_rate": 6.452154448796867e-05, "loss": 1.7351, "step": 4442 }, { "epoch": 1.065643546549967, "grad_norm": 0.6764358282089233, "learning_rate": 6.450555599968024e-05, "loss": 1.6808, "step": 4444 }, { "epoch": 1.06612313410467, "grad_norm": 0.6584882140159607, "learning_rate": 6.44895675113918e-05, "loss": 1.744, "step": 4446 }, { "epoch": 1.066602721659373, "grad_norm": 0.7029091119766235, "learning_rate": 6.447357902310337e-05, "loss": 1.7342, "step": 4448 }, { "epoch": 1.067082309214076, "grad_norm": 0.7940102219581604, "learning_rate": 6.445759053481493e-05, "loss": 1.725, "step": 4450 }, { "epoch": 1.0675618967687788, "grad_norm": 0.8872503042221069, "learning_rate": 6.444160204652651e-05, "loss": 1.7148, "step": 4452 }, { "epoch": 1.0680414843234818, "grad_norm": 0.6552236080169678, "learning_rate": 6.442561355823808e-05, "loss": 1.7473, "step": 4454 }, { "epoch": 1.0685210718781848, "grad_norm": 0.7208357453346252, "learning_rate": 6.440962506994964e-05, "loss": 1.6878, "step": 4456 }, { "epoch": 1.0690006594328878, "grad_norm": 0.7114290595054626, "learning_rate": 6.439363658166121e-05, "loss": 1.6771, "step": 4458 }, { "epoch": 1.0694802469875906, "grad_norm": 0.717843234539032, "learning_rate": 6.437764809337277e-05, "loss": 1.7061, "step": 4460 }, { "epoch": 1.0699598345422936, "grad_norm": 0.7537991404533386, "learning_rate": 6.436165960508434e-05, "loss": 1.7247, "step": 4462 }, { "epoch": 1.0704394220969966, "grad_norm": 0.7215165495872498, "learning_rate": 6.434567111679592e-05, "loss": 1.73, "step": 4464 }, { "epoch": 1.0709190096516996, "grad_norm": 0.7188729643821716, "learning_rate": 6.432968262850749e-05, "loss": 1.7203, "step": 4466 }, { "epoch": 1.0713985972064024, "grad_norm": 0.7668144702911377, "learning_rate": 6.431369414021905e-05, "loss": 1.7302, "step": 4468 }, { "epoch": 1.0718781847611054, "grad_norm": 0.7654345631599426, "learning_rate": 6.429770565193062e-05, "loss": 1.703, "step": 4470 }, { "epoch": 1.0723577723158084, "grad_norm": 0.7126740217208862, "learning_rate": 6.428171716364218e-05, "loss": 1.7785, "step": 4472 }, { "epoch": 1.0728373598705114, "grad_norm": 0.830988883972168, "learning_rate": 6.426572867535375e-05, "loss": 1.7142, "step": 4474 }, { "epoch": 1.0733169474252142, "grad_norm": 0.7710103988647461, "learning_rate": 6.424974018706533e-05, "loss": 1.7194, "step": 4476 }, { "epoch": 1.0737965349799172, "grad_norm": 0.7210911512374878, "learning_rate": 6.423375169877688e-05, "loss": 1.7523, "step": 4478 }, { "epoch": 1.0742761225346202, "grad_norm": 0.894454300403595, "learning_rate": 6.421776321048845e-05, "loss": 1.6848, "step": 4480 }, { "epoch": 1.0747557100893232, "grad_norm": 0.640953004360199, "learning_rate": 6.420177472220001e-05, "loss": 1.6833, "step": 4482 }, { "epoch": 1.075235297644026, "grad_norm": 0.6473954319953918, "learning_rate": 6.418578623391158e-05, "loss": 1.7347, "step": 4484 }, { "epoch": 1.075714885198729, "grad_norm": 0.7278022170066833, "learning_rate": 6.416979774562316e-05, "loss": 1.7434, "step": 4486 }, { "epoch": 1.076194472753432, "grad_norm": 0.8176808953285217, "learning_rate": 6.415380925733472e-05, "loss": 1.7046, "step": 4488 }, { "epoch": 1.076674060308135, "grad_norm": 0.719761073589325, "learning_rate": 6.413782076904629e-05, "loss": 1.6743, "step": 4490 }, { "epoch": 1.077153647862838, "grad_norm": 0.8407365679740906, "learning_rate": 6.412183228075785e-05, "loss": 1.7464, "step": 4492 }, { "epoch": 1.0776332354175409, "grad_norm": 0.7349164485931396, "learning_rate": 6.410584379246942e-05, "loss": 1.6854, "step": 4494 }, { "epoch": 1.0781128229722439, "grad_norm": 0.7036625146865845, "learning_rate": 6.408985530418098e-05, "loss": 1.703, "step": 4496 }, { "epoch": 1.0785924105269469, "grad_norm": 0.7196708917617798, "learning_rate": 6.407386681589256e-05, "loss": 1.7079, "step": 4498 }, { "epoch": 1.0790719980816497, "grad_norm": 0.8162951469421387, "learning_rate": 6.405787832760413e-05, "loss": 1.7467, "step": 4500 }, { "epoch": 1.0795515856363527, "grad_norm": 0.6928543448448181, "learning_rate": 6.40418898393157e-05, "loss": 1.6795, "step": 4502 }, { "epoch": 1.0800311731910557, "grad_norm": 0.8259634375572205, "learning_rate": 6.402590135102726e-05, "loss": 1.7411, "step": 4504 }, { "epoch": 1.0805107607457587, "grad_norm": 0.7613701224327087, "learning_rate": 6.400991286273883e-05, "loss": 1.7721, "step": 4506 }, { "epoch": 1.0809903483004617, "grad_norm": 0.677270770072937, "learning_rate": 6.399392437445039e-05, "loss": 1.7214, "step": 4508 }, { "epoch": 1.0814699358551645, "grad_norm": 0.7859853506088257, "learning_rate": 6.397793588616197e-05, "loss": 1.6622, "step": 4510 }, { "epoch": 1.0819495234098675, "grad_norm": 1.0437628030776978, "learning_rate": 6.396194739787354e-05, "loss": 1.6708, "step": 4512 }, { "epoch": 1.0824291109645705, "grad_norm": 0.6739570498466492, "learning_rate": 6.39459589095851e-05, "loss": 1.7346, "step": 4514 }, { "epoch": 1.0829086985192735, "grad_norm": 0.6835720539093018, "learning_rate": 6.392997042129667e-05, "loss": 1.7153, "step": 4516 }, { "epoch": 1.0833882860739763, "grad_norm": 0.6924737095832825, "learning_rate": 6.391398193300823e-05, "loss": 1.728, "step": 4518 }, { "epoch": 1.0838678736286793, "grad_norm": 0.6678351759910583, "learning_rate": 6.38979934447198e-05, "loss": 1.7134, "step": 4520 }, { "epoch": 1.0843474611833823, "grad_norm": 0.6996366381645203, "learning_rate": 6.388200495643138e-05, "loss": 1.7106, "step": 4522 }, { "epoch": 1.0848270487380853, "grad_norm": 0.6908839344978333, "learning_rate": 6.386601646814295e-05, "loss": 1.6551, "step": 4524 }, { "epoch": 1.0853066362927881, "grad_norm": 0.6850778460502625, "learning_rate": 6.385002797985451e-05, "loss": 1.669, "step": 4526 }, { "epoch": 1.0857862238474911, "grad_norm": 0.7306576371192932, "learning_rate": 6.383403949156608e-05, "loss": 1.7237, "step": 4528 }, { "epoch": 1.0862658114021941, "grad_norm": 0.6825398802757263, "learning_rate": 6.381805100327764e-05, "loss": 1.7198, "step": 4530 }, { "epoch": 1.0867453989568971, "grad_norm": 0.724030613899231, "learning_rate": 6.380206251498921e-05, "loss": 1.7377, "step": 4532 }, { "epoch": 1.0872249865116, "grad_norm": 0.6779702305793762, "learning_rate": 6.378607402670079e-05, "loss": 1.7039, "step": 4534 }, { "epoch": 1.087704574066303, "grad_norm": 0.8602533936500549, "learning_rate": 6.377008553841235e-05, "loss": 1.7103, "step": 4536 }, { "epoch": 1.088184161621006, "grad_norm": 0.7362406849861145, "learning_rate": 6.375409705012392e-05, "loss": 1.7377, "step": 4538 }, { "epoch": 1.088663749175709, "grad_norm": 0.7997919917106628, "learning_rate": 6.373810856183548e-05, "loss": 1.7302, "step": 4540 }, { "epoch": 1.0891433367304117, "grad_norm": 0.7212596535682678, "learning_rate": 6.372212007354705e-05, "loss": 1.6876, "step": 4542 }, { "epoch": 1.0896229242851148, "grad_norm": 0.6244559288024902, "learning_rate": 6.370613158525862e-05, "loss": 1.7055, "step": 4544 }, { "epoch": 1.0901025118398178, "grad_norm": 0.7385662794113159, "learning_rate": 6.36901430969702e-05, "loss": 1.7458, "step": 4546 }, { "epoch": 1.0905820993945208, "grad_norm": 0.7338683009147644, "learning_rate": 6.367415460868176e-05, "loss": 1.735, "step": 4548 }, { "epoch": 1.0910616869492236, "grad_norm": 0.824506402015686, "learning_rate": 6.365816612039331e-05, "loss": 1.751, "step": 4550 }, { "epoch": 1.0915412745039266, "grad_norm": 0.806104838848114, "learning_rate": 6.364217763210488e-05, "loss": 1.7287, "step": 4552 }, { "epoch": 1.0920208620586296, "grad_norm": 0.8002044558525085, "learning_rate": 6.362618914381644e-05, "loss": 1.7296, "step": 4554 }, { "epoch": 1.0925004496133326, "grad_norm": 0.6430622339248657, "learning_rate": 6.361020065552802e-05, "loss": 1.7475, "step": 4556 }, { "epoch": 1.0929800371680356, "grad_norm": 0.7254200577735901, "learning_rate": 6.359421216723959e-05, "loss": 1.7428, "step": 4558 }, { "epoch": 1.0934596247227384, "grad_norm": 0.7203539609909058, "learning_rate": 6.357822367895115e-05, "loss": 1.7351, "step": 4560 }, { "epoch": 1.0939392122774414, "grad_norm": 0.6686127185821533, "learning_rate": 6.356223519066272e-05, "loss": 1.7162, "step": 4562 }, { "epoch": 1.0944187998321444, "grad_norm": 0.6446087956428528, "learning_rate": 6.354624670237429e-05, "loss": 1.673, "step": 4564 }, { "epoch": 1.0948983873868472, "grad_norm": 0.6905428171157837, "learning_rate": 6.353025821408585e-05, "loss": 1.7436, "step": 4566 }, { "epoch": 1.0953779749415502, "grad_norm": 0.7886167764663696, "learning_rate": 6.351426972579743e-05, "loss": 1.7104, "step": 4568 }, { "epoch": 1.0958575624962532, "grad_norm": 0.7387871742248535, "learning_rate": 6.3498281237509e-05, "loss": 1.7682, "step": 4570 }, { "epoch": 1.0963371500509562, "grad_norm": 0.717685878276825, "learning_rate": 6.348229274922056e-05, "loss": 1.6555, "step": 4572 }, { "epoch": 1.0968167376056592, "grad_norm": 0.6584152579307556, "learning_rate": 6.346630426093213e-05, "loss": 1.7102, "step": 4574 }, { "epoch": 1.097296325160362, "grad_norm": 0.7651646733283997, "learning_rate": 6.34503157726437e-05, "loss": 1.7405, "step": 4576 }, { "epoch": 1.097775912715065, "grad_norm": 0.6657854914665222, "learning_rate": 6.343432728435526e-05, "loss": 1.6836, "step": 4578 }, { "epoch": 1.098255500269768, "grad_norm": 0.7188881635665894, "learning_rate": 6.341833879606684e-05, "loss": 1.7015, "step": 4580 }, { "epoch": 1.098735087824471, "grad_norm": 0.6550967693328857, "learning_rate": 6.34023503077784e-05, "loss": 1.7036, "step": 4582 }, { "epoch": 1.0992146753791738, "grad_norm": 0.6782785654067993, "learning_rate": 6.338636181948997e-05, "loss": 1.7258, "step": 4584 }, { "epoch": 1.0996942629338768, "grad_norm": 0.7333585023880005, "learning_rate": 6.337037333120154e-05, "loss": 1.7037, "step": 4586 }, { "epoch": 1.1001738504885799, "grad_norm": 0.7684351801872253, "learning_rate": 6.33543848429131e-05, "loss": 1.725, "step": 4588 }, { "epoch": 1.1006534380432829, "grad_norm": 0.6743415594100952, "learning_rate": 6.333839635462467e-05, "loss": 1.7055, "step": 4590 }, { "epoch": 1.1011330255979856, "grad_norm": 0.7122042179107666, "learning_rate": 6.332240786633625e-05, "loss": 1.6839, "step": 4592 }, { "epoch": 1.1016126131526887, "grad_norm": 0.6784891486167908, "learning_rate": 6.330641937804781e-05, "loss": 1.6935, "step": 4594 }, { "epoch": 1.1020922007073917, "grad_norm": 0.7066177129745483, "learning_rate": 6.329043088975938e-05, "loss": 1.7456, "step": 4596 }, { "epoch": 1.1025717882620947, "grad_norm": 0.7803440690040588, "learning_rate": 6.327444240147094e-05, "loss": 1.7014, "step": 4598 }, { "epoch": 1.1030513758167975, "grad_norm": 0.691433310508728, "learning_rate": 6.325845391318251e-05, "loss": 1.6848, "step": 4600 }, { "epoch": 1.1035309633715005, "grad_norm": 0.7267906665802002, "learning_rate": 6.324246542489408e-05, "loss": 1.7016, "step": 4602 }, { "epoch": 1.1040105509262035, "grad_norm": 0.6324319243431091, "learning_rate": 6.322647693660565e-05, "loss": 1.709, "step": 4604 }, { "epoch": 1.1044901384809065, "grad_norm": 0.8127952218055725, "learning_rate": 6.321048844831722e-05, "loss": 1.7462, "step": 4606 }, { "epoch": 1.1049697260356093, "grad_norm": 0.679721474647522, "learning_rate": 6.319449996002879e-05, "loss": 1.7512, "step": 4608 }, { "epoch": 1.1054493135903123, "grad_norm": 0.7706676125526428, "learning_rate": 6.317851147174035e-05, "loss": 1.7409, "step": 4610 }, { "epoch": 1.1059289011450153, "grad_norm": 0.7034671902656555, "learning_rate": 6.316252298345192e-05, "loss": 1.694, "step": 4612 }, { "epoch": 1.1064084886997183, "grad_norm": 0.7045848965644836, "learning_rate": 6.31465344951635e-05, "loss": 1.7092, "step": 4614 }, { "epoch": 1.106888076254421, "grad_norm": 0.8075549602508545, "learning_rate": 6.313054600687506e-05, "loss": 1.7109, "step": 4616 }, { "epoch": 1.107367663809124, "grad_norm": 0.7073401808738708, "learning_rate": 6.311455751858663e-05, "loss": 1.7838, "step": 4618 }, { "epoch": 1.1078472513638271, "grad_norm": 0.8403735160827637, "learning_rate": 6.30985690302982e-05, "loss": 1.7394, "step": 4620 }, { "epoch": 1.1083268389185301, "grad_norm": 0.7087125778198242, "learning_rate": 6.308258054200976e-05, "loss": 1.7415, "step": 4622 }, { "epoch": 1.1088064264732331, "grad_norm": 0.8912880420684814, "learning_rate": 6.306659205372132e-05, "loss": 1.7277, "step": 4624 }, { "epoch": 1.109286014027936, "grad_norm": 0.7419688105583191, "learning_rate": 6.305060356543289e-05, "loss": 1.7158, "step": 4626 }, { "epoch": 1.109765601582639, "grad_norm": 0.8332404494285583, "learning_rate": 6.303461507714446e-05, "loss": 1.7008, "step": 4628 }, { "epoch": 1.110245189137342, "grad_norm": 0.7866342663764954, "learning_rate": 6.301862658885602e-05, "loss": 1.7175, "step": 4630 }, { "epoch": 1.1107247766920447, "grad_norm": 0.7347488403320312, "learning_rate": 6.300263810056759e-05, "loss": 1.7185, "step": 4632 }, { "epoch": 1.1112043642467477, "grad_norm": 0.8284986615180969, "learning_rate": 6.298664961227915e-05, "loss": 1.7502, "step": 4634 }, { "epoch": 1.1116839518014507, "grad_norm": 0.7024006843566895, "learning_rate": 6.297066112399073e-05, "loss": 1.7342, "step": 4636 }, { "epoch": 1.1121635393561538, "grad_norm": 0.6544941067695618, "learning_rate": 6.29546726357023e-05, "loss": 1.7206, "step": 4638 }, { "epoch": 1.1126431269108568, "grad_norm": 0.6552453637123108, "learning_rate": 6.293868414741386e-05, "loss": 1.7005, "step": 4640 }, { "epoch": 1.1131227144655595, "grad_norm": 0.6397868394851685, "learning_rate": 6.292269565912543e-05, "loss": 1.7411, "step": 4642 }, { "epoch": 1.1136023020202626, "grad_norm": 0.6436076164245605, "learning_rate": 6.2906707170837e-05, "loss": 1.6973, "step": 4644 }, { "epoch": 1.1140818895749656, "grad_norm": 0.782526433467865, "learning_rate": 6.289071868254856e-05, "loss": 1.7491, "step": 4646 }, { "epoch": 1.1145614771296686, "grad_norm": 0.8910605907440186, "learning_rate": 6.287473019426014e-05, "loss": 1.7076, "step": 4648 }, { "epoch": 1.1150410646843714, "grad_norm": 0.7535091042518616, "learning_rate": 6.28587417059717e-05, "loss": 1.7001, "step": 4650 }, { "epoch": 1.1155206522390744, "grad_norm": 0.7125601172447205, "learning_rate": 6.284275321768327e-05, "loss": 1.7253, "step": 4652 }, { "epoch": 1.1160002397937774, "grad_norm": 0.6817425489425659, "learning_rate": 6.282676472939484e-05, "loss": 1.6857, "step": 4654 }, { "epoch": 1.1164798273484804, "grad_norm": 0.6903069615364075, "learning_rate": 6.28107762411064e-05, "loss": 1.7351, "step": 4656 }, { "epoch": 1.1169594149031832, "grad_norm": 0.7218087911605835, "learning_rate": 6.279478775281797e-05, "loss": 1.744, "step": 4658 }, { "epoch": 1.1174390024578862, "grad_norm": 0.7162546515464783, "learning_rate": 6.277879926452955e-05, "loss": 1.7462, "step": 4660 }, { "epoch": 1.1179185900125892, "grad_norm": 0.68385249376297, "learning_rate": 6.276281077624111e-05, "loss": 1.7449, "step": 4662 }, { "epoch": 1.1183981775672922, "grad_norm": 0.7532578110694885, "learning_rate": 6.274682228795268e-05, "loss": 1.7605, "step": 4664 }, { "epoch": 1.118877765121995, "grad_norm": 0.7097329497337341, "learning_rate": 6.273083379966425e-05, "loss": 1.6793, "step": 4666 }, { "epoch": 1.119357352676698, "grad_norm": 0.7093682289123535, "learning_rate": 6.271484531137581e-05, "loss": 1.7733, "step": 4668 }, { "epoch": 1.119836940231401, "grad_norm": 0.7384554147720337, "learning_rate": 6.269885682308738e-05, "loss": 1.7607, "step": 4670 }, { "epoch": 1.120316527786104, "grad_norm": 0.6612635254859924, "learning_rate": 6.268286833479896e-05, "loss": 1.7017, "step": 4672 }, { "epoch": 1.1207961153408068, "grad_norm": 0.700837254524231, "learning_rate": 6.266687984651052e-05, "loss": 1.7278, "step": 4674 }, { "epoch": 1.1212757028955098, "grad_norm": 0.7064563632011414, "learning_rate": 6.265089135822209e-05, "loss": 1.7164, "step": 4676 }, { "epoch": 1.1217552904502128, "grad_norm": 0.6974341869354248, "learning_rate": 6.263490286993365e-05, "loss": 1.7154, "step": 4678 }, { "epoch": 1.1222348780049158, "grad_norm": 0.6816765666007996, "learning_rate": 6.261891438164522e-05, "loss": 1.7451, "step": 4680 }, { "epoch": 1.1227144655596186, "grad_norm": 0.6444795727729797, "learning_rate": 6.260292589335678e-05, "loss": 1.739, "step": 4682 }, { "epoch": 1.1231940531143216, "grad_norm": 0.6648075580596924, "learning_rate": 6.258693740506836e-05, "loss": 1.7018, "step": 4684 }, { "epoch": 1.1236736406690246, "grad_norm": 0.734429121017456, "learning_rate": 6.257094891677993e-05, "loss": 1.7078, "step": 4686 }, { "epoch": 1.1241532282237277, "grad_norm": 0.7880716323852539, "learning_rate": 6.25549604284915e-05, "loss": 1.745, "step": 4688 }, { "epoch": 1.1246328157784307, "grad_norm": 0.618289053440094, "learning_rate": 6.253897194020306e-05, "loss": 1.734, "step": 4690 }, { "epoch": 1.1251124033331334, "grad_norm": 0.8015515208244324, "learning_rate": 6.252298345191463e-05, "loss": 1.7322, "step": 4692 }, { "epoch": 1.1255919908878365, "grad_norm": 0.8170024156570435, "learning_rate": 6.250699496362619e-05, "loss": 1.7486, "step": 4694 }, { "epoch": 1.1260715784425395, "grad_norm": 0.6493185758590698, "learning_rate": 6.249100647533776e-05, "loss": 1.6946, "step": 4696 }, { "epoch": 1.1265511659972423, "grad_norm": 0.890475332736969, "learning_rate": 6.247501798704932e-05, "loss": 1.7109, "step": 4698 }, { "epoch": 1.1270307535519453, "grad_norm": 0.6174802780151367, "learning_rate": 6.245902949876089e-05, "loss": 1.7118, "step": 4700 }, { "epoch": 1.1275103411066483, "grad_norm": 0.6479105353355408, "learning_rate": 6.244304101047245e-05, "loss": 1.7284, "step": 4702 }, { "epoch": 1.1279899286613513, "grad_norm": 0.6918498873710632, "learning_rate": 6.242705252218402e-05, "loss": 1.6891, "step": 4704 }, { "epoch": 1.1284695162160543, "grad_norm": 0.753836452960968, "learning_rate": 6.24110640338956e-05, "loss": 1.675, "step": 4706 }, { "epoch": 1.128949103770757, "grad_norm": 0.7029423713684082, "learning_rate": 6.239507554560717e-05, "loss": 1.7097, "step": 4708 }, { "epoch": 1.12942869132546, "grad_norm": 0.6383736729621887, "learning_rate": 6.237908705731873e-05, "loss": 1.7219, "step": 4710 }, { "epoch": 1.129908278880163, "grad_norm": 0.9248202443122864, "learning_rate": 6.23630985690303e-05, "loss": 1.6982, "step": 4712 }, { "epoch": 1.130387866434866, "grad_norm": 0.9409369230270386, "learning_rate": 6.234711008074186e-05, "loss": 1.7513, "step": 4714 }, { "epoch": 1.130867453989569, "grad_norm": 0.8575685024261475, "learning_rate": 6.233112159245343e-05, "loss": 1.7143, "step": 4716 }, { "epoch": 1.131347041544272, "grad_norm": 0.6317620873451233, "learning_rate": 6.231513310416501e-05, "loss": 1.6762, "step": 4718 }, { "epoch": 1.131826629098975, "grad_norm": 0.7595640420913696, "learning_rate": 6.229914461587657e-05, "loss": 1.7036, "step": 4720 }, { "epoch": 1.132306216653678, "grad_norm": 0.9840017557144165, "learning_rate": 6.228315612758814e-05, "loss": 1.7223, "step": 4722 }, { "epoch": 1.1327858042083807, "grad_norm": 0.6589813232421875, "learning_rate": 6.22671676392997e-05, "loss": 1.7123, "step": 4724 }, { "epoch": 1.1332653917630837, "grad_norm": 0.7293727397918701, "learning_rate": 6.225117915101127e-05, "loss": 1.7207, "step": 4726 }, { "epoch": 1.1337449793177867, "grad_norm": 0.6597088575363159, "learning_rate": 6.223519066272284e-05, "loss": 1.6654, "step": 4728 }, { "epoch": 1.1342245668724897, "grad_norm": 0.7139222025871277, "learning_rate": 6.221920217443442e-05, "loss": 1.7285, "step": 4730 }, { "epoch": 1.1347041544271925, "grad_norm": 0.7042369246482849, "learning_rate": 6.220321368614598e-05, "loss": 1.7091, "step": 4732 }, { "epoch": 1.1351837419818955, "grad_norm": 0.689056396484375, "learning_rate": 6.218722519785755e-05, "loss": 1.7287, "step": 4734 }, { "epoch": 1.1356633295365985, "grad_norm": 0.7793980836868286, "learning_rate": 6.217123670956911e-05, "loss": 1.7082, "step": 4736 }, { "epoch": 1.1361429170913016, "grad_norm": 0.8730711340904236, "learning_rate": 6.215524822128068e-05, "loss": 1.7298, "step": 4738 }, { "epoch": 1.1366225046460043, "grad_norm": 0.6291846632957458, "learning_rate": 6.213925973299224e-05, "loss": 1.6677, "step": 4740 }, { "epoch": 1.1371020922007073, "grad_norm": 0.7937508225440979, "learning_rate": 6.212327124470382e-05, "loss": 1.7051, "step": 4742 }, { "epoch": 1.1375816797554104, "grad_norm": 0.6442207098007202, "learning_rate": 6.210728275641539e-05, "loss": 1.7299, "step": 4744 }, { "epoch": 1.1380612673101134, "grad_norm": 0.6772178411483765, "learning_rate": 6.209129426812695e-05, "loss": 1.7229, "step": 4746 }, { "epoch": 1.1385408548648162, "grad_norm": 0.663411021232605, "learning_rate": 6.207530577983852e-05, "loss": 1.6963, "step": 4748 }, { "epoch": 1.1390204424195192, "grad_norm": 0.6589503884315491, "learning_rate": 6.205931729155009e-05, "loss": 1.7407, "step": 4750 }, { "epoch": 1.1395000299742222, "grad_norm": 0.7092217206954956, "learning_rate": 6.204332880326165e-05, "loss": 1.6281, "step": 4752 }, { "epoch": 1.1399796175289252, "grad_norm": 0.6914910674095154, "learning_rate": 6.202734031497323e-05, "loss": 1.7172, "step": 4754 }, { "epoch": 1.1404592050836282, "grad_norm": 0.7197555303573608, "learning_rate": 6.20113518266848e-05, "loss": 1.7325, "step": 4756 }, { "epoch": 1.140938792638331, "grad_norm": 0.837527871131897, "learning_rate": 6.199536333839636e-05, "loss": 1.6953, "step": 4758 }, { "epoch": 1.141418380193034, "grad_norm": 0.7859463691711426, "learning_rate": 6.197937485010793e-05, "loss": 1.7259, "step": 4760 }, { "epoch": 1.141897967747737, "grad_norm": 0.7741965055465698, "learning_rate": 6.19633863618195e-05, "loss": 1.7113, "step": 4762 }, { "epoch": 1.1423775553024398, "grad_norm": 0.851879894733429, "learning_rate": 6.194739787353106e-05, "loss": 1.7038, "step": 4764 }, { "epoch": 1.1428571428571428, "grad_norm": 0.7314482927322388, "learning_rate": 6.193140938524264e-05, "loss": 1.727, "step": 4766 }, { "epoch": 1.1433367304118458, "grad_norm": 0.7777717113494873, "learning_rate": 6.191542089695419e-05, "loss": 1.6862, "step": 4768 }, { "epoch": 1.1438163179665488, "grad_norm": 0.6634988784790039, "learning_rate": 6.189943240866576e-05, "loss": 1.7107, "step": 4770 }, { "epoch": 1.1442959055212518, "grad_norm": 0.5980605483055115, "learning_rate": 6.188344392037732e-05, "loss": 1.6972, "step": 4772 }, { "epoch": 1.1447754930759546, "grad_norm": 0.798463761806488, "learning_rate": 6.186745543208889e-05, "loss": 1.7076, "step": 4774 }, { "epoch": 1.1452550806306576, "grad_norm": 0.7322179675102234, "learning_rate": 6.185146694380047e-05, "loss": 1.6647, "step": 4776 }, { "epoch": 1.1457346681853606, "grad_norm": 0.7975754737854004, "learning_rate": 6.183547845551203e-05, "loss": 1.7223, "step": 4778 }, { "epoch": 1.1462142557400636, "grad_norm": 0.7841323614120483, "learning_rate": 6.18194899672236e-05, "loss": 1.7344, "step": 4780 }, { "epoch": 1.1466938432947664, "grad_norm": 0.7250811457633972, "learning_rate": 6.180350147893516e-05, "loss": 1.6843, "step": 4782 }, { "epoch": 1.1471734308494694, "grad_norm": 0.721772313117981, "learning_rate": 6.178751299064673e-05, "loss": 1.7218, "step": 4784 }, { "epoch": 1.1476530184041724, "grad_norm": 0.7378328442573547, "learning_rate": 6.17715245023583e-05, "loss": 1.6866, "step": 4786 }, { "epoch": 1.1481326059588755, "grad_norm": 0.6708845496177673, "learning_rate": 6.175553601406987e-05, "loss": 1.6958, "step": 4788 }, { "epoch": 1.1486121935135782, "grad_norm": 0.6357711553573608, "learning_rate": 6.173954752578144e-05, "loss": 1.6738, "step": 4790 }, { "epoch": 1.1490917810682812, "grad_norm": 0.7108901143074036, "learning_rate": 6.1723559037493e-05, "loss": 1.7027, "step": 4792 }, { "epoch": 1.1495713686229843, "grad_norm": 0.7714745402336121, "learning_rate": 6.170757054920457e-05, "loss": 1.7267, "step": 4794 }, { "epoch": 1.1500509561776873, "grad_norm": 0.7255046367645264, "learning_rate": 6.169158206091614e-05, "loss": 1.7317, "step": 4796 }, { "epoch": 1.15053054373239, "grad_norm": 0.7257003784179688, "learning_rate": 6.16755935726277e-05, "loss": 1.7067, "step": 4798 }, { "epoch": 1.151010131287093, "grad_norm": 0.7384042143821716, "learning_rate": 6.165960508433928e-05, "loss": 1.7211, "step": 4800 }, { "epoch": 1.151010131287093, "eval_loss": 1.7552478313446045, "eval_runtime": 331.1657, "eval_samples_per_second": 402.967, "eval_steps_per_second": 12.595, "step": 4800 }, { "epoch": 1.151489718841796, "grad_norm": 0.6760810613632202, "learning_rate": 6.164361659605085e-05, "loss": 1.6686, "step": 4802 }, { "epoch": 1.151969306396499, "grad_norm": 0.8734385371208191, "learning_rate": 6.162762810776241e-05, "loss": 1.6887, "step": 4804 }, { "epoch": 1.1524488939512019, "grad_norm": 0.7974105477333069, "learning_rate": 6.161163961947398e-05, "loss": 1.6907, "step": 4806 }, { "epoch": 1.1529284815059049, "grad_norm": 0.7362575531005859, "learning_rate": 6.159565113118555e-05, "loss": 1.7178, "step": 4808 }, { "epoch": 1.1534080690606079, "grad_norm": 0.8543595671653748, "learning_rate": 6.157966264289712e-05, "loss": 1.7103, "step": 4810 }, { "epoch": 1.153887656615311, "grad_norm": 0.688814640045166, "learning_rate": 6.156367415460869e-05, "loss": 1.7003, "step": 4812 }, { "epoch": 1.1543672441700137, "grad_norm": 0.9102094769477844, "learning_rate": 6.154768566632026e-05, "loss": 1.7405, "step": 4814 }, { "epoch": 1.1548468317247167, "grad_norm": 0.7113800048828125, "learning_rate": 6.153169717803182e-05, "loss": 1.7099, "step": 4816 }, { "epoch": 1.1553264192794197, "grad_norm": 0.7126806378364563, "learning_rate": 6.151570868974339e-05, "loss": 1.7339, "step": 4818 }, { "epoch": 1.1558060068341227, "grad_norm": 0.6434633731842041, "learning_rate": 6.149972020145495e-05, "loss": 1.6866, "step": 4820 }, { "epoch": 1.1562855943888257, "grad_norm": 0.6139534115791321, "learning_rate": 6.148373171316653e-05, "loss": 1.7463, "step": 4822 }, { "epoch": 1.1567651819435285, "grad_norm": 0.6568921208381653, "learning_rate": 6.14677432248781e-05, "loss": 1.702, "step": 4824 }, { "epoch": 1.1572447694982315, "grad_norm": 0.7479694485664368, "learning_rate": 6.145175473658966e-05, "loss": 1.7134, "step": 4826 }, { "epoch": 1.1577243570529345, "grad_norm": 0.625961184501648, "learning_rate": 6.143576624830123e-05, "loss": 1.6892, "step": 4828 }, { "epoch": 1.1582039446076373, "grad_norm": 0.7195528149604797, "learning_rate": 6.14197777600128e-05, "loss": 1.6804, "step": 4830 }, { "epoch": 1.1586835321623403, "grad_norm": 0.6392335891723633, "learning_rate": 6.140378927172436e-05, "loss": 1.6786, "step": 4832 }, { "epoch": 1.1591631197170433, "grad_norm": 0.7118913531303406, "learning_rate": 6.138780078343594e-05, "loss": 1.7407, "step": 4834 }, { "epoch": 1.1596427072717463, "grad_norm": 0.6857004165649414, "learning_rate": 6.13718122951475e-05, "loss": 1.6911, "step": 4836 }, { "epoch": 1.1601222948264494, "grad_norm": 0.6446318030357361, "learning_rate": 6.135582380685907e-05, "loss": 1.6816, "step": 4838 }, { "epoch": 1.1606018823811521, "grad_norm": 0.7431190013885498, "learning_rate": 6.133983531857064e-05, "loss": 1.6896, "step": 4840 }, { "epoch": 1.1610814699358551, "grad_norm": 0.6484420895576477, "learning_rate": 6.132384683028219e-05, "loss": 1.6909, "step": 4842 }, { "epoch": 1.1615610574905582, "grad_norm": 0.7101963758468628, "learning_rate": 6.130785834199377e-05, "loss": 1.7027, "step": 4844 }, { "epoch": 1.1620406450452612, "grad_norm": 0.8890482783317566, "learning_rate": 6.129186985370533e-05, "loss": 1.7259, "step": 4846 }, { "epoch": 1.162520232599964, "grad_norm": 0.7744002938270569, "learning_rate": 6.12758813654169e-05, "loss": 1.7101, "step": 4848 }, { "epoch": 1.162999820154667, "grad_norm": 0.7358707189559937, "learning_rate": 6.125989287712847e-05, "loss": 1.722, "step": 4850 }, { "epoch": 1.16347940770937, "grad_norm": 0.6585420370101929, "learning_rate": 6.124390438884003e-05, "loss": 1.6536, "step": 4852 }, { "epoch": 1.163958995264073, "grad_norm": 0.6523721814155579, "learning_rate": 6.12279159005516e-05, "loss": 1.6968, "step": 4854 }, { "epoch": 1.1644385828187758, "grad_norm": 0.7298158407211304, "learning_rate": 6.121192741226318e-05, "loss": 1.737, "step": 4856 }, { "epoch": 1.1649181703734788, "grad_norm": 0.6012827157974243, "learning_rate": 6.119593892397474e-05, "loss": 1.7666, "step": 4858 }, { "epoch": 1.1653977579281818, "grad_norm": 0.651386022567749, "learning_rate": 6.117995043568631e-05, "loss": 1.7475, "step": 4860 }, { "epoch": 1.1658773454828848, "grad_norm": 0.7391490340232849, "learning_rate": 6.116396194739787e-05, "loss": 1.6691, "step": 4862 }, { "epoch": 1.1663569330375876, "grad_norm": 0.8554806113243103, "learning_rate": 6.114797345910944e-05, "loss": 1.7446, "step": 4864 }, { "epoch": 1.1668365205922906, "grad_norm": 0.7300620079040527, "learning_rate": 6.1131984970821e-05, "loss": 1.6672, "step": 4866 }, { "epoch": 1.1673161081469936, "grad_norm": 0.667506217956543, "learning_rate": 6.111599648253258e-05, "loss": 1.7655, "step": 4868 }, { "epoch": 1.1677956957016966, "grad_norm": 0.822966992855072, "learning_rate": 6.110000799424415e-05, "loss": 1.7354, "step": 4870 }, { "epoch": 1.1682752832563994, "grad_norm": 0.7643373012542725, "learning_rate": 6.108401950595572e-05, "loss": 1.6635, "step": 4872 }, { "epoch": 1.1687548708111024, "grad_norm": 0.6667726039886475, "learning_rate": 6.106803101766728e-05, "loss": 1.6849, "step": 4874 }, { "epoch": 1.1692344583658054, "grad_norm": 0.6724462509155273, "learning_rate": 6.105204252937885e-05, "loss": 1.7597, "step": 4876 }, { "epoch": 1.1697140459205084, "grad_norm": 0.6608595848083496, "learning_rate": 6.103605404109041e-05, "loss": 1.7553, "step": 4878 }, { "epoch": 1.1701936334752112, "grad_norm": 0.7224385142326355, "learning_rate": 6.1020065552801985e-05, "loss": 1.7292, "step": 4880 }, { "epoch": 1.1706732210299142, "grad_norm": 0.7194795608520508, "learning_rate": 6.100407706451355e-05, "loss": 1.6969, "step": 4882 }, { "epoch": 1.1711528085846172, "grad_norm": 0.6628981232643127, "learning_rate": 6.098808857622512e-05, "loss": 1.6617, "step": 4884 }, { "epoch": 1.1716323961393202, "grad_norm": 0.7371034622192383, "learning_rate": 6.097210008793669e-05, "loss": 1.7096, "step": 4886 }, { "epoch": 1.1721119836940233, "grad_norm": 0.7591552734375, "learning_rate": 6.0956111599648255e-05, "loss": 1.756, "step": 4888 }, { "epoch": 1.172591571248726, "grad_norm": 0.7261850833892822, "learning_rate": 6.094012311135983e-05, "loss": 1.7177, "step": 4890 }, { "epoch": 1.173071158803429, "grad_norm": 0.8448691964149475, "learning_rate": 6.092413462307139e-05, "loss": 1.6814, "step": 4892 }, { "epoch": 1.173550746358132, "grad_norm": 0.6818679571151733, "learning_rate": 6.090814613478296e-05, "loss": 1.7114, "step": 4894 }, { "epoch": 1.1740303339128348, "grad_norm": 0.6805614829063416, "learning_rate": 6.089215764649453e-05, "loss": 1.6834, "step": 4896 }, { "epoch": 1.1745099214675379, "grad_norm": 0.7612377405166626, "learning_rate": 6.08761691582061e-05, "loss": 1.7691, "step": 4898 }, { "epoch": 1.1749895090222409, "grad_norm": 0.7170482873916626, "learning_rate": 6.086018066991766e-05, "loss": 1.76, "step": 4900 }, { "epoch": 1.1754690965769439, "grad_norm": 0.7096856236457825, "learning_rate": 6.0844192181629235e-05, "loss": 1.7098, "step": 4902 }, { "epoch": 1.1759486841316469, "grad_norm": 0.6550337076187134, "learning_rate": 6.08282036933408e-05, "loss": 1.7128, "step": 4904 }, { "epoch": 1.1764282716863497, "grad_norm": 0.7104032635688782, "learning_rate": 6.0812215205052366e-05, "loss": 1.7504, "step": 4906 }, { "epoch": 1.1769078592410527, "grad_norm": 0.7091881036758423, "learning_rate": 6.079622671676394e-05, "loss": 1.6915, "step": 4908 }, { "epoch": 1.1773874467957557, "grad_norm": 0.7774102687835693, "learning_rate": 6.0780238228475505e-05, "loss": 1.6691, "step": 4910 }, { "epoch": 1.1778670343504587, "grad_norm": 0.6729702949523926, "learning_rate": 6.076424974018707e-05, "loss": 1.7483, "step": 4912 }, { "epoch": 1.1783466219051615, "grad_norm": 0.633075475692749, "learning_rate": 6.074826125189863e-05, "loss": 1.7264, "step": 4914 }, { "epoch": 1.1788262094598645, "grad_norm": 0.7202550172805786, "learning_rate": 6.07322727636102e-05, "loss": 1.6922, "step": 4916 }, { "epoch": 1.1793057970145675, "grad_norm": 0.6954649686813354, "learning_rate": 6.071628427532177e-05, "loss": 1.6898, "step": 4918 }, { "epoch": 1.1797853845692705, "grad_norm": 0.9193169474601746, "learning_rate": 6.070029578703333e-05, "loss": 1.7052, "step": 4920 }, { "epoch": 1.1802649721239733, "grad_norm": 0.7124171257019043, "learning_rate": 6.0684307298744906e-05, "loss": 1.6613, "step": 4922 }, { "epoch": 1.1807445596786763, "grad_norm": 0.6504312753677368, "learning_rate": 6.066831881045647e-05, "loss": 1.7219, "step": 4924 }, { "epoch": 1.1812241472333793, "grad_norm": 0.719771146774292, "learning_rate": 6.065233032216804e-05, "loss": 1.7428, "step": 4926 }, { "epoch": 1.1817037347880823, "grad_norm": 0.7355853915214539, "learning_rate": 6.063634183387961e-05, "loss": 1.7146, "step": 4928 }, { "epoch": 1.1821833223427851, "grad_norm": 0.6585120558738708, "learning_rate": 6.0620353345591175e-05, "loss": 1.7158, "step": 4930 }, { "epoch": 1.1826629098974881, "grad_norm": 0.6757897138595581, "learning_rate": 6.060436485730274e-05, "loss": 1.7024, "step": 4932 }, { "epoch": 1.1831424974521911, "grad_norm": 0.7650914788246155, "learning_rate": 6.058837636901431e-05, "loss": 1.7333, "step": 4934 }, { "epoch": 1.1836220850068941, "grad_norm": 0.744437038898468, "learning_rate": 6.057238788072588e-05, "loss": 1.7196, "step": 4936 }, { "epoch": 1.184101672561597, "grad_norm": 0.6379473805427551, "learning_rate": 6.0556399392437445e-05, "loss": 1.6646, "step": 4938 }, { "epoch": 1.1845812601163, "grad_norm": 0.7521306276321411, "learning_rate": 6.054041090414902e-05, "loss": 1.709, "step": 4940 }, { "epoch": 1.185060847671003, "grad_norm": 0.7326486706733704, "learning_rate": 6.052442241586058e-05, "loss": 1.7121, "step": 4942 }, { "epoch": 1.185540435225706, "grad_norm": 0.65378338098526, "learning_rate": 6.050843392757215e-05, "loss": 1.7794, "step": 4944 }, { "epoch": 1.1860200227804087, "grad_norm": 0.6486876606941223, "learning_rate": 6.049244543928372e-05, "loss": 1.6336, "step": 4946 }, { "epoch": 1.1864996103351118, "grad_norm": 0.7055097222328186, "learning_rate": 6.047645695099529e-05, "loss": 1.6616, "step": 4948 }, { "epoch": 1.1869791978898148, "grad_norm": 0.6846367120742798, "learning_rate": 6.046046846270685e-05, "loss": 1.696, "step": 4950 }, { "epoch": 1.1874587854445178, "grad_norm": 0.7881131768226624, "learning_rate": 6.0444479974418425e-05, "loss": 1.7066, "step": 4952 }, { "epoch": 1.1879383729992208, "grad_norm": 0.696333646774292, "learning_rate": 6.042849148612999e-05, "loss": 1.7676, "step": 4954 }, { "epoch": 1.1884179605539236, "grad_norm": 0.6624189019203186, "learning_rate": 6.0412502997841556e-05, "loss": 1.7011, "step": 4956 }, { "epoch": 1.1888975481086266, "grad_norm": 0.6591887474060059, "learning_rate": 6.039651450955313e-05, "loss": 1.7058, "step": 4958 }, { "epoch": 1.1893771356633296, "grad_norm": 0.6829979419708252, "learning_rate": 6.0380526021264694e-05, "loss": 1.7026, "step": 4960 }, { "epoch": 1.1898567232180324, "grad_norm": 0.7552392482757568, "learning_rate": 6.036453753297626e-05, "loss": 1.6656, "step": 4962 }, { "epoch": 1.1903363107727354, "grad_norm": 0.6782222390174866, "learning_rate": 6.034854904468783e-05, "loss": 1.7232, "step": 4964 }, { "epoch": 1.1908158983274384, "grad_norm": 0.6753552556037903, "learning_rate": 6.03325605563994e-05, "loss": 1.7052, "step": 4966 }, { "epoch": 1.1912954858821414, "grad_norm": 0.6579861640930176, "learning_rate": 6.0316572068110964e-05, "loss": 1.655, "step": 4968 }, { "epoch": 1.1917750734368444, "grad_norm": 0.6770998239517212, "learning_rate": 6.0300583579822537e-05, "loss": 1.714, "step": 4970 }, { "epoch": 1.1922546609915472, "grad_norm": 0.7375226616859436, "learning_rate": 6.02845950915341e-05, "loss": 1.6575, "step": 4972 }, { "epoch": 1.1927342485462502, "grad_norm": 0.733205258846283, "learning_rate": 6.026860660324567e-05, "loss": 1.773, "step": 4974 }, { "epoch": 1.1932138361009532, "grad_norm": 0.756797194480896, "learning_rate": 6.025261811495724e-05, "loss": 1.7759, "step": 4976 }, { "epoch": 1.1936934236556562, "grad_norm": 0.872608482837677, "learning_rate": 6.0236629626668806e-05, "loss": 1.6813, "step": 4978 }, { "epoch": 1.194173011210359, "grad_norm": 0.6989811062812805, "learning_rate": 6.022064113838037e-05, "loss": 1.7361, "step": 4980 }, { "epoch": 1.194652598765062, "grad_norm": 0.6450750827789307, "learning_rate": 6.0204652650091944e-05, "loss": 1.6926, "step": 4982 }, { "epoch": 1.195132186319765, "grad_norm": 0.6883386969566345, "learning_rate": 6.018866416180351e-05, "loss": 1.6829, "step": 4984 }, { "epoch": 1.195611773874468, "grad_norm": 0.6350464224815369, "learning_rate": 6.017267567351507e-05, "loss": 1.7289, "step": 4986 }, { "epoch": 1.1960913614291708, "grad_norm": 0.7074738144874573, "learning_rate": 6.0156687185226635e-05, "loss": 1.6506, "step": 4988 }, { "epoch": 1.1965709489838738, "grad_norm": 0.7624898552894592, "learning_rate": 6.01406986969382e-05, "loss": 1.7143, "step": 4990 }, { "epoch": 1.1970505365385768, "grad_norm": 0.6538412570953369, "learning_rate": 6.012471020864977e-05, "loss": 1.7435, "step": 4992 }, { "epoch": 1.1975301240932799, "grad_norm": 0.6548634767532349, "learning_rate": 6.010872172036134e-05, "loss": 1.7161, "step": 4994 }, { "epoch": 1.1980097116479826, "grad_norm": 0.7312964200973511, "learning_rate": 6.0092733232072904e-05, "loss": 1.7135, "step": 4996 }, { "epoch": 1.1984892992026857, "grad_norm": 0.7165902256965637, "learning_rate": 6.007674474378448e-05, "loss": 1.7034, "step": 4998 }, { "epoch": 1.1989688867573887, "grad_norm": 0.7756792306900024, "learning_rate": 6.006075625549604e-05, "loss": 1.7254, "step": 5000 }, { "epoch": 1.1994484743120917, "grad_norm": 0.9170318245887756, "learning_rate": 6.004476776720761e-05, "loss": 1.731, "step": 5002 }, { "epoch": 1.1999280618667945, "grad_norm": 0.6687180399894714, "learning_rate": 6.002877927891918e-05, "loss": 1.6996, "step": 5004 }, { "epoch": 1.2004076494214975, "grad_norm": 0.6185319423675537, "learning_rate": 6.0012790790630746e-05, "loss": 1.7247, "step": 5006 }, { "epoch": 1.2008872369762005, "grad_norm": 0.692065954208374, "learning_rate": 5.999680230234231e-05, "loss": 1.6951, "step": 5008 }, { "epoch": 1.2013668245309035, "grad_norm": 0.8140860795974731, "learning_rate": 5.9980813814053884e-05, "loss": 1.7209, "step": 5010 }, { "epoch": 1.2018464120856063, "grad_norm": 0.6510762572288513, "learning_rate": 5.996482532576545e-05, "loss": 1.7146, "step": 5012 }, { "epoch": 1.2023259996403093, "grad_norm": 0.7227585315704346, "learning_rate": 5.9948836837477016e-05, "loss": 1.6738, "step": 5014 }, { "epoch": 1.2028055871950123, "grad_norm": 0.6502881646156311, "learning_rate": 5.993284834918859e-05, "loss": 1.6827, "step": 5016 }, { "epoch": 1.2032851747497153, "grad_norm": 0.8377312421798706, "learning_rate": 5.9916859860900154e-05, "loss": 1.778, "step": 5018 }, { "epoch": 1.2037647623044183, "grad_norm": 0.5966493487358093, "learning_rate": 5.990087137261172e-05, "loss": 1.6908, "step": 5020 }, { "epoch": 1.204244349859121, "grad_norm": 0.7361922264099121, "learning_rate": 5.988488288432329e-05, "loss": 1.6863, "step": 5022 }, { "epoch": 1.204723937413824, "grad_norm": 0.7974795699119568, "learning_rate": 5.986889439603486e-05, "loss": 1.7275, "step": 5024 }, { "epoch": 1.2052035249685271, "grad_norm": 0.6648057699203491, "learning_rate": 5.9852905907746424e-05, "loss": 1.7359, "step": 5026 }, { "epoch": 1.20568311252323, "grad_norm": 0.6651880741119385, "learning_rate": 5.9836917419457996e-05, "loss": 1.7375, "step": 5028 }, { "epoch": 1.206162700077933, "grad_norm": 0.6897217631340027, "learning_rate": 5.982092893116956e-05, "loss": 1.6419, "step": 5030 }, { "epoch": 1.206642287632636, "grad_norm": 0.7468763589859009, "learning_rate": 5.980494044288113e-05, "loss": 1.6746, "step": 5032 }, { "epoch": 1.207121875187339, "grad_norm": 0.6611817479133606, "learning_rate": 5.97889519545927e-05, "loss": 1.7111, "step": 5034 }, { "epoch": 1.207601462742042, "grad_norm": 0.6798948049545288, "learning_rate": 5.9772963466304266e-05, "loss": 1.7251, "step": 5036 }, { "epoch": 1.2080810502967447, "grad_norm": 0.639279842376709, "learning_rate": 5.975697497801583e-05, "loss": 1.7071, "step": 5038 }, { "epoch": 1.2085606378514477, "grad_norm": 0.5957537293434143, "learning_rate": 5.9740986489727404e-05, "loss": 1.6628, "step": 5040 }, { "epoch": 1.2090402254061507, "grad_norm": 0.7079448103904724, "learning_rate": 5.972499800143897e-05, "loss": 1.6871, "step": 5042 }, { "epoch": 1.2095198129608538, "grad_norm": 0.670529305934906, "learning_rate": 5.9709009513150535e-05, "loss": 1.7961, "step": 5044 }, { "epoch": 1.2099994005155565, "grad_norm": 0.6816782355308533, "learning_rate": 5.969302102486211e-05, "loss": 1.7176, "step": 5046 }, { "epoch": 1.2104789880702596, "grad_norm": 0.6747205257415771, "learning_rate": 5.967703253657367e-05, "loss": 1.7521, "step": 5048 }, { "epoch": 1.2109585756249626, "grad_norm": 0.7469954490661621, "learning_rate": 5.966104404828524e-05, "loss": 1.7223, "step": 5050 }, { "epoch": 1.2114381631796656, "grad_norm": 0.6502618789672852, "learning_rate": 5.964505555999681e-05, "loss": 1.6533, "step": 5052 }, { "epoch": 1.2119177507343684, "grad_norm": 0.6655495166778564, "learning_rate": 5.962906707170838e-05, "loss": 1.7093, "step": 5054 }, { "epoch": 1.2123973382890714, "grad_norm": 0.6593751907348633, "learning_rate": 5.961307858341994e-05, "loss": 1.709, "step": 5056 }, { "epoch": 1.2128769258437744, "grad_norm": 0.6440466642379761, "learning_rate": 5.9597090095131515e-05, "loss": 1.704, "step": 5058 }, { "epoch": 1.2133565133984774, "grad_norm": 0.618308424949646, "learning_rate": 5.958110160684307e-05, "loss": 1.6572, "step": 5060 }, { "epoch": 1.2138361009531802, "grad_norm": 0.6396142244338989, "learning_rate": 5.956511311855464e-05, "loss": 1.6887, "step": 5062 }, { "epoch": 1.2143156885078832, "grad_norm": 0.697784423828125, "learning_rate": 5.9549124630266206e-05, "loss": 1.7481, "step": 5064 }, { "epoch": 1.2147952760625862, "grad_norm": 0.7022085785865784, "learning_rate": 5.953313614197777e-05, "loss": 1.7123, "step": 5066 }, { "epoch": 1.2152748636172892, "grad_norm": 0.7095388770103455, "learning_rate": 5.9517147653689344e-05, "loss": 1.7169, "step": 5068 }, { "epoch": 1.215754451171992, "grad_norm": 0.6718257665634155, "learning_rate": 5.950115916540091e-05, "loss": 1.6718, "step": 5070 }, { "epoch": 1.216234038726695, "grad_norm": 0.7242958545684814, "learning_rate": 5.9485170677112475e-05, "loss": 1.7126, "step": 5072 }, { "epoch": 1.216713626281398, "grad_norm": 0.6809329390525818, "learning_rate": 5.946918218882405e-05, "loss": 1.6834, "step": 5074 }, { "epoch": 1.217193213836101, "grad_norm": 0.8401723504066467, "learning_rate": 5.9453193700535614e-05, "loss": 1.661, "step": 5076 }, { "epoch": 1.2176728013908038, "grad_norm": 0.6373501420021057, "learning_rate": 5.943720521224718e-05, "loss": 1.7229, "step": 5078 }, { "epoch": 1.2181523889455068, "grad_norm": 0.7586556673049927, "learning_rate": 5.942121672395875e-05, "loss": 1.7367, "step": 5080 }, { "epoch": 1.2186319765002098, "grad_norm": 0.8116690516471863, "learning_rate": 5.940522823567032e-05, "loss": 1.6953, "step": 5082 }, { "epoch": 1.2191115640549128, "grad_norm": 0.6888695955276489, "learning_rate": 5.938923974738188e-05, "loss": 1.6545, "step": 5084 }, { "epoch": 1.2195911516096158, "grad_norm": 1.0836124420166016, "learning_rate": 5.9373251259093456e-05, "loss": 1.76, "step": 5086 }, { "epoch": 1.2200707391643186, "grad_norm": 0.8375632762908936, "learning_rate": 5.935726277080502e-05, "loss": 1.6463, "step": 5088 }, { "epoch": 1.2205503267190216, "grad_norm": 0.6816129684448242, "learning_rate": 5.934127428251659e-05, "loss": 1.661, "step": 5090 }, { "epoch": 1.2210299142737246, "grad_norm": 0.7888052463531494, "learning_rate": 5.932528579422816e-05, "loss": 1.6976, "step": 5092 }, { "epoch": 1.2215095018284274, "grad_norm": 0.6758246421813965, "learning_rate": 5.9309297305939725e-05, "loss": 1.6843, "step": 5094 }, { "epoch": 1.2219890893831304, "grad_norm": 0.7395145297050476, "learning_rate": 5.929330881765129e-05, "loss": 1.7696, "step": 5096 }, { "epoch": 1.2224686769378335, "grad_norm": 0.7176066040992737, "learning_rate": 5.927732032936286e-05, "loss": 1.7515, "step": 5098 }, { "epoch": 1.2229482644925365, "grad_norm": 0.6571214199066162, "learning_rate": 5.926133184107443e-05, "loss": 1.7266, "step": 5100 }, { "epoch": 1.2234278520472395, "grad_norm": 0.6881331205368042, "learning_rate": 5.9245343352785995e-05, "loss": 1.6952, "step": 5102 }, { "epoch": 1.2239074396019423, "grad_norm": 0.680357813835144, "learning_rate": 5.922935486449757e-05, "loss": 1.6756, "step": 5104 }, { "epoch": 1.2243870271566453, "grad_norm": 0.7051910758018494, "learning_rate": 5.921336637620913e-05, "loss": 1.6577, "step": 5106 }, { "epoch": 1.2248666147113483, "grad_norm": 0.7137699723243713, "learning_rate": 5.91973778879207e-05, "loss": 1.7045, "step": 5108 }, { "epoch": 1.2253462022660513, "grad_norm": 0.6744747161865234, "learning_rate": 5.918138939963227e-05, "loss": 1.749, "step": 5110 }, { "epoch": 1.225825789820754, "grad_norm": 0.6063811182975769, "learning_rate": 5.916540091134384e-05, "loss": 1.6817, "step": 5112 }, { "epoch": 1.226305377375457, "grad_norm": 0.7240449786186218, "learning_rate": 5.914941242305541e-05, "loss": 1.702, "step": 5114 }, { "epoch": 1.22678496493016, "grad_norm": 0.6603817343711853, "learning_rate": 5.9133423934766975e-05, "loss": 1.7449, "step": 5116 }, { "epoch": 1.227264552484863, "grad_norm": 0.6384834051132202, "learning_rate": 5.911743544647854e-05, "loss": 1.7383, "step": 5118 }, { "epoch": 1.227744140039566, "grad_norm": 0.6385036706924438, "learning_rate": 5.910144695819011e-05, "loss": 1.7433, "step": 5120 }, { "epoch": 1.228223727594269, "grad_norm": 0.6752763390541077, "learning_rate": 5.908545846990168e-05, "loss": 1.7821, "step": 5122 }, { "epoch": 1.228703315148972, "grad_norm": 0.7081639766693115, "learning_rate": 5.9069469981613245e-05, "loss": 1.7289, "step": 5124 }, { "epoch": 1.229182902703675, "grad_norm": 0.7212027907371521, "learning_rate": 5.905348149332482e-05, "loss": 1.721, "step": 5126 }, { "epoch": 1.2296624902583777, "grad_norm": 0.8010708689689636, "learning_rate": 5.903749300503638e-05, "loss": 1.7233, "step": 5128 }, { "epoch": 1.2301420778130807, "grad_norm": 0.7094576358795166, "learning_rate": 5.902150451674795e-05, "loss": 1.7379, "step": 5130 }, { "epoch": 1.2306216653677837, "grad_norm": 0.6785212755203247, "learning_rate": 5.900551602845951e-05, "loss": 1.7031, "step": 5132 }, { "epoch": 1.2311012529224867, "grad_norm": 0.7105900049209595, "learning_rate": 5.898952754017107e-05, "loss": 1.6715, "step": 5134 }, { "epoch": 1.2315808404771895, "grad_norm": 0.8269817233085632, "learning_rate": 5.8973539051882645e-05, "loss": 1.7363, "step": 5136 }, { "epoch": 1.2320604280318925, "grad_norm": 0.705819308757782, "learning_rate": 5.895755056359421e-05, "loss": 1.7366, "step": 5138 }, { "epoch": 1.2325400155865955, "grad_norm": 0.634282648563385, "learning_rate": 5.894156207530578e-05, "loss": 1.7805, "step": 5140 }, { "epoch": 1.2330196031412985, "grad_norm": 0.7048913836479187, "learning_rate": 5.892557358701735e-05, "loss": 1.7019, "step": 5142 }, { "epoch": 1.2334991906960013, "grad_norm": 0.6323996186256409, "learning_rate": 5.8909585098728915e-05, "loss": 1.6706, "step": 5144 }, { "epoch": 1.2339787782507043, "grad_norm": 0.6945284605026245, "learning_rate": 5.889359661044048e-05, "loss": 1.6313, "step": 5146 }, { "epoch": 1.2344583658054074, "grad_norm": 0.7074249386787415, "learning_rate": 5.887760812215205e-05, "loss": 1.6515, "step": 5148 }, { "epoch": 1.2349379533601104, "grad_norm": 0.7451314926147461, "learning_rate": 5.886161963386362e-05, "loss": 1.7715, "step": 5150 }, { "epoch": 1.2354175409148134, "grad_norm": 0.7063527703285217, "learning_rate": 5.8845631145575185e-05, "loss": 1.6666, "step": 5152 }, { "epoch": 1.2358971284695162, "grad_norm": 0.6776946187019348, "learning_rate": 5.882964265728676e-05, "loss": 1.6854, "step": 5154 }, { "epoch": 1.2363767160242192, "grad_norm": 0.9749411344528198, "learning_rate": 5.881365416899832e-05, "loss": 1.7187, "step": 5156 }, { "epoch": 1.2368563035789222, "grad_norm": 0.7237053513526917, "learning_rate": 5.879766568070989e-05, "loss": 1.7284, "step": 5158 }, { "epoch": 1.237335891133625, "grad_norm": 0.6642870306968689, "learning_rate": 5.878167719242146e-05, "loss": 1.6948, "step": 5160 }, { "epoch": 1.237815478688328, "grad_norm": 0.7438876032829285, "learning_rate": 5.876568870413303e-05, "loss": 1.7356, "step": 5162 }, { "epoch": 1.238295066243031, "grad_norm": 0.8009781837463379, "learning_rate": 5.874970021584459e-05, "loss": 1.725, "step": 5164 }, { "epoch": 1.238774653797734, "grad_norm": 0.829773485660553, "learning_rate": 5.8733711727556165e-05, "loss": 1.7009, "step": 5166 }, { "epoch": 1.239254241352437, "grad_norm": 0.6742973923683167, "learning_rate": 5.871772323926773e-05, "loss": 1.6909, "step": 5168 }, { "epoch": 1.2397338289071398, "grad_norm": 0.7424991726875305, "learning_rate": 5.8701734750979296e-05, "loss": 1.7217, "step": 5170 }, { "epoch": 1.2402134164618428, "grad_norm": 0.7063673734664917, "learning_rate": 5.868574626269087e-05, "loss": 1.6979, "step": 5172 }, { "epoch": 1.2406930040165458, "grad_norm": 0.7374402284622192, "learning_rate": 5.8669757774402434e-05, "loss": 1.716, "step": 5174 }, { "epoch": 1.2411725915712488, "grad_norm": 0.740669310092926, "learning_rate": 5.8653769286114e-05, "loss": 1.6662, "step": 5176 }, { "epoch": 1.2416521791259516, "grad_norm": 0.9080861806869507, "learning_rate": 5.863778079782557e-05, "loss": 1.7074, "step": 5178 }, { "epoch": 1.2421317666806546, "grad_norm": 0.7091116309165955, "learning_rate": 5.862179230953714e-05, "loss": 1.7203, "step": 5180 }, { "epoch": 1.2426113542353576, "grad_norm": 0.903561532497406, "learning_rate": 5.8605803821248704e-05, "loss": 1.6442, "step": 5182 }, { "epoch": 1.2430909417900606, "grad_norm": 0.8209563493728638, "learning_rate": 5.8589815332960276e-05, "loss": 1.7233, "step": 5184 }, { "epoch": 1.2435705293447634, "grad_norm": 0.7607261538505554, "learning_rate": 5.857382684467184e-05, "loss": 1.7398, "step": 5186 }, { "epoch": 1.2440501168994664, "grad_norm": 0.6875326633453369, "learning_rate": 5.855783835638341e-05, "loss": 1.7277, "step": 5188 }, { "epoch": 1.2445297044541694, "grad_norm": 0.7261474132537842, "learning_rate": 5.854184986809498e-05, "loss": 1.7102, "step": 5190 }, { "epoch": 1.2450092920088724, "grad_norm": 0.7414340972900391, "learning_rate": 5.8525861379806546e-05, "loss": 1.7031, "step": 5192 }, { "epoch": 1.2454888795635752, "grad_norm": 0.8427855372428894, "learning_rate": 5.850987289151811e-05, "loss": 1.6742, "step": 5194 }, { "epoch": 1.2459684671182782, "grad_norm": 0.7613822221755981, "learning_rate": 5.8493884403229684e-05, "loss": 1.7357, "step": 5196 }, { "epoch": 1.2464480546729813, "grad_norm": 0.6552219986915588, "learning_rate": 5.847789591494125e-05, "loss": 1.7434, "step": 5198 }, { "epoch": 1.2469276422276843, "grad_norm": 0.6826272010803223, "learning_rate": 5.8461907426652816e-05, "loss": 1.7297, "step": 5200 }, { "epoch": 1.2469276422276843, "eval_loss": 1.749930500984192, "eval_runtime": 331.2477, "eval_samples_per_second": 402.868, "eval_steps_per_second": 12.592, "step": 5200 }, { "epoch": 1.247407229782387, "grad_norm": 0.7242288589477539, "learning_rate": 5.844591893836439e-05, "loss": 1.7508, "step": 5202 }, { "epoch": 1.24788681733709, "grad_norm": 0.7033302783966064, "learning_rate": 5.842993045007594e-05, "loss": 1.6878, "step": 5204 }, { "epoch": 1.248366404891793, "grad_norm": 0.6475143432617188, "learning_rate": 5.841394196178751e-05, "loss": 1.6941, "step": 5206 }, { "epoch": 1.248845992446496, "grad_norm": 0.6873172521591187, "learning_rate": 5.839795347349908e-05, "loss": 1.7101, "step": 5208 }, { "epoch": 1.2493255800011989, "grad_norm": 0.7949736714363098, "learning_rate": 5.8381964985210644e-05, "loss": 1.7109, "step": 5210 }, { "epoch": 1.2498051675559019, "grad_norm": 0.6987682580947876, "learning_rate": 5.836597649692222e-05, "loss": 1.7477, "step": 5212 }, { "epoch": 1.2502847551106049, "grad_norm": 0.7331650257110596, "learning_rate": 5.834998800863378e-05, "loss": 1.6967, "step": 5214 }, { "epoch": 1.250764342665308, "grad_norm": 0.6589092016220093, "learning_rate": 5.833399952034535e-05, "loss": 1.6353, "step": 5216 }, { "epoch": 1.251243930220011, "grad_norm": 0.6821252107620239, "learning_rate": 5.831801103205692e-05, "loss": 1.7753, "step": 5218 }, { "epoch": 1.2517235177747137, "grad_norm": 0.8143470883369446, "learning_rate": 5.8302022543768486e-05, "loss": 1.6774, "step": 5220 }, { "epoch": 1.2522031053294167, "grad_norm": 0.9054415225982666, "learning_rate": 5.828603405548005e-05, "loss": 1.6801, "step": 5222 }, { "epoch": 1.2526826928841197, "grad_norm": 0.7406411170959473, "learning_rate": 5.8270045567191624e-05, "loss": 1.6717, "step": 5224 }, { "epoch": 1.2531622804388225, "grad_norm": 0.7254564762115479, "learning_rate": 5.825405707890319e-05, "loss": 1.7067, "step": 5226 }, { "epoch": 1.2536418679935255, "grad_norm": 0.8423312902450562, "learning_rate": 5.8238068590614756e-05, "loss": 1.7305, "step": 5228 }, { "epoch": 1.2541214555482285, "grad_norm": 0.7941121459007263, "learning_rate": 5.822208010232633e-05, "loss": 1.7532, "step": 5230 }, { "epoch": 1.2546010431029315, "grad_norm": 0.6687875986099243, "learning_rate": 5.8206091614037894e-05, "loss": 1.6728, "step": 5232 }, { "epoch": 1.2550806306576345, "grad_norm": 0.648192822933197, "learning_rate": 5.819010312574946e-05, "loss": 1.7569, "step": 5234 }, { "epoch": 1.2555602182123373, "grad_norm": 0.8497558832168579, "learning_rate": 5.817411463746103e-05, "loss": 1.7034, "step": 5236 }, { "epoch": 1.2560398057670403, "grad_norm": 0.6229110360145569, "learning_rate": 5.81581261491726e-05, "loss": 1.6861, "step": 5238 }, { "epoch": 1.2565193933217433, "grad_norm": 0.6329578161239624, "learning_rate": 5.8142137660884164e-05, "loss": 1.7009, "step": 5240 }, { "epoch": 1.2569989808764461, "grad_norm": 0.7674649357795715, "learning_rate": 5.8126149172595736e-05, "loss": 1.713, "step": 5242 }, { "epoch": 1.2574785684311491, "grad_norm": 0.711179256439209, "learning_rate": 5.81101606843073e-05, "loss": 1.729, "step": 5244 }, { "epoch": 1.2579581559858521, "grad_norm": 0.6940897107124329, "learning_rate": 5.809417219601887e-05, "loss": 1.7054, "step": 5246 }, { "epoch": 1.2584377435405552, "grad_norm": 0.7245051264762878, "learning_rate": 5.807818370773044e-05, "loss": 1.7153, "step": 5248 }, { "epoch": 1.2589173310952582, "grad_norm": 0.7326179146766663, "learning_rate": 5.8062195219442006e-05, "loss": 1.7, "step": 5250 }, { "epoch": 1.259396918649961, "grad_norm": 0.6619378328323364, "learning_rate": 5.804620673115357e-05, "loss": 1.6838, "step": 5252 }, { "epoch": 1.259876506204664, "grad_norm": 0.7237507104873657, "learning_rate": 5.8030218242865144e-05, "loss": 1.7544, "step": 5254 }, { "epoch": 1.260356093759367, "grad_norm": 0.6975585222244263, "learning_rate": 5.801422975457671e-05, "loss": 1.72, "step": 5256 }, { "epoch": 1.26083568131407, "grad_norm": 0.7619460225105286, "learning_rate": 5.7998241266288275e-05, "loss": 1.7078, "step": 5258 }, { "epoch": 1.2613152688687728, "grad_norm": 0.6753650307655334, "learning_rate": 5.798225277799985e-05, "loss": 1.6936, "step": 5260 }, { "epoch": 1.2617948564234758, "grad_norm": 0.813018798828125, "learning_rate": 5.796626428971141e-05, "loss": 1.678, "step": 5262 }, { "epoch": 1.2622744439781788, "grad_norm": 0.6783462762832642, "learning_rate": 5.795027580142298e-05, "loss": 1.714, "step": 5264 }, { "epoch": 1.2627540315328818, "grad_norm": 0.6741435527801514, "learning_rate": 5.793428731313455e-05, "loss": 1.7069, "step": 5266 }, { "epoch": 1.2632336190875848, "grad_norm": 0.7844259738922119, "learning_rate": 5.791829882484612e-05, "loss": 1.6849, "step": 5268 }, { "epoch": 1.2637132066422876, "grad_norm": 0.8545024394989014, "learning_rate": 5.790231033655768e-05, "loss": 1.68, "step": 5270 }, { "epoch": 1.2641927941969906, "grad_norm": 0.7672237157821655, "learning_rate": 5.7886321848269255e-05, "loss": 1.6947, "step": 5272 }, { "epoch": 1.2646723817516936, "grad_norm": 0.8484249711036682, "learning_rate": 5.787033335998082e-05, "loss": 1.7434, "step": 5274 }, { "epoch": 1.2651519693063964, "grad_norm": 0.7379119396209717, "learning_rate": 5.785434487169239e-05, "loss": 1.6707, "step": 5276 }, { "epoch": 1.2656315568610994, "grad_norm": 0.7438102960586548, "learning_rate": 5.7838356383403946e-05, "loss": 1.6723, "step": 5278 }, { "epoch": 1.2661111444158024, "grad_norm": 0.8571270704269409, "learning_rate": 5.782236789511551e-05, "loss": 1.7104, "step": 5280 }, { "epoch": 1.2665907319705054, "grad_norm": 0.788577675819397, "learning_rate": 5.7806379406827084e-05, "loss": 1.7188, "step": 5282 }, { "epoch": 1.2670703195252084, "grad_norm": 0.6847503185272217, "learning_rate": 5.779039091853865e-05, "loss": 1.7277, "step": 5284 }, { "epoch": 1.2675499070799112, "grad_norm": 0.7371469736099243, "learning_rate": 5.7774402430250215e-05, "loss": 1.7251, "step": 5286 }, { "epoch": 1.2680294946346142, "grad_norm": 0.6881584525108337, "learning_rate": 5.775841394196179e-05, "loss": 1.7253, "step": 5288 }, { "epoch": 1.2685090821893172, "grad_norm": 0.7141966223716736, "learning_rate": 5.7742425453673353e-05, "loss": 1.6834, "step": 5290 }, { "epoch": 1.26898866974402, "grad_norm": 0.8310843706130981, "learning_rate": 5.772643696538492e-05, "loss": 1.722, "step": 5292 }, { "epoch": 1.269468257298723, "grad_norm": 0.6331903338432312, "learning_rate": 5.771044847709649e-05, "loss": 1.7258, "step": 5294 }, { "epoch": 1.269947844853426, "grad_norm": 0.7928677797317505, "learning_rate": 5.769445998880806e-05, "loss": 1.7054, "step": 5296 }, { "epoch": 1.270427432408129, "grad_norm": 0.6856340765953064, "learning_rate": 5.767847150051962e-05, "loss": 1.7524, "step": 5298 }, { "epoch": 1.270907019962832, "grad_norm": 0.6549466848373413, "learning_rate": 5.7662483012231195e-05, "loss": 1.7159, "step": 5300 }, { "epoch": 1.2713866075175349, "grad_norm": 0.9353899955749512, "learning_rate": 5.764649452394276e-05, "loss": 1.7339, "step": 5302 }, { "epoch": 1.2718661950722379, "grad_norm": 0.6906251311302185, "learning_rate": 5.763050603565433e-05, "loss": 1.6923, "step": 5304 }, { "epoch": 1.2723457826269409, "grad_norm": 0.6559111475944519, "learning_rate": 5.76145175473659e-05, "loss": 1.7025, "step": 5306 }, { "epoch": 1.2728253701816437, "grad_norm": 0.6506053805351257, "learning_rate": 5.7598529059077465e-05, "loss": 1.7077, "step": 5308 }, { "epoch": 1.2733049577363467, "grad_norm": 0.6941146850585938, "learning_rate": 5.758254057078904e-05, "loss": 1.6523, "step": 5310 }, { "epoch": 1.2737845452910497, "grad_norm": 0.7604207992553711, "learning_rate": 5.75665520825006e-05, "loss": 1.7102, "step": 5312 }, { "epoch": 1.2742641328457527, "grad_norm": 0.6427507400512695, "learning_rate": 5.755056359421217e-05, "loss": 1.7265, "step": 5314 }, { "epoch": 1.2747437204004557, "grad_norm": 0.6631563901901245, "learning_rate": 5.753457510592374e-05, "loss": 1.7301, "step": 5316 }, { "epoch": 1.2752233079551585, "grad_norm": 0.6389387845993042, "learning_rate": 5.751858661763531e-05, "loss": 1.6712, "step": 5318 }, { "epoch": 1.2757028955098615, "grad_norm": 0.7515463829040527, "learning_rate": 5.750259812934687e-05, "loss": 1.7264, "step": 5320 }, { "epoch": 1.2761824830645645, "grad_norm": 0.7415605187416077, "learning_rate": 5.7486609641058445e-05, "loss": 1.7109, "step": 5322 }, { "epoch": 1.2766620706192675, "grad_norm": 0.7004036903381348, "learning_rate": 5.747062115277001e-05, "loss": 1.7343, "step": 5324 }, { "epoch": 1.2771416581739703, "grad_norm": 0.7145670056343079, "learning_rate": 5.745463266448158e-05, "loss": 1.6985, "step": 5326 }, { "epoch": 1.2776212457286733, "grad_norm": 0.6578049063682556, "learning_rate": 5.743864417619315e-05, "loss": 1.6911, "step": 5328 }, { "epoch": 1.2781008332833763, "grad_norm": 0.626899003982544, "learning_rate": 5.7422655687904715e-05, "loss": 1.7193, "step": 5330 }, { "epoch": 1.2785804208380793, "grad_norm": 0.6773748993873596, "learning_rate": 5.740666719961628e-05, "loss": 1.7276, "step": 5332 }, { "epoch": 1.2790600083927823, "grad_norm": 0.6282117962837219, "learning_rate": 5.739067871132785e-05, "loss": 1.7396, "step": 5334 }, { "epoch": 1.2795395959474851, "grad_norm": 0.8055185079574585, "learning_rate": 5.737469022303942e-05, "loss": 1.7327, "step": 5336 }, { "epoch": 1.2800191835021881, "grad_norm": 0.7647718191146851, "learning_rate": 5.7358701734750984e-05, "loss": 1.7308, "step": 5338 }, { "epoch": 1.2804987710568911, "grad_norm": 1.0439715385437012, "learning_rate": 5.734271324646256e-05, "loss": 1.7165, "step": 5340 }, { "epoch": 1.280978358611594, "grad_norm": 0.6954693794250488, "learning_rate": 5.732672475817412e-05, "loss": 1.7143, "step": 5342 }, { "epoch": 1.281457946166297, "grad_norm": 0.7961425185203552, "learning_rate": 5.731073626988569e-05, "loss": 1.7265, "step": 5344 }, { "epoch": 1.281937533721, "grad_norm": 0.633351743221283, "learning_rate": 5.729474778159726e-05, "loss": 1.7175, "step": 5346 }, { "epoch": 1.282417121275703, "grad_norm": 0.7070804834365845, "learning_rate": 5.7278759293308826e-05, "loss": 1.7152, "step": 5348 }, { "epoch": 1.282896708830406, "grad_norm": 0.7708163857460022, "learning_rate": 5.7262770805020385e-05, "loss": 1.7702, "step": 5350 }, { "epoch": 1.2833762963851088, "grad_norm": 0.6896591186523438, "learning_rate": 5.724678231673195e-05, "loss": 1.6984, "step": 5352 }, { "epoch": 1.2838558839398118, "grad_norm": 0.6568495035171509, "learning_rate": 5.723079382844352e-05, "loss": 1.6519, "step": 5354 }, { "epoch": 1.2843354714945148, "grad_norm": 0.6497288346290588, "learning_rate": 5.721480534015509e-05, "loss": 1.6809, "step": 5356 }, { "epoch": 1.2848150590492176, "grad_norm": 0.7739642858505249, "learning_rate": 5.7198816851866655e-05, "loss": 1.6612, "step": 5358 }, { "epoch": 1.2852946466039206, "grad_norm": 0.6757890582084656, "learning_rate": 5.718282836357822e-05, "loss": 1.6988, "step": 5360 }, { "epoch": 1.2857742341586236, "grad_norm": 0.7526322603225708, "learning_rate": 5.716683987528979e-05, "loss": 1.6709, "step": 5362 }, { "epoch": 1.2862538217133266, "grad_norm": 0.7163819074630737, "learning_rate": 5.715085138700136e-05, "loss": 1.7138, "step": 5364 }, { "epoch": 1.2867334092680296, "grad_norm": 0.7732597589492798, "learning_rate": 5.7134862898712925e-05, "loss": 1.6718, "step": 5366 }, { "epoch": 1.2872129968227324, "grad_norm": 0.8184769749641418, "learning_rate": 5.71188744104245e-05, "loss": 1.6955, "step": 5368 }, { "epoch": 1.2876925843774354, "grad_norm": 0.6769185662269592, "learning_rate": 5.710288592213606e-05, "loss": 1.704, "step": 5370 }, { "epoch": 1.2881721719321384, "grad_norm": 0.7484620809555054, "learning_rate": 5.708689743384763e-05, "loss": 1.6815, "step": 5372 }, { "epoch": 1.2886517594868412, "grad_norm": 0.6492456197738647, "learning_rate": 5.70709089455592e-05, "loss": 1.7448, "step": 5374 }, { "epoch": 1.2891313470415442, "grad_norm": 0.7749621272087097, "learning_rate": 5.705492045727077e-05, "loss": 1.7473, "step": 5376 }, { "epoch": 1.2896109345962472, "grad_norm": 0.8541063070297241, "learning_rate": 5.703893196898233e-05, "loss": 1.724, "step": 5378 }, { "epoch": 1.2900905221509502, "grad_norm": 0.7192348837852478, "learning_rate": 5.7022943480693905e-05, "loss": 1.7122, "step": 5380 }, { "epoch": 1.2905701097056532, "grad_norm": 0.6667373180389404, "learning_rate": 5.700695499240547e-05, "loss": 1.7662, "step": 5382 }, { "epoch": 1.291049697260356, "grad_norm": 0.6063985824584961, "learning_rate": 5.6990966504117036e-05, "loss": 1.6926, "step": 5384 }, { "epoch": 1.291529284815059, "grad_norm": 0.7093929052352905, "learning_rate": 5.697497801582861e-05, "loss": 1.6825, "step": 5386 }, { "epoch": 1.292008872369762, "grad_norm": 0.7002310156822205, "learning_rate": 5.6958989527540174e-05, "loss": 1.7561, "step": 5388 }, { "epoch": 1.292488459924465, "grad_norm": 0.6784309148788452, "learning_rate": 5.694300103925174e-05, "loss": 1.6946, "step": 5390 }, { "epoch": 1.2929680474791678, "grad_norm": 0.6558551788330078, "learning_rate": 5.692701255096331e-05, "loss": 1.6752, "step": 5392 }, { "epoch": 1.2934476350338708, "grad_norm": 0.660863995552063, "learning_rate": 5.691102406267488e-05, "loss": 1.6527, "step": 5394 }, { "epoch": 1.2939272225885738, "grad_norm": 0.7443373799324036, "learning_rate": 5.6895035574386444e-05, "loss": 1.756, "step": 5396 }, { "epoch": 1.2944068101432769, "grad_norm": 0.6622648239135742, "learning_rate": 5.6879047086098016e-05, "loss": 1.6823, "step": 5398 }, { "epoch": 1.2948863976979799, "grad_norm": 0.7123906016349792, "learning_rate": 5.686305859780958e-05, "loss": 1.7145, "step": 5400 }, { "epoch": 1.2953659852526827, "grad_norm": 0.6256366968154907, "learning_rate": 5.684707010952115e-05, "loss": 1.6892, "step": 5402 }, { "epoch": 1.2958455728073857, "grad_norm": 0.7290905714035034, "learning_rate": 5.683108162123272e-05, "loss": 1.6582, "step": 5404 }, { "epoch": 1.2963251603620887, "grad_norm": 0.6816310882568359, "learning_rate": 5.6815093132944286e-05, "loss": 1.748, "step": 5406 }, { "epoch": 1.2968047479167915, "grad_norm": 0.6401040554046631, "learning_rate": 5.679910464465585e-05, "loss": 1.7025, "step": 5408 }, { "epoch": 1.2972843354714945, "grad_norm": 0.7499650120735168, "learning_rate": 5.6783116156367424e-05, "loss": 1.7141, "step": 5410 }, { "epoch": 1.2977639230261975, "grad_norm": 0.589373767375946, "learning_rate": 5.676712766807899e-05, "loss": 1.695, "step": 5412 }, { "epoch": 1.2982435105809005, "grad_norm": 0.8103967308998108, "learning_rate": 5.6751139179790556e-05, "loss": 1.7344, "step": 5414 }, { "epoch": 1.2987230981356035, "grad_norm": 0.7098590731620789, "learning_rate": 5.673515069150213e-05, "loss": 1.7273, "step": 5416 }, { "epoch": 1.2992026856903063, "grad_norm": 0.6404314041137695, "learning_rate": 5.6719162203213694e-05, "loss": 1.7247, "step": 5418 }, { "epoch": 1.2996822732450093, "grad_norm": 0.7916894555091858, "learning_rate": 5.670317371492526e-05, "loss": 1.6993, "step": 5420 }, { "epoch": 1.3001618607997123, "grad_norm": 0.7103358507156372, "learning_rate": 5.668718522663683e-05, "loss": 1.7034, "step": 5422 }, { "epoch": 1.300641448354415, "grad_norm": 0.6620671153068542, "learning_rate": 5.6671196738348384e-05, "loss": 1.7221, "step": 5424 }, { "epoch": 1.301121035909118, "grad_norm": 0.8718481659889221, "learning_rate": 5.6655208250059957e-05, "loss": 1.7265, "step": 5426 }, { "epoch": 1.301600623463821, "grad_norm": 0.7287850379943848, "learning_rate": 5.663921976177152e-05, "loss": 1.6597, "step": 5428 }, { "epoch": 1.3020802110185241, "grad_norm": 0.7617875337600708, "learning_rate": 5.662323127348309e-05, "loss": 1.706, "step": 5430 }, { "epoch": 1.3025597985732271, "grad_norm": 0.7041019797325134, "learning_rate": 5.660724278519466e-05, "loss": 1.7233, "step": 5432 }, { "epoch": 1.30303938612793, "grad_norm": 0.6484505534172058, "learning_rate": 5.6591254296906226e-05, "loss": 1.6834, "step": 5434 }, { "epoch": 1.303518973682633, "grad_norm": 0.6359051465988159, "learning_rate": 5.657526580861779e-05, "loss": 1.674, "step": 5436 }, { "epoch": 1.303998561237336, "grad_norm": 0.8573886156082153, "learning_rate": 5.6559277320329364e-05, "loss": 1.6975, "step": 5438 }, { "epoch": 1.3044781487920387, "grad_norm": 0.6884638667106628, "learning_rate": 5.654328883204093e-05, "loss": 1.6947, "step": 5440 }, { "epoch": 1.3049577363467417, "grad_norm": 0.6787795424461365, "learning_rate": 5.6527300343752496e-05, "loss": 1.6791, "step": 5442 }, { "epoch": 1.3054373239014447, "grad_norm": 0.7525657415390015, "learning_rate": 5.651131185546407e-05, "loss": 1.6755, "step": 5444 }, { "epoch": 1.3059169114561477, "grad_norm": 0.6483628153800964, "learning_rate": 5.6495323367175634e-05, "loss": 1.7153, "step": 5446 }, { "epoch": 1.3063964990108508, "grad_norm": 0.7002248764038086, "learning_rate": 5.64793348788872e-05, "loss": 1.6814, "step": 5448 }, { "epoch": 1.3068760865655535, "grad_norm": 0.616722583770752, "learning_rate": 5.646334639059877e-05, "loss": 1.6933, "step": 5450 }, { "epoch": 1.3073556741202566, "grad_norm": 0.6808350086212158, "learning_rate": 5.644735790231034e-05, "loss": 1.7564, "step": 5452 }, { "epoch": 1.3078352616749596, "grad_norm": 0.7167090177536011, "learning_rate": 5.6431369414021903e-05, "loss": 1.6846, "step": 5454 }, { "epoch": 1.3083148492296626, "grad_norm": 0.6688698530197144, "learning_rate": 5.6415380925733476e-05, "loss": 1.7027, "step": 5456 }, { "epoch": 1.3087944367843654, "grad_norm": 0.7007933855056763, "learning_rate": 5.639939243744504e-05, "loss": 1.7639, "step": 5458 }, { "epoch": 1.3092740243390684, "grad_norm": 0.7650499939918518, "learning_rate": 5.638340394915661e-05, "loss": 1.7344, "step": 5460 }, { "epoch": 1.3097536118937714, "grad_norm": 0.7759153842926025, "learning_rate": 5.636741546086818e-05, "loss": 1.6893, "step": 5462 }, { "epoch": 1.3102331994484744, "grad_norm": 0.6743199229240417, "learning_rate": 5.6351426972579746e-05, "loss": 1.6923, "step": 5464 }, { "epoch": 1.3107127870031774, "grad_norm": 0.6295130252838135, "learning_rate": 5.633543848429131e-05, "loss": 1.672, "step": 5466 }, { "epoch": 1.3111923745578802, "grad_norm": 0.677501916885376, "learning_rate": 5.6319449996002884e-05, "loss": 1.7028, "step": 5468 }, { "epoch": 1.3116719621125832, "grad_norm": 0.7447498440742493, "learning_rate": 5.630346150771445e-05, "loss": 1.6773, "step": 5470 }, { "epoch": 1.3121515496672862, "grad_norm": 0.6640578508377075, "learning_rate": 5.6287473019426015e-05, "loss": 1.6848, "step": 5472 }, { "epoch": 1.312631137221989, "grad_norm": 0.7492619156837463, "learning_rate": 5.627148453113759e-05, "loss": 1.7509, "step": 5474 }, { "epoch": 1.313110724776692, "grad_norm": 0.7171247601509094, "learning_rate": 5.625549604284915e-05, "loss": 1.6815, "step": 5476 }, { "epoch": 1.313590312331395, "grad_norm": 0.8259782791137695, "learning_rate": 5.623950755456072e-05, "loss": 1.7209, "step": 5478 }, { "epoch": 1.314069899886098, "grad_norm": 0.7121151089668274, "learning_rate": 5.622351906627229e-05, "loss": 1.7146, "step": 5480 }, { "epoch": 1.314549487440801, "grad_norm": 0.6416226625442505, "learning_rate": 5.620753057798386e-05, "loss": 1.6827, "step": 5482 }, { "epoch": 1.3150290749955038, "grad_norm": 0.6146599054336548, "learning_rate": 5.619154208969542e-05, "loss": 1.6812, "step": 5484 }, { "epoch": 1.3155086625502068, "grad_norm": 0.6813895106315613, "learning_rate": 5.6175553601406995e-05, "loss": 1.7091, "step": 5486 }, { "epoch": 1.3159882501049098, "grad_norm": 0.695120632648468, "learning_rate": 5.615956511311856e-05, "loss": 1.6673, "step": 5488 }, { "epoch": 1.3164678376596126, "grad_norm": 0.6431856155395508, "learning_rate": 5.614357662483013e-05, "loss": 1.7186, "step": 5490 }, { "epoch": 1.3169474252143156, "grad_norm": 0.7692582011222839, "learning_rate": 5.61275881365417e-05, "loss": 1.6514, "step": 5492 }, { "epoch": 1.3174270127690186, "grad_norm": 0.8235185146331787, "learning_rate": 5.6111599648253265e-05, "loss": 1.7447, "step": 5494 }, { "epoch": 1.3179066003237216, "grad_norm": 0.7608405351638794, "learning_rate": 5.6095611159964824e-05, "loss": 1.7121, "step": 5496 }, { "epoch": 1.3183861878784247, "grad_norm": 0.6960509419441223, "learning_rate": 5.607962267167639e-05, "loss": 1.7273, "step": 5498 }, { "epoch": 1.3188657754331274, "grad_norm": 0.691074550151825, "learning_rate": 5.6063634183387955e-05, "loss": 1.6905, "step": 5500 }, { "epoch": 1.3193453629878305, "grad_norm": 0.6612728238105774, "learning_rate": 5.604764569509953e-05, "loss": 1.7259, "step": 5502 }, { "epoch": 1.3198249505425335, "grad_norm": 0.6632093191146851, "learning_rate": 5.603165720681109e-05, "loss": 1.7226, "step": 5504 }, { "epoch": 1.3203045380972362, "grad_norm": 0.6677670478820801, "learning_rate": 5.6015668718522666e-05, "loss": 1.7391, "step": 5506 }, { "epoch": 1.3207841256519393, "grad_norm": 0.6649909019470215, "learning_rate": 5.599968023023423e-05, "loss": 1.7122, "step": 5508 }, { "epoch": 1.3212637132066423, "grad_norm": 0.7215535044670105, "learning_rate": 5.59836917419458e-05, "loss": 1.7218, "step": 5510 }, { "epoch": 1.3217433007613453, "grad_norm": 0.6546183228492737, "learning_rate": 5.596770325365737e-05, "loss": 1.6961, "step": 5512 }, { "epoch": 1.3222228883160483, "grad_norm": 0.6487047672271729, "learning_rate": 5.5951714765368935e-05, "loss": 1.6924, "step": 5514 }, { "epoch": 1.322702475870751, "grad_norm": 0.7427170276641846, "learning_rate": 5.59357262770805e-05, "loss": 1.7104, "step": 5516 }, { "epoch": 1.323182063425454, "grad_norm": 0.659069836139679, "learning_rate": 5.5919737788792074e-05, "loss": 1.743, "step": 5518 }, { "epoch": 1.323661650980157, "grad_norm": 0.6560345888137817, "learning_rate": 5.590374930050364e-05, "loss": 1.7211, "step": 5520 }, { "epoch": 1.32414123853486, "grad_norm": 0.7344533801078796, "learning_rate": 5.5887760812215205e-05, "loss": 1.6598, "step": 5522 }, { "epoch": 1.3246208260895629, "grad_norm": 0.6735556125640869, "learning_rate": 5.587177232392678e-05, "loss": 1.6618, "step": 5524 }, { "epoch": 1.325100413644266, "grad_norm": 0.7454924583435059, "learning_rate": 5.585578383563834e-05, "loss": 1.7181, "step": 5526 }, { "epoch": 1.325580001198969, "grad_norm": 1.0502228736877441, "learning_rate": 5.583979534734991e-05, "loss": 1.735, "step": 5528 }, { "epoch": 1.326059588753672, "grad_norm": 0.6497300267219543, "learning_rate": 5.582380685906148e-05, "loss": 1.7027, "step": 5530 }, { "epoch": 1.326539176308375, "grad_norm": 0.6848211288452148, "learning_rate": 5.580781837077305e-05, "loss": 1.709, "step": 5532 }, { "epoch": 1.3270187638630777, "grad_norm": 0.6841749548912048, "learning_rate": 5.579182988248461e-05, "loss": 1.7057, "step": 5534 }, { "epoch": 1.3274983514177807, "grad_norm": 0.6971889734268188, "learning_rate": 5.5775841394196185e-05, "loss": 1.7037, "step": 5536 }, { "epoch": 1.3279779389724837, "grad_norm": 0.6833301186561584, "learning_rate": 5.575985290590775e-05, "loss": 1.7043, "step": 5538 }, { "epoch": 1.3284575265271865, "grad_norm": 0.7922766804695129, "learning_rate": 5.574386441761932e-05, "loss": 1.7398, "step": 5540 }, { "epoch": 1.3289371140818895, "grad_norm": 0.7746238708496094, "learning_rate": 5.572787592933089e-05, "loss": 1.7615, "step": 5542 }, { "epoch": 1.3294167016365925, "grad_norm": 0.676066517829895, "learning_rate": 5.5711887441042455e-05, "loss": 1.6525, "step": 5544 }, { "epoch": 1.3298962891912955, "grad_norm": 0.829502284526825, "learning_rate": 5.569589895275402e-05, "loss": 1.6647, "step": 5546 }, { "epoch": 1.3303758767459986, "grad_norm": 0.6038015484809875, "learning_rate": 5.567991046446559e-05, "loss": 1.7324, "step": 5548 }, { "epoch": 1.3308554643007013, "grad_norm": 0.6349323987960815, "learning_rate": 5.566392197617716e-05, "loss": 1.7679, "step": 5550 }, { "epoch": 1.3313350518554044, "grad_norm": 0.8107993006706238, "learning_rate": 5.5647933487888724e-05, "loss": 1.6978, "step": 5552 }, { "epoch": 1.3318146394101074, "grad_norm": 0.7322676181793213, "learning_rate": 5.56319449996003e-05, "loss": 1.719, "step": 5554 }, { "epoch": 1.3322942269648101, "grad_norm": 0.6461386680603027, "learning_rate": 5.561595651131186e-05, "loss": 1.7008, "step": 5556 }, { "epoch": 1.3327738145195132, "grad_norm": 0.656742513179779, "learning_rate": 5.559996802302343e-05, "loss": 1.6824, "step": 5558 }, { "epoch": 1.3332534020742162, "grad_norm": 0.6593782901763916, "learning_rate": 5.5583979534735e-05, "loss": 1.7302, "step": 5560 }, { "epoch": 1.3337329896289192, "grad_norm": 0.6321220993995667, "learning_rate": 5.5567991046446566e-05, "loss": 1.6802, "step": 5562 }, { "epoch": 1.3342125771836222, "grad_norm": 0.6265666484832764, "learning_rate": 5.555200255815813e-05, "loss": 1.6898, "step": 5564 }, { "epoch": 1.334692164738325, "grad_norm": 0.7208665609359741, "learning_rate": 5.5536014069869705e-05, "loss": 1.7874, "step": 5566 }, { "epoch": 1.335171752293028, "grad_norm": 0.7247272729873657, "learning_rate": 5.552002558158126e-05, "loss": 1.6929, "step": 5568 }, { "epoch": 1.335651339847731, "grad_norm": 0.6267654299736023, "learning_rate": 5.550403709329283e-05, "loss": 1.7103, "step": 5570 }, { "epoch": 1.3361309274024338, "grad_norm": 0.6025223135948181, "learning_rate": 5.5488048605004395e-05, "loss": 1.7115, "step": 5572 }, { "epoch": 1.3366105149571368, "grad_norm": 0.6781705617904663, "learning_rate": 5.547206011671596e-05, "loss": 1.7211, "step": 5574 }, { "epoch": 1.3370901025118398, "grad_norm": 0.775626540184021, "learning_rate": 5.545607162842753e-05, "loss": 1.6761, "step": 5576 }, { "epoch": 1.3375696900665428, "grad_norm": 0.7546366453170776, "learning_rate": 5.54400831401391e-05, "loss": 1.7181, "step": 5578 }, { "epoch": 1.3380492776212458, "grad_norm": 0.6521198749542236, "learning_rate": 5.5424094651850665e-05, "loss": 1.7176, "step": 5580 }, { "epoch": 1.3385288651759486, "grad_norm": 0.658228874206543, "learning_rate": 5.540810616356224e-05, "loss": 1.7422, "step": 5582 }, { "epoch": 1.3390084527306516, "grad_norm": 0.7263331413269043, "learning_rate": 5.53921176752738e-05, "loss": 1.6731, "step": 5584 }, { "epoch": 1.3394880402853546, "grad_norm": 0.6818578839302063, "learning_rate": 5.537612918698537e-05, "loss": 1.7431, "step": 5586 }, { "epoch": 1.3399676278400576, "grad_norm": 0.8738374710083008, "learning_rate": 5.536014069869694e-05, "loss": 1.6925, "step": 5588 }, { "epoch": 1.3404472153947604, "grad_norm": 0.6663306355476379, "learning_rate": 5.5344152210408507e-05, "loss": 1.6888, "step": 5590 }, { "epoch": 1.3409268029494634, "grad_norm": 0.7031589150428772, "learning_rate": 5.532816372212007e-05, "loss": 1.7183, "step": 5592 }, { "epoch": 1.3414063905041664, "grad_norm": 0.7353261113166809, "learning_rate": 5.5312175233831645e-05, "loss": 1.6793, "step": 5594 }, { "epoch": 1.3418859780588694, "grad_norm": 0.688431441783905, "learning_rate": 5.529618674554321e-05, "loss": 1.7204, "step": 5596 }, { "epoch": 1.3423655656135725, "grad_norm": 0.6952008008956909, "learning_rate": 5.5280198257254776e-05, "loss": 1.7092, "step": 5598 }, { "epoch": 1.3428451531682752, "grad_norm": 0.7701739072799683, "learning_rate": 5.526420976896635e-05, "loss": 1.7306, "step": 5600 }, { "epoch": 1.3428451531682752, "eval_loss": 1.7445539236068726, "eval_runtime": 331.2843, "eval_samples_per_second": 402.823, "eval_steps_per_second": 12.59, "step": 5600 }, { "epoch": 1.3433247407229782, "grad_norm": 0.6956355571746826, "learning_rate": 5.5248221280677914e-05, "loss": 1.7019, "step": 5602 }, { "epoch": 1.3438043282776813, "grad_norm": 0.6415538191795349, "learning_rate": 5.523223279238948e-05, "loss": 1.6561, "step": 5604 }, { "epoch": 1.344283915832384, "grad_norm": 0.8332722187042236, "learning_rate": 5.521624430410105e-05, "loss": 1.7076, "step": 5606 }, { "epoch": 1.344763503387087, "grad_norm": 0.7149489521980286, "learning_rate": 5.520025581581262e-05, "loss": 1.7279, "step": 5608 }, { "epoch": 1.34524309094179, "grad_norm": 0.7159495949745178, "learning_rate": 5.5184267327524184e-05, "loss": 1.6736, "step": 5610 }, { "epoch": 1.345722678496493, "grad_norm": 0.7556326985359192, "learning_rate": 5.5168278839235756e-05, "loss": 1.7346, "step": 5612 }, { "epoch": 1.346202266051196, "grad_norm": 0.8085745573043823, "learning_rate": 5.515229035094732e-05, "loss": 1.7166, "step": 5614 }, { "epoch": 1.3466818536058989, "grad_norm": 0.643356204032898, "learning_rate": 5.513630186265889e-05, "loss": 1.7158, "step": 5616 }, { "epoch": 1.3471614411606019, "grad_norm": 0.6873928308486938, "learning_rate": 5.512031337437046e-05, "loss": 1.7491, "step": 5618 }, { "epoch": 1.347641028715305, "grad_norm": 0.7706488966941833, "learning_rate": 5.5104324886082026e-05, "loss": 1.7067, "step": 5620 }, { "epoch": 1.3481206162700077, "grad_norm": 0.8299438953399658, "learning_rate": 5.508833639779359e-05, "loss": 1.6969, "step": 5622 }, { "epoch": 1.3486002038247107, "grad_norm": 0.7135815620422363, "learning_rate": 5.5072347909505164e-05, "loss": 1.7203, "step": 5624 }, { "epoch": 1.3490797913794137, "grad_norm": 0.6865275502204895, "learning_rate": 5.505635942121673e-05, "loss": 1.6991, "step": 5626 }, { "epoch": 1.3495593789341167, "grad_norm": 0.7548512816429138, "learning_rate": 5.5040370932928296e-05, "loss": 1.6897, "step": 5628 }, { "epoch": 1.3500389664888197, "grad_norm": 0.7414130568504333, "learning_rate": 5.502438244463987e-05, "loss": 1.6549, "step": 5630 }, { "epoch": 1.3505185540435225, "grad_norm": 0.6571274995803833, "learning_rate": 5.5008393956351434e-05, "loss": 1.729, "step": 5632 }, { "epoch": 1.3509981415982255, "grad_norm": 0.6994803547859192, "learning_rate": 5.4992405468063e-05, "loss": 1.7198, "step": 5634 }, { "epoch": 1.3514777291529285, "grad_norm": 0.6696985363960266, "learning_rate": 5.497641697977457e-05, "loss": 1.6983, "step": 5636 }, { "epoch": 1.3519573167076313, "grad_norm": 0.7097785472869873, "learning_rate": 5.496042849148614e-05, "loss": 1.7071, "step": 5638 }, { "epoch": 1.3524369042623343, "grad_norm": 0.7205188870429993, "learning_rate": 5.49444400031977e-05, "loss": 1.7173, "step": 5640 }, { "epoch": 1.3529164918170373, "grad_norm": 0.7120492458343506, "learning_rate": 5.492845151490926e-05, "loss": 1.7177, "step": 5642 }, { "epoch": 1.3533960793717403, "grad_norm": 0.6264262199401855, "learning_rate": 5.491246302662083e-05, "loss": 1.7263, "step": 5644 }, { "epoch": 1.3538756669264433, "grad_norm": 0.6340615749359131, "learning_rate": 5.48964745383324e-05, "loss": 1.7274, "step": 5646 }, { "epoch": 1.3543552544811461, "grad_norm": 0.668602466583252, "learning_rate": 5.4880486050043966e-05, "loss": 1.6833, "step": 5648 }, { "epoch": 1.3548348420358491, "grad_norm": 0.8497878313064575, "learning_rate": 5.486449756175553e-05, "loss": 1.7191, "step": 5650 }, { "epoch": 1.3553144295905521, "grad_norm": 0.7200911641120911, "learning_rate": 5.4848509073467104e-05, "loss": 1.7435, "step": 5652 }, { "epoch": 1.3557940171452552, "grad_norm": 0.7372239828109741, "learning_rate": 5.483252058517867e-05, "loss": 1.68, "step": 5654 }, { "epoch": 1.356273604699958, "grad_norm": 0.6215764880180359, "learning_rate": 5.4816532096890236e-05, "loss": 1.6679, "step": 5656 }, { "epoch": 1.356753192254661, "grad_norm": 0.6152515411376953, "learning_rate": 5.480054360860181e-05, "loss": 1.7277, "step": 5658 }, { "epoch": 1.357232779809364, "grad_norm": 0.7310672998428345, "learning_rate": 5.4784555120313374e-05, "loss": 1.7243, "step": 5660 }, { "epoch": 1.357712367364067, "grad_norm": 0.6719784736633301, "learning_rate": 5.476856663202494e-05, "loss": 1.6556, "step": 5662 }, { "epoch": 1.35819195491877, "grad_norm": 0.64484041929245, "learning_rate": 5.475257814373651e-05, "loss": 1.7394, "step": 5664 }, { "epoch": 1.3586715424734728, "grad_norm": 0.6675312519073486, "learning_rate": 5.473658965544808e-05, "loss": 1.7137, "step": 5666 }, { "epoch": 1.3591511300281758, "grad_norm": 0.6850504875183105, "learning_rate": 5.4720601167159643e-05, "loss": 1.7169, "step": 5668 }, { "epoch": 1.3596307175828788, "grad_norm": 0.6488770246505737, "learning_rate": 5.4704612678871216e-05, "loss": 1.7554, "step": 5670 }, { "epoch": 1.3601103051375816, "grad_norm": 0.6724187731742859, "learning_rate": 5.468862419058278e-05, "loss": 1.7487, "step": 5672 }, { "epoch": 1.3605898926922846, "grad_norm": 0.651566743850708, "learning_rate": 5.467263570229435e-05, "loss": 1.7048, "step": 5674 }, { "epoch": 1.3610694802469876, "grad_norm": 0.6117959022521973, "learning_rate": 5.465664721400592e-05, "loss": 1.7239, "step": 5676 }, { "epoch": 1.3615490678016906, "grad_norm": 0.8765501976013184, "learning_rate": 5.4640658725717485e-05, "loss": 1.6962, "step": 5678 }, { "epoch": 1.3620286553563936, "grad_norm": 0.6645521521568298, "learning_rate": 5.462467023742905e-05, "loss": 1.6565, "step": 5680 }, { "epoch": 1.3625082429110964, "grad_norm": 0.6620674133300781, "learning_rate": 5.4608681749140624e-05, "loss": 1.6956, "step": 5682 }, { "epoch": 1.3629878304657994, "grad_norm": 0.6261199116706848, "learning_rate": 5.459269326085219e-05, "loss": 1.7239, "step": 5684 }, { "epoch": 1.3634674180205024, "grad_norm": 0.7366220951080322, "learning_rate": 5.4576704772563755e-05, "loss": 1.7014, "step": 5686 }, { "epoch": 1.3639470055752052, "grad_norm": 0.7821937799453735, "learning_rate": 5.456071628427533e-05, "loss": 1.7198, "step": 5688 }, { "epoch": 1.3644265931299082, "grad_norm": 0.7429319620132446, "learning_rate": 5.454472779598689e-05, "loss": 1.701, "step": 5690 }, { "epoch": 1.3649061806846112, "grad_norm": 0.6746081113815308, "learning_rate": 5.452873930769846e-05, "loss": 1.7257, "step": 5692 }, { "epoch": 1.3653857682393142, "grad_norm": 0.6605741381645203, "learning_rate": 5.451275081941003e-05, "loss": 1.7199, "step": 5694 }, { "epoch": 1.3658653557940172, "grad_norm": 0.6786696314811707, "learning_rate": 5.44967623311216e-05, "loss": 1.7278, "step": 5696 }, { "epoch": 1.36634494334872, "grad_norm": 0.8111580014228821, "learning_rate": 5.448077384283316e-05, "loss": 1.6391, "step": 5698 }, { "epoch": 1.366824530903423, "grad_norm": 0.6570935845375061, "learning_rate": 5.4464785354544735e-05, "loss": 1.6929, "step": 5700 }, { "epoch": 1.367304118458126, "grad_norm": 0.667306661605835, "learning_rate": 5.44487968662563e-05, "loss": 1.6732, "step": 5702 }, { "epoch": 1.3677837060128288, "grad_norm": 0.77003413438797, "learning_rate": 5.443280837796787e-05, "loss": 1.6915, "step": 5704 }, { "epoch": 1.3682632935675318, "grad_norm": 0.7312922477722168, "learning_rate": 5.441681988967944e-05, "loss": 1.6727, "step": 5706 }, { "epoch": 1.3687428811222349, "grad_norm": 0.6322761178016663, "learning_rate": 5.4400831401391005e-05, "loss": 1.6854, "step": 5708 }, { "epoch": 1.3692224686769379, "grad_norm": 0.6372693777084351, "learning_rate": 5.438484291310257e-05, "loss": 1.6745, "step": 5710 }, { "epoch": 1.3697020562316409, "grad_norm": 0.8038871884346008, "learning_rate": 5.436885442481414e-05, "loss": 1.6642, "step": 5712 }, { "epoch": 1.3701816437863437, "grad_norm": 0.7181879281997681, "learning_rate": 5.43528659365257e-05, "loss": 1.6723, "step": 5714 }, { "epoch": 1.3706612313410467, "grad_norm": 0.6927109360694885, "learning_rate": 5.433687744823727e-05, "loss": 1.6325, "step": 5716 }, { "epoch": 1.3711408188957497, "grad_norm": 0.6460704803466797, "learning_rate": 5.432088895994883e-05, "loss": 1.7188, "step": 5718 }, { "epoch": 1.3716204064504527, "grad_norm": 0.6802504658699036, "learning_rate": 5.4304900471660406e-05, "loss": 1.7102, "step": 5720 }, { "epoch": 1.3720999940051555, "grad_norm": 0.6491362452507019, "learning_rate": 5.428891198337197e-05, "loss": 1.6805, "step": 5722 }, { "epoch": 1.3725795815598585, "grad_norm": 0.5917383432388306, "learning_rate": 5.427292349508354e-05, "loss": 1.7021, "step": 5724 }, { "epoch": 1.3730591691145615, "grad_norm": 0.7114906907081604, "learning_rate": 5.425693500679511e-05, "loss": 1.671, "step": 5726 }, { "epoch": 1.3735387566692645, "grad_norm": 0.7268342971801758, "learning_rate": 5.4240946518506675e-05, "loss": 1.7294, "step": 5728 }, { "epoch": 1.3740183442239675, "grad_norm": 0.688716471195221, "learning_rate": 5.422495803021824e-05, "loss": 1.7057, "step": 5730 }, { "epoch": 1.3744979317786703, "grad_norm": 0.705938994884491, "learning_rate": 5.4208969541929814e-05, "loss": 1.6574, "step": 5732 }, { "epoch": 1.3749775193333733, "grad_norm": 0.7630473971366882, "learning_rate": 5.419298105364138e-05, "loss": 1.6802, "step": 5734 }, { "epoch": 1.3754571068880763, "grad_norm": 0.6531763076782227, "learning_rate": 5.4176992565352945e-05, "loss": 1.7144, "step": 5736 }, { "epoch": 1.375936694442779, "grad_norm": 0.6296224594116211, "learning_rate": 5.416100407706452e-05, "loss": 1.6998, "step": 5738 }, { "epoch": 1.3764162819974821, "grad_norm": 0.6090771555900574, "learning_rate": 5.414501558877608e-05, "loss": 1.6992, "step": 5740 }, { "epoch": 1.3768958695521851, "grad_norm": 0.682792067527771, "learning_rate": 5.412902710048765e-05, "loss": 1.6957, "step": 5742 }, { "epoch": 1.3773754571068881, "grad_norm": 0.6889716386795044, "learning_rate": 5.411303861219922e-05, "loss": 1.7373, "step": 5744 }, { "epoch": 1.3778550446615911, "grad_norm": 0.7340922355651855, "learning_rate": 5.409705012391079e-05, "loss": 1.6805, "step": 5746 }, { "epoch": 1.378334632216294, "grad_norm": 0.6161440014839172, "learning_rate": 5.408106163562235e-05, "loss": 1.6809, "step": 5748 }, { "epoch": 1.378814219770997, "grad_norm": 0.725288450717926, "learning_rate": 5.4065073147333925e-05, "loss": 1.7068, "step": 5750 }, { "epoch": 1.3792938073257, "grad_norm": 0.7660910487174988, "learning_rate": 5.404908465904549e-05, "loss": 1.7216, "step": 5752 }, { "epoch": 1.3797733948804027, "grad_norm": 0.7012436389923096, "learning_rate": 5.4033096170757057e-05, "loss": 1.6868, "step": 5754 }, { "epoch": 1.3802529824351057, "grad_norm": 0.8739418983459473, "learning_rate": 5.401710768246863e-05, "loss": 1.7134, "step": 5756 }, { "epoch": 1.3807325699898088, "grad_norm": 0.6966594457626343, "learning_rate": 5.4001119194180195e-05, "loss": 1.7572, "step": 5758 }, { "epoch": 1.3812121575445118, "grad_norm": 0.6691337823867798, "learning_rate": 5.398513070589176e-05, "loss": 1.6787, "step": 5760 }, { "epoch": 1.3816917450992148, "grad_norm": 0.8031095862388611, "learning_rate": 5.396914221760333e-05, "loss": 1.6856, "step": 5762 }, { "epoch": 1.3821713326539176, "grad_norm": 0.7526069283485413, "learning_rate": 5.39531537293149e-05, "loss": 1.7379, "step": 5764 }, { "epoch": 1.3826509202086206, "grad_norm": 0.6763551235198975, "learning_rate": 5.3937165241026464e-05, "loss": 1.7034, "step": 5766 }, { "epoch": 1.3831305077633236, "grad_norm": 0.6651099920272827, "learning_rate": 5.392117675273804e-05, "loss": 1.6611, "step": 5768 }, { "epoch": 1.3836100953180264, "grad_norm": 0.6314768195152283, "learning_rate": 5.39051882644496e-05, "loss": 1.7058, "step": 5770 }, { "epoch": 1.3840896828727294, "grad_norm": 0.6744330525398254, "learning_rate": 5.388919977616117e-05, "loss": 1.7766, "step": 5772 }, { "epoch": 1.3845692704274324, "grad_norm": 0.7510837316513062, "learning_rate": 5.387321128787274e-05, "loss": 1.6862, "step": 5774 }, { "epoch": 1.3850488579821354, "grad_norm": 0.6401622295379639, "learning_rate": 5.3857222799584306e-05, "loss": 1.6634, "step": 5776 }, { "epoch": 1.3855284455368384, "grad_norm": 0.7486141920089722, "learning_rate": 5.384123431129587e-05, "loss": 1.669, "step": 5778 }, { "epoch": 1.3860080330915412, "grad_norm": 0.6893539428710938, "learning_rate": 5.3825245823007445e-05, "loss": 1.6474, "step": 5780 }, { "epoch": 1.3864876206462442, "grad_norm": 0.7211657166481018, "learning_rate": 5.380925733471901e-05, "loss": 1.63, "step": 5782 }, { "epoch": 1.3869672082009472, "grad_norm": 0.719837486743927, "learning_rate": 5.3793268846430576e-05, "loss": 1.703, "step": 5784 }, { "epoch": 1.3874467957556502, "grad_norm": 0.7009168267250061, "learning_rate": 5.3777280358142135e-05, "loss": 1.7664, "step": 5786 }, { "epoch": 1.387926383310353, "grad_norm": 0.6377356648445129, "learning_rate": 5.37612918698537e-05, "loss": 1.7224, "step": 5788 }, { "epoch": 1.388405970865056, "grad_norm": 0.6347834467887878, "learning_rate": 5.374530338156527e-05, "loss": 1.6974, "step": 5790 }, { "epoch": 1.388885558419759, "grad_norm": 0.667727530002594, "learning_rate": 5.372931489327684e-05, "loss": 1.6837, "step": 5792 }, { "epoch": 1.389365145974462, "grad_norm": 0.629482090473175, "learning_rate": 5.3713326404988404e-05, "loss": 1.6861, "step": 5794 }, { "epoch": 1.389844733529165, "grad_norm": 0.7706561088562012, "learning_rate": 5.369733791669998e-05, "loss": 1.7377, "step": 5796 }, { "epoch": 1.3903243210838678, "grad_norm": 0.7843564748764038, "learning_rate": 5.368134942841154e-05, "loss": 1.7065, "step": 5798 }, { "epoch": 1.3908039086385708, "grad_norm": 0.6392410397529602, "learning_rate": 5.366536094012311e-05, "loss": 1.7335, "step": 5800 }, { "epoch": 1.3912834961932738, "grad_norm": 0.687632143497467, "learning_rate": 5.364937245183468e-05, "loss": 1.715, "step": 5802 }, { "epoch": 1.3917630837479766, "grad_norm": 0.6896960735321045, "learning_rate": 5.3633383963546247e-05, "loss": 1.7068, "step": 5804 }, { "epoch": 1.3922426713026796, "grad_norm": 0.6976484656333923, "learning_rate": 5.361739547525781e-05, "loss": 1.6935, "step": 5806 }, { "epoch": 1.3927222588573827, "grad_norm": 0.6445300579071045, "learning_rate": 5.3601406986969385e-05, "loss": 1.6756, "step": 5808 }, { "epoch": 1.3932018464120857, "grad_norm": 0.696608304977417, "learning_rate": 5.358541849868095e-05, "loss": 1.6931, "step": 5810 }, { "epoch": 1.3936814339667887, "grad_norm": 0.6997944116592407, "learning_rate": 5.3569430010392516e-05, "loss": 1.6691, "step": 5812 }, { "epoch": 1.3941610215214915, "grad_norm": 0.644656240940094, "learning_rate": 5.355344152210409e-05, "loss": 1.7094, "step": 5814 }, { "epoch": 1.3946406090761945, "grad_norm": 0.7430647015571594, "learning_rate": 5.3537453033815654e-05, "loss": 1.7317, "step": 5816 }, { "epoch": 1.3951201966308975, "grad_norm": 0.7466447949409485, "learning_rate": 5.352146454552722e-05, "loss": 1.7034, "step": 5818 }, { "epoch": 1.3955997841856003, "grad_norm": 0.6447800397872925, "learning_rate": 5.350547605723879e-05, "loss": 1.6615, "step": 5820 }, { "epoch": 1.3960793717403033, "grad_norm": 0.7294922471046448, "learning_rate": 5.348948756895036e-05, "loss": 1.7528, "step": 5822 }, { "epoch": 1.3965589592950063, "grad_norm": 0.7538589835166931, "learning_rate": 5.3473499080661924e-05, "loss": 1.6826, "step": 5824 }, { "epoch": 1.3970385468497093, "grad_norm": 0.9833450317382812, "learning_rate": 5.3457510592373496e-05, "loss": 1.7009, "step": 5826 }, { "epoch": 1.3975181344044123, "grad_norm": 0.8952828645706177, "learning_rate": 5.344152210408506e-05, "loss": 1.7393, "step": 5828 }, { "epoch": 1.397997721959115, "grad_norm": 0.6488960981369019, "learning_rate": 5.342553361579663e-05, "loss": 1.7112, "step": 5830 }, { "epoch": 1.398477309513818, "grad_norm": 0.7115768194198608, "learning_rate": 5.34095451275082e-05, "loss": 1.7068, "step": 5832 }, { "epoch": 1.398956897068521, "grad_norm": 0.6879238486289978, "learning_rate": 5.3393556639219766e-05, "loss": 1.7078, "step": 5834 }, { "epoch": 1.399436484623224, "grad_norm": 0.7371633052825928, "learning_rate": 5.337756815093133e-05, "loss": 1.6866, "step": 5836 }, { "epoch": 1.399916072177927, "grad_norm": 0.7693784236907959, "learning_rate": 5.3361579662642904e-05, "loss": 1.7146, "step": 5838 }, { "epoch": 1.40039565973263, "grad_norm": 0.7850694060325623, "learning_rate": 5.334559117435447e-05, "loss": 1.7065, "step": 5840 }, { "epoch": 1.400875247287333, "grad_norm": 0.783394992351532, "learning_rate": 5.3329602686066035e-05, "loss": 1.6526, "step": 5842 }, { "epoch": 1.401354834842036, "grad_norm": 0.6880589127540588, "learning_rate": 5.331361419777761e-05, "loss": 1.6962, "step": 5844 }, { "epoch": 1.4018344223967387, "grad_norm": 0.7596959471702576, "learning_rate": 5.3297625709489174e-05, "loss": 1.6673, "step": 5846 }, { "epoch": 1.4023140099514417, "grad_norm": 0.6052194833755493, "learning_rate": 5.328163722120074e-05, "loss": 1.7014, "step": 5848 }, { "epoch": 1.4027935975061447, "grad_norm": 0.6505351066589355, "learning_rate": 5.326564873291231e-05, "loss": 1.6906, "step": 5850 }, { "epoch": 1.4032731850608477, "grad_norm": 0.9735186100006104, "learning_rate": 5.324966024462388e-05, "loss": 1.6839, "step": 5852 }, { "epoch": 1.4037527726155505, "grad_norm": 0.6779367923736572, "learning_rate": 5.323367175633544e-05, "loss": 1.7741, "step": 5854 }, { "epoch": 1.4042323601702535, "grad_norm": 0.8208292722702026, "learning_rate": 5.3217683268047016e-05, "loss": 1.7026, "step": 5856 }, { "epoch": 1.4047119477249566, "grad_norm": 0.7780153155326843, "learning_rate": 5.320169477975858e-05, "loss": 1.7542, "step": 5858 }, { "epoch": 1.4051915352796596, "grad_norm": 0.6667279601097107, "learning_rate": 5.318570629147014e-05, "loss": 1.7106, "step": 5860 }, { "epoch": 1.4056711228343626, "grad_norm": 0.7300053834915161, "learning_rate": 5.3169717803181706e-05, "loss": 1.701, "step": 5862 }, { "epoch": 1.4061507103890654, "grad_norm": 0.6448196172714233, "learning_rate": 5.315372931489327e-05, "loss": 1.7082, "step": 5864 }, { "epoch": 1.4066302979437684, "grad_norm": 0.6709287166595459, "learning_rate": 5.3137740826604844e-05, "loss": 1.7077, "step": 5866 }, { "epoch": 1.4071098854984714, "grad_norm": 0.7050842642784119, "learning_rate": 5.312175233831641e-05, "loss": 1.6941, "step": 5868 }, { "epoch": 1.4075894730531742, "grad_norm": 0.6617968678474426, "learning_rate": 5.3105763850027976e-05, "loss": 1.7147, "step": 5870 }, { "epoch": 1.4080690606078772, "grad_norm": 0.6279814839363098, "learning_rate": 5.308977536173955e-05, "loss": 1.7526, "step": 5872 }, { "epoch": 1.4085486481625802, "grad_norm": 0.6334817409515381, "learning_rate": 5.3073786873451114e-05, "loss": 1.6894, "step": 5874 }, { "epoch": 1.4090282357172832, "grad_norm": 0.6115161180496216, "learning_rate": 5.305779838516268e-05, "loss": 1.7149, "step": 5876 }, { "epoch": 1.4095078232719862, "grad_norm": 0.8663807511329651, "learning_rate": 5.304180989687425e-05, "loss": 1.7095, "step": 5878 }, { "epoch": 1.409987410826689, "grad_norm": 0.7211307287216187, "learning_rate": 5.302582140858582e-05, "loss": 1.7535, "step": 5880 }, { "epoch": 1.410466998381392, "grad_norm": 0.6414149403572083, "learning_rate": 5.300983292029738e-05, "loss": 1.67, "step": 5882 }, { "epoch": 1.410946585936095, "grad_norm": 0.6979393362998962, "learning_rate": 5.2993844432008956e-05, "loss": 1.7095, "step": 5884 }, { "epoch": 1.4114261734907978, "grad_norm": 0.5991981625556946, "learning_rate": 5.297785594372052e-05, "loss": 1.7319, "step": 5886 }, { "epoch": 1.4119057610455008, "grad_norm": 0.6525225639343262, "learning_rate": 5.296186745543209e-05, "loss": 1.6814, "step": 5888 }, { "epoch": 1.4123853486002038, "grad_norm": 0.7165660262107849, "learning_rate": 5.294587896714366e-05, "loss": 1.6886, "step": 5890 }, { "epoch": 1.4128649361549068, "grad_norm": 0.6375117301940918, "learning_rate": 5.2929890478855225e-05, "loss": 1.7003, "step": 5892 }, { "epoch": 1.4133445237096098, "grad_norm": 0.6851058602333069, "learning_rate": 5.291390199056679e-05, "loss": 1.6891, "step": 5894 }, { "epoch": 1.4138241112643126, "grad_norm": 0.7483408451080322, "learning_rate": 5.2897913502278364e-05, "loss": 1.7563, "step": 5896 }, { "epoch": 1.4143036988190156, "grad_norm": 0.6616154909133911, "learning_rate": 5.288192501398993e-05, "loss": 1.6725, "step": 5898 }, { "epoch": 1.4147832863737186, "grad_norm": 0.6829070448875427, "learning_rate": 5.2865936525701495e-05, "loss": 1.7263, "step": 5900 }, { "epoch": 1.4152628739284214, "grad_norm": 0.6683503985404968, "learning_rate": 5.284994803741307e-05, "loss": 1.739, "step": 5902 }, { "epoch": 1.4157424614831244, "grad_norm": 0.707729160785675, "learning_rate": 5.283395954912463e-05, "loss": 1.7139, "step": 5904 }, { "epoch": 1.4162220490378274, "grad_norm": 0.7207410335540771, "learning_rate": 5.28179710608362e-05, "loss": 1.7023, "step": 5906 }, { "epoch": 1.4167016365925305, "grad_norm": 0.6543484330177307, "learning_rate": 5.280198257254777e-05, "loss": 1.6823, "step": 5908 }, { "epoch": 1.4171812241472335, "grad_norm": 0.7118797302246094, "learning_rate": 5.278599408425934e-05, "loss": 1.6595, "step": 5910 }, { "epoch": 1.4176608117019363, "grad_norm": 0.6772561073303223, "learning_rate": 5.27700055959709e-05, "loss": 1.6616, "step": 5912 }, { "epoch": 1.4181403992566393, "grad_norm": 0.7477948069572449, "learning_rate": 5.2754017107682475e-05, "loss": 1.6504, "step": 5914 }, { "epoch": 1.4186199868113423, "grad_norm": 0.9145217537879944, "learning_rate": 5.273802861939404e-05, "loss": 1.707, "step": 5916 }, { "epoch": 1.4190995743660453, "grad_norm": 0.7669690251350403, "learning_rate": 5.272204013110561e-05, "loss": 1.6985, "step": 5918 }, { "epoch": 1.419579161920748, "grad_norm": 0.6313607096672058, "learning_rate": 5.270605164281718e-05, "loss": 1.7552, "step": 5920 }, { "epoch": 1.420058749475451, "grad_norm": 0.6752296090126038, "learning_rate": 5.2690063154528745e-05, "loss": 1.7222, "step": 5922 }, { "epoch": 1.420538337030154, "grad_norm": 0.6743757128715515, "learning_rate": 5.267407466624032e-05, "loss": 1.6807, "step": 5924 }, { "epoch": 1.421017924584857, "grad_norm": 0.6500996351242065, "learning_rate": 5.265808617795188e-05, "loss": 1.7362, "step": 5926 }, { "epoch": 1.42149751213956, "grad_norm": 0.7132394313812256, "learning_rate": 5.264209768966345e-05, "loss": 1.6503, "step": 5928 }, { "epoch": 1.421977099694263, "grad_norm": 0.6811193823814392, "learning_rate": 5.262610920137502e-05, "loss": 1.7242, "step": 5930 }, { "epoch": 1.422456687248966, "grad_norm": 0.7944670915603638, "learning_rate": 5.261012071308657e-05, "loss": 1.674, "step": 5932 }, { "epoch": 1.422936274803669, "grad_norm": 0.6203374266624451, "learning_rate": 5.2594132224798146e-05, "loss": 1.6825, "step": 5934 }, { "epoch": 1.4234158623583717, "grad_norm": 0.6702468395233154, "learning_rate": 5.257814373650971e-05, "loss": 1.72, "step": 5936 }, { "epoch": 1.4238954499130747, "grad_norm": 0.6399931907653809, "learning_rate": 5.256215524822128e-05, "loss": 1.7464, "step": 5938 }, { "epoch": 1.4243750374677777, "grad_norm": 0.6885970830917358, "learning_rate": 5.254616675993285e-05, "loss": 1.727, "step": 5940 }, { "epoch": 1.4248546250224807, "grad_norm": 0.7142877578735352, "learning_rate": 5.2530178271644415e-05, "loss": 1.7157, "step": 5942 }, { "epoch": 1.4253342125771837, "grad_norm": 0.6559465527534485, "learning_rate": 5.251418978335598e-05, "loss": 1.7338, "step": 5944 }, { "epoch": 1.4258138001318865, "grad_norm": 0.6436615586280823, "learning_rate": 5.2498201295067553e-05, "loss": 1.6916, "step": 5946 }, { "epoch": 1.4262933876865895, "grad_norm": 0.7541715502738953, "learning_rate": 5.248221280677912e-05, "loss": 1.6365, "step": 5948 }, { "epoch": 1.4267729752412925, "grad_norm": 0.7275949120521545, "learning_rate": 5.2466224318490685e-05, "loss": 1.7467, "step": 5950 }, { "epoch": 1.4272525627959953, "grad_norm": 0.7145816683769226, "learning_rate": 5.245023583020226e-05, "loss": 1.6892, "step": 5952 }, { "epoch": 1.4277321503506983, "grad_norm": 0.6733691692352295, "learning_rate": 5.243424734191382e-05, "loss": 1.6859, "step": 5954 }, { "epoch": 1.4282117379054013, "grad_norm": 0.7774860262870789, "learning_rate": 5.241825885362539e-05, "loss": 1.7367, "step": 5956 }, { "epoch": 1.4286913254601044, "grad_norm": 0.7837063074111938, "learning_rate": 5.240227036533696e-05, "loss": 1.7214, "step": 5958 }, { "epoch": 1.4291709130148074, "grad_norm": 0.6708971261978149, "learning_rate": 5.238628187704853e-05, "loss": 1.6662, "step": 5960 }, { "epoch": 1.4296505005695102, "grad_norm": 0.6841815710067749, "learning_rate": 5.237029338876009e-05, "loss": 1.636, "step": 5962 }, { "epoch": 1.4301300881242132, "grad_norm": 0.7150969505310059, "learning_rate": 5.2354304900471665e-05, "loss": 1.7202, "step": 5964 }, { "epoch": 1.4306096756789162, "grad_norm": 0.651997447013855, "learning_rate": 5.233831641218323e-05, "loss": 1.6957, "step": 5966 }, { "epoch": 1.431089263233619, "grad_norm": 0.6510360240936279, "learning_rate": 5.2322327923894797e-05, "loss": 1.7455, "step": 5968 }, { "epoch": 1.431568850788322, "grad_norm": 0.6956027150154114, "learning_rate": 5.230633943560637e-05, "loss": 1.6979, "step": 5970 }, { "epoch": 1.432048438343025, "grad_norm": 0.7519411444664001, "learning_rate": 5.2290350947317935e-05, "loss": 1.6457, "step": 5972 }, { "epoch": 1.432528025897728, "grad_norm": 0.6770023703575134, "learning_rate": 5.22743624590295e-05, "loss": 1.6718, "step": 5974 }, { "epoch": 1.433007613452431, "grad_norm": 0.6158294677734375, "learning_rate": 5.225837397074107e-05, "loss": 1.6926, "step": 5976 }, { "epoch": 1.4334872010071338, "grad_norm": 0.7860799431800842, "learning_rate": 5.224238548245264e-05, "loss": 1.6942, "step": 5978 }, { "epoch": 1.4339667885618368, "grad_norm": 0.6613467931747437, "learning_rate": 5.2226396994164204e-05, "loss": 1.73, "step": 5980 }, { "epoch": 1.4344463761165398, "grad_norm": 0.6926292181015015, "learning_rate": 5.221040850587578e-05, "loss": 1.7084, "step": 5982 }, { "epoch": 1.4349259636712428, "grad_norm": 0.6734023094177246, "learning_rate": 5.219442001758734e-05, "loss": 1.7253, "step": 5984 }, { "epoch": 1.4354055512259456, "grad_norm": 0.7129262685775757, "learning_rate": 5.217843152929891e-05, "loss": 1.7061, "step": 5986 }, { "epoch": 1.4358851387806486, "grad_norm": 0.6219239234924316, "learning_rate": 5.216244304101048e-05, "loss": 1.7156, "step": 5988 }, { "epoch": 1.4363647263353516, "grad_norm": 0.6428289413452148, "learning_rate": 5.2146454552722046e-05, "loss": 1.6943, "step": 5990 }, { "epoch": 1.4368443138900546, "grad_norm": 0.7911584973335266, "learning_rate": 5.213046606443361e-05, "loss": 1.7342, "step": 5992 }, { "epoch": 1.4373239014447576, "grad_norm": 0.6956084370613098, "learning_rate": 5.2114477576145184e-05, "loss": 1.7038, "step": 5994 }, { "epoch": 1.4378034889994604, "grad_norm": 0.6390711665153503, "learning_rate": 5.209848908785675e-05, "loss": 1.7636, "step": 5996 }, { "epoch": 1.4382830765541634, "grad_norm": 0.6657728552818298, "learning_rate": 5.2082500599568316e-05, "loss": 1.657, "step": 5998 }, { "epoch": 1.4387626641088664, "grad_norm": 0.738726019859314, "learning_rate": 5.206651211127989e-05, "loss": 1.6929, "step": 6000 }, { "epoch": 1.4387626641088664, "eval_loss": 1.740631341934204, "eval_runtime": 331.4002, "eval_samples_per_second": 402.682, "eval_steps_per_second": 12.586, "step": 6000 }, { "epoch": 1.4392422516635692, "grad_norm": 0.7080428600311279, "learning_rate": 5.2050523622991454e-05, "loss": 1.661, "step": 6002 }, { "epoch": 1.4397218392182722, "grad_norm": 0.6491647958755493, "learning_rate": 5.203453513470301e-05, "loss": 1.745, "step": 6004 }, { "epoch": 1.4402014267729752, "grad_norm": 0.7112669348716736, "learning_rate": 5.201854664641458e-05, "loss": 1.7006, "step": 6006 }, { "epoch": 1.4406810143276783, "grad_norm": 0.7304234504699707, "learning_rate": 5.2002558158126144e-05, "loss": 1.7326, "step": 6008 }, { "epoch": 1.4411606018823813, "grad_norm": 0.7353765964508057, "learning_rate": 5.198656966983772e-05, "loss": 1.723, "step": 6010 }, { "epoch": 1.441640189437084, "grad_norm": 0.6597503423690796, "learning_rate": 5.197058118154928e-05, "loss": 1.7173, "step": 6012 }, { "epoch": 1.442119776991787, "grad_norm": 0.7223091721534729, "learning_rate": 5.195459269326085e-05, "loss": 1.6711, "step": 6014 }, { "epoch": 1.44259936454649, "grad_norm": 0.6557283401489258, "learning_rate": 5.193860420497242e-05, "loss": 1.7389, "step": 6016 }, { "epoch": 1.4430789521011929, "grad_norm": 0.7053162455558777, "learning_rate": 5.1922615716683986e-05, "loss": 1.7404, "step": 6018 }, { "epoch": 1.4435585396558959, "grad_norm": 0.6303732395172119, "learning_rate": 5.190662722839555e-05, "loss": 1.6792, "step": 6020 }, { "epoch": 1.4440381272105989, "grad_norm": 0.5672025084495544, "learning_rate": 5.1890638740107125e-05, "loss": 1.6977, "step": 6022 }, { "epoch": 1.4445177147653019, "grad_norm": 0.6709706783294678, "learning_rate": 5.187465025181869e-05, "loss": 1.6685, "step": 6024 }, { "epoch": 1.444997302320005, "grad_norm": 0.7113738059997559, "learning_rate": 5.1858661763530256e-05, "loss": 1.712, "step": 6026 }, { "epoch": 1.4454768898747077, "grad_norm": 0.7590446472167969, "learning_rate": 5.184267327524183e-05, "loss": 1.6907, "step": 6028 }, { "epoch": 1.4459564774294107, "grad_norm": 0.6789416670799255, "learning_rate": 5.1826684786953394e-05, "loss": 1.7453, "step": 6030 }, { "epoch": 1.4464360649841137, "grad_norm": 0.6650261878967285, "learning_rate": 5.181069629866496e-05, "loss": 1.6947, "step": 6032 }, { "epoch": 1.4469156525388165, "grad_norm": 0.6524399518966675, "learning_rate": 5.179470781037653e-05, "loss": 1.6821, "step": 6034 }, { "epoch": 1.4473952400935195, "grad_norm": 0.7487620711326599, "learning_rate": 5.17787193220881e-05, "loss": 1.6783, "step": 6036 }, { "epoch": 1.4478748276482225, "grad_norm": 0.6386328935623169, "learning_rate": 5.1762730833799664e-05, "loss": 1.6921, "step": 6038 }, { "epoch": 1.4483544152029255, "grad_norm": 0.8586521148681641, "learning_rate": 5.1746742345511236e-05, "loss": 1.6694, "step": 6040 }, { "epoch": 1.4488340027576285, "grad_norm": 0.6494764685630798, "learning_rate": 5.17307538572228e-05, "loss": 1.704, "step": 6042 }, { "epoch": 1.4493135903123313, "grad_norm": 0.6365654468536377, "learning_rate": 5.171476536893437e-05, "loss": 1.7164, "step": 6044 }, { "epoch": 1.4497931778670343, "grad_norm": 0.6781180500984192, "learning_rate": 5.169877688064594e-05, "loss": 1.6947, "step": 6046 }, { "epoch": 1.4502727654217373, "grad_norm": 0.633733868598938, "learning_rate": 5.1682788392357506e-05, "loss": 1.7192, "step": 6048 }, { "epoch": 1.4507523529764403, "grad_norm": 0.8278135061264038, "learning_rate": 5.166679990406907e-05, "loss": 1.666, "step": 6050 }, { "epoch": 1.4512319405311431, "grad_norm": 0.7818396687507629, "learning_rate": 5.1650811415780644e-05, "loss": 1.7346, "step": 6052 }, { "epoch": 1.4517115280858461, "grad_norm": 0.707790732383728, "learning_rate": 5.163482292749221e-05, "loss": 1.7519, "step": 6054 }, { "epoch": 1.4521911156405491, "grad_norm": 0.6576093435287476, "learning_rate": 5.1618834439203775e-05, "loss": 1.6817, "step": 6056 }, { "epoch": 1.4526707031952522, "grad_norm": 0.6540111899375916, "learning_rate": 5.160284595091535e-05, "loss": 1.6716, "step": 6058 }, { "epoch": 1.4531502907499552, "grad_norm": 0.7295790910720825, "learning_rate": 5.1586857462626914e-05, "loss": 1.6547, "step": 6060 }, { "epoch": 1.453629878304658, "grad_norm": 0.6446998119354248, "learning_rate": 5.157086897433848e-05, "loss": 1.7421, "step": 6062 }, { "epoch": 1.454109465859361, "grad_norm": 0.7655063271522522, "learning_rate": 5.155488048605005e-05, "loss": 1.6529, "step": 6064 }, { "epoch": 1.454589053414064, "grad_norm": 0.6819613575935364, "learning_rate": 5.153889199776162e-05, "loss": 1.6708, "step": 6066 }, { "epoch": 1.4550686409687668, "grad_norm": 0.7097482681274414, "learning_rate": 5.152290350947318e-05, "loss": 1.7147, "step": 6068 }, { "epoch": 1.4555482285234698, "grad_norm": 0.7458867430686951, "learning_rate": 5.1506915021184756e-05, "loss": 1.7937, "step": 6070 }, { "epoch": 1.4560278160781728, "grad_norm": 0.649010181427002, "learning_rate": 5.149092653289632e-05, "loss": 1.6705, "step": 6072 }, { "epoch": 1.4565074036328758, "grad_norm": 0.6486867666244507, "learning_rate": 5.147493804460789e-05, "loss": 1.6847, "step": 6074 }, { "epoch": 1.4569869911875788, "grad_norm": 0.7491609454154968, "learning_rate": 5.145894955631946e-05, "loss": 1.6489, "step": 6076 }, { "epoch": 1.4574665787422816, "grad_norm": 0.6492285132408142, "learning_rate": 5.144296106803101e-05, "loss": 1.6972, "step": 6078 }, { "epoch": 1.4579461662969846, "grad_norm": 0.759353518486023, "learning_rate": 5.1426972579742584e-05, "loss": 1.7204, "step": 6080 }, { "epoch": 1.4584257538516876, "grad_norm": 0.6879876852035522, "learning_rate": 5.141098409145415e-05, "loss": 1.7086, "step": 6082 }, { "epoch": 1.4589053414063904, "grad_norm": 0.7318446636199951, "learning_rate": 5.1394995603165716e-05, "loss": 1.7063, "step": 6084 }, { "epoch": 1.4593849289610934, "grad_norm": 0.6482135057449341, "learning_rate": 5.137900711487729e-05, "loss": 1.6972, "step": 6086 }, { "epoch": 1.4598645165157964, "grad_norm": 0.7303016185760498, "learning_rate": 5.1363018626588854e-05, "loss": 1.7072, "step": 6088 }, { "epoch": 1.4603441040704994, "grad_norm": 0.6866605281829834, "learning_rate": 5.134703013830042e-05, "loss": 1.7042, "step": 6090 }, { "epoch": 1.4608236916252024, "grad_norm": 0.7493895292282104, "learning_rate": 5.133104165001199e-05, "loss": 1.6704, "step": 6092 }, { "epoch": 1.4613032791799052, "grad_norm": 0.7254617810249329, "learning_rate": 5.131505316172356e-05, "loss": 1.6561, "step": 6094 }, { "epoch": 1.4617828667346082, "grad_norm": 0.7819439768791199, "learning_rate": 5.129906467343512e-05, "loss": 1.6989, "step": 6096 }, { "epoch": 1.4622624542893112, "grad_norm": 0.6846041679382324, "learning_rate": 5.1283076185146696e-05, "loss": 1.7044, "step": 6098 }, { "epoch": 1.462742041844014, "grad_norm": 0.7215938568115234, "learning_rate": 5.126708769685826e-05, "loss": 1.7385, "step": 6100 }, { "epoch": 1.463221629398717, "grad_norm": 0.6960102319717407, "learning_rate": 5.125109920856983e-05, "loss": 1.7022, "step": 6102 }, { "epoch": 1.46370121695342, "grad_norm": 0.6168778538703918, "learning_rate": 5.12351107202814e-05, "loss": 1.6827, "step": 6104 }, { "epoch": 1.464180804508123, "grad_norm": 0.6535958051681519, "learning_rate": 5.1219122231992965e-05, "loss": 1.7001, "step": 6106 }, { "epoch": 1.464660392062826, "grad_norm": 0.652046263217926, "learning_rate": 5.120313374370454e-05, "loss": 1.6886, "step": 6108 }, { "epoch": 1.4651399796175288, "grad_norm": 0.6588443517684937, "learning_rate": 5.1187145255416103e-05, "loss": 1.6869, "step": 6110 }, { "epoch": 1.4656195671722319, "grad_norm": 0.6154664754867554, "learning_rate": 5.117115676712767e-05, "loss": 1.7221, "step": 6112 }, { "epoch": 1.4660991547269349, "grad_norm": 0.6676748394966125, "learning_rate": 5.115516827883924e-05, "loss": 1.7261, "step": 6114 }, { "epoch": 1.4665787422816379, "grad_norm": 0.6796942353248596, "learning_rate": 5.113917979055081e-05, "loss": 1.6421, "step": 6116 }, { "epoch": 1.4670583298363407, "grad_norm": 0.6635184288024902, "learning_rate": 5.112319130226237e-05, "loss": 1.7161, "step": 6118 }, { "epoch": 1.4675379173910437, "grad_norm": 0.6417514681816101, "learning_rate": 5.1107202813973946e-05, "loss": 1.7392, "step": 6120 }, { "epoch": 1.4680175049457467, "grad_norm": 0.6489534974098206, "learning_rate": 5.109121432568551e-05, "loss": 1.7469, "step": 6122 }, { "epoch": 1.4684970925004497, "grad_norm": 0.7196010947227478, "learning_rate": 5.107522583739708e-05, "loss": 1.7174, "step": 6124 }, { "epoch": 1.4689766800551527, "grad_norm": 0.6162723302841187, "learning_rate": 5.105923734910865e-05, "loss": 1.6699, "step": 6126 }, { "epoch": 1.4694562676098555, "grad_norm": 0.6377822160720825, "learning_rate": 5.1043248860820215e-05, "loss": 1.7219, "step": 6128 }, { "epoch": 1.4699358551645585, "grad_norm": 0.6627887487411499, "learning_rate": 5.102726037253178e-05, "loss": 1.6812, "step": 6130 }, { "epoch": 1.4704154427192615, "grad_norm": 0.6218540668487549, "learning_rate": 5.101127188424335e-05, "loss": 1.7018, "step": 6132 }, { "epoch": 1.4708950302739643, "grad_norm": 0.6261554956436157, "learning_rate": 5.099528339595492e-05, "loss": 1.6495, "step": 6134 }, { "epoch": 1.4713746178286673, "grad_norm": 0.6239809989929199, "learning_rate": 5.0979294907666485e-05, "loss": 1.6445, "step": 6136 }, { "epoch": 1.4718542053833703, "grad_norm": 0.8774892091751099, "learning_rate": 5.096330641937806e-05, "loss": 1.6935, "step": 6138 }, { "epoch": 1.4723337929380733, "grad_norm": 0.6521182060241699, "learning_rate": 5.094731793108962e-05, "loss": 1.7236, "step": 6140 }, { "epoch": 1.4728133804927763, "grad_norm": 0.6541429758071899, "learning_rate": 5.093132944280119e-05, "loss": 1.6369, "step": 6142 }, { "epoch": 1.4732929680474791, "grad_norm": 0.7532526254653931, "learning_rate": 5.091534095451276e-05, "loss": 1.694, "step": 6144 }, { "epoch": 1.4737725556021821, "grad_norm": 0.7472359538078308, "learning_rate": 5.089935246622433e-05, "loss": 1.7282, "step": 6146 }, { "epoch": 1.4742521431568851, "grad_norm": 0.7226340174674988, "learning_rate": 5.088336397793589e-05, "loss": 1.6938, "step": 6148 }, { "epoch": 1.474731730711588, "grad_norm": 0.7037482857704163, "learning_rate": 5.086737548964745e-05, "loss": 1.6758, "step": 6150 }, { "epoch": 1.475211318266291, "grad_norm": 0.6353002190589905, "learning_rate": 5.085138700135902e-05, "loss": 1.7365, "step": 6152 }, { "epoch": 1.475690905820994, "grad_norm": 0.6391789317131042, "learning_rate": 5.083539851307059e-05, "loss": 1.7355, "step": 6154 }, { "epoch": 1.476170493375697, "grad_norm": 0.7375152707099915, "learning_rate": 5.0819410024782155e-05, "loss": 1.6818, "step": 6156 }, { "epoch": 1.4766500809304, "grad_norm": 0.7918117046356201, "learning_rate": 5.080342153649372e-05, "loss": 1.7008, "step": 6158 }, { "epoch": 1.4771296684851027, "grad_norm": 0.7262170314788818, "learning_rate": 5.0787433048205293e-05, "loss": 1.7123, "step": 6160 }, { "epoch": 1.4776092560398058, "grad_norm": 0.7246866226196289, "learning_rate": 5.077144455991686e-05, "loss": 1.7237, "step": 6162 }, { "epoch": 1.4780888435945088, "grad_norm": 0.6752650737762451, "learning_rate": 5.0755456071628425e-05, "loss": 1.6585, "step": 6164 }, { "epoch": 1.4785684311492115, "grad_norm": 1.1318131685256958, "learning_rate": 5.073946758334e-05, "loss": 1.7136, "step": 6166 }, { "epoch": 1.4790480187039146, "grad_norm": 1.0212699174880981, "learning_rate": 5.072347909505156e-05, "loss": 1.7378, "step": 6168 }, { "epoch": 1.4795276062586176, "grad_norm": 0.6944500207901001, "learning_rate": 5.070749060676313e-05, "loss": 1.7223, "step": 6170 }, { "epoch": 1.4800071938133206, "grad_norm": 0.611276388168335, "learning_rate": 5.06915021184747e-05, "loss": 1.6906, "step": 6172 }, { "epoch": 1.4804867813680236, "grad_norm": 0.6746801137924194, "learning_rate": 5.067551363018627e-05, "loss": 1.6877, "step": 6174 }, { "epoch": 1.4809663689227264, "grad_norm": 0.6561198830604553, "learning_rate": 5.065952514189783e-05, "loss": 1.6946, "step": 6176 }, { "epoch": 1.4814459564774294, "grad_norm": 0.6225281357765198, "learning_rate": 5.0643536653609405e-05, "loss": 1.6853, "step": 6178 }, { "epoch": 1.4819255440321324, "grad_norm": 0.8388818502426147, "learning_rate": 5.062754816532097e-05, "loss": 1.7118, "step": 6180 }, { "epoch": 1.4824051315868354, "grad_norm": 0.6866105794906616, "learning_rate": 5.0611559677032536e-05, "loss": 1.7156, "step": 6182 }, { "epoch": 1.4828847191415382, "grad_norm": 0.6315853595733643, "learning_rate": 5.059557118874411e-05, "loss": 1.6947, "step": 6184 }, { "epoch": 1.4833643066962412, "grad_norm": 0.8495597243309021, "learning_rate": 5.0579582700455675e-05, "loss": 1.6937, "step": 6186 }, { "epoch": 1.4838438942509442, "grad_norm": 0.7081652879714966, "learning_rate": 5.056359421216724e-05, "loss": 1.719, "step": 6188 }, { "epoch": 1.4843234818056472, "grad_norm": 0.6546943187713623, "learning_rate": 5.054760572387881e-05, "loss": 1.7139, "step": 6190 }, { "epoch": 1.4848030693603502, "grad_norm": 0.7244045734405518, "learning_rate": 5.053161723559038e-05, "loss": 1.6967, "step": 6192 }, { "epoch": 1.485282656915053, "grad_norm": 0.6931948065757751, "learning_rate": 5.0515628747301944e-05, "loss": 1.6436, "step": 6194 }, { "epoch": 1.485762244469756, "grad_norm": 0.6979962587356567, "learning_rate": 5.049964025901352e-05, "loss": 1.71, "step": 6196 }, { "epoch": 1.486241832024459, "grad_norm": 0.6872630715370178, "learning_rate": 5.048365177072508e-05, "loss": 1.7013, "step": 6198 }, { "epoch": 1.4867214195791618, "grad_norm": 0.599855899810791, "learning_rate": 5.046766328243665e-05, "loss": 1.6674, "step": 6200 }, { "epoch": 1.4872010071338648, "grad_norm": 0.6584493517875671, "learning_rate": 5.045167479414822e-05, "loss": 1.6887, "step": 6202 }, { "epoch": 1.4876805946885678, "grad_norm": 0.6421840190887451, "learning_rate": 5.0435686305859786e-05, "loss": 1.7075, "step": 6204 }, { "epoch": 1.4881601822432708, "grad_norm": 0.7064083814620972, "learning_rate": 5.041969781757135e-05, "loss": 1.6268, "step": 6206 }, { "epoch": 1.4886397697979739, "grad_norm": 0.7797737717628479, "learning_rate": 5.0403709329282924e-05, "loss": 1.6958, "step": 6208 }, { "epoch": 1.4891193573526766, "grad_norm": 0.7122231721878052, "learning_rate": 5.038772084099449e-05, "loss": 1.7081, "step": 6210 }, { "epoch": 1.4895989449073797, "grad_norm": 0.7281239628791809, "learning_rate": 5.0371732352706056e-05, "loss": 1.73, "step": 6212 }, { "epoch": 1.4900785324620827, "grad_norm": 0.638126790523529, "learning_rate": 5.035574386441763e-05, "loss": 1.7009, "step": 6214 }, { "epoch": 1.4905581200167854, "grad_norm": 0.5996806025505066, "learning_rate": 5.0339755376129194e-05, "loss": 1.6728, "step": 6216 }, { "epoch": 1.4910377075714885, "grad_norm": 0.6503472924232483, "learning_rate": 5.032376688784076e-05, "loss": 1.6618, "step": 6218 }, { "epoch": 1.4915172951261915, "grad_norm": 0.6629751324653625, "learning_rate": 5.030777839955233e-05, "loss": 1.7265, "step": 6220 }, { "epoch": 1.4919968826808945, "grad_norm": 0.6444504261016846, "learning_rate": 5.02917899112639e-05, "loss": 1.738, "step": 6222 }, { "epoch": 1.4924764702355975, "grad_norm": 0.7671033143997192, "learning_rate": 5.027580142297546e-05, "loss": 1.6884, "step": 6224 }, { "epoch": 1.4929560577903003, "grad_norm": 0.659144937992096, "learning_rate": 5.025981293468702e-05, "loss": 1.7557, "step": 6226 }, { "epoch": 1.4934356453450033, "grad_norm": 0.7086419463157654, "learning_rate": 5.024382444639859e-05, "loss": 1.6889, "step": 6228 }, { "epoch": 1.4939152328997063, "grad_norm": 0.8041189312934875, "learning_rate": 5.022783595811016e-05, "loss": 1.6166, "step": 6230 }, { "epoch": 1.494394820454409, "grad_norm": 0.6373702883720398, "learning_rate": 5.0211847469821726e-05, "loss": 1.6563, "step": 6232 }, { "epoch": 1.494874408009112, "grad_norm": 0.6508566737174988, "learning_rate": 5.019585898153329e-05, "loss": 1.6881, "step": 6234 }, { "epoch": 1.495353995563815, "grad_norm": 0.6834598183631897, "learning_rate": 5.0179870493244865e-05, "loss": 1.6972, "step": 6236 }, { "epoch": 1.495833583118518, "grad_norm": 0.7266842722892761, "learning_rate": 5.016388200495643e-05, "loss": 1.6966, "step": 6238 }, { "epoch": 1.4963131706732211, "grad_norm": 0.6795170307159424, "learning_rate": 5.0147893516667996e-05, "loss": 1.6741, "step": 6240 }, { "epoch": 1.496792758227924, "grad_norm": 0.6552463173866272, "learning_rate": 5.013190502837957e-05, "loss": 1.7096, "step": 6242 }, { "epoch": 1.497272345782627, "grad_norm": 0.6526675224304199, "learning_rate": 5.0115916540091134e-05, "loss": 1.7432, "step": 6244 }, { "epoch": 1.49775193333733, "grad_norm": 0.6907639503479004, "learning_rate": 5.00999280518027e-05, "loss": 1.6902, "step": 6246 }, { "epoch": 1.498231520892033, "grad_norm": 0.6403139233589172, "learning_rate": 5.008393956351427e-05, "loss": 1.7242, "step": 6248 }, { "epoch": 1.4987111084467357, "grad_norm": 0.6100246906280518, "learning_rate": 5.006795107522584e-05, "loss": 1.7168, "step": 6250 }, { "epoch": 1.4991906960014387, "grad_norm": 0.667578399181366, "learning_rate": 5.0051962586937404e-05, "loss": 1.7257, "step": 6252 }, { "epoch": 1.4996702835561417, "grad_norm": 0.6488122940063477, "learning_rate": 5.0035974098648976e-05, "loss": 1.7152, "step": 6254 }, { "epoch": 1.5001498711108447, "grad_norm": 0.6594210267066956, "learning_rate": 5.001998561036054e-05, "loss": 1.7012, "step": 6256 }, { "epoch": 1.5006294586655478, "grad_norm": 0.7174622416496277, "learning_rate": 5.000399712207211e-05, "loss": 1.7249, "step": 6258 }, { "epoch": 1.5011090462202505, "grad_norm": 0.7273445129394531, "learning_rate": 4.998800863378368e-05, "loss": 1.7301, "step": 6260 }, { "epoch": 1.5015886337749536, "grad_norm": 0.6820775866508484, "learning_rate": 4.9972020145495246e-05, "loss": 1.7123, "step": 6262 }, { "epoch": 1.5020682213296563, "grad_norm": 0.7021302580833435, "learning_rate": 4.995603165720681e-05, "loss": 1.6812, "step": 6264 }, { "epoch": 1.5025478088843593, "grad_norm": 0.5782956480979919, "learning_rate": 4.9940043168918384e-05, "loss": 1.6958, "step": 6266 }, { "epoch": 1.5030273964390624, "grad_norm": 0.6000342965126038, "learning_rate": 4.992405468062995e-05, "loss": 1.7111, "step": 6268 }, { "epoch": 1.5035069839937654, "grad_norm": 0.6487705111503601, "learning_rate": 4.9908066192341515e-05, "loss": 1.6928, "step": 6270 }, { "epoch": 1.5039865715484684, "grad_norm": 0.7139567136764526, "learning_rate": 4.989207770405309e-05, "loss": 1.7105, "step": 6272 }, { "epoch": 1.5044661591031714, "grad_norm": 0.6327992677688599, "learning_rate": 4.9876089215764654e-05, "loss": 1.6848, "step": 6274 }, { "epoch": 1.5049457466578742, "grad_norm": 0.7725253105163574, "learning_rate": 4.986010072747622e-05, "loss": 1.6673, "step": 6276 }, { "epoch": 1.5054253342125772, "grad_norm": 0.6720406413078308, "learning_rate": 4.9844112239187785e-05, "loss": 1.699, "step": 6278 }, { "epoch": 1.5059049217672802, "grad_norm": 0.6832230091094971, "learning_rate": 4.982812375089935e-05, "loss": 1.7263, "step": 6280 }, { "epoch": 1.506384509321983, "grad_norm": 0.6500194072723389, "learning_rate": 4.981213526261092e-05, "loss": 1.6992, "step": 6282 }, { "epoch": 1.506864096876686, "grad_norm": 0.581327497959137, "learning_rate": 4.979614677432249e-05, "loss": 1.6979, "step": 6284 }, { "epoch": 1.507343684431389, "grad_norm": 0.7356831431388855, "learning_rate": 4.9780158286034054e-05, "loss": 1.7154, "step": 6286 }, { "epoch": 1.507823271986092, "grad_norm": 0.797125518321991, "learning_rate": 4.976416979774563e-05, "loss": 1.7235, "step": 6288 }, { "epoch": 1.508302859540795, "grad_norm": 0.7048808932304382, "learning_rate": 4.974818130945719e-05, "loss": 1.7472, "step": 6290 }, { "epoch": 1.508782447095498, "grad_norm": 0.6416649222373962, "learning_rate": 4.973219282116876e-05, "loss": 1.6827, "step": 6292 }, { "epoch": 1.5092620346502008, "grad_norm": 0.7678630352020264, "learning_rate": 4.971620433288033e-05, "loss": 1.7085, "step": 6294 }, { "epoch": 1.5097416222049038, "grad_norm": 0.6577839851379395, "learning_rate": 4.9700215844591897e-05, "loss": 1.7539, "step": 6296 }, { "epoch": 1.5102212097596066, "grad_norm": 0.634703516960144, "learning_rate": 4.968422735630346e-05, "loss": 1.7165, "step": 6298 }, { "epoch": 1.5107007973143096, "grad_norm": 0.676006019115448, "learning_rate": 4.9668238868015035e-05, "loss": 1.7216, "step": 6300 }, { "epoch": 1.5111803848690126, "grad_norm": 0.6249676942825317, "learning_rate": 4.96522503797266e-05, "loss": 1.7124, "step": 6302 }, { "epoch": 1.5116599724237156, "grad_norm": 0.6103209257125854, "learning_rate": 4.9636261891438166e-05, "loss": 1.6887, "step": 6304 }, { "epoch": 1.5121395599784186, "grad_norm": 0.6254585981369019, "learning_rate": 4.962027340314974e-05, "loss": 1.7044, "step": 6306 }, { "epoch": 1.5126191475331217, "grad_norm": 0.6821017265319824, "learning_rate": 4.9604284914861304e-05, "loss": 1.7163, "step": 6308 }, { "epoch": 1.5130987350878244, "grad_norm": 0.6916487812995911, "learning_rate": 4.958829642657287e-05, "loss": 1.7305, "step": 6310 }, { "epoch": 1.5135783226425275, "grad_norm": 0.6351715326309204, "learning_rate": 4.957230793828444e-05, "loss": 1.7206, "step": 6312 }, { "epoch": 1.5140579101972302, "grad_norm": 0.6707674264907837, "learning_rate": 4.9556319449996e-05, "loss": 1.7358, "step": 6314 }, { "epoch": 1.5145374977519332, "grad_norm": 0.691756546497345, "learning_rate": 4.9540330961707574e-05, "loss": 1.7119, "step": 6316 }, { "epoch": 1.5150170853066363, "grad_norm": 0.7459651827812195, "learning_rate": 4.952434247341914e-05, "loss": 1.6928, "step": 6318 }, { "epoch": 1.5154966728613393, "grad_norm": 0.6451705098152161, "learning_rate": 4.9508353985130705e-05, "loss": 1.6245, "step": 6320 }, { "epoch": 1.5159762604160423, "grad_norm": 1.024332880973816, "learning_rate": 4.949236549684228e-05, "loss": 1.6955, "step": 6322 }, { "epoch": 1.5164558479707453, "grad_norm": 0.6815436482429504, "learning_rate": 4.9476377008553843e-05, "loss": 1.6957, "step": 6324 }, { "epoch": 1.516935435525448, "grad_norm": 0.6821497082710266, "learning_rate": 4.946038852026541e-05, "loss": 1.6379, "step": 6326 }, { "epoch": 1.517415023080151, "grad_norm": 0.6966855525970459, "learning_rate": 4.944440003197698e-05, "loss": 1.7394, "step": 6328 }, { "epoch": 1.5178946106348539, "grad_norm": 0.7632134556770325, "learning_rate": 4.942841154368855e-05, "loss": 1.6747, "step": 6330 }, { "epoch": 1.5183741981895569, "grad_norm": 0.7023187875747681, "learning_rate": 4.941242305540011e-05, "loss": 1.7708, "step": 6332 }, { "epoch": 1.5188537857442599, "grad_norm": 0.661986768245697, "learning_rate": 4.9396434567111685e-05, "loss": 1.7195, "step": 6334 }, { "epoch": 1.519333373298963, "grad_norm": 0.7255896925926208, "learning_rate": 4.938044607882325e-05, "loss": 1.6833, "step": 6336 }, { "epoch": 1.519812960853666, "grad_norm": 0.6861463189125061, "learning_rate": 4.936445759053482e-05, "loss": 1.7229, "step": 6338 }, { "epoch": 1.520292548408369, "grad_norm": 0.7088152766227722, "learning_rate": 4.934846910224639e-05, "loss": 1.6981, "step": 6340 }, { "epoch": 1.5207721359630717, "grad_norm": 0.813969075679779, "learning_rate": 4.9332480613957955e-05, "loss": 1.7051, "step": 6342 }, { "epoch": 1.5212517235177747, "grad_norm": 0.6456663608551025, "learning_rate": 4.931649212566952e-05, "loss": 1.7101, "step": 6344 }, { "epoch": 1.5217313110724777, "grad_norm": 0.7885885238647461, "learning_rate": 4.930050363738109e-05, "loss": 1.6898, "step": 6346 }, { "epoch": 1.5222108986271805, "grad_norm": 0.6879623532295227, "learning_rate": 4.928451514909266e-05, "loss": 1.7001, "step": 6348 }, { "epoch": 1.5226904861818835, "grad_norm": 0.8273037075996399, "learning_rate": 4.926852666080422e-05, "loss": 1.7054, "step": 6350 }, { "epoch": 1.5231700737365865, "grad_norm": 0.7120887041091919, "learning_rate": 4.925253817251579e-05, "loss": 1.7271, "step": 6352 }, { "epoch": 1.5236496612912895, "grad_norm": 0.7280737161636353, "learning_rate": 4.9236549684227356e-05, "loss": 1.6858, "step": 6354 }, { "epoch": 1.5241292488459925, "grad_norm": 0.6970281004905701, "learning_rate": 4.922056119593892e-05, "loss": 1.7446, "step": 6356 }, { "epoch": 1.5246088364006956, "grad_norm": 0.7836964726448059, "learning_rate": 4.9204572707650494e-05, "loss": 1.7049, "step": 6358 }, { "epoch": 1.5250884239553983, "grad_norm": 0.6369091272354126, "learning_rate": 4.918858421936206e-05, "loss": 1.7006, "step": 6360 }, { "epoch": 1.5255680115101014, "grad_norm": 0.6463208198547363, "learning_rate": 4.9172595731073626e-05, "loss": 1.7137, "step": 6362 }, { "epoch": 1.5260475990648041, "grad_norm": 0.6368257999420166, "learning_rate": 4.91566072427852e-05, "loss": 1.665, "step": 6364 }, { "epoch": 1.5265271866195071, "grad_norm": 0.7863854169845581, "learning_rate": 4.9140618754496764e-05, "loss": 1.7116, "step": 6366 }, { "epoch": 1.5270067741742102, "grad_norm": 0.613044261932373, "learning_rate": 4.912463026620833e-05, "loss": 1.7037, "step": 6368 }, { "epoch": 1.5274863617289132, "grad_norm": 0.6266853213310242, "learning_rate": 4.91086417779199e-05, "loss": 1.7205, "step": 6370 }, { "epoch": 1.5279659492836162, "grad_norm": 0.6989834308624268, "learning_rate": 4.909265328963147e-05, "loss": 1.6523, "step": 6372 }, { "epoch": 1.5284455368383192, "grad_norm": 0.5981797575950623, "learning_rate": 4.907666480134303e-05, "loss": 1.6947, "step": 6374 }, { "epoch": 1.528925124393022, "grad_norm": 0.7093629240989685, "learning_rate": 4.9060676313054606e-05, "loss": 1.6794, "step": 6376 }, { "epoch": 1.529404711947725, "grad_norm": 0.7043320536613464, "learning_rate": 4.904468782476617e-05, "loss": 1.7267, "step": 6378 }, { "epoch": 1.5298842995024278, "grad_norm": 0.6495473384857178, "learning_rate": 4.902869933647774e-05, "loss": 1.6921, "step": 6380 }, { "epoch": 1.5303638870571308, "grad_norm": 0.6098270416259766, "learning_rate": 4.901271084818931e-05, "loss": 1.6509, "step": 6382 }, { "epoch": 1.5308434746118338, "grad_norm": 0.7150903940200806, "learning_rate": 4.8996722359900875e-05, "loss": 1.6996, "step": 6384 }, { "epoch": 1.5313230621665368, "grad_norm": 0.6677735447883606, "learning_rate": 4.898073387161244e-05, "loss": 1.6872, "step": 6386 }, { "epoch": 1.5318026497212398, "grad_norm": 0.6099262833595276, "learning_rate": 4.896474538332401e-05, "loss": 1.7294, "step": 6388 }, { "epoch": 1.5322822372759428, "grad_norm": 0.6133672595024109, "learning_rate": 4.894875689503557e-05, "loss": 1.6644, "step": 6390 }, { "epoch": 1.5327618248306456, "grad_norm": 0.7170303463935852, "learning_rate": 4.8932768406747145e-05, "loss": 1.6848, "step": 6392 }, { "epoch": 1.5332414123853486, "grad_norm": 0.6320705413818359, "learning_rate": 4.891677991845871e-05, "loss": 1.6954, "step": 6394 }, { "epoch": 1.5337209999400514, "grad_norm": 0.6637185215950012, "learning_rate": 4.8900791430170276e-05, "loss": 1.7431, "step": 6396 }, { "epoch": 1.5342005874947544, "grad_norm": 0.6412577033042908, "learning_rate": 4.888480294188185e-05, "loss": 1.7197, "step": 6398 }, { "epoch": 1.5346801750494574, "grad_norm": 0.6950353384017944, "learning_rate": 4.8868814453593415e-05, "loss": 1.6857, "step": 6400 }, { "epoch": 1.5346801750494574, "eval_loss": 1.7372572422027588, "eval_runtime": 331.3378, "eval_samples_per_second": 402.758, "eval_steps_per_second": 12.588, "step": 6400 }, { "epoch": 1.5351597626041604, "grad_norm": 0.7477556467056274, "learning_rate": 4.885282596530498e-05, "loss": 1.7214, "step": 6402 }, { "epoch": 1.5356393501588634, "grad_norm": 0.692593514919281, "learning_rate": 4.883683747701655e-05, "loss": 1.7108, "step": 6404 }, { "epoch": 1.5361189377135664, "grad_norm": 0.5856477618217468, "learning_rate": 4.882084898872812e-05, "loss": 1.6827, "step": 6406 }, { "epoch": 1.5365985252682692, "grad_norm": 0.6648089289665222, "learning_rate": 4.8804860500439684e-05, "loss": 1.6784, "step": 6408 }, { "epoch": 1.5370781128229722, "grad_norm": 0.6450070142745972, "learning_rate": 4.878887201215126e-05, "loss": 1.7233, "step": 6410 }, { "epoch": 1.5375577003776753, "grad_norm": 0.5935024619102478, "learning_rate": 4.877288352386282e-05, "loss": 1.7251, "step": 6412 }, { "epoch": 1.538037287932378, "grad_norm": 0.6827055215835571, "learning_rate": 4.875689503557439e-05, "loss": 1.6891, "step": 6414 }, { "epoch": 1.538516875487081, "grad_norm": 0.71940678358078, "learning_rate": 4.874090654728596e-05, "loss": 1.6678, "step": 6416 }, { "epoch": 1.538996463041784, "grad_norm": 0.703602135181427, "learning_rate": 4.8724918058997526e-05, "loss": 1.701, "step": 6418 }, { "epoch": 1.539476050596487, "grad_norm": 0.7244856953620911, "learning_rate": 4.870892957070909e-05, "loss": 1.7106, "step": 6420 }, { "epoch": 1.53995563815119, "grad_norm": 0.7248233556747437, "learning_rate": 4.8692941082420664e-05, "loss": 1.724, "step": 6422 }, { "epoch": 1.540435225705893, "grad_norm": 0.7130035161972046, "learning_rate": 4.867695259413222e-05, "loss": 1.6802, "step": 6424 }, { "epoch": 1.5409148132605959, "grad_norm": 0.6429159641265869, "learning_rate": 4.8660964105843796e-05, "loss": 1.713, "step": 6426 }, { "epoch": 1.5413944008152989, "grad_norm": 0.6308446526527405, "learning_rate": 4.864497561755536e-05, "loss": 1.6809, "step": 6428 }, { "epoch": 1.5418739883700017, "grad_norm": 0.6378887891769409, "learning_rate": 4.862898712926693e-05, "loss": 1.6875, "step": 6430 }, { "epoch": 1.5423535759247047, "grad_norm": 0.6019787788391113, "learning_rate": 4.86129986409785e-05, "loss": 1.7414, "step": 6432 }, { "epoch": 1.5428331634794077, "grad_norm": 0.680582582950592, "learning_rate": 4.8597010152690065e-05, "loss": 1.7382, "step": 6434 }, { "epoch": 1.5433127510341107, "grad_norm": 0.6616604924201965, "learning_rate": 4.858102166440163e-05, "loss": 1.6452, "step": 6436 }, { "epoch": 1.5437923385888137, "grad_norm": 0.8139570355415344, "learning_rate": 4.8565033176113204e-05, "loss": 1.7107, "step": 6438 }, { "epoch": 1.5442719261435167, "grad_norm": 0.6047822833061218, "learning_rate": 4.854904468782477e-05, "loss": 1.7195, "step": 6440 }, { "epoch": 1.5447515136982195, "grad_norm": 0.6757035255432129, "learning_rate": 4.8533056199536335e-05, "loss": 1.7233, "step": 6442 }, { "epoch": 1.5452311012529225, "grad_norm": 0.6731333136558533, "learning_rate": 4.851706771124791e-05, "loss": 1.6794, "step": 6444 }, { "epoch": 1.5457106888076253, "grad_norm": 0.7895196676254272, "learning_rate": 4.850107922295947e-05, "loss": 1.6728, "step": 6446 }, { "epoch": 1.5461902763623283, "grad_norm": 0.6560156941413879, "learning_rate": 4.848509073467104e-05, "loss": 1.6884, "step": 6448 }, { "epoch": 1.5466698639170313, "grad_norm": 0.7252815961837769, "learning_rate": 4.846910224638261e-05, "loss": 1.6978, "step": 6450 }, { "epoch": 1.5471494514717343, "grad_norm": 0.6407668590545654, "learning_rate": 4.845311375809418e-05, "loss": 1.7412, "step": 6452 }, { "epoch": 1.5476290390264373, "grad_norm": 0.7184181809425354, "learning_rate": 4.843712526980574e-05, "loss": 1.7396, "step": 6454 }, { "epoch": 1.5481086265811403, "grad_norm": 0.6420187950134277, "learning_rate": 4.8421136781517315e-05, "loss": 1.7058, "step": 6456 }, { "epoch": 1.5485882141358431, "grad_norm": 0.7586414813995361, "learning_rate": 4.840514829322888e-05, "loss": 1.697, "step": 6458 }, { "epoch": 1.5490678016905461, "grad_norm": 0.6500709652900696, "learning_rate": 4.838915980494044e-05, "loss": 1.7141, "step": 6460 }, { "epoch": 1.549547389245249, "grad_norm": 0.6857673525810242, "learning_rate": 4.837317131665201e-05, "loss": 1.7606, "step": 6462 }, { "epoch": 1.550026976799952, "grad_norm": 0.6049094796180725, "learning_rate": 4.835718282836358e-05, "loss": 1.69, "step": 6464 }, { "epoch": 1.550506564354655, "grad_norm": 0.6231589317321777, "learning_rate": 4.8341194340075144e-05, "loss": 1.6838, "step": 6466 }, { "epoch": 1.550986151909358, "grad_norm": 0.6960512399673462, "learning_rate": 4.8325205851786716e-05, "loss": 1.6517, "step": 6468 }, { "epoch": 1.551465739464061, "grad_norm": 0.7908833026885986, "learning_rate": 4.830921736349828e-05, "loss": 1.7314, "step": 6470 }, { "epoch": 1.551945327018764, "grad_norm": 0.6260631084442139, "learning_rate": 4.829322887520985e-05, "loss": 1.6928, "step": 6472 }, { "epoch": 1.5524249145734668, "grad_norm": 0.6175808310508728, "learning_rate": 4.827724038692142e-05, "loss": 1.7362, "step": 6474 }, { "epoch": 1.5529045021281698, "grad_norm": 0.6466395854949951, "learning_rate": 4.8261251898632986e-05, "loss": 1.6814, "step": 6476 }, { "epoch": 1.5533840896828728, "grad_norm": 0.6083034873008728, "learning_rate": 4.824526341034455e-05, "loss": 1.6686, "step": 6478 }, { "epoch": 1.5538636772375756, "grad_norm": 0.7281681895256042, "learning_rate": 4.8229274922056124e-05, "loss": 1.6823, "step": 6480 }, { "epoch": 1.5543432647922786, "grad_norm": 0.693898618221283, "learning_rate": 4.821328643376769e-05, "loss": 1.7185, "step": 6482 }, { "epoch": 1.5548228523469816, "grad_norm": 0.6789742708206177, "learning_rate": 4.8197297945479255e-05, "loss": 1.6876, "step": 6484 }, { "epoch": 1.5553024399016846, "grad_norm": 0.7122653722763062, "learning_rate": 4.818130945719083e-05, "loss": 1.6637, "step": 6486 }, { "epoch": 1.5557820274563876, "grad_norm": 0.7129628658294678, "learning_rate": 4.8165320968902393e-05, "loss": 1.7036, "step": 6488 }, { "epoch": 1.5562616150110906, "grad_norm": 0.6617931127548218, "learning_rate": 4.814933248061396e-05, "loss": 1.718, "step": 6490 }, { "epoch": 1.5567412025657934, "grad_norm": 0.6855613589286804, "learning_rate": 4.813334399232553e-05, "loss": 1.7319, "step": 6492 }, { "epoch": 1.5572207901204964, "grad_norm": 0.6400307416915894, "learning_rate": 4.81173555040371e-05, "loss": 1.7136, "step": 6494 }, { "epoch": 1.5577003776751992, "grad_norm": 0.6832963228225708, "learning_rate": 4.810136701574866e-05, "loss": 1.7011, "step": 6496 }, { "epoch": 1.5581799652299022, "grad_norm": 0.6819493174552917, "learning_rate": 4.808537852746023e-05, "loss": 1.7022, "step": 6498 }, { "epoch": 1.5586595527846052, "grad_norm": 0.6384820938110352, "learning_rate": 4.8069390039171794e-05, "loss": 1.6767, "step": 6500 }, { "epoch": 1.5591391403393082, "grad_norm": 0.8360480666160583, "learning_rate": 4.805340155088337e-05, "loss": 1.6513, "step": 6502 }, { "epoch": 1.5596187278940112, "grad_norm": 0.6297588348388672, "learning_rate": 4.803741306259493e-05, "loss": 1.7406, "step": 6504 }, { "epoch": 1.5600983154487142, "grad_norm": 0.6614934802055359, "learning_rate": 4.80214245743065e-05, "loss": 1.6813, "step": 6506 }, { "epoch": 1.560577903003417, "grad_norm": 0.6533039808273315, "learning_rate": 4.800543608601807e-05, "loss": 1.691, "step": 6508 }, { "epoch": 1.56105749055812, "grad_norm": 0.61040860414505, "learning_rate": 4.7989447597729636e-05, "loss": 1.6571, "step": 6510 }, { "epoch": 1.5615370781128228, "grad_norm": 0.6364620327949524, "learning_rate": 4.79734591094412e-05, "loss": 1.6843, "step": 6512 }, { "epoch": 1.5620166656675258, "grad_norm": 0.6417961120605469, "learning_rate": 4.7957470621152775e-05, "loss": 1.6803, "step": 6514 }, { "epoch": 1.5624962532222288, "grad_norm": 0.6476030349731445, "learning_rate": 4.794148213286434e-05, "loss": 1.7066, "step": 6516 }, { "epoch": 1.5629758407769319, "grad_norm": 0.6539744734764099, "learning_rate": 4.7925493644575906e-05, "loss": 1.7022, "step": 6518 }, { "epoch": 1.5634554283316349, "grad_norm": 0.6674281358718872, "learning_rate": 4.790950515628748e-05, "loss": 1.6712, "step": 6520 }, { "epoch": 1.5639350158863379, "grad_norm": 0.7227086424827576, "learning_rate": 4.7893516667999044e-05, "loss": 1.6934, "step": 6522 }, { "epoch": 1.5644146034410407, "grad_norm": 0.7583106160163879, "learning_rate": 4.787752817971061e-05, "loss": 1.6982, "step": 6524 }, { "epoch": 1.5648941909957437, "grad_norm": 0.7887646555900574, "learning_rate": 4.786153969142218e-05, "loss": 1.6493, "step": 6526 }, { "epoch": 1.5653737785504465, "grad_norm": 0.609953761100769, "learning_rate": 4.784555120313375e-05, "loss": 1.6932, "step": 6528 }, { "epoch": 1.5658533661051495, "grad_norm": 0.620701789855957, "learning_rate": 4.7829562714845314e-05, "loss": 1.6288, "step": 6530 }, { "epoch": 1.5663329536598525, "grad_norm": 0.6460697054862976, "learning_rate": 4.781357422655688e-05, "loss": 1.6869, "step": 6532 }, { "epoch": 1.5668125412145555, "grad_norm": 0.6462131142616272, "learning_rate": 4.7797585738268445e-05, "loss": 1.6712, "step": 6534 }, { "epoch": 1.5672921287692585, "grad_norm": 0.6471810340881348, "learning_rate": 4.778159724998002e-05, "loss": 1.7424, "step": 6536 }, { "epoch": 1.5677717163239615, "grad_norm": 0.5912790298461914, "learning_rate": 4.776560876169158e-05, "loss": 1.6938, "step": 6538 }, { "epoch": 1.5682513038786643, "grad_norm": 0.6933525204658508, "learning_rate": 4.774962027340315e-05, "loss": 1.7387, "step": 6540 }, { "epoch": 1.5687308914333673, "grad_norm": 0.6348704099655151, "learning_rate": 4.773363178511472e-05, "loss": 1.7136, "step": 6542 }, { "epoch": 1.5692104789880703, "grad_norm": 0.7876065969467163, "learning_rate": 4.771764329682629e-05, "loss": 1.68, "step": 6544 }, { "epoch": 1.569690066542773, "grad_norm": 0.624118447303772, "learning_rate": 4.770165480853785e-05, "loss": 1.6596, "step": 6546 }, { "epoch": 1.570169654097476, "grad_norm": 0.6703066229820251, "learning_rate": 4.7685666320249425e-05, "loss": 1.6787, "step": 6548 }, { "epoch": 1.5706492416521791, "grad_norm": 0.6896206140518188, "learning_rate": 4.766967783196099e-05, "loss": 1.7392, "step": 6550 }, { "epoch": 1.5711288292068821, "grad_norm": 0.6948111057281494, "learning_rate": 4.765368934367256e-05, "loss": 1.6827, "step": 6552 }, { "epoch": 1.5716084167615851, "grad_norm": 0.5981853604316711, "learning_rate": 4.763770085538413e-05, "loss": 1.7234, "step": 6554 }, { "epoch": 1.5720880043162881, "grad_norm": 0.5800202488899231, "learning_rate": 4.7621712367095695e-05, "loss": 1.7388, "step": 6556 }, { "epoch": 1.572567591870991, "grad_norm": 0.6487054824829102, "learning_rate": 4.760572387880726e-05, "loss": 1.6776, "step": 6558 }, { "epoch": 1.573047179425694, "grad_norm": 0.6429522633552551, "learning_rate": 4.758973539051883e-05, "loss": 1.661, "step": 6560 }, { "epoch": 1.5735267669803967, "grad_norm": 0.6993064284324646, "learning_rate": 4.75737469022304e-05, "loss": 1.6582, "step": 6562 }, { "epoch": 1.5740063545350997, "grad_norm": 0.6343466639518738, "learning_rate": 4.7557758413941965e-05, "loss": 1.7108, "step": 6564 }, { "epoch": 1.5744859420898027, "grad_norm": 0.5981734991073608, "learning_rate": 4.754176992565354e-05, "loss": 1.738, "step": 6566 }, { "epoch": 1.5749655296445058, "grad_norm": 0.7218007445335388, "learning_rate": 4.7525781437365096e-05, "loss": 1.6963, "step": 6568 }, { "epoch": 1.5754451171992088, "grad_norm": 0.6633502244949341, "learning_rate": 4.750979294907666e-05, "loss": 1.7116, "step": 6570 }, { "epoch": 1.5759247047539118, "grad_norm": 0.6428684592247009, "learning_rate": 4.7493804460788234e-05, "loss": 1.7086, "step": 6572 }, { "epoch": 1.5764042923086146, "grad_norm": 0.792656660079956, "learning_rate": 4.74778159724998e-05, "loss": 1.6937, "step": 6574 }, { "epoch": 1.5768838798633176, "grad_norm": 0.6277286410331726, "learning_rate": 4.7461827484211366e-05, "loss": 1.7233, "step": 6576 }, { "epoch": 1.5773634674180204, "grad_norm": 0.7224653959274292, "learning_rate": 4.744583899592294e-05, "loss": 1.6728, "step": 6578 }, { "epoch": 1.5778430549727234, "grad_norm": 0.6688931584358215, "learning_rate": 4.7429850507634504e-05, "loss": 1.6765, "step": 6580 }, { "epoch": 1.5783226425274264, "grad_norm": 0.6058412194252014, "learning_rate": 4.741386201934607e-05, "loss": 1.7449, "step": 6582 }, { "epoch": 1.5788022300821294, "grad_norm": 0.6067028045654297, "learning_rate": 4.739787353105764e-05, "loss": 1.6708, "step": 6584 }, { "epoch": 1.5792818176368324, "grad_norm": 0.6054325699806213, "learning_rate": 4.738188504276921e-05, "loss": 1.7522, "step": 6586 }, { "epoch": 1.5797614051915354, "grad_norm": 0.6282007694244385, "learning_rate": 4.736589655448077e-05, "loss": 1.7147, "step": 6588 }, { "epoch": 1.5802409927462382, "grad_norm": 0.5876883864402771, "learning_rate": 4.7349908066192346e-05, "loss": 1.7243, "step": 6590 }, { "epoch": 1.5807205803009412, "grad_norm": 0.6249237060546875, "learning_rate": 4.733391957790391e-05, "loss": 1.693, "step": 6592 }, { "epoch": 1.581200167855644, "grad_norm": 0.6246289014816284, "learning_rate": 4.731793108961548e-05, "loss": 1.6846, "step": 6594 }, { "epoch": 1.581679755410347, "grad_norm": 0.6474538445472717, "learning_rate": 4.730194260132705e-05, "loss": 1.7411, "step": 6596 }, { "epoch": 1.58215934296505, "grad_norm": 0.5711390972137451, "learning_rate": 4.7285954113038615e-05, "loss": 1.6738, "step": 6598 }, { "epoch": 1.582638930519753, "grad_norm": 0.7124190926551819, "learning_rate": 4.726996562475018e-05, "loss": 1.7286, "step": 6600 }, { "epoch": 1.583118518074456, "grad_norm": 0.6866257190704346, "learning_rate": 4.7253977136461754e-05, "loss": 1.7155, "step": 6602 }, { "epoch": 1.583598105629159, "grad_norm": 0.7718676328659058, "learning_rate": 4.723798864817331e-05, "loss": 1.7101, "step": 6604 }, { "epoch": 1.5840776931838618, "grad_norm": 0.6387284398078918, "learning_rate": 4.7222000159884885e-05, "loss": 1.6552, "step": 6606 }, { "epoch": 1.5845572807385648, "grad_norm": 0.6708875298500061, "learning_rate": 4.720601167159645e-05, "loss": 1.6984, "step": 6608 }, { "epoch": 1.5850368682932678, "grad_norm": 0.8117458820343018, "learning_rate": 4.7190023183308016e-05, "loss": 1.7295, "step": 6610 }, { "epoch": 1.5855164558479706, "grad_norm": 0.7523454427719116, "learning_rate": 4.717403469501959e-05, "loss": 1.7534, "step": 6612 }, { "epoch": 1.5859960434026736, "grad_norm": 0.6357442736625671, "learning_rate": 4.7158046206731155e-05, "loss": 1.6651, "step": 6614 }, { "epoch": 1.5864756309573766, "grad_norm": 0.6879259943962097, "learning_rate": 4.714205771844272e-05, "loss": 1.6934, "step": 6616 }, { "epoch": 1.5869552185120797, "grad_norm": 0.6736151576042175, "learning_rate": 4.712606923015429e-05, "loss": 1.7496, "step": 6618 }, { "epoch": 1.5874348060667827, "grad_norm": 0.6563031077384949, "learning_rate": 4.711008074186586e-05, "loss": 1.7209, "step": 6620 }, { "epoch": 1.5879143936214857, "grad_norm": 0.7368848919868469, "learning_rate": 4.7094092253577424e-05, "loss": 1.6781, "step": 6622 }, { "epoch": 1.5883939811761885, "grad_norm": 0.6947146058082581, "learning_rate": 4.7078103765288997e-05, "loss": 1.666, "step": 6624 }, { "epoch": 1.5888735687308915, "grad_norm": 0.5857922434806824, "learning_rate": 4.706211527700056e-05, "loss": 1.6869, "step": 6626 }, { "epoch": 1.5893531562855943, "grad_norm": 0.6422547698020935, "learning_rate": 4.704612678871213e-05, "loss": 1.6958, "step": 6628 }, { "epoch": 1.5898327438402973, "grad_norm": 0.5981950163841248, "learning_rate": 4.70301383004237e-05, "loss": 1.6962, "step": 6630 }, { "epoch": 1.5903123313950003, "grad_norm": 0.6150957942008972, "learning_rate": 4.7014149812135266e-05, "loss": 1.6331, "step": 6632 }, { "epoch": 1.5907919189497033, "grad_norm": 0.6236534118652344, "learning_rate": 4.699816132384683e-05, "loss": 1.7221, "step": 6634 }, { "epoch": 1.5912715065044063, "grad_norm": 0.6450435519218445, "learning_rate": 4.6982172835558404e-05, "loss": 1.6989, "step": 6636 }, { "epoch": 1.5917510940591093, "grad_norm": 0.7540398240089417, "learning_rate": 4.696618434726997e-05, "loss": 1.749, "step": 6638 }, { "epoch": 1.592230681613812, "grad_norm": 0.635714054107666, "learning_rate": 4.6950195858981536e-05, "loss": 1.7359, "step": 6640 }, { "epoch": 1.592710269168515, "grad_norm": 0.6030869483947754, "learning_rate": 4.69342073706931e-05, "loss": 1.6551, "step": 6642 }, { "epoch": 1.593189856723218, "grad_norm": 0.6171694993972778, "learning_rate": 4.691821888240467e-05, "loss": 1.641, "step": 6644 }, { "epoch": 1.593669444277921, "grad_norm": 0.6305464506149292, "learning_rate": 4.690223039411624e-05, "loss": 1.6234, "step": 6646 }, { "epoch": 1.594149031832624, "grad_norm": 0.6782022714614868, "learning_rate": 4.6886241905827805e-05, "loss": 1.6806, "step": 6648 }, { "epoch": 1.594628619387327, "grad_norm": 0.649774432182312, "learning_rate": 4.687025341753937e-05, "loss": 1.7174, "step": 6650 }, { "epoch": 1.59510820694203, "grad_norm": 0.7324897646903992, "learning_rate": 4.6854264929250943e-05, "loss": 1.7302, "step": 6652 }, { "epoch": 1.595587794496733, "grad_norm": 0.6627001762390137, "learning_rate": 4.683827644096251e-05, "loss": 1.702, "step": 6654 }, { "epoch": 1.5960673820514357, "grad_norm": 0.6612520813941956, "learning_rate": 4.6822287952674075e-05, "loss": 1.7071, "step": 6656 }, { "epoch": 1.5965469696061387, "grad_norm": 0.6625233292579651, "learning_rate": 4.680629946438565e-05, "loss": 1.6995, "step": 6658 }, { "epoch": 1.5970265571608415, "grad_norm": 0.7970927953720093, "learning_rate": 4.679031097609721e-05, "loss": 1.7165, "step": 6660 }, { "epoch": 1.5975061447155445, "grad_norm": 0.8318336606025696, "learning_rate": 4.677432248780878e-05, "loss": 1.7102, "step": 6662 }, { "epoch": 1.5979857322702475, "grad_norm": 0.6248257756233215, "learning_rate": 4.675833399952035e-05, "loss": 1.7248, "step": 6664 }, { "epoch": 1.5984653198249505, "grad_norm": 0.6677607893943787, "learning_rate": 4.674234551123192e-05, "loss": 1.6787, "step": 6666 }, { "epoch": 1.5989449073796536, "grad_norm": 0.7032372355461121, "learning_rate": 4.672635702294348e-05, "loss": 1.6634, "step": 6668 }, { "epoch": 1.5994244949343566, "grad_norm": 0.6327639222145081, "learning_rate": 4.6710368534655055e-05, "loss": 1.6782, "step": 6670 }, { "epoch": 1.5999040824890594, "grad_norm": 0.7280043959617615, "learning_rate": 4.669438004636662e-05, "loss": 1.6613, "step": 6672 }, { "epoch": 1.6003836700437624, "grad_norm": 0.672728419303894, "learning_rate": 4.6678391558078186e-05, "loss": 1.6951, "step": 6674 }, { "epoch": 1.6008632575984654, "grad_norm": 0.6778761148452759, "learning_rate": 4.666240306978976e-05, "loss": 1.7034, "step": 6676 }, { "epoch": 1.6013428451531682, "grad_norm": 0.7978004217147827, "learning_rate": 4.664641458150132e-05, "loss": 1.7339, "step": 6678 }, { "epoch": 1.6018224327078712, "grad_norm": 0.7329824566841125, "learning_rate": 4.6630426093212884e-05, "loss": 1.6348, "step": 6680 }, { "epoch": 1.6023020202625742, "grad_norm": 0.6763856410980225, "learning_rate": 4.6614437604924456e-05, "loss": 1.728, "step": 6682 }, { "epoch": 1.6027816078172772, "grad_norm": 0.6098474860191345, "learning_rate": 4.659844911663602e-05, "loss": 1.7, "step": 6684 }, { "epoch": 1.6032611953719802, "grad_norm": 0.6308492422103882, "learning_rate": 4.658246062834759e-05, "loss": 1.6764, "step": 6686 }, { "epoch": 1.6037407829266832, "grad_norm": 0.7192627191543579, "learning_rate": 4.656647214005916e-05, "loss": 1.7418, "step": 6688 }, { "epoch": 1.604220370481386, "grad_norm": 0.6592125296592712, "learning_rate": 4.6550483651770726e-05, "loss": 1.6784, "step": 6690 }, { "epoch": 1.604699958036089, "grad_norm": 0.7162062525749207, "learning_rate": 4.653449516348229e-05, "loss": 1.6474, "step": 6692 }, { "epoch": 1.6051795455907918, "grad_norm": 0.6806288361549377, "learning_rate": 4.6518506675193864e-05, "loss": 1.7128, "step": 6694 }, { "epoch": 1.6056591331454948, "grad_norm": 0.6614459753036499, "learning_rate": 4.650251818690543e-05, "loss": 1.6875, "step": 6696 }, { "epoch": 1.6061387207001978, "grad_norm": 0.6941540837287903, "learning_rate": 4.6486529698616995e-05, "loss": 1.7222, "step": 6698 }, { "epoch": 1.6066183082549008, "grad_norm": 0.6123828291893005, "learning_rate": 4.647054121032857e-05, "loss": 1.6659, "step": 6700 }, { "epoch": 1.6070978958096038, "grad_norm": 0.7388832569122314, "learning_rate": 4.645455272204013e-05, "loss": 1.7324, "step": 6702 }, { "epoch": 1.6075774833643068, "grad_norm": 0.6734049916267395, "learning_rate": 4.64385642337517e-05, "loss": 1.6587, "step": 6704 }, { "epoch": 1.6080570709190096, "grad_norm": 0.6256875991821289, "learning_rate": 4.642257574546327e-05, "loss": 1.6755, "step": 6706 }, { "epoch": 1.6085366584737126, "grad_norm": 0.6533706784248352, "learning_rate": 4.640658725717484e-05, "loss": 1.6947, "step": 6708 }, { "epoch": 1.6090162460284154, "grad_norm": 0.6463721394538879, "learning_rate": 4.63905987688864e-05, "loss": 1.6987, "step": 6710 }, { "epoch": 1.6094958335831184, "grad_norm": 0.6086716651916504, "learning_rate": 4.6374610280597975e-05, "loss": 1.7264, "step": 6712 }, { "epoch": 1.6099754211378214, "grad_norm": 0.6397482752799988, "learning_rate": 4.6358621792309534e-05, "loss": 1.7107, "step": 6714 }, { "epoch": 1.6104550086925244, "grad_norm": 0.7410135865211487, "learning_rate": 4.634263330402111e-05, "loss": 1.667, "step": 6716 }, { "epoch": 1.6109345962472275, "grad_norm": 0.8049417734146118, "learning_rate": 4.632664481573267e-05, "loss": 1.6757, "step": 6718 }, { "epoch": 1.6114141838019305, "grad_norm": 0.6531417369842529, "learning_rate": 4.631065632744424e-05, "loss": 1.696, "step": 6720 }, { "epoch": 1.6118937713566333, "grad_norm": 0.6446656584739685, "learning_rate": 4.629466783915581e-05, "loss": 1.6936, "step": 6722 }, { "epoch": 1.6123733589113363, "grad_norm": 0.6181802749633789, "learning_rate": 4.6278679350867376e-05, "loss": 1.706, "step": 6724 }, { "epoch": 1.612852946466039, "grad_norm": 0.7125673890113831, "learning_rate": 4.626269086257894e-05, "loss": 1.7261, "step": 6726 }, { "epoch": 1.613332534020742, "grad_norm": 0.6190324425697327, "learning_rate": 4.6246702374290515e-05, "loss": 1.6626, "step": 6728 }, { "epoch": 1.613812121575445, "grad_norm": 0.8814627528190613, "learning_rate": 4.623071388600208e-05, "loss": 1.6756, "step": 6730 }, { "epoch": 1.614291709130148, "grad_norm": 0.6570208072662354, "learning_rate": 4.6214725397713646e-05, "loss": 1.6973, "step": 6732 }, { "epoch": 1.614771296684851, "grad_norm": 0.7919439673423767, "learning_rate": 4.619873690942522e-05, "loss": 1.7113, "step": 6734 }, { "epoch": 1.615250884239554, "grad_norm": 0.6357349753379822, "learning_rate": 4.6182748421136784e-05, "loss": 1.7131, "step": 6736 }, { "epoch": 1.6157304717942569, "grad_norm": 0.6801686882972717, "learning_rate": 4.616675993284835e-05, "loss": 1.6842, "step": 6738 }, { "epoch": 1.61621005934896, "grad_norm": 0.660700261592865, "learning_rate": 4.615077144455992e-05, "loss": 1.6929, "step": 6740 }, { "epoch": 1.616689646903663, "grad_norm": 0.6734868884086609, "learning_rate": 4.613478295627149e-05, "loss": 1.7345, "step": 6742 }, { "epoch": 1.6171692344583657, "grad_norm": 0.7140079140663147, "learning_rate": 4.6118794467983054e-05, "loss": 1.6933, "step": 6744 }, { "epoch": 1.6176488220130687, "grad_norm": 0.6247152090072632, "learning_rate": 4.6102805979694626e-05, "loss": 1.6899, "step": 6746 }, { "epoch": 1.6181284095677717, "grad_norm": 0.6128095984458923, "learning_rate": 4.608681749140619e-05, "loss": 1.6486, "step": 6748 }, { "epoch": 1.6186079971224747, "grad_norm": 0.7147334218025208, "learning_rate": 4.607082900311776e-05, "loss": 1.6882, "step": 6750 }, { "epoch": 1.6190875846771777, "grad_norm": 0.7044378519058228, "learning_rate": 4.605484051482932e-05, "loss": 1.719, "step": 6752 }, { "epoch": 1.6195671722318807, "grad_norm": 0.6824830770492554, "learning_rate": 4.603885202654089e-05, "loss": 1.6816, "step": 6754 }, { "epoch": 1.6200467597865835, "grad_norm": 0.6091986894607544, "learning_rate": 4.602286353825246e-05, "loss": 1.7005, "step": 6756 }, { "epoch": 1.6205263473412865, "grad_norm": 0.7244357466697693, "learning_rate": 4.600687504996403e-05, "loss": 1.6163, "step": 6758 }, { "epoch": 1.6210059348959893, "grad_norm": 0.6376845240592957, "learning_rate": 4.599088656167559e-05, "loss": 1.6923, "step": 6760 }, { "epoch": 1.6214855224506923, "grad_norm": 0.669632077217102, "learning_rate": 4.5974898073387165e-05, "loss": 1.6977, "step": 6762 }, { "epoch": 1.6219651100053953, "grad_norm": 0.6529282927513123, "learning_rate": 4.595890958509873e-05, "loss": 1.6766, "step": 6764 }, { "epoch": 1.6224446975600983, "grad_norm": 0.6546927094459534, "learning_rate": 4.59429210968103e-05, "loss": 1.6934, "step": 6766 }, { "epoch": 1.6229242851148014, "grad_norm": 0.621741771697998, "learning_rate": 4.592693260852187e-05, "loss": 1.6223, "step": 6768 }, { "epoch": 1.6234038726695044, "grad_norm": 0.7833709716796875, "learning_rate": 4.5910944120233435e-05, "loss": 1.7567, "step": 6770 }, { "epoch": 1.6238834602242072, "grad_norm": 0.6623146533966064, "learning_rate": 4.5894955631945e-05, "loss": 1.7066, "step": 6772 }, { "epoch": 1.6243630477789102, "grad_norm": 0.6001529693603516, "learning_rate": 4.587896714365657e-05, "loss": 1.6853, "step": 6774 }, { "epoch": 1.624842635333613, "grad_norm": 0.653661847114563, "learning_rate": 4.586297865536814e-05, "loss": 1.7386, "step": 6776 }, { "epoch": 1.625322222888316, "grad_norm": 0.7108601331710815, "learning_rate": 4.5846990167079705e-05, "loss": 1.6906, "step": 6778 }, { "epoch": 1.625801810443019, "grad_norm": 0.7108763456344604, "learning_rate": 4.583100167879128e-05, "loss": 1.746, "step": 6780 }, { "epoch": 1.626281397997722, "grad_norm": 0.715711236000061, "learning_rate": 4.581501319050284e-05, "loss": 1.7051, "step": 6782 }, { "epoch": 1.626760985552425, "grad_norm": 0.6503947377204895, "learning_rate": 4.579902470221441e-05, "loss": 1.7176, "step": 6784 }, { "epoch": 1.627240573107128, "grad_norm": 0.6434596180915833, "learning_rate": 4.5783036213925974e-05, "loss": 1.656, "step": 6786 }, { "epoch": 1.6277201606618308, "grad_norm": 0.6194823980331421, "learning_rate": 4.576704772563754e-05, "loss": 1.6554, "step": 6788 }, { "epoch": 1.6281997482165338, "grad_norm": 0.7391887307167053, "learning_rate": 4.5751059237349105e-05, "loss": 1.6945, "step": 6790 }, { "epoch": 1.6286793357712366, "grad_norm": 0.7763944268226624, "learning_rate": 4.573507074906068e-05, "loss": 1.7103, "step": 6792 }, { "epoch": 1.6291589233259396, "grad_norm": 0.704348087310791, "learning_rate": 4.5719082260772244e-05, "loss": 1.6787, "step": 6794 }, { "epoch": 1.6296385108806426, "grad_norm": 0.6882117986679077, "learning_rate": 4.570309377248381e-05, "loss": 1.7138, "step": 6796 }, { "epoch": 1.6301180984353456, "grad_norm": 0.661745011806488, "learning_rate": 4.568710528419538e-05, "loss": 1.6744, "step": 6798 }, { "epoch": 1.6305976859900486, "grad_norm": 0.7047156691551208, "learning_rate": 4.567111679590695e-05, "loss": 1.728, "step": 6800 }, { "epoch": 1.6305976859900486, "eval_loss": 1.7341609001159668, "eval_runtime": 331.3317, "eval_samples_per_second": 402.766, "eval_steps_per_second": 12.589, "step": 6800 }, { "epoch": 1.6310772735447516, "grad_norm": 0.7315900921821594, "learning_rate": 4.565512830761851e-05, "loss": 1.6922, "step": 6802 }, { "epoch": 1.6315568610994544, "grad_norm": 0.7072718739509583, "learning_rate": 4.5639139819330086e-05, "loss": 1.7015, "step": 6804 }, { "epoch": 1.6320364486541574, "grad_norm": 0.7062069177627563, "learning_rate": 4.562315133104165e-05, "loss": 1.7279, "step": 6806 }, { "epoch": 1.6325160362088604, "grad_norm": 0.7697598338127136, "learning_rate": 4.560716284275322e-05, "loss": 1.7049, "step": 6808 }, { "epoch": 1.6329956237635632, "grad_norm": 0.6092913746833801, "learning_rate": 4.559117435446479e-05, "loss": 1.7036, "step": 6810 }, { "epoch": 1.6334752113182662, "grad_norm": 0.6959515810012817, "learning_rate": 4.5575185866176355e-05, "loss": 1.7064, "step": 6812 }, { "epoch": 1.6339547988729692, "grad_norm": 0.6601964831352234, "learning_rate": 4.555919737788793e-05, "loss": 1.6728, "step": 6814 }, { "epoch": 1.6344343864276722, "grad_norm": 0.6241142153739929, "learning_rate": 4.5543208889599493e-05, "loss": 1.7057, "step": 6816 }, { "epoch": 1.6349139739823753, "grad_norm": 0.7351783514022827, "learning_rate": 4.552722040131106e-05, "loss": 1.7058, "step": 6818 }, { "epoch": 1.6353935615370783, "grad_norm": 0.7060487866401672, "learning_rate": 4.551123191302263e-05, "loss": 1.6979, "step": 6820 }, { "epoch": 1.635873149091781, "grad_norm": 0.6507560610771179, "learning_rate": 4.54952434247342e-05, "loss": 1.6951, "step": 6822 }, { "epoch": 1.636352736646484, "grad_norm": 0.7072592377662659, "learning_rate": 4.5479254936445756e-05, "loss": 1.6979, "step": 6824 }, { "epoch": 1.6368323242011869, "grad_norm": 0.6408288478851318, "learning_rate": 4.546326644815733e-05, "loss": 1.684, "step": 6826 }, { "epoch": 1.6373119117558899, "grad_norm": 0.8819686770439148, "learning_rate": 4.5447277959868894e-05, "loss": 1.7002, "step": 6828 }, { "epoch": 1.6377914993105929, "grad_norm": 0.6794906854629517, "learning_rate": 4.543128947158046e-05, "loss": 1.6951, "step": 6830 }, { "epoch": 1.6382710868652959, "grad_norm": 0.6352238059043884, "learning_rate": 4.541530098329203e-05, "loss": 1.7239, "step": 6832 }, { "epoch": 1.6387506744199989, "grad_norm": 0.6722749471664429, "learning_rate": 4.53993124950036e-05, "loss": 1.6858, "step": 6834 }, { "epoch": 1.639230261974702, "grad_norm": 0.6379525065422058, "learning_rate": 4.5383324006715164e-05, "loss": 1.7024, "step": 6836 }, { "epoch": 1.6397098495294047, "grad_norm": 0.6779729723930359, "learning_rate": 4.5367335518426736e-05, "loss": 1.692, "step": 6838 }, { "epoch": 1.6401894370841077, "grad_norm": 0.5837069749832153, "learning_rate": 4.53513470301383e-05, "loss": 1.7276, "step": 6840 }, { "epoch": 1.6406690246388105, "grad_norm": 0.6151543259620667, "learning_rate": 4.533535854184987e-05, "loss": 1.6855, "step": 6842 }, { "epoch": 1.6411486121935135, "grad_norm": 0.6811367273330688, "learning_rate": 4.531937005356144e-05, "loss": 1.6925, "step": 6844 }, { "epoch": 1.6416281997482165, "grad_norm": 0.6435073018074036, "learning_rate": 4.5303381565273006e-05, "loss": 1.7037, "step": 6846 }, { "epoch": 1.6421077873029195, "grad_norm": 0.6458755135536194, "learning_rate": 4.528739307698457e-05, "loss": 1.7511, "step": 6848 }, { "epoch": 1.6425873748576225, "grad_norm": 0.6237685084342957, "learning_rate": 4.5271404588696144e-05, "loss": 1.7281, "step": 6850 }, { "epoch": 1.6430669624123255, "grad_norm": 0.6298481225967407, "learning_rate": 4.525541610040771e-05, "loss": 1.7272, "step": 6852 }, { "epoch": 1.6435465499670283, "grad_norm": 0.5935114622116089, "learning_rate": 4.5239427612119276e-05, "loss": 1.6711, "step": 6854 }, { "epoch": 1.6440261375217313, "grad_norm": 0.6632751226425171, "learning_rate": 4.522343912383085e-05, "loss": 1.7022, "step": 6856 }, { "epoch": 1.644505725076434, "grad_norm": 0.6120949983596802, "learning_rate": 4.5207450635542414e-05, "loss": 1.7143, "step": 6858 }, { "epoch": 1.6449853126311371, "grad_norm": 0.6499776840209961, "learning_rate": 4.519146214725398e-05, "loss": 1.682, "step": 6860 }, { "epoch": 1.6454649001858401, "grad_norm": 0.6353088617324829, "learning_rate": 4.5175473658965545e-05, "loss": 1.7556, "step": 6862 }, { "epoch": 1.6459444877405431, "grad_norm": 0.5800523161888123, "learning_rate": 4.515948517067711e-05, "loss": 1.6883, "step": 6864 }, { "epoch": 1.6464240752952461, "grad_norm": 0.6485404372215271, "learning_rate": 4.5143496682388683e-05, "loss": 1.6918, "step": 6866 }, { "epoch": 1.6469036628499492, "grad_norm": 0.6066177487373352, "learning_rate": 4.512750819410025e-05, "loss": 1.6871, "step": 6868 }, { "epoch": 1.647383250404652, "grad_norm": 0.697462797164917, "learning_rate": 4.5111519705811815e-05, "loss": 1.7282, "step": 6870 }, { "epoch": 1.647862837959355, "grad_norm": 0.7157881855964661, "learning_rate": 4.509553121752339e-05, "loss": 1.6894, "step": 6872 }, { "epoch": 1.648342425514058, "grad_norm": 0.8047124147415161, "learning_rate": 4.507954272923495e-05, "loss": 1.6935, "step": 6874 }, { "epoch": 1.6488220130687608, "grad_norm": 0.580672025680542, "learning_rate": 4.506355424094652e-05, "loss": 1.6942, "step": 6876 }, { "epoch": 1.6493016006234638, "grad_norm": 0.6460202932357788, "learning_rate": 4.504756575265809e-05, "loss": 1.7506, "step": 6878 }, { "epoch": 1.6497811881781668, "grad_norm": 0.6126521229743958, "learning_rate": 4.503157726436966e-05, "loss": 1.6766, "step": 6880 }, { "epoch": 1.6502607757328698, "grad_norm": 0.6465358734130859, "learning_rate": 4.501558877608122e-05, "loss": 1.6859, "step": 6882 }, { "epoch": 1.6507403632875728, "grad_norm": 0.604941189289093, "learning_rate": 4.4999600287792795e-05, "loss": 1.7044, "step": 6884 }, { "epoch": 1.6512199508422758, "grad_norm": 0.7677505612373352, "learning_rate": 4.498361179950436e-05, "loss": 1.6754, "step": 6886 }, { "epoch": 1.6516995383969786, "grad_norm": 0.8906038999557495, "learning_rate": 4.4967623311215926e-05, "loss": 1.7264, "step": 6888 }, { "epoch": 1.6521791259516816, "grad_norm": 0.6408452987670898, "learning_rate": 4.49516348229275e-05, "loss": 1.6975, "step": 6890 }, { "epoch": 1.6526587135063844, "grad_norm": 0.6235701441764832, "learning_rate": 4.4935646334639065e-05, "loss": 1.7086, "step": 6892 }, { "epoch": 1.6531383010610874, "grad_norm": 0.6288596391677856, "learning_rate": 4.491965784635063e-05, "loss": 1.653, "step": 6894 }, { "epoch": 1.6536178886157904, "grad_norm": 0.6266556978225708, "learning_rate": 4.4903669358062196e-05, "loss": 1.7252, "step": 6896 }, { "epoch": 1.6540974761704934, "grad_norm": 0.5763782262802124, "learning_rate": 4.488768086977376e-05, "loss": 1.6513, "step": 6898 }, { "epoch": 1.6545770637251964, "grad_norm": 0.7474942803382874, "learning_rate": 4.487169238148533e-05, "loss": 1.6557, "step": 6900 }, { "epoch": 1.6550566512798994, "grad_norm": 0.6486688852310181, "learning_rate": 4.48557038931969e-05, "loss": 1.6907, "step": 6902 }, { "epoch": 1.6555362388346022, "grad_norm": 0.680007815361023, "learning_rate": 4.4839715404908466e-05, "loss": 1.7286, "step": 6904 }, { "epoch": 1.6560158263893052, "grad_norm": 0.6397298574447632, "learning_rate": 4.482372691662003e-05, "loss": 1.6929, "step": 6906 }, { "epoch": 1.656495413944008, "grad_norm": 0.6508957743644714, "learning_rate": 4.4807738428331604e-05, "loss": 1.7383, "step": 6908 }, { "epoch": 1.656975001498711, "grad_norm": 0.7146604061126709, "learning_rate": 4.479174994004317e-05, "loss": 1.7079, "step": 6910 }, { "epoch": 1.657454589053414, "grad_norm": 0.6631509065628052, "learning_rate": 4.477576145175474e-05, "loss": 1.679, "step": 6912 }, { "epoch": 1.657934176608117, "grad_norm": 0.6393337845802307, "learning_rate": 4.475977296346631e-05, "loss": 1.7194, "step": 6914 }, { "epoch": 1.65841376416282, "grad_norm": 0.6216802000999451, "learning_rate": 4.474378447517787e-05, "loss": 1.7128, "step": 6916 }, { "epoch": 1.658893351717523, "grad_norm": 0.6934576630592346, "learning_rate": 4.4727795986889446e-05, "loss": 1.6632, "step": 6918 }, { "epoch": 1.6593729392722258, "grad_norm": 0.6697971224784851, "learning_rate": 4.471180749860101e-05, "loss": 1.7467, "step": 6920 }, { "epoch": 1.6598525268269289, "grad_norm": 0.6878527402877808, "learning_rate": 4.469581901031258e-05, "loss": 1.7519, "step": 6922 }, { "epoch": 1.6603321143816316, "grad_norm": 0.6446409821510315, "learning_rate": 4.467983052202415e-05, "loss": 1.7782, "step": 6924 }, { "epoch": 1.6608117019363347, "grad_norm": 0.6746791005134583, "learning_rate": 4.4663842033735715e-05, "loss": 1.7072, "step": 6926 }, { "epoch": 1.6612912894910377, "grad_norm": 0.6366303563117981, "learning_rate": 4.464785354544728e-05, "loss": 1.7023, "step": 6928 }, { "epoch": 1.6617708770457407, "grad_norm": 0.6811580061912537, "learning_rate": 4.4631865057158854e-05, "loss": 1.7302, "step": 6930 }, { "epoch": 1.6622504646004437, "grad_norm": 0.6247665882110596, "learning_rate": 4.461587656887041e-05, "loss": 1.6404, "step": 6932 }, { "epoch": 1.6627300521551467, "grad_norm": 0.6238241195678711, "learning_rate": 4.459988808058198e-05, "loss": 1.7155, "step": 6934 }, { "epoch": 1.6632096397098495, "grad_norm": 0.6818514466285706, "learning_rate": 4.458389959229355e-05, "loss": 1.7057, "step": 6936 }, { "epoch": 1.6636892272645525, "grad_norm": 0.6877810955047607, "learning_rate": 4.4567911104005116e-05, "loss": 1.6423, "step": 6938 }, { "epoch": 1.6641688148192555, "grad_norm": 0.6267225742340088, "learning_rate": 4.455192261571668e-05, "loss": 1.7202, "step": 6940 }, { "epoch": 1.6646484023739583, "grad_norm": 0.7052085399627686, "learning_rate": 4.4535934127428255e-05, "loss": 1.6858, "step": 6942 }, { "epoch": 1.6651279899286613, "grad_norm": 0.6312008500099182, "learning_rate": 4.451994563913982e-05, "loss": 1.717, "step": 6944 }, { "epoch": 1.6656075774833643, "grad_norm": 0.5914605259895325, "learning_rate": 4.4503957150851386e-05, "loss": 1.7244, "step": 6946 }, { "epoch": 1.6660871650380673, "grad_norm": 0.6662948727607727, "learning_rate": 4.448796866256296e-05, "loss": 1.6902, "step": 6948 }, { "epoch": 1.6665667525927703, "grad_norm": 0.6717657446861267, "learning_rate": 4.4471980174274524e-05, "loss": 1.6812, "step": 6950 }, { "epoch": 1.6670463401474733, "grad_norm": 0.6464540958404541, "learning_rate": 4.445599168598609e-05, "loss": 1.7017, "step": 6952 }, { "epoch": 1.6675259277021761, "grad_norm": 0.7252960801124573, "learning_rate": 4.444000319769766e-05, "loss": 1.6543, "step": 6954 }, { "epoch": 1.6680055152568791, "grad_norm": 0.7172642350196838, "learning_rate": 4.442401470940923e-05, "loss": 1.6937, "step": 6956 }, { "epoch": 1.668485102811582, "grad_norm": 0.613092839717865, "learning_rate": 4.4408026221120794e-05, "loss": 1.7173, "step": 6958 }, { "epoch": 1.668964690366285, "grad_norm": 0.6636039614677429, "learning_rate": 4.4392037732832366e-05, "loss": 1.7122, "step": 6960 }, { "epoch": 1.669444277920988, "grad_norm": 0.7927680015563965, "learning_rate": 4.437604924454393e-05, "loss": 1.6787, "step": 6962 }, { "epoch": 1.669923865475691, "grad_norm": 0.6912435293197632, "learning_rate": 4.43600607562555e-05, "loss": 1.6748, "step": 6964 }, { "epoch": 1.670403453030394, "grad_norm": 0.6833535432815552, "learning_rate": 4.434407226796707e-05, "loss": 1.6616, "step": 6966 }, { "epoch": 1.670883040585097, "grad_norm": 0.7166875004768372, "learning_rate": 4.432808377967863e-05, "loss": 1.6882, "step": 6968 }, { "epoch": 1.6713626281397997, "grad_norm": 0.6804790496826172, "learning_rate": 4.43120952913902e-05, "loss": 1.6665, "step": 6970 }, { "epoch": 1.6718422156945028, "grad_norm": 0.7000980973243713, "learning_rate": 4.429610680310177e-05, "loss": 1.7131, "step": 6972 }, { "epoch": 1.6723218032492055, "grad_norm": 0.6542059779167175, "learning_rate": 4.428011831481333e-05, "loss": 1.7032, "step": 6974 }, { "epoch": 1.6728013908039085, "grad_norm": 0.6720829606056213, "learning_rate": 4.4264129826524905e-05, "loss": 1.6523, "step": 6976 }, { "epoch": 1.6732809783586116, "grad_norm": 0.6646603345870972, "learning_rate": 4.424814133823647e-05, "loss": 1.6614, "step": 6978 }, { "epoch": 1.6737605659133146, "grad_norm": 0.5984199047088623, "learning_rate": 4.423215284994804e-05, "loss": 1.7149, "step": 6980 }, { "epoch": 1.6742401534680176, "grad_norm": 0.6346538066864014, "learning_rate": 4.421616436165961e-05, "loss": 1.7084, "step": 6982 }, { "epoch": 1.6747197410227206, "grad_norm": 0.5741820335388184, "learning_rate": 4.4200175873371175e-05, "loss": 1.7162, "step": 6984 }, { "epoch": 1.6751993285774234, "grad_norm": 0.724179208278656, "learning_rate": 4.418418738508274e-05, "loss": 1.7617, "step": 6986 }, { "epoch": 1.6756789161321264, "grad_norm": 0.616922914981842, "learning_rate": 4.416819889679431e-05, "loss": 1.7354, "step": 6988 }, { "epoch": 1.6761585036868292, "grad_norm": 0.6289510130882263, "learning_rate": 4.415221040850588e-05, "loss": 1.6882, "step": 6990 }, { "epoch": 1.6766380912415322, "grad_norm": 0.6567245125770569, "learning_rate": 4.4136221920217444e-05, "loss": 1.7438, "step": 6992 }, { "epoch": 1.6771176787962352, "grad_norm": 0.7341806292533875, "learning_rate": 4.412023343192902e-05, "loss": 1.6872, "step": 6994 }, { "epoch": 1.6775972663509382, "grad_norm": 0.7897122502326965, "learning_rate": 4.410424494364058e-05, "loss": 1.7226, "step": 6996 }, { "epoch": 1.6780768539056412, "grad_norm": 0.6688176989555359, "learning_rate": 4.408825645535215e-05, "loss": 1.6861, "step": 6998 }, { "epoch": 1.6785564414603442, "grad_norm": 0.6884894371032715, "learning_rate": 4.407226796706372e-05, "loss": 1.6626, "step": 7000 }, { "epoch": 1.679036029015047, "grad_norm": 0.6715915203094482, "learning_rate": 4.4056279478775287e-05, "loss": 1.7042, "step": 7002 }, { "epoch": 1.67951561656975, "grad_norm": 0.7346648573875427, "learning_rate": 4.4040290990486845e-05, "loss": 1.6732, "step": 7004 }, { "epoch": 1.679995204124453, "grad_norm": 0.6231310367584229, "learning_rate": 4.402430250219842e-05, "loss": 1.6856, "step": 7006 }, { "epoch": 1.6804747916791558, "grad_norm": 0.6867584586143494, "learning_rate": 4.4008314013909984e-05, "loss": 1.721, "step": 7008 }, { "epoch": 1.6809543792338588, "grad_norm": 0.6889344453811646, "learning_rate": 4.3992325525621556e-05, "loss": 1.6497, "step": 7010 }, { "epoch": 1.6814339667885618, "grad_norm": 0.6558771133422852, "learning_rate": 4.397633703733312e-05, "loss": 1.7116, "step": 7012 }, { "epoch": 1.6819135543432648, "grad_norm": 0.6382499933242798, "learning_rate": 4.396034854904469e-05, "loss": 1.6899, "step": 7014 }, { "epoch": 1.6823931418979678, "grad_norm": 0.6124967336654663, "learning_rate": 4.394436006075626e-05, "loss": 1.7095, "step": 7016 }, { "epoch": 1.6828727294526709, "grad_norm": 0.7072728276252747, "learning_rate": 4.3928371572467826e-05, "loss": 1.7232, "step": 7018 }, { "epoch": 1.6833523170073736, "grad_norm": 0.6392463445663452, "learning_rate": 4.391238308417939e-05, "loss": 1.6677, "step": 7020 }, { "epoch": 1.6838319045620767, "grad_norm": 0.6236127018928528, "learning_rate": 4.3896394595890964e-05, "loss": 1.7298, "step": 7022 }, { "epoch": 1.6843114921167794, "grad_norm": 0.6531131267547607, "learning_rate": 4.388040610760253e-05, "loss": 1.6841, "step": 7024 }, { "epoch": 1.6847910796714824, "grad_norm": 0.6702679395675659, "learning_rate": 4.3864417619314095e-05, "loss": 1.7081, "step": 7026 }, { "epoch": 1.6852706672261855, "grad_norm": 0.8006516695022583, "learning_rate": 4.384842913102567e-05, "loss": 1.6826, "step": 7028 }, { "epoch": 1.6857502547808885, "grad_norm": 0.6451188325881958, "learning_rate": 4.3832440642737233e-05, "loss": 1.7195, "step": 7030 }, { "epoch": 1.6862298423355915, "grad_norm": 0.5949522256851196, "learning_rate": 4.38164521544488e-05, "loss": 1.6929, "step": 7032 }, { "epoch": 1.6867094298902945, "grad_norm": 0.6533100605010986, "learning_rate": 4.380046366616037e-05, "loss": 1.6868, "step": 7034 }, { "epoch": 1.6871890174449973, "grad_norm": 0.7180526256561279, "learning_rate": 4.378447517787194e-05, "loss": 1.6986, "step": 7036 }, { "epoch": 1.6876686049997003, "grad_norm": 0.6795576810836792, "learning_rate": 4.37684866895835e-05, "loss": 1.6731, "step": 7038 }, { "epoch": 1.688148192554403, "grad_norm": 0.6845134496688843, "learning_rate": 4.3752498201295075e-05, "loss": 1.6696, "step": 7040 }, { "epoch": 1.688627780109106, "grad_norm": 0.7391487956047058, "learning_rate": 4.3736509713006634e-05, "loss": 1.6749, "step": 7042 }, { "epoch": 1.689107367663809, "grad_norm": 0.6418830156326294, "learning_rate": 4.37205212247182e-05, "loss": 1.6218, "step": 7044 }, { "epoch": 1.689586955218512, "grad_norm": 0.6246388554573059, "learning_rate": 4.370453273642977e-05, "loss": 1.691, "step": 7046 }, { "epoch": 1.690066542773215, "grad_norm": 0.6918032765388489, "learning_rate": 4.368854424814134e-05, "loss": 1.7286, "step": 7048 }, { "epoch": 1.6905461303279181, "grad_norm": 0.6976194381713867, "learning_rate": 4.3672555759852904e-05, "loss": 1.6697, "step": 7050 }, { "epoch": 1.691025717882621, "grad_norm": 0.6900615692138672, "learning_rate": 4.3656567271564476e-05, "loss": 1.6753, "step": 7052 }, { "epoch": 1.691505305437324, "grad_norm": 0.6295838356018066, "learning_rate": 4.364057878327604e-05, "loss": 1.6684, "step": 7054 }, { "epoch": 1.6919848929920267, "grad_norm": 0.7743697762489319, "learning_rate": 4.362459029498761e-05, "loss": 1.7116, "step": 7056 }, { "epoch": 1.6924644805467297, "grad_norm": 0.641784131526947, "learning_rate": 4.360860180669918e-05, "loss": 1.7068, "step": 7058 }, { "epoch": 1.6929440681014327, "grad_norm": 0.7354543209075928, "learning_rate": 4.3592613318410746e-05, "loss": 1.7546, "step": 7060 }, { "epoch": 1.6934236556561357, "grad_norm": 0.6967296600341797, "learning_rate": 4.357662483012231e-05, "loss": 1.691, "step": 7062 }, { "epoch": 1.6939032432108387, "grad_norm": 0.8541903495788574, "learning_rate": 4.3560636341833884e-05, "loss": 1.7287, "step": 7064 }, { "epoch": 1.6943828307655417, "grad_norm": 0.7203862071037292, "learning_rate": 4.354464785354545e-05, "loss": 1.6684, "step": 7066 }, { "epoch": 1.6948624183202445, "grad_norm": 0.6036088466644287, "learning_rate": 4.3528659365257016e-05, "loss": 1.6956, "step": 7068 }, { "epoch": 1.6953420058749475, "grad_norm": 0.7577828168869019, "learning_rate": 4.351267087696859e-05, "loss": 1.7237, "step": 7070 }, { "epoch": 1.6958215934296506, "grad_norm": 0.7159384489059448, "learning_rate": 4.3496682388680154e-05, "loss": 1.6922, "step": 7072 }, { "epoch": 1.6963011809843533, "grad_norm": 0.6819401979446411, "learning_rate": 4.348069390039172e-05, "loss": 1.6705, "step": 7074 }, { "epoch": 1.6967807685390563, "grad_norm": 0.6039004921913147, "learning_rate": 4.346470541210329e-05, "loss": 1.6147, "step": 7076 }, { "epoch": 1.6972603560937594, "grad_norm": 0.7051602005958557, "learning_rate": 4.344871692381485e-05, "loss": 1.6938, "step": 7078 }, { "epoch": 1.6977399436484624, "grad_norm": 0.6476424336433411, "learning_rate": 4.343272843552642e-05, "loss": 1.6696, "step": 7080 }, { "epoch": 1.6982195312031654, "grad_norm": 0.6418758034706116, "learning_rate": 4.341673994723799e-05, "loss": 1.66, "step": 7082 }, { "epoch": 1.6986991187578684, "grad_norm": 0.6897554993629456, "learning_rate": 4.3400751458949555e-05, "loss": 1.6565, "step": 7084 }, { "epoch": 1.6991787063125712, "grad_norm": 0.6142883896827698, "learning_rate": 4.338476297066113e-05, "loss": 1.6924, "step": 7086 }, { "epoch": 1.6996582938672742, "grad_norm": 0.6533589363098145, "learning_rate": 4.336877448237269e-05, "loss": 1.6973, "step": 7088 }, { "epoch": 1.700137881421977, "grad_norm": 0.6980764269828796, "learning_rate": 4.335278599408426e-05, "loss": 1.6803, "step": 7090 }, { "epoch": 1.70061746897668, "grad_norm": 0.7207129001617432, "learning_rate": 4.333679750579583e-05, "loss": 1.6872, "step": 7092 }, { "epoch": 1.701097056531383, "grad_norm": 0.6944156885147095, "learning_rate": 4.33208090175074e-05, "loss": 1.6974, "step": 7094 }, { "epoch": 1.701576644086086, "grad_norm": 0.6296306848526001, "learning_rate": 4.330482052921896e-05, "loss": 1.6818, "step": 7096 }, { "epoch": 1.702056231640789, "grad_norm": 0.6723199486732483, "learning_rate": 4.3288832040930535e-05, "loss": 1.6879, "step": 7098 }, { "epoch": 1.702535819195492, "grad_norm": 0.8373998999595642, "learning_rate": 4.32728435526421e-05, "loss": 1.7019, "step": 7100 }, { "epoch": 1.7030154067501948, "grad_norm": 0.6345167756080627, "learning_rate": 4.3256855064353666e-05, "loss": 1.7109, "step": 7102 }, { "epoch": 1.7034949943048978, "grad_norm": 0.6054137349128723, "learning_rate": 4.324086657606524e-05, "loss": 1.7064, "step": 7104 }, { "epoch": 1.7039745818596006, "grad_norm": 0.5981360673904419, "learning_rate": 4.3224878087776805e-05, "loss": 1.7107, "step": 7106 }, { "epoch": 1.7044541694143036, "grad_norm": 0.680203914642334, "learning_rate": 4.320888959948837e-05, "loss": 1.6781, "step": 7108 }, { "epoch": 1.7049337569690066, "grad_norm": 0.6236011385917664, "learning_rate": 4.319290111119994e-05, "loss": 1.7039, "step": 7110 }, { "epoch": 1.7054133445237096, "grad_norm": 0.6980118155479431, "learning_rate": 4.317691262291151e-05, "loss": 1.704, "step": 7112 }, { "epoch": 1.7058929320784126, "grad_norm": 0.6931481957435608, "learning_rate": 4.3160924134623074e-05, "loss": 1.6305, "step": 7114 }, { "epoch": 1.7063725196331156, "grad_norm": 0.6354336142539978, "learning_rate": 4.314493564633464e-05, "loss": 1.6694, "step": 7116 }, { "epoch": 1.7068521071878184, "grad_norm": 0.6119486689567566, "learning_rate": 4.3128947158046206e-05, "loss": 1.7421, "step": 7118 }, { "epoch": 1.7073316947425214, "grad_norm": 0.6262586712837219, "learning_rate": 4.311295866975778e-05, "loss": 1.6317, "step": 7120 }, { "epoch": 1.7078112822972242, "grad_norm": 0.6965233087539673, "learning_rate": 4.3096970181469344e-05, "loss": 1.6299, "step": 7122 }, { "epoch": 1.7082908698519272, "grad_norm": 0.7240157723426819, "learning_rate": 4.308098169318091e-05, "loss": 1.7019, "step": 7124 }, { "epoch": 1.7087704574066302, "grad_norm": 0.6135428547859192, "learning_rate": 4.306499320489248e-05, "loss": 1.651, "step": 7126 }, { "epoch": 1.7092500449613333, "grad_norm": 0.7128486633300781, "learning_rate": 4.304900471660405e-05, "loss": 1.6734, "step": 7128 }, { "epoch": 1.7097296325160363, "grad_norm": 0.6704049110412598, "learning_rate": 4.303301622831561e-05, "loss": 1.6785, "step": 7130 }, { "epoch": 1.7102092200707393, "grad_norm": 0.5925652980804443, "learning_rate": 4.3017027740027186e-05, "loss": 1.7754, "step": 7132 }, { "epoch": 1.710688807625442, "grad_norm": 0.6780576705932617, "learning_rate": 4.300103925173875e-05, "loss": 1.7071, "step": 7134 }, { "epoch": 1.711168395180145, "grad_norm": 0.60521399974823, "learning_rate": 4.298505076345032e-05, "loss": 1.6612, "step": 7136 }, { "epoch": 1.711647982734848, "grad_norm": 0.6567942500114441, "learning_rate": 4.296906227516189e-05, "loss": 1.6869, "step": 7138 }, { "epoch": 1.7121275702895509, "grad_norm": 0.62856525182724, "learning_rate": 4.2953073786873455e-05, "loss": 1.6478, "step": 7140 }, { "epoch": 1.7126071578442539, "grad_norm": 0.6049023866653442, "learning_rate": 4.293708529858502e-05, "loss": 1.6825, "step": 7142 }, { "epoch": 1.713086745398957, "grad_norm": 0.6443159580230713, "learning_rate": 4.2921096810296593e-05, "loss": 1.7074, "step": 7144 }, { "epoch": 1.71356633295366, "grad_norm": 0.6977401971817017, "learning_rate": 4.290510832200816e-05, "loss": 1.7246, "step": 7146 }, { "epoch": 1.714045920508363, "grad_norm": 0.6651487946510315, "learning_rate": 4.2889119833719725e-05, "loss": 1.6858, "step": 7148 }, { "epoch": 1.714525508063066, "grad_norm": 0.837122917175293, "learning_rate": 4.287313134543129e-05, "loss": 1.7197, "step": 7150 }, { "epoch": 1.7150050956177687, "grad_norm": 0.6743748188018799, "learning_rate": 4.2857142857142856e-05, "loss": 1.7338, "step": 7152 }, { "epoch": 1.7154846831724717, "grad_norm": 0.6125153303146362, "learning_rate": 4.284115436885442e-05, "loss": 1.6866, "step": 7154 }, { "epoch": 1.7159642707271745, "grad_norm": 0.6750504374504089, "learning_rate": 4.2825165880565994e-05, "loss": 1.7014, "step": 7156 }, { "epoch": 1.7164438582818775, "grad_norm": 0.6676688194274902, "learning_rate": 4.280917739227756e-05, "loss": 1.6827, "step": 7158 }, { "epoch": 1.7169234458365805, "grad_norm": 0.6307616233825684, "learning_rate": 4.2793188903989126e-05, "loss": 1.6647, "step": 7160 }, { "epoch": 1.7174030333912835, "grad_norm": 0.8009878993034363, "learning_rate": 4.27772004157007e-05, "loss": 1.6864, "step": 7162 }, { "epoch": 1.7178826209459865, "grad_norm": 0.6708737015724182, "learning_rate": 4.2761211927412264e-05, "loss": 1.7415, "step": 7164 }, { "epoch": 1.7183622085006895, "grad_norm": 0.6512638330459595, "learning_rate": 4.274522343912383e-05, "loss": 1.6944, "step": 7166 }, { "epoch": 1.7188417960553923, "grad_norm": 0.6971409320831299, "learning_rate": 4.27292349508354e-05, "loss": 1.6914, "step": 7168 }, { "epoch": 1.7193213836100953, "grad_norm": 0.714971125125885, "learning_rate": 4.271324646254697e-05, "loss": 1.7213, "step": 7170 }, { "epoch": 1.7198009711647981, "grad_norm": 0.7179045677185059, "learning_rate": 4.2697257974258534e-05, "loss": 1.75, "step": 7172 }, { "epoch": 1.7202805587195011, "grad_norm": 0.698767900466919, "learning_rate": 4.2681269485970106e-05, "loss": 1.685, "step": 7174 }, { "epoch": 1.7207601462742041, "grad_norm": 0.6682954430580139, "learning_rate": 4.266528099768167e-05, "loss": 1.6733, "step": 7176 }, { "epoch": 1.7212397338289072, "grad_norm": 0.9047183990478516, "learning_rate": 4.264929250939324e-05, "loss": 1.701, "step": 7178 }, { "epoch": 1.7217193213836102, "grad_norm": 0.6772891879081726, "learning_rate": 4.263330402110481e-05, "loss": 1.6543, "step": 7180 }, { "epoch": 1.7221989089383132, "grad_norm": 0.6926412582397461, "learning_rate": 4.2617315532816376e-05, "loss": 1.6887, "step": 7182 }, { "epoch": 1.722678496493016, "grad_norm": 0.6488686203956604, "learning_rate": 4.260132704452794e-05, "loss": 1.7004, "step": 7184 }, { "epoch": 1.723158084047719, "grad_norm": 0.6761799454689026, "learning_rate": 4.258533855623951e-05, "loss": 1.6966, "step": 7186 }, { "epoch": 1.7236376716024218, "grad_norm": 0.6753386855125427, "learning_rate": 4.256935006795107e-05, "loss": 1.6698, "step": 7188 }, { "epoch": 1.7241172591571248, "grad_norm": 0.6931450963020325, "learning_rate": 4.2553361579662645e-05, "loss": 1.7344, "step": 7190 }, { "epoch": 1.7245968467118278, "grad_norm": 0.6661020517349243, "learning_rate": 4.253737309137421e-05, "loss": 1.6778, "step": 7192 }, { "epoch": 1.7250764342665308, "grad_norm": 0.725106954574585, "learning_rate": 4.252138460308578e-05, "loss": 1.6904, "step": 7194 }, { "epoch": 1.7255560218212338, "grad_norm": 0.5951911807060242, "learning_rate": 4.250539611479735e-05, "loss": 1.6916, "step": 7196 }, { "epoch": 1.7260356093759368, "grad_norm": 0.6392805576324463, "learning_rate": 4.2489407626508915e-05, "loss": 1.7015, "step": 7198 }, { "epoch": 1.7265151969306396, "grad_norm": 0.6875796318054199, "learning_rate": 4.247341913822048e-05, "loss": 1.7467, "step": 7200 }, { "epoch": 1.7265151969306396, "eval_loss": 1.7289694547653198, "eval_runtime": 331.3478, "eval_samples_per_second": 402.746, "eval_steps_per_second": 12.588, "step": 7200 }, { "epoch": 1.7269947844853426, "grad_norm": 0.623810350894928, "learning_rate": 4.245743064993205e-05, "loss": 1.6791, "step": 7202 }, { "epoch": 1.7274743720400456, "grad_norm": 0.6635192632675171, "learning_rate": 4.244144216164362e-05, "loss": 1.7084, "step": 7204 }, { "epoch": 1.7279539595947484, "grad_norm": 0.6127939820289612, "learning_rate": 4.2425453673355184e-05, "loss": 1.7223, "step": 7206 }, { "epoch": 1.7284335471494514, "grad_norm": 0.601517915725708, "learning_rate": 4.240946518506676e-05, "loss": 1.7055, "step": 7208 }, { "epoch": 1.7289131347041544, "grad_norm": 0.9981102347373962, "learning_rate": 4.239347669677832e-05, "loss": 1.717, "step": 7210 }, { "epoch": 1.7293927222588574, "grad_norm": 0.7100702524185181, "learning_rate": 4.237748820848989e-05, "loss": 1.7615, "step": 7212 }, { "epoch": 1.7298723098135604, "grad_norm": 0.6509621143341064, "learning_rate": 4.236149972020146e-05, "loss": 1.6928, "step": 7214 }, { "epoch": 1.7303518973682634, "grad_norm": 0.7664061188697815, "learning_rate": 4.2345511231913026e-05, "loss": 1.7118, "step": 7216 }, { "epoch": 1.7308314849229662, "grad_norm": 0.6150618195533752, "learning_rate": 4.232952274362459e-05, "loss": 1.7065, "step": 7218 }, { "epoch": 1.7313110724776692, "grad_norm": 0.6563504934310913, "learning_rate": 4.2313534255336165e-05, "loss": 1.6788, "step": 7220 }, { "epoch": 1.731790660032372, "grad_norm": 0.6438421010971069, "learning_rate": 4.229754576704773e-05, "loss": 1.6876, "step": 7222 }, { "epoch": 1.732270247587075, "grad_norm": 0.6664254665374756, "learning_rate": 4.2281557278759296e-05, "loss": 1.6649, "step": 7224 }, { "epoch": 1.732749835141778, "grad_norm": 0.6643362045288086, "learning_rate": 4.226556879047086e-05, "loss": 1.673, "step": 7226 }, { "epoch": 1.733229422696481, "grad_norm": 0.6426982283592224, "learning_rate": 4.224958030218243e-05, "loss": 1.7238, "step": 7228 }, { "epoch": 1.733709010251184, "grad_norm": 0.6487430334091187, "learning_rate": 4.2233591813894e-05, "loss": 1.6819, "step": 7230 }, { "epoch": 1.734188597805887, "grad_norm": 0.7027775049209595, "learning_rate": 4.2217603325605566e-05, "loss": 1.645, "step": 7232 }, { "epoch": 1.7346681853605899, "grad_norm": 0.8074947595596313, "learning_rate": 4.220161483731713e-05, "loss": 1.6998, "step": 7234 }, { "epoch": 1.7351477729152929, "grad_norm": 0.621760368347168, "learning_rate": 4.2185626349028704e-05, "loss": 1.6968, "step": 7236 }, { "epoch": 1.7356273604699957, "grad_norm": 0.6193776726722717, "learning_rate": 4.216963786074027e-05, "loss": 1.6641, "step": 7238 }, { "epoch": 1.7361069480246987, "grad_norm": 0.6654245257377625, "learning_rate": 4.2153649372451835e-05, "loss": 1.7097, "step": 7240 }, { "epoch": 1.7365865355794017, "grad_norm": 0.687473475933075, "learning_rate": 4.213766088416341e-05, "loss": 1.6956, "step": 7242 }, { "epoch": 1.7370661231341047, "grad_norm": 0.6474734544754028, "learning_rate": 4.212167239587497e-05, "loss": 1.7053, "step": 7244 }, { "epoch": 1.7375457106888077, "grad_norm": 0.7391834855079651, "learning_rate": 4.210568390758654e-05, "loss": 1.6878, "step": 7246 }, { "epoch": 1.7380252982435107, "grad_norm": 0.6418306827545166, "learning_rate": 4.208969541929811e-05, "loss": 1.6517, "step": 7248 }, { "epoch": 1.7385048857982135, "grad_norm": 0.674423336982727, "learning_rate": 4.207370693100968e-05, "loss": 1.7013, "step": 7250 }, { "epoch": 1.7389844733529165, "grad_norm": 0.6538198590278625, "learning_rate": 4.205771844272124e-05, "loss": 1.7235, "step": 7252 }, { "epoch": 1.7394640609076193, "grad_norm": 0.728451669216156, "learning_rate": 4.2041729954432815e-05, "loss": 1.675, "step": 7254 }, { "epoch": 1.7399436484623223, "grad_norm": 0.8708463907241821, "learning_rate": 4.202574146614438e-05, "loss": 1.6821, "step": 7256 }, { "epoch": 1.7404232360170253, "grad_norm": 0.620997428894043, "learning_rate": 4.200975297785595e-05, "loss": 1.7047, "step": 7258 }, { "epoch": 1.7409028235717283, "grad_norm": 0.7871776819229126, "learning_rate": 4.199376448956751e-05, "loss": 1.6671, "step": 7260 }, { "epoch": 1.7413824111264313, "grad_norm": 0.7603675127029419, "learning_rate": 4.197777600127908e-05, "loss": 1.6913, "step": 7262 }, { "epoch": 1.7418619986811343, "grad_norm": 0.6583966016769409, "learning_rate": 4.1961787512990644e-05, "loss": 1.7037, "step": 7264 }, { "epoch": 1.7423415862358371, "grad_norm": 0.6349726319313049, "learning_rate": 4.1945799024702216e-05, "loss": 1.68, "step": 7266 }, { "epoch": 1.7428211737905401, "grad_norm": 0.6973651051521301, "learning_rate": 4.192981053641378e-05, "loss": 1.6646, "step": 7268 }, { "epoch": 1.7433007613452431, "grad_norm": 0.8336693644523621, "learning_rate": 4.191382204812535e-05, "loss": 1.6443, "step": 7270 }, { "epoch": 1.743780348899946, "grad_norm": 0.7116827964782715, "learning_rate": 4.189783355983692e-05, "loss": 1.6489, "step": 7272 }, { "epoch": 1.744259936454649, "grad_norm": 0.7868492007255554, "learning_rate": 4.1881845071548486e-05, "loss": 1.6909, "step": 7274 }, { "epoch": 1.744739524009352, "grad_norm": 0.5745306611061096, "learning_rate": 4.186585658326005e-05, "loss": 1.7051, "step": 7276 }, { "epoch": 1.745219111564055, "grad_norm": 0.6426863074302673, "learning_rate": 4.1849868094971624e-05, "loss": 1.6985, "step": 7278 }, { "epoch": 1.745698699118758, "grad_norm": 0.6750835180282593, "learning_rate": 4.183387960668319e-05, "loss": 1.7554, "step": 7280 }, { "epoch": 1.746178286673461, "grad_norm": 0.5926274061203003, "learning_rate": 4.1817891118394756e-05, "loss": 1.6659, "step": 7282 }, { "epoch": 1.7466578742281638, "grad_norm": 0.6095907092094421, "learning_rate": 4.180190263010633e-05, "loss": 1.7847, "step": 7284 }, { "epoch": 1.7471374617828668, "grad_norm": 0.6413607001304626, "learning_rate": 4.1785914141817894e-05, "loss": 1.7385, "step": 7286 }, { "epoch": 1.7476170493375696, "grad_norm": 0.6431804895401001, "learning_rate": 4.176992565352946e-05, "loss": 1.6738, "step": 7288 }, { "epoch": 1.7480966368922726, "grad_norm": 0.6205611228942871, "learning_rate": 4.175393716524103e-05, "loss": 1.7002, "step": 7290 }, { "epoch": 1.7485762244469756, "grad_norm": 0.8033289313316345, "learning_rate": 4.17379486769526e-05, "loss": 1.6657, "step": 7292 }, { "epoch": 1.7490558120016786, "grad_norm": 0.6545907855033875, "learning_rate": 4.172196018866416e-05, "loss": 1.6742, "step": 7294 }, { "epoch": 1.7495353995563816, "grad_norm": 0.7819506525993347, "learning_rate": 4.170597170037573e-05, "loss": 1.712, "step": 7296 }, { "epoch": 1.7500149871110846, "grad_norm": 0.6465640664100647, "learning_rate": 4.1689983212087295e-05, "loss": 1.644, "step": 7298 }, { "epoch": 1.7504945746657874, "grad_norm": 0.6537215709686279, "learning_rate": 4.167399472379887e-05, "loss": 1.6772, "step": 7300 }, { "epoch": 1.7509741622204904, "grad_norm": 0.6920420527458191, "learning_rate": 4.165800623551043e-05, "loss": 1.6624, "step": 7302 }, { "epoch": 1.7514537497751932, "grad_norm": 0.8525955080986023, "learning_rate": 4.1642017747222e-05, "loss": 1.6875, "step": 7304 }, { "epoch": 1.7519333373298962, "grad_norm": 0.6894299983978271, "learning_rate": 4.162602925893357e-05, "loss": 1.6721, "step": 7306 }, { "epoch": 1.7524129248845992, "grad_norm": 0.8036620020866394, "learning_rate": 4.161004077064514e-05, "loss": 1.7386, "step": 7308 }, { "epoch": 1.7528925124393022, "grad_norm": 0.6304709315299988, "learning_rate": 4.15940522823567e-05, "loss": 1.6985, "step": 7310 }, { "epoch": 1.7533720999940052, "grad_norm": 0.6685320734977722, "learning_rate": 4.1578063794068275e-05, "loss": 1.6506, "step": 7312 }, { "epoch": 1.7538516875487082, "grad_norm": 0.6701992154121399, "learning_rate": 4.156207530577984e-05, "loss": 1.7016, "step": 7314 }, { "epoch": 1.754331275103411, "grad_norm": 0.6525934934616089, "learning_rate": 4.1546086817491406e-05, "loss": 1.6707, "step": 7316 }, { "epoch": 1.754810862658114, "grad_norm": 0.6253710985183716, "learning_rate": 4.153009832920298e-05, "loss": 1.6347, "step": 7318 }, { "epoch": 1.7552904502128168, "grad_norm": 0.64095538854599, "learning_rate": 4.1514109840914544e-05, "loss": 1.718, "step": 7320 }, { "epoch": 1.7557700377675198, "grad_norm": 0.7210913300514221, "learning_rate": 4.149812135262611e-05, "loss": 1.7472, "step": 7322 }, { "epoch": 1.7562496253222228, "grad_norm": 0.6185953617095947, "learning_rate": 4.148213286433768e-05, "loss": 1.7079, "step": 7324 }, { "epoch": 1.7567292128769258, "grad_norm": 0.6613900065422058, "learning_rate": 4.146614437604925e-05, "loss": 1.6651, "step": 7326 }, { "epoch": 1.7572088004316289, "grad_norm": 0.7278888821601868, "learning_rate": 4.1450155887760814e-05, "loss": 1.6561, "step": 7328 }, { "epoch": 1.7576883879863319, "grad_norm": 0.7124983072280884, "learning_rate": 4.1434167399472387e-05, "loss": 1.7144, "step": 7330 }, { "epoch": 1.7581679755410347, "grad_norm": 0.7303761839866638, "learning_rate": 4.1418178911183945e-05, "loss": 1.6607, "step": 7332 }, { "epoch": 1.7586475630957377, "grad_norm": 0.6193926930427551, "learning_rate": 4.140219042289552e-05, "loss": 1.7052, "step": 7334 }, { "epoch": 1.7591271506504407, "grad_norm": 0.7161205410957336, "learning_rate": 4.1386201934607084e-05, "loss": 1.646, "step": 7336 }, { "epoch": 1.7596067382051435, "grad_norm": 0.6068868041038513, "learning_rate": 4.137021344631865e-05, "loss": 1.6871, "step": 7338 }, { "epoch": 1.7600863257598465, "grad_norm": 0.6449130177497864, "learning_rate": 4.135422495803022e-05, "loss": 1.7346, "step": 7340 }, { "epoch": 1.7605659133145495, "grad_norm": 0.5763112902641296, "learning_rate": 4.133823646974179e-05, "loss": 1.6663, "step": 7342 }, { "epoch": 1.7610455008692525, "grad_norm": 0.6483626365661621, "learning_rate": 4.132224798145335e-05, "loss": 1.7376, "step": 7344 }, { "epoch": 1.7615250884239555, "grad_norm": 0.5957968235015869, "learning_rate": 4.1306259493164926e-05, "loss": 1.6942, "step": 7346 }, { "epoch": 1.7620046759786585, "grad_norm": 0.7954830527305603, "learning_rate": 4.129027100487649e-05, "loss": 1.7303, "step": 7348 }, { "epoch": 1.7624842635333613, "grad_norm": 0.6529787182807922, "learning_rate": 4.127428251658806e-05, "loss": 1.7013, "step": 7350 }, { "epoch": 1.7629638510880643, "grad_norm": 0.6161306500434875, "learning_rate": 4.125829402829963e-05, "loss": 1.7155, "step": 7352 }, { "epoch": 1.763443438642767, "grad_norm": 0.6698099970817566, "learning_rate": 4.1242305540011195e-05, "loss": 1.7051, "step": 7354 }, { "epoch": 1.76392302619747, "grad_norm": 0.6075140237808228, "learning_rate": 4.122631705172276e-05, "loss": 1.7094, "step": 7356 }, { "epoch": 1.764402613752173, "grad_norm": 0.5898030400276184, "learning_rate": 4.1210328563434333e-05, "loss": 1.657, "step": 7358 }, { "epoch": 1.7648822013068761, "grad_norm": 0.5746397972106934, "learning_rate": 4.11943400751459e-05, "loss": 1.6392, "step": 7360 }, { "epoch": 1.7653617888615791, "grad_norm": 0.6481466293334961, "learning_rate": 4.1178351586857465e-05, "loss": 1.682, "step": 7362 }, { "epoch": 1.7658413764162821, "grad_norm": 0.6189270615577698, "learning_rate": 4.116236309856904e-05, "loss": 1.7168, "step": 7364 }, { "epoch": 1.766320963970985, "grad_norm": 0.6216933727264404, "learning_rate": 4.11463746102806e-05, "loss": 1.6797, "step": 7366 }, { "epoch": 1.766800551525688, "grad_norm": 0.6759783625602722, "learning_rate": 4.113038612199216e-05, "loss": 1.7439, "step": 7368 }, { "epoch": 1.7672801390803907, "grad_norm": 0.6507828235626221, "learning_rate": 4.1114397633703734e-05, "loss": 1.6663, "step": 7370 }, { "epoch": 1.7677597266350937, "grad_norm": 0.6520199179649353, "learning_rate": 4.10984091454153e-05, "loss": 1.7082, "step": 7372 }, { "epoch": 1.7682393141897967, "grad_norm": 0.6596447825431824, "learning_rate": 4.1082420657126866e-05, "loss": 1.722, "step": 7374 }, { "epoch": 1.7687189017444997, "grad_norm": 0.6238113045692444, "learning_rate": 4.106643216883844e-05, "loss": 1.7089, "step": 7376 }, { "epoch": 1.7691984892992028, "grad_norm": 0.5930425524711609, "learning_rate": 4.1050443680550004e-05, "loss": 1.6433, "step": 7378 }, { "epoch": 1.7696780768539058, "grad_norm": 0.6631926894187927, "learning_rate": 4.103445519226157e-05, "loss": 1.6177, "step": 7380 }, { "epoch": 1.7701576644086086, "grad_norm": 0.628372311592102, "learning_rate": 4.101846670397314e-05, "loss": 1.7045, "step": 7382 }, { "epoch": 1.7706372519633116, "grad_norm": 0.6875704526901245, "learning_rate": 4.100247821568471e-05, "loss": 1.6815, "step": 7384 }, { "epoch": 1.7711168395180144, "grad_norm": 0.6806082129478455, "learning_rate": 4.0986489727396274e-05, "loss": 1.7362, "step": 7386 }, { "epoch": 1.7715964270727174, "grad_norm": 0.757533609867096, "learning_rate": 4.0970501239107846e-05, "loss": 1.7137, "step": 7388 }, { "epoch": 1.7720760146274204, "grad_norm": 0.718622624874115, "learning_rate": 4.095451275081941e-05, "loss": 1.6588, "step": 7390 }, { "epoch": 1.7725556021821234, "grad_norm": 0.573077917098999, "learning_rate": 4.093852426253098e-05, "loss": 1.6701, "step": 7392 }, { "epoch": 1.7730351897368264, "grad_norm": 0.6345736384391785, "learning_rate": 4.092253577424255e-05, "loss": 1.7188, "step": 7394 }, { "epoch": 1.7735147772915294, "grad_norm": 0.6742398142814636, "learning_rate": 4.0906547285954116e-05, "loss": 1.7089, "step": 7396 }, { "epoch": 1.7739943648462322, "grad_norm": 0.6583297848701477, "learning_rate": 4.089055879766568e-05, "loss": 1.6681, "step": 7398 }, { "epoch": 1.7744739524009352, "grad_norm": 0.5893363952636719, "learning_rate": 4.0874570309377254e-05, "loss": 1.66, "step": 7400 }, { "epoch": 1.7749535399556382, "grad_norm": 0.652974545955658, "learning_rate": 4.085858182108882e-05, "loss": 1.6644, "step": 7402 }, { "epoch": 1.775433127510341, "grad_norm": 0.6253976821899414, "learning_rate": 4.0842593332800385e-05, "loss": 1.6384, "step": 7404 }, { "epoch": 1.775912715065044, "grad_norm": 0.7039534449577332, "learning_rate": 4.082660484451195e-05, "loss": 1.732, "step": 7406 }, { "epoch": 1.776392302619747, "grad_norm": 0.6185221672058105, "learning_rate": 4.0810616356223517e-05, "loss": 1.727, "step": 7408 }, { "epoch": 1.77687189017445, "grad_norm": 0.6339254975318909, "learning_rate": 4.079462786793509e-05, "loss": 1.7356, "step": 7410 }, { "epoch": 1.777351477729153, "grad_norm": 0.67397141456604, "learning_rate": 4.0778639379646655e-05, "loss": 1.7368, "step": 7412 }, { "epoch": 1.777831065283856, "grad_norm": 0.6931449770927429, "learning_rate": 4.076265089135822e-05, "loss": 1.7397, "step": 7414 }, { "epoch": 1.7783106528385588, "grad_norm": 0.7096007466316223, "learning_rate": 4.074666240306979e-05, "loss": 1.6785, "step": 7416 }, { "epoch": 1.7787902403932618, "grad_norm": 0.6167078018188477, "learning_rate": 4.073067391478136e-05, "loss": 1.7273, "step": 7418 }, { "epoch": 1.7792698279479646, "grad_norm": 0.6204904317855835, "learning_rate": 4.0714685426492924e-05, "loss": 1.6611, "step": 7420 }, { "epoch": 1.7797494155026676, "grad_norm": 0.7340939044952393, "learning_rate": 4.06986969382045e-05, "loss": 1.6358, "step": 7422 }, { "epoch": 1.7802290030573706, "grad_norm": 0.7309663891792297, "learning_rate": 4.068270844991606e-05, "loss": 1.7454, "step": 7424 }, { "epoch": 1.7807085906120736, "grad_norm": 0.5999955534934998, "learning_rate": 4.066671996162763e-05, "loss": 1.6646, "step": 7426 }, { "epoch": 1.7811881781667767, "grad_norm": 0.6125255227088928, "learning_rate": 4.06507314733392e-05, "loss": 1.6756, "step": 7428 }, { "epoch": 1.7816677657214797, "grad_norm": 0.7244718670845032, "learning_rate": 4.0634742985050766e-05, "loss": 1.6851, "step": 7430 }, { "epoch": 1.7821473532761825, "grad_norm": 0.6417447924613953, "learning_rate": 4.061875449676233e-05, "loss": 1.7079, "step": 7432 }, { "epoch": 1.7826269408308855, "grad_norm": 0.6106614470481873, "learning_rate": 4.0602766008473905e-05, "loss": 1.6849, "step": 7434 }, { "epoch": 1.7831065283855883, "grad_norm": 0.5996801853179932, "learning_rate": 4.058677752018547e-05, "loss": 1.7083, "step": 7436 }, { "epoch": 1.7835861159402913, "grad_norm": 0.6185377836227417, "learning_rate": 4.0570789031897036e-05, "loss": 1.6778, "step": 7438 }, { "epoch": 1.7840657034949943, "grad_norm": 0.8414687514305115, "learning_rate": 4.055480054360861e-05, "loss": 1.7244, "step": 7440 }, { "epoch": 1.7845452910496973, "grad_norm": 0.7387486100196838, "learning_rate": 4.053881205532017e-05, "loss": 1.6701, "step": 7442 }, { "epoch": 1.7850248786044003, "grad_norm": 0.6899896264076233, "learning_rate": 4.052282356703174e-05, "loss": 1.688, "step": 7444 }, { "epoch": 1.7855044661591033, "grad_norm": 0.627964198589325, "learning_rate": 4.0506835078743306e-05, "loss": 1.6591, "step": 7446 }, { "epoch": 1.785984053713806, "grad_norm": 0.612082839012146, "learning_rate": 4.049084659045487e-05, "loss": 1.7101, "step": 7448 }, { "epoch": 1.786463641268509, "grad_norm": 0.6534795761108398, "learning_rate": 4.0474858102166444e-05, "loss": 1.6689, "step": 7450 }, { "epoch": 1.7869432288232119, "grad_norm": 0.6220560073852539, "learning_rate": 4.045886961387801e-05, "loss": 1.7109, "step": 7452 }, { "epoch": 1.787422816377915, "grad_norm": 0.612832248210907, "learning_rate": 4.0442881125589575e-05, "loss": 1.6239, "step": 7454 }, { "epoch": 1.787902403932618, "grad_norm": 0.7320275902748108, "learning_rate": 4.042689263730115e-05, "loss": 1.7245, "step": 7456 }, { "epoch": 1.788381991487321, "grad_norm": 0.6549773216247559, "learning_rate": 4.041090414901271e-05, "loss": 1.7097, "step": 7458 }, { "epoch": 1.788861579042024, "grad_norm": 0.6270062923431396, "learning_rate": 4.039491566072428e-05, "loss": 1.6818, "step": 7460 }, { "epoch": 1.789341166596727, "grad_norm": 0.6484335064888, "learning_rate": 4.037892717243585e-05, "loss": 1.6547, "step": 7462 }, { "epoch": 1.7898207541514297, "grad_norm": 0.6511297821998596, "learning_rate": 4.036293868414742e-05, "loss": 1.7109, "step": 7464 }, { "epoch": 1.7903003417061327, "grad_norm": 0.7380300760269165, "learning_rate": 4.034695019585898e-05, "loss": 1.6872, "step": 7466 }, { "epoch": 1.7907799292608357, "grad_norm": 0.767991840839386, "learning_rate": 4.0330961707570555e-05, "loss": 1.6567, "step": 7468 }, { "epoch": 1.7912595168155385, "grad_norm": 0.6468380093574524, "learning_rate": 4.031497321928212e-05, "loss": 1.6803, "step": 7470 }, { "epoch": 1.7917391043702415, "grad_norm": 0.6131623983383179, "learning_rate": 4.029898473099369e-05, "loss": 1.6569, "step": 7472 }, { "epoch": 1.7922186919249445, "grad_norm": 0.6937527656555176, "learning_rate": 4.028299624270526e-05, "loss": 1.6664, "step": 7474 }, { "epoch": 1.7926982794796475, "grad_norm": 0.6432701945304871, "learning_rate": 4.0267007754416825e-05, "loss": 1.6594, "step": 7476 }, { "epoch": 1.7931778670343506, "grad_norm": 0.6733567714691162, "learning_rate": 4.0251019266128384e-05, "loss": 1.7166, "step": 7478 }, { "epoch": 1.7936574545890536, "grad_norm": 0.7920487523078918, "learning_rate": 4.0235030777839956e-05, "loss": 1.6987, "step": 7480 }, { "epoch": 1.7941370421437564, "grad_norm": 0.7047169804573059, "learning_rate": 4.021904228955152e-05, "loss": 1.6471, "step": 7482 }, { "epoch": 1.7946166296984594, "grad_norm": 0.7339998483657837, "learning_rate": 4.020305380126309e-05, "loss": 1.6907, "step": 7484 }, { "epoch": 1.7950962172531622, "grad_norm": 0.6428928971290588, "learning_rate": 4.018706531297466e-05, "loss": 1.686, "step": 7486 }, { "epoch": 1.7955758048078652, "grad_norm": 0.72614985704422, "learning_rate": 4.0171076824686226e-05, "loss": 1.6963, "step": 7488 }, { "epoch": 1.7960553923625682, "grad_norm": 0.6257103681564331, "learning_rate": 4.015508833639779e-05, "loss": 1.6871, "step": 7490 }, { "epoch": 1.7965349799172712, "grad_norm": 0.6519310474395752, "learning_rate": 4.0139099848109364e-05, "loss": 1.6806, "step": 7492 }, { "epoch": 1.7970145674719742, "grad_norm": 0.8307796120643616, "learning_rate": 4.012311135982093e-05, "loss": 1.6963, "step": 7494 }, { "epoch": 1.7974941550266772, "grad_norm": 0.785900354385376, "learning_rate": 4.0107122871532495e-05, "loss": 1.7358, "step": 7496 }, { "epoch": 1.79797374258138, "grad_norm": 0.6603189706802368, "learning_rate": 4.009113438324407e-05, "loss": 1.7137, "step": 7498 }, { "epoch": 1.798453330136083, "grad_norm": 0.6985392570495605, "learning_rate": 4.0075145894955634e-05, "loss": 1.7085, "step": 7500 }, { "epoch": 1.7989329176907858, "grad_norm": 0.6027252078056335, "learning_rate": 4.00591574066672e-05, "loss": 1.7315, "step": 7502 }, { "epoch": 1.7994125052454888, "grad_norm": 0.6263147592544556, "learning_rate": 4.004316891837877e-05, "loss": 1.7182, "step": 7504 }, { "epoch": 1.7998920928001918, "grad_norm": 0.6991830468177795, "learning_rate": 4.002718043009034e-05, "loss": 1.6916, "step": 7506 }, { "epoch": 1.8003716803548948, "grad_norm": 0.6039568781852722, "learning_rate": 4.00111919418019e-05, "loss": 1.7256, "step": 7508 }, { "epoch": 1.8008512679095978, "grad_norm": 0.653226375579834, "learning_rate": 3.9995203453513476e-05, "loss": 1.7092, "step": 7510 }, { "epoch": 1.8013308554643008, "grad_norm": 0.6624664664268494, "learning_rate": 3.997921496522504e-05, "loss": 1.7414, "step": 7512 }, { "epoch": 1.8018104430190036, "grad_norm": 0.6736733317375183, "learning_rate": 3.996322647693661e-05, "loss": 1.6769, "step": 7514 }, { "epoch": 1.8022900305737066, "grad_norm": 0.6279837489128113, "learning_rate": 3.994723798864817e-05, "loss": 1.6627, "step": 7516 }, { "epoch": 1.8027696181284094, "grad_norm": 0.6112959980964661, "learning_rate": 3.993124950035974e-05, "loss": 1.6927, "step": 7518 }, { "epoch": 1.8032492056831124, "grad_norm": 0.7009296417236328, "learning_rate": 3.991526101207131e-05, "loss": 1.7038, "step": 7520 }, { "epoch": 1.8037287932378154, "grad_norm": 0.6602535843849182, "learning_rate": 3.989927252378288e-05, "loss": 1.6792, "step": 7522 }, { "epoch": 1.8042083807925184, "grad_norm": 0.7578940391540527, "learning_rate": 3.988328403549444e-05, "loss": 1.7013, "step": 7524 }, { "epoch": 1.8046879683472214, "grad_norm": 0.7011135816574097, "learning_rate": 3.9867295547206015e-05, "loss": 1.7325, "step": 7526 }, { "epoch": 1.8051675559019245, "grad_norm": 0.9930646419525146, "learning_rate": 3.985130705891758e-05, "loss": 1.6994, "step": 7528 }, { "epoch": 1.8056471434566272, "grad_norm": 0.7074646949768066, "learning_rate": 3.9835318570629146e-05, "loss": 1.6548, "step": 7530 }, { "epoch": 1.8061267310113303, "grad_norm": 0.6386991739273071, "learning_rate": 3.981933008234072e-05, "loss": 1.6835, "step": 7532 }, { "epoch": 1.8066063185660333, "grad_norm": 0.5932888984680176, "learning_rate": 3.9803341594052284e-05, "loss": 1.6424, "step": 7534 }, { "epoch": 1.807085906120736, "grad_norm": 0.605454683303833, "learning_rate": 3.978735310576385e-05, "loss": 1.678, "step": 7536 }, { "epoch": 1.807565493675439, "grad_norm": 0.6764107942581177, "learning_rate": 3.977136461747542e-05, "loss": 1.6644, "step": 7538 }, { "epoch": 1.808045081230142, "grad_norm": 0.6844762563705444, "learning_rate": 3.975537612918699e-05, "loss": 1.6715, "step": 7540 }, { "epoch": 1.808524668784845, "grad_norm": 0.6676619052886963, "learning_rate": 3.9739387640898554e-05, "loss": 1.7318, "step": 7542 }, { "epoch": 1.809004256339548, "grad_norm": 0.7513611316680908, "learning_rate": 3.9723399152610126e-05, "loss": 1.6437, "step": 7544 }, { "epoch": 1.809483843894251, "grad_norm": 0.7095953226089478, "learning_rate": 3.970741066432169e-05, "loss": 1.6539, "step": 7546 }, { "epoch": 1.8099634314489539, "grad_norm": 0.6077327132225037, "learning_rate": 3.969142217603326e-05, "loss": 1.6876, "step": 7548 }, { "epoch": 1.810443019003657, "grad_norm": 0.7327786087989807, "learning_rate": 3.9675433687744824e-05, "loss": 1.6584, "step": 7550 }, { "epoch": 1.8109226065583597, "grad_norm": 0.7483721971511841, "learning_rate": 3.965944519945639e-05, "loss": 1.6915, "step": 7552 }, { "epoch": 1.8114021941130627, "grad_norm": 0.6254407167434692, "learning_rate": 3.964345671116796e-05, "loss": 1.6647, "step": 7554 }, { "epoch": 1.8118817816677657, "grad_norm": 0.7506750822067261, "learning_rate": 3.962746822287953e-05, "loss": 1.7223, "step": 7556 }, { "epoch": 1.8123613692224687, "grad_norm": 0.6346734166145325, "learning_rate": 3.961147973459109e-05, "loss": 1.6288, "step": 7558 }, { "epoch": 1.8128409567771717, "grad_norm": 0.6455095410346985, "learning_rate": 3.9595491246302666e-05, "loss": 1.6652, "step": 7560 }, { "epoch": 1.8133205443318747, "grad_norm": 0.6286899447441101, "learning_rate": 3.957950275801423e-05, "loss": 1.6791, "step": 7562 }, { "epoch": 1.8138001318865775, "grad_norm": 0.6029579639434814, "learning_rate": 3.95635142697258e-05, "loss": 1.6963, "step": 7564 }, { "epoch": 1.8142797194412805, "grad_norm": 0.7341497540473938, "learning_rate": 3.954752578143737e-05, "loss": 1.6523, "step": 7566 }, { "epoch": 1.8147593069959833, "grad_norm": 0.8122292757034302, "learning_rate": 3.9531537293148935e-05, "loss": 1.7008, "step": 7568 }, { "epoch": 1.8152388945506863, "grad_norm": 0.6709796190261841, "learning_rate": 3.95155488048605e-05, "loss": 1.7329, "step": 7570 }, { "epoch": 1.8157184821053893, "grad_norm": 0.813251793384552, "learning_rate": 3.949956031657207e-05, "loss": 1.681, "step": 7572 }, { "epoch": 1.8161980696600923, "grad_norm": 0.70853590965271, "learning_rate": 3.948357182828364e-05, "loss": 1.6908, "step": 7574 }, { "epoch": 1.8166776572147953, "grad_norm": 0.6333069801330566, "learning_rate": 3.9467583339995205e-05, "loss": 1.6916, "step": 7576 }, { "epoch": 1.8171572447694984, "grad_norm": 0.813346803188324, "learning_rate": 3.945159485170678e-05, "loss": 1.6939, "step": 7578 }, { "epoch": 1.8176368323242011, "grad_norm": 0.7064893841743469, "learning_rate": 3.943560636341834e-05, "loss": 1.7294, "step": 7580 }, { "epoch": 1.8181164198789042, "grad_norm": 0.6683565378189087, "learning_rate": 3.941961787512991e-05, "loss": 1.6782, "step": 7582 }, { "epoch": 1.818596007433607, "grad_norm": 0.6622360944747925, "learning_rate": 3.940362938684148e-05, "loss": 1.6543, "step": 7584 }, { "epoch": 1.81907559498831, "grad_norm": 0.6458684206008911, "learning_rate": 3.938764089855304e-05, "loss": 1.7173, "step": 7586 }, { "epoch": 1.819555182543013, "grad_norm": 0.6564217209815979, "learning_rate": 3.9371652410264606e-05, "loss": 1.7018, "step": 7588 }, { "epoch": 1.820034770097716, "grad_norm": 0.7021989226341248, "learning_rate": 3.935566392197618e-05, "loss": 1.7148, "step": 7590 }, { "epoch": 1.820514357652419, "grad_norm": 0.5832430124282837, "learning_rate": 3.9339675433687744e-05, "loss": 1.6639, "step": 7592 }, { "epoch": 1.820993945207122, "grad_norm": 0.7046759724617004, "learning_rate": 3.932368694539931e-05, "loss": 1.6851, "step": 7594 }, { "epoch": 1.8214735327618248, "grad_norm": 0.6685476899147034, "learning_rate": 3.930769845711088e-05, "loss": 1.6666, "step": 7596 }, { "epoch": 1.8219531203165278, "grad_norm": 0.7470160722732544, "learning_rate": 3.929170996882245e-05, "loss": 1.7092, "step": 7598 }, { "epoch": 1.8224327078712308, "grad_norm": 0.6556419134140015, "learning_rate": 3.9275721480534013e-05, "loss": 1.6398, "step": 7600 }, { "epoch": 1.8224327078712308, "eval_loss": 1.7246181964874268, "eval_runtime": 331.3779, "eval_samples_per_second": 402.709, "eval_steps_per_second": 12.587, "step": 7600 }, { "epoch": 1.8229122954259336, "grad_norm": 0.6821807026863098, "learning_rate": 3.9259732992245586e-05, "loss": 1.6953, "step": 7602 }, { "epoch": 1.8233918829806366, "grad_norm": 0.6331961154937744, "learning_rate": 3.924374450395715e-05, "loss": 1.716, "step": 7604 }, { "epoch": 1.8238714705353396, "grad_norm": 0.7016385793685913, "learning_rate": 3.922775601566872e-05, "loss": 1.6593, "step": 7606 }, { "epoch": 1.8243510580900426, "grad_norm": 0.6572972536087036, "learning_rate": 3.921176752738029e-05, "loss": 1.6748, "step": 7608 }, { "epoch": 1.8248306456447456, "grad_norm": 0.6619850397109985, "learning_rate": 3.9195779039091856e-05, "loss": 1.735, "step": 7610 }, { "epoch": 1.8253102331994486, "grad_norm": 0.633041501045227, "learning_rate": 3.917979055080343e-05, "loss": 1.721, "step": 7612 }, { "epoch": 1.8257898207541514, "grad_norm": 0.8032136559486389, "learning_rate": 3.9163802062514994e-05, "loss": 1.6835, "step": 7614 }, { "epoch": 1.8262694083088544, "grad_norm": 0.6541498303413391, "learning_rate": 3.914781357422656e-05, "loss": 1.6616, "step": 7616 }, { "epoch": 1.8267489958635572, "grad_norm": 0.6337077617645264, "learning_rate": 3.913182508593813e-05, "loss": 1.7312, "step": 7618 }, { "epoch": 1.8272285834182602, "grad_norm": 0.6612256765365601, "learning_rate": 3.91158365976497e-05, "loss": 1.6975, "step": 7620 }, { "epoch": 1.8277081709729632, "grad_norm": 0.5918282866477966, "learning_rate": 3.909984810936126e-05, "loss": 1.6592, "step": 7622 }, { "epoch": 1.8281877585276662, "grad_norm": 0.6824080944061279, "learning_rate": 3.908385962107283e-05, "loss": 1.6376, "step": 7624 }, { "epoch": 1.8286673460823692, "grad_norm": 0.6861978769302368, "learning_rate": 3.9067871132784395e-05, "loss": 1.6669, "step": 7626 }, { "epoch": 1.8291469336370723, "grad_norm": 0.8563194870948792, "learning_rate": 3.905188264449596e-05, "loss": 1.6871, "step": 7628 }, { "epoch": 1.829626521191775, "grad_norm": 0.6002926826477051, "learning_rate": 3.903589415620753e-05, "loss": 1.6695, "step": 7630 }, { "epoch": 1.830106108746478, "grad_norm": 0.6460264325141907, "learning_rate": 3.90199056679191e-05, "loss": 1.6652, "step": 7632 }, { "epoch": 1.8305856963011808, "grad_norm": 0.8216442465782166, "learning_rate": 3.9003917179630664e-05, "loss": 1.6382, "step": 7634 }, { "epoch": 1.8310652838558839, "grad_norm": 0.7218013405799866, "learning_rate": 3.898792869134224e-05, "loss": 1.6628, "step": 7636 }, { "epoch": 1.8315448714105869, "grad_norm": 0.6895619630813599, "learning_rate": 3.89719402030538e-05, "loss": 1.7011, "step": 7638 }, { "epoch": 1.8320244589652899, "grad_norm": 0.675621747970581, "learning_rate": 3.895595171476537e-05, "loss": 1.6689, "step": 7640 }, { "epoch": 1.8325040465199929, "grad_norm": 0.6144310235977173, "learning_rate": 3.893996322647694e-05, "loss": 1.6632, "step": 7642 }, { "epoch": 1.8329836340746959, "grad_norm": 0.61588454246521, "learning_rate": 3.8923974738188506e-05, "loss": 1.7147, "step": 7644 }, { "epoch": 1.8334632216293987, "grad_norm": 0.6885856986045837, "learning_rate": 3.890798624990007e-05, "loss": 1.6631, "step": 7646 }, { "epoch": 1.8339428091841017, "grad_norm": 0.6935003995895386, "learning_rate": 3.8891997761611645e-05, "loss": 1.7017, "step": 7648 }, { "epoch": 1.8344223967388045, "grad_norm": 0.5981388688087463, "learning_rate": 3.887600927332321e-05, "loss": 1.7284, "step": 7650 }, { "epoch": 1.8349019842935075, "grad_norm": 0.8613856434822083, "learning_rate": 3.8860020785034776e-05, "loss": 1.7438, "step": 7652 }, { "epoch": 1.8353815718482105, "grad_norm": 0.6386662721633911, "learning_rate": 3.884403229674635e-05, "loss": 1.658, "step": 7654 }, { "epoch": 1.8358611594029135, "grad_norm": 0.7437959909439087, "learning_rate": 3.8828043808457914e-05, "loss": 1.7435, "step": 7656 }, { "epoch": 1.8363407469576165, "grad_norm": 0.6165745854377747, "learning_rate": 3.881205532016948e-05, "loss": 1.6359, "step": 7658 }, { "epoch": 1.8368203345123195, "grad_norm": 0.6585517525672913, "learning_rate": 3.8796066831881045e-05, "loss": 1.6649, "step": 7660 }, { "epoch": 1.8372999220670223, "grad_norm": 0.634559154510498, "learning_rate": 3.878007834359261e-05, "loss": 1.6891, "step": 7662 }, { "epoch": 1.8377795096217253, "grad_norm": 0.6811406016349792, "learning_rate": 3.8764089855304184e-05, "loss": 1.6788, "step": 7664 }, { "epoch": 1.8382590971764283, "grad_norm": 0.5813066959381104, "learning_rate": 3.874810136701575e-05, "loss": 1.69, "step": 7666 }, { "epoch": 1.8387386847311311, "grad_norm": 0.6294447779655457, "learning_rate": 3.8732112878727315e-05, "loss": 1.7002, "step": 7668 }, { "epoch": 1.8392182722858341, "grad_norm": 0.671968400478363, "learning_rate": 3.871612439043889e-05, "loss": 1.6359, "step": 7670 }, { "epoch": 1.8396978598405371, "grad_norm": 0.6696687340736389, "learning_rate": 3.870013590215045e-05, "loss": 1.7645, "step": 7672 }, { "epoch": 1.8401774473952401, "grad_norm": 0.6473903059959412, "learning_rate": 3.868414741386202e-05, "loss": 1.6854, "step": 7674 }, { "epoch": 1.8406570349499431, "grad_norm": 0.658176839351654, "learning_rate": 3.866815892557359e-05, "loss": 1.7365, "step": 7676 }, { "epoch": 1.8411366225046462, "grad_norm": 0.6221208572387695, "learning_rate": 3.865217043728516e-05, "loss": 1.6386, "step": 7678 }, { "epoch": 1.841616210059349, "grad_norm": 0.7193309664726257, "learning_rate": 3.863618194899672e-05, "loss": 1.6237, "step": 7680 }, { "epoch": 1.842095797614052, "grad_norm": 0.6810340881347656, "learning_rate": 3.8620193460708295e-05, "loss": 1.6569, "step": 7682 }, { "epoch": 1.8425753851687547, "grad_norm": 0.6908073425292969, "learning_rate": 3.860420497241986e-05, "loss": 1.6958, "step": 7684 }, { "epoch": 1.8430549727234578, "grad_norm": 0.6718606352806091, "learning_rate": 3.858821648413143e-05, "loss": 1.6563, "step": 7686 }, { "epoch": 1.8435345602781608, "grad_norm": 0.6639564037322998, "learning_rate": 3.8572227995843e-05, "loss": 1.6667, "step": 7688 }, { "epoch": 1.8440141478328638, "grad_norm": 0.6720911860466003, "learning_rate": 3.8556239507554565e-05, "loss": 1.6954, "step": 7690 }, { "epoch": 1.8444937353875668, "grad_norm": 0.8060373067855835, "learning_rate": 3.854025101926613e-05, "loss": 1.6855, "step": 7692 }, { "epoch": 1.8449733229422698, "grad_norm": 0.7618966102600098, "learning_rate": 3.85242625309777e-05, "loss": 1.7108, "step": 7694 }, { "epoch": 1.8454529104969726, "grad_norm": 0.6822398900985718, "learning_rate": 3.850827404268926e-05, "loss": 1.7103, "step": 7696 }, { "epoch": 1.8459324980516756, "grad_norm": 0.6374587416648865, "learning_rate": 3.849228555440083e-05, "loss": 1.6906, "step": 7698 }, { "epoch": 1.8464120856063784, "grad_norm": 0.6241010427474976, "learning_rate": 3.84762970661124e-05, "loss": 1.6983, "step": 7700 }, { "epoch": 1.8468916731610814, "grad_norm": 0.6899442076683044, "learning_rate": 3.8460308577823966e-05, "loss": 1.6963, "step": 7702 }, { "epoch": 1.8473712607157844, "grad_norm": 0.6559684872627258, "learning_rate": 3.844432008953553e-05, "loss": 1.7379, "step": 7704 }, { "epoch": 1.8478508482704874, "grad_norm": 0.681191623210907, "learning_rate": 3.8428331601247104e-05, "loss": 1.6886, "step": 7706 }, { "epoch": 1.8483304358251904, "grad_norm": 0.7125858664512634, "learning_rate": 3.841234311295867e-05, "loss": 1.68, "step": 7708 }, { "epoch": 1.8488100233798934, "grad_norm": 0.6839450597763062, "learning_rate": 3.839635462467024e-05, "loss": 1.6989, "step": 7710 }, { "epoch": 1.8492896109345962, "grad_norm": 0.6547253131866455, "learning_rate": 3.838036613638181e-05, "loss": 1.7065, "step": 7712 }, { "epoch": 1.8497691984892992, "grad_norm": 0.5844863653182983, "learning_rate": 3.8364377648093374e-05, "loss": 1.666, "step": 7714 }, { "epoch": 1.850248786044002, "grad_norm": 0.7930759787559509, "learning_rate": 3.8348389159804946e-05, "loss": 1.6622, "step": 7716 }, { "epoch": 1.850728373598705, "grad_norm": 0.6219218969345093, "learning_rate": 3.833240067151651e-05, "loss": 1.6934, "step": 7718 }, { "epoch": 1.851207961153408, "grad_norm": 0.6353334188461304, "learning_rate": 3.831641218322808e-05, "loss": 1.6674, "step": 7720 }, { "epoch": 1.851687548708111, "grad_norm": 0.6381412744522095, "learning_rate": 3.830042369493965e-05, "loss": 1.7273, "step": 7722 }, { "epoch": 1.852167136262814, "grad_norm": 0.6717019081115723, "learning_rate": 3.8284435206651216e-05, "loss": 1.7533, "step": 7724 }, { "epoch": 1.852646723817517, "grad_norm": 0.6328226923942566, "learning_rate": 3.826844671836278e-05, "loss": 1.7094, "step": 7726 }, { "epoch": 1.8531263113722198, "grad_norm": 0.6675305962562561, "learning_rate": 3.8252458230074354e-05, "loss": 1.6722, "step": 7728 }, { "epoch": 1.8536058989269228, "grad_norm": 0.6133986711502075, "learning_rate": 3.823646974178592e-05, "loss": 1.666, "step": 7730 }, { "epoch": 1.8540854864816259, "grad_norm": 0.6370675563812256, "learning_rate": 3.822048125349748e-05, "loss": 1.6994, "step": 7732 }, { "epoch": 1.8545650740363286, "grad_norm": 0.6081402897834778, "learning_rate": 3.820449276520905e-05, "loss": 1.6176, "step": 7734 }, { "epoch": 1.8550446615910317, "grad_norm": 0.6632161736488342, "learning_rate": 3.8188504276920617e-05, "loss": 1.6879, "step": 7736 }, { "epoch": 1.8555242491457347, "grad_norm": 0.7740578651428223, "learning_rate": 3.817251578863218e-05, "loss": 1.7261, "step": 7738 }, { "epoch": 1.8560038367004377, "grad_norm": 0.6090090870857239, "learning_rate": 3.8156527300343755e-05, "loss": 1.7097, "step": 7740 }, { "epoch": 1.8564834242551407, "grad_norm": 0.6135731339454651, "learning_rate": 3.814053881205532e-05, "loss": 1.6864, "step": 7742 }, { "epoch": 1.8569630118098437, "grad_norm": 0.7921651005744934, "learning_rate": 3.8124550323766886e-05, "loss": 1.7189, "step": 7744 }, { "epoch": 1.8574425993645465, "grad_norm": 0.6666748523712158, "learning_rate": 3.810856183547846e-05, "loss": 1.6994, "step": 7746 }, { "epoch": 1.8579221869192495, "grad_norm": 0.6198654174804688, "learning_rate": 3.8092573347190024e-05, "loss": 1.685, "step": 7748 }, { "epoch": 1.8584017744739523, "grad_norm": 0.6262189745903015, "learning_rate": 3.807658485890159e-05, "loss": 1.6558, "step": 7750 }, { "epoch": 1.8588813620286553, "grad_norm": 0.693250298500061, "learning_rate": 3.806059637061316e-05, "loss": 1.7, "step": 7752 }, { "epoch": 1.8593609495833583, "grad_norm": 0.6223062872886658, "learning_rate": 3.804460788232473e-05, "loss": 1.6582, "step": 7754 }, { "epoch": 1.8598405371380613, "grad_norm": 0.6307729482650757, "learning_rate": 3.8028619394036294e-05, "loss": 1.71, "step": 7756 }, { "epoch": 1.8603201246927643, "grad_norm": 0.6023372411727905, "learning_rate": 3.8012630905747866e-05, "loss": 1.6738, "step": 7758 }, { "epoch": 1.8607997122474673, "grad_norm": 0.5629035234451294, "learning_rate": 3.799664241745943e-05, "loss": 1.6803, "step": 7760 }, { "epoch": 1.86127929980217, "grad_norm": 0.6593649387359619, "learning_rate": 3.7980653929171e-05, "loss": 1.7406, "step": 7762 }, { "epoch": 1.8617588873568731, "grad_norm": 0.686018705368042, "learning_rate": 3.796466544088257e-05, "loss": 1.6914, "step": 7764 }, { "epoch": 1.862238474911576, "grad_norm": 0.6527137160301208, "learning_rate": 3.7948676952594136e-05, "loss": 1.7178, "step": 7766 }, { "epoch": 1.862718062466279, "grad_norm": 0.6376665234565735, "learning_rate": 3.79326884643057e-05, "loss": 1.6931, "step": 7768 }, { "epoch": 1.863197650020982, "grad_norm": 0.6643142700195312, "learning_rate": 3.791669997601727e-05, "loss": 1.6528, "step": 7770 }, { "epoch": 1.863677237575685, "grad_norm": 0.6276307702064514, "learning_rate": 3.790071148772883e-05, "loss": 1.6827, "step": 7772 }, { "epoch": 1.864156825130388, "grad_norm": 0.6178349256515503, "learning_rate": 3.7884722999440406e-05, "loss": 1.7447, "step": 7774 }, { "epoch": 1.864636412685091, "grad_norm": 0.6427262425422668, "learning_rate": 3.786873451115197e-05, "loss": 1.6848, "step": 7776 }, { "epoch": 1.8651160002397937, "grad_norm": 0.6224375367164612, "learning_rate": 3.785274602286354e-05, "loss": 1.6921, "step": 7778 }, { "epoch": 1.8655955877944967, "grad_norm": 0.6770567297935486, "learning_rate": 3.783675753457511e-05, "loss": 1.6915, "step": 7780 }, { "epoch": 1.8660751753491995, "grad_norm": 0.712465226650238, "learning_rate": 3.7820769046286675e-05, "loss": 1.6884, "step": 7782 }, { "epoch": 1.8665547629039025, "grad_norm": 0.7128692865371704, "learning_rate": 3.780478055799824e-05, "loss": 1.7425, "step": 7784 }, { "epoch": 1.8670343504586056, "grad_norm": 0.6720327138900757, "learning_rate": 3.778879206970981e-05, "loss": 1.6481, "step": 7786 }, { "epoch": 1.8675139380133086, "grad_norm": 0.6193423271179199, "learning_rate": 3.777280358142138e-05, "loss": 1.7282, "step": 7788 }, { "epoch": 1.8679935255680116, "grad_norm": 0.6236129403114319, "learning_rate": 3.7756815093132945e-05, "loss": 1.6606, "step": 7790 }, { "epoch": 1.8684731131227146, "grad_norm": 0.5959348678588867, "learning_rate": 3.774082660484452e-05, "loss": 1.683, "step": 7792 }, { "epoch": 1.8689527006774174, "grad_norm": 0.6995235681533813, "learning_rate": 3.772483811655608e-05, "loss": 1.6918, "step": 7794 }, { "epoch": 1.8694322882321204, "grad_norm": 0.6525107622146606, "learning_rate": 3.770884962826765e-05, "loss": 1.687, "step": 7796 }, { "epoch": 1.8699118757868234, "grad_norm": 0.7019533514976501, "learning_rate": 3.769286113997922e-05, "loss": 1.7386, "step": 7798 }, { "epoch": 1.8703914633415262, "grad_norm": 0.7124558091163635, "learning_rate": 3.767687265169079e-05, "loss": 1.7026, "step": 7800 }, { "epoch": 1.8708710508962292, "grad_norm": 0.6296766996383667, "learning_rate": 3.766088416340235e-05, "loss": 1.6795, "step": 7802 }, { "epoch": 1.8713506384509322, "grad_norm": 0.8553352355957031, "learning_rate": 3.764489567511392e-05, "loss": 1.641, "step": 7804 }, { "epoch": 1.8718302260056352, "grad_norm": 0.6361808180809021, "learning_rate": 3.7628907186825484e-05, "loss": 1.6883, "step": 7806 }, { "epoch": 1.8723098135603382, "grad_norm": 0.6784217953681946, "learning_rate": 3.7612918698537056e-05, "loss": 1.6895, "step": 7808 }, { "epoch": 1.8727894011150412, "grad_norm": 0.7343073487281799, "learning_rate": 3.759693021024862e-05, "loss": 1.6523, "step": 7810 }, { "epoch": 1.873268988669744, "grad_norm": 0.6178284287452698, "learning_rate": 3.758094172196019e-05, "loss": 1.6223, "step": 7812 }, { "epoch": 1.873748576224447, "grad_norm": 0.6494504809379578, "learning_rate": 3.756495323367176e-05, "loss": 1.6833, "step": 7814 }, { "epoch": 1.8742281637791498, "grad_norm": 0.6655352711677551, "learning_rate": 3.7548964745383326e-05, "loss": 1.6791, "step": 7816 }, { "epoch": 1.8747077513338528, "grad_norm": 0.6164079904556274, "learning_rate": 3.753297625709489e-05, "loss": 1.6712, "step": 7818 }, { "epoch": 1.8751873388885558, "grad_norm": 0.7991706728935242, "learning_rate": 3.7516987768806464e-05, "loss": 1.6956, "step": 7820 }, { "epoch": 1.8756669264432588, "grad_norm": 0.6664561629295349, "learning_rate": 3.750099928051803e-05, "loss": 1.7061, "step": 7822 }, { "epoch": 1.8761465139979618, "grad_norm": 0.6334608793258667, "learning_rate": 3.7485010792229595e-05, "loss": 1.638, "step": 7824 }, { "epoch": 1.8766261015526648, "grad_norm": 0.6536276340484619, "learning_rate": 3.746902230394117e-05, "loss": 1.7057, "step": 7826 }, { "epoch": 1.8771056891073676, "grad_norm": 0.6436629295349121, "learning_rate": 3.7453033815652734e-05, "loss": 1.7307, "step": 7828 }, { "epoch": 1.8775852766620706, "grad_norm": 0.6583948135375977, "learning_rate": 3.74370453273643e-05, "loss": 1.6855, "step": 7830 }, { "epoch": 1.8780648642167734, "grad_norm": 0.6534018516540527, "learning_rate": 3.742105683907587e-05, "loss": 1.7136, "step": 7832 }, { "epoch": 1.8785444517714764, "grad_norm": 0.6205143332481384, "learning_rate": 3.740506835078744e-05, "loss": 1.6751, "step": 7834 }, { "epoch": 1.8790240393261795, "grad_norm": 0.658017635345459, "learning_rate": 3.7389079862499e-05, "loss": 1.6781, "step": 7836 }, { "epoch": 1.8795036268808825, "grad_norm": 0.6296694874763489, "learning_rate": 3.7373091374210576e-05, "loss": 1.6923, "step": 7838 }, { "epoch": 1.8799832144355855, "grad_norm": 0.6294623613357544, "learning_rate": 3.735710288592214e-05, "loss": 1.6803, "step": 7840 }, { "epoch": 1.8804628019902885, "grad_norm": 0.6568511128425598, "learning_rate": 3.73411143976337e-05, "loss": 1.751, "step": 7842 }, { "epoch": 1.8809423895449913, "grad_norm": 0.7246510982513428, "learning_rate": 3.732512590934527e-05, "loss": 1.6763, "step": 7844 }, { "epoch": 1.8814219770996943, "grad_norm": 0.636059045791626, "learning_rate": 3.730913742105684e-05, "loss": 1.6964, "step": 7846 }, { "epoch": 1.881901564654397, "grad_norm": 0.6544449329376221, "learning_rate": 3.7293148932768404e-05, "loss": 1.6827, "step": 7848 }, { "epoch": 1.8823811522091, "grad_norm": 0.7059678435325623, "learning_rate": 3.727716044447998e-05, "loss": 1.7522, "step": 7850 }, { "epoch": 1.882860739763803, "grad_norm": 0.7859704494476318, "learning_rate": 3.726117195619154e-05, "loss": 1.721, "step": 7852 }, { "epoch": 1.883340327318506, "grad_norm": 0.6012389659881592, "learning_rate": 3.724518346790311e-05, "loss": 1.7223, "step": 7854 }, { "epoch": 1.883819914873209, "grad_norm": 0.6737827658653259, "learning_rate": 3.722919497961468e-05, "loss": 1.6855, "step": 7856 }, { "epoch": 1.884299502427912, "grad_norm": 0.7209442853927612, "learning_rate": 3.7213206491326246e-05, "loss": 1.688, "step": 7858 }, { "epoch": 1.884779089982615, "grad_norm": 0.5970027446746826, "learning_rate": 3.719721800303781e-05, "loss": 1.6749, "step": 7860 }, { "epoch": 1.885258677537318, "grad_norm": 0.6501736044883728, "learning_rate": 3.7181229514749384e-05, "loss": 1.6582, "step": 7862 }, { "epoch": 1.885738265092021, "grad_norm": 0.780483603477478, "learning_rate": 3.716524102646095e-05, "loss": 1.7086, "step": 7864 }, { "epoch": 1.8862178526467237, "grad_norm": 0.6884295344352722, "learning_rate": 3.7149252538172516e-05, "loss": 1.668, "step": 7866 }, { "epoch": 1.8866974402014267, "grad_norm": 0.6804516911506653, "learning_rate": 3.713326404988409e-05, "loss": 1.7339, "step": 7868 }, { "epoch": 1.8871770277561297, "grad_norm": 0.5857244729995728, "learning_rate": 3.7117275561595654e-05, "loss": 1.7002, "step": 7870 }, { "epoch": 1.8876566153108327, "grad_norm": 0.6181894540786743, "learning_rate": 3.710128707330722e-05, "loss": 1.6503, "step": 7872 }, { "epoch": 1.8881362028655357, "grad_norm": 0.675798237323761, "learning_rate": 3.708529858501879e-05, "loss": 1.703, "step": 7874 }, { "epoch": 1.8886157904202387, "grad_norm": 0.7005626559257507, "learning_rate": 3.706931009673036e-05, "loss": 1.6794, "step": 7876 }, { "epoch": 1.8890953779749415, "grad_norm": 0.6162826418876648, "learning_rate": 3.7053321608441924e-05, "loss": 1.719, "step": 7878 }, { "epoch": 1.8895749655296445, "grad_norm": 0.6978678703308105, "learning_rate": 3.703733312015349e-05, "loss": 1.675, "step": 7880 }, { "epoch": 1.8900545530843473, "grad_norm": 0.6772316098213196, "learning_rate": 3.7021344631865055e-05, "loss": 1.6911, "step": 7882 }, { "epoch": 1.8905341406390503, "grad_norm": 0.6893572807312012, "learning_rate": 3.700535614357663e-05, "loss": 1.7424, "step": 7884 }, { "epoch": 1.8910137281937534, "grad_norm": 0.6172594428062439, "learning_rate": 3.698936765528819e-05, "loss": 1.7018, "step": 7886 }, { "epoch": 1.8914933157484564, "grad_norm": 0.6880853176116943, "learning_rate": 3.697337916699976e-05, "loss": 1.667, "step": 7888 }, { "epoch": 1.8919729033031594, "grad_norm": 0.6870656609535217, "learning_rate": 3.695739067871133e-05, "loss": 1.6991, "step": 7890 }, { "epoch": 1.8924524908578624, "grad_norm": 0.6388475298881531, "learning_rate": 3.69414021904229e-05, "loss": 1.7117, "step": 7892 }, { "epoch": 1.8929320784125652, "grad_norm": 0.6648794412612915, "learning_rate": 3.692541370213446e-05, "loss": 1.6787, "step": 7894 }, { "epoch": 1.8934116659672682, "grad_norm": 0.6270703673362732, "learning_rate": 3.6909425213846035e-05, "loss": 1.6581, "step": 7896 }, { "epoch": 1.893891253521971, "grad_norm": 0.6547582745552063, "learning_rate": 3.68934367255576e-05, "loss": 1.7031, "step": 7898 }, { "epoch": 1.894370841076674, "grad_norm": 0.62434321641922, "learning_rate": 3.687744823726917e-05, "loss": 1.7222, "step": 7900 }, { "epoch": 1.894850428631377, "grad_norm": 0.6520553827285767, "learning_rate": 3.686145974898074e-05, "loss": 1.6805, "step": 7902 }, { "epoch": 1.89533001618608, "grad_norm": 0.6491270661354065, "learning_rate": 3.6845471260692305e-05, "loss": 1.6957, "step": 7904 }, { "epoch": 1.895809603740783, "grad_norm": 0.7117295265197754, "learning_rate": 3.682948277240387e-05, "loss": 1.6653, "step": 7906 }, { "epoch": 1.896289191295486, "grad_norm": 0.6893346905708313, "learning_rate": 3.681349428411544e-05, "loss": 1.6833, "step": 7908 }, { "epoch": 1.8967687788501888, "grad_norm": 0.6687785983085632, "learning_rate": 3.679750579582701e-05, "loss": 1.6819, "step": 7910 }, { "epoch": 1.8972483664048918, "grad_norm": 0.6477510929107666, "learning_rate": 3.6781517307538574e-05, "loss": 1.7214, "step": 7912 }, { "epoch": 1.8977279539595946, "grad_norm": 0.6078613996505737, "learning_rate": 3.676552881925014e-05, "loss": 1.6782, "step": 7914 }, { "epoch": 1.8982075415142976, "grad_norm": 0.6656264066696167, "learning_rate": 3.6749540330961706e-05, "loss": 1.6648, "step": 7916 }, { "epoch": 1.8986871290690006, "grad_norm": 0.6325451135635376, "learning_rate": 3.673355184267328e-05, "loss": 1.7091, "step": 7918 }, { "epoch": 1.8991667166237036, "grad_norm": 0.6995405554771423, "learning_rate": 3.6717563354384844e-05, "loss": 1.6624, "step": 7920 }, { "epoch": 1.8996463041784066, "grad_norm": 0.6328856348991394, "learning_rate": 3.670157486609641e-05, "loss": 1.5878, "step": 7922 }, { "epoch": 1.9001258917331096, "grad_norm": 0.68600994348526, "learning_rate": 3.668558637780798e-05, "loss": 1.7242, "step": 7924 }, { "epoch": 1.9006054792878124, "grad_norm": 0.6199191808700562, "learning_rate": 3.666959788951955e-05, "loss": 1.7017, "step": 7926 }, { "epoch": 1.9010850668425154, "grad_norm": 0.7052134871482849, "learning_rate": 3.6653609401231114e-05, "loss": 1.6786, "step": 7928 }, { "epoch": 1.9015646543972184, "grad_norm": 0.6393725872039795, "learning_rate": 3.6637620912942686e-05, "loss": 1.6433, "step": 7930 }, { "epoch": 1.9020442419519212, "grad_norm": 0.7825372815132141, "learning_rate": 3.662163242465425e-05, "loss": 1.7049, "step": 7932 }, { "epoch": 1.9025238295066242, "grad_norm": 0.6826351284980774, "learning_rate": 3.660564393636582e-05, "loss": 1.693, "step": 7934 }, { "epoch": 1.9030034170613273, "grad_norm": 0.636398434638977, "learning_rate": 3.658965544807739e-05, "loss": 1.6931, "step": 7936 }, { "epoch": 1.9034830046160303, "grad_norm": 0.6009083986282349, "learning_rate": 3.6573666959788956e-05, "loss": 1.6287, "step": 7938 }, { "epoch": 1.9039625921707333, "grad_norm": 0.6228242516517639, "learning_rate": 3.655767847150052e-05, "loss": 1.6531, "step": 7940 }, { "epoch": 1.9044421797254363, "grad_norm": 0.7411357760429382, "learning_rate": 3.6541689983212094e-05, "loss": 1.6805, "step": 7942 }, { "epoch": 1.904921767280139, "grad_norm": 0.6157864332199097, "learning_rate": 3.652570149492366e-05, "loss": 1.6925, "step": 7944 }, { "epoch": 1.905401354834842, "grad_norm": 0.776102602481842, "learning_rate": 3.6509713006635225e-05, "loss": 1.6969, "step": 7946 }, { "epoch": 1.9058809423895449, "grad_norm": 0.6356221437454224, "learning_rate": 3.64937245183468e-05, "loss": 1.6487, "step": 7948 }, { "epoch": 1.9063605299442479, "grad_norm": 0.5954484939575195, "learning_rate": 3.6477736030058357e-05, "loss": 1.6952, "step": 7950 }, { "epoch": 1.9068401174989509, "grad_norm": 0.641162633895874, "learning_rate": 3.646174754176992e-05, "loss": 1.6365, "step": 7952 }, { "epoch": 1.907319705053654, "grad_norm": 0.6433380246162415, "learning_rate": 3.6445759053481495e-05, "loss": 1.7307, "step": 7954 }, { "epoch": 1.907799292608357, "grad_norm": 0.7403784394264221, "learning_rate": 3.642977056519306e-05, "loss": 1.6651, "step": 7956 }, { "epoch": 1.90827888016306, "grad_norm": 0.7046185731887817, "learning_rate": 3.6413782076904626e-05, "loss": 1.7274, "step": 7958 }, { "epoch": 1.9087584677177627, "grad_norm": 0.6623694896697998, "learning_rate": 3.63977935886162e-05, "loss": 1.6782, "step": 7960 }, { "epoch": 1.9092380552724657, "grad_norm": 0.8405992388725281, "learning_rate": 3.6381805100327764e-05, "loss": 1.7213, "step": 7962 }, { "epoch": 1.9097176428271685, "grad_norm": 0.687022864818573, "learning_rate": 3.636581661203933e-05, "loss": 1.7102, "step": 7964 }, { "epoch": 1.9101972303818715, "grad_norm": 0.6518090963363647, "learning_rate": 3.63498281237509e-05, "loss": 1.7367, "step": 7966 }, { "epoch": 1.9106768179365745, "grad_norm": 0.6767498254776001, "learning_rate": 3.633383963546247e-05, "loss": 1.6712, "step": 7968 }, { "epoch": 1.9111564054912775, "grad_norm": 0.6482460498809814, "learning_rate": 3.6317851147174034e-05, "loss": 1.7282, "step": 7970 }, { "epoch": 1.9116359930459805, "grad_norm": 0.62384432554245, "learning_rate": 3.6301862658885606e-05, "loss": 1.6805, "step": 7972 }, { "epoch": 1.9121155806006835, "grad_norm": 0.6279187202453613, "learning_rate": 3.628587417059717e-05, "loss": 1.7384, "step": 7974 }, { "epoch": 1.9125951681553863, "grad_norm": 0.6743167638778687, "learning_rate": 3.626988568230874e-05, "loss": 1.6727, "step": 7976 }, { "epoch": 1.9130747557100893, "grad_norm": 0.7173577547073364, "learning_rate": 3.625389719402031e-05, "loss": 1.6708, "step": 7978 }, { "epoch": 1.9135543432647921, "grad_norm": 0.6131308078765869, "learning_rate": 3.6237908705731876e-05, "loss": 1.6679, "step": 7980 }, { "epoch": 1.9140339308194951, "grad_norm": 0.596360981464386, "learning_rate": 3.622192021744344e-05, "loss": 1.6389, "step": 7982 }, { "epoch": 1.9145135183741981, "grad_norm": 0.8192497491836548, "learning_rate": 3.6205931729155014e-05, "loss": 1.7024, "step": 7984 }, { "epoch": 1.9149931059289012, "grad_norm": 0.6745701432228088, "learning_rate": 3.618994324086657e-05, "loss": 1.6889, "step": 7986 }, { "epoch": 1.9154726934836042, "grad_norm": 0.7094804644584656, "learning_rate": 3.6173954752578145e-05, "loss": 1.6809, "step": 7988 }, { "epoch": 1.9159522810383072, "grad_norm": 0.7106441855430603, "learning_rate": 3.615796626428971e-05, "loss": 1.6898, "step": 7990 }, { "epoch": 1.91643186859301, "grad_norm": 0.7201178073883057, "learning_rate": 3.614197777600128e-05, "loss": 1.6858, "step": 7992 }, { "epoch": 1.916911456147713, "grad_norm": 0.6626922488212585, "learning_rate": 3.612598928771285e-05, "loss": 1.6787, "step": 7994 }, { "epoch": 1.917391043702416, "grad_norm": 0.7760750651359558, "learning_rate": 3.6110000799424415e-05, "loss": 1.6984, "step": 7996 }, { "epoch": 1.9178706312571188, "grad_norm": 0.7291256189346313, "learning_rate": 3.609401231113598e-05, "loss": 1.677, "step": 7998 }, { "epoch": 1.9183502188118218, "grad_norm": 0.6530420780181885, "learning_rate": 3.607802382284755e-05, "loss": 1.7378, "step": 8000 }, { "epoch": 1.9183502188118218, "eval_loss": 1.7204610109329224, "eval_runtime": 331.2882, "eval_samples_per_second": 402.819, "eval_steps_per_second": 12.59, "step": 8000 }, { "epoch": 1.9188298063665248, "grad_norm": 0.6754840016365051, "learning_rate": 3.606203533455912e-05, "loss": 1.6886, "step": 8002 }, { "epoch": 1.9193093939212278, "grad_norm": 0.6694512963294983, "learning_rate": 3.6046046846270685e-05, "loss": 1.6419, "step": 8004 }, { "epoch": 1.9197889814759308, "grad_norm": 0.6946858167648315, "learning_rate": 3.603005835798226e-05, "loss": 1.6731, "step": 8006 }, { "epoch": 1.9202685690306338, "grad_norm": 0.6807816624641418, "learning_rate": 3.601406986969382e-05, "loss": 1.6917, "step": 8008 }, { "epoch": 1.9207481565853366, "grad_norm": 0.7169034481048584, "learning_rate": 3.599808138140539e-05, "loss": 1.6884, "step": 8010 }, { "epoch": 1.9212277441400396, "grad_norm": 0.5878745913505554, "learning_rate": 3.598209289311696e-05, "loss": 1.6738, "step": 8012 }, { "epoch": 1.9217073316947424, "grad_norm": 0.6517378091812134, "learning_rate": 3.596610440482853e-05, "loss": 1.7286, "step": 8014 }, { "epoch": 1.9221869192494454, "grad_norm": 0.6781705617904663, "learning_rate": 3.595011591654009e-05, "loss": 1.6905, "step": 8016 }, { "epoch": 1.9226665068041484, "grad_norm": 0.6143770813941956, "learning_rate": 3.5934127428251665e-05, "loss": 1.6929, "step": 8018 }, { "epoch": 1.9231460943588514, "grad_norm": 0.5912351012229919, "learning_rate": 3.591813893996323e-05, "loss": 1.6873, "step": 8020 }, { "epoch": 1.9236256819135544, "grad_norm": 0.6328504085540771, "learning_rate": 3.5902150451674796e-05, "loss": 1.6931, "step": 8022 }, { "epoch": 1.9241052694682574, "grad_norm": 0.6527055501937866, "learning_rate": 3.588616196338636e-05, "loss": 1.683, "step": 8024 }, { "epoch": 1.9245848570229602, "grad_norm": 0.8026780486106873, "learning_rate": 3.587017347509793e-05, "loss": 1.6792, "step": 8026 }, { "epoch": 1.9250644445776632, "grad_norm": 0.7319624423980713, "learning_rate": 3.58541849868095e-05, "loss": 1.7038, "step": 8028 }, { "epoch": 1.925544032132366, "grad_norm": 0.8049572706222534, "learning_rate": 3.5838196498521066e-05, "loss": 1.6947, "step": 8030 }, { "epoch": 1.926023619687069, "grad_norm": 0.7694524526596069, "learning_rate": 3.582220801023263e-05, "loss": 1.709, "step": 8032 }, { "epoch": 1.926503207241772, "grad_norm": 0.6278097629547119, "learning_rate": 3.5806219521944204e-05, "loss": 1.6969, "step": 8034 }, { "epoch": 1.926982794796475, "grad_norm": 0.6401321291923523, "learning_rate": 3.579023103365577e-05, "loss": 1.6658, "step": 8036 }, { "epoch": 1.927462382351178, "grad_norm": 0.8009715676307678, "learning_rate": 3.5774242545367335e-05, "loss": 1.6864, "step": 8038 }, { "epoch": 1.927941969905881, "grad_norm": 0.7349458336830139, "learning_rate": 3.575825405707891e-05, "loss": 1.7279, "step": 8040 }, { "epoch": 1.9284215574605839, "grad_norm": 0.682866632938385, "learning_rate": 3.5742265568790474e-05, "loss": 1.6445, "step": 8042 }, { "epoch": 1.9289011450152869, "grad_norm": 0.6572544574737549, "learning_rate": 3.572627708050204e-05, "loss": 1.684, "step": 8044 }, { "epoch": 1.9293807325699897, "grad_norm": 0.6916688680648804, "learning_rate": 3.571028859221361e-05, "loss": 1.7401, "step": 8046 }, { "epoch": 1.9298603201246927, "grad_norm": 0.7322476506233215, "learning_rate": 3.569430010392518e-05, "loss": 1.614, "step": 8048 }, { "epoch": 1.9303399076793957, "grad_norm": 0.6285569667816162, "learning_rate": 3.567831161563674e-05, "loss": 1.6847, "step": 8050 }, { "epoch": 1.9308194952340987, "grad_norm": 0.6544349789619446, "learning_rate": 3.5662323127348316e-05, "loss": 1.6288, "step": 8052 }, { "epoch": 1.9312990827888017, "grad_norm": 0.7195642590522766, "learning_rate": 3.564633463905988e-05, "loss": 1.6762, "step": 8054 }, { "epoch": 1.9317786703435047, "grad_norm": 0.6513542532920837, "learning_rate": 3.563034615077145e-05, "loss": 1.6838, "step": 8056 }, { "epoch": 1.9322582578982075, "grad_norm": 0.703486979007721, "learning_rate": 3.561435766248302e-05, "loss": 1.6855, "step": 8058 }, { "epoch": 1.9327378454529105, "grad_norm": 0.690883219242096, "learning_rate": 3.559836917419458e-05, "loss": 1.6918, "step": 8060 }, { "epoch": 1.9332174330076135, "grad_norm": 0.7755768299102783, "learning_rate": 3.5582380685906144e-05, "loss": 1.6799, "step": 8062 }, { "epoch": 1.9336970205623163, "grad_norm": 0.614804208278656, "learning_rate": 3.556639219761772e-05, "loss": 1.628, "step": 8064 }, { "epoch": 1.9341766081170193, "grad_norm": 0.6535418629646301, "learning_rate": 3.555040370932928e-05, "loss": 1.6516, "step": 8066 }, { "epoch": 1.9346561956717223, "grad_norm": 0.6598456501960754, "learning_rate": 3.553441522104085e-05, "loss": 1.7042, "step": 8068 }, { "epoch": 1.9351357832264253, "grad_norm": 0.6914973855018616, "learning_rate": 3.551842673275242e-05, "loss": 1.6766, "step": 8070 }, { "epoch": 1.9356153707811283, "grad_norm": 0.7190582752227783, "learning_rate": 3.5502438244463986e-05, "loss": 1.6677, "step": 8072 }, { "epoch": 1.9360949583358313, "grad_norm": 0.8434534072875977, "learning_rate": 3.548644975617555e-05, "loss": 1.6641, "step": 8074 }, { "epoch": 1.9365745458905341, "grad_norm": 0.7144259810447693, "learning_rate": 3.5470461267887124e-05, "loss": 1.671, "step": 8076 }, { "epoch": 1.9370541334452371, "grad_norm": 0.6364271640777588, "learning_rate": 3.545447277959869e-05, "loss": 1.6889, "step": 8078 }, { "epoch": 1.93753372099994, "grad_norm": 0.681068480014801, "learning_rate": 3.5438484291310256e-05, "loss": 1.7025, "step": 8080 }, { "epoch": 1.938013308554643, "grad_norm": 0.7080742120742798, "learning_rate": 3.542249580302183e-05, "loss": 1.6768, "step": 8082 }, { "epoch": 1.938492896109346, "grad_norm": 0.6795032620429993, "learning_rate": 3.5406507314733394e-05, "loss": 1.7316, "step": 8084 }, { "epoch": 1.938972483664049, "grad_norm": 0.6975846290588379, "learning_rate": 3.539051882644496e-05, "loss": 1.6819, "step": 8086 }, { "epoch": 1.939452071218752, "grad_norm": 0.6703097224235535, "learning_rate": 3.537453033815653e-05, "loss": 1.7112, "step": 8088 }, { "epoch": 1.939931658773455, "grad_norm": 0.682579517364502, "learning_rate": 3.53585418498681e-05, "loss": 1.6622, "step": 8090 }, { "epoch": 1.9404112463281578, "grad_norm": 0.7008426189422607, "learning_rate": 3.5342553361579664e-05, "loss": 1.6829, "step": 8092 }, { "epoch": 1.9408908338828608, "grad_norm": 0.6050434708595276, "learning_rate": 3.5326564873291236e-05, "loss": 1.7148, "step": 8094 }, { "epoch": 1.9413704214375636, "grad_norm": 0.762579619884491, "learning_rate": 3.5310576385002795e-05, "loss": 1.6679, "step": 8096 }, { "epoch": 1.9418500089922666, "grad_norm": 0.5831575989723206, "learning_rate": 3.529458789671437e-05, "loss": 1.6939, "step": 8098 }, { "epoch": 1.9423295965469696, "grad_norm": 0.7762715816497803, "learning_rate": 3.527859940842593e-05, "loss": 1.6944, "step": 8100 }, { "epoch": 1.9428091841016726, "grad_norm": 0.8016173839569092, "learning_rate": 3.52626109201375e-05, "loss": 1.7096, "step": 8102 }, { "epoch": 1.9432887716563756, "grad_norm": 0.7325262427330017, "learning_rate": 3.524662243184907e-05, "loss": 1.6909, "step": 8104 }, { "epoch": 1.9437683592110786, "grad_norm": 0.6838918328285217, "learning_rate": 3.523063394356064e-05, "loss": 1.6885, "step": 8106 }, { "epoch": 1.9442479467657814, "grad_norm": 0.7334467768669128, "learning_rate": 3.52146454552722e-05, "loss": 1.672, "step": 8108 }, { "epoch": 1.9447275343204844, "grad_norm": 0.6359182000160217, "learning_rate": 3.5198656966983775e-05, "loss": 1.6939, "step": 8110 }, { "epoch": 1.9452071218751872, "grad_norm": 0.5935043096542358, "learning_rate": 3.518266847869534e-05, "loss": 1.7214, "step": 8112 }, { "epoch": 1.9456867094298902, "grad_norm": 0.6933405995368958, "learning_rate": 3.5166679990406907e-05, "loss": 1.6837, "step": 8114 }, { "epoch": 1.9461662969845932, "grad_norm": 0.6530511379241943, "learning_rate": 3.515069150211848e-05, "loss": 1.6379, "step": 8116 }, { "epoch": 1.9466458845392962, "grad_norm": 0.7067379951477051, "learning_rate": 3.5134703013830045e-05, "loss": 1.7018, "step": 8118 }, { "epoch": 1.9471254720939992, "grad_norm": 0.6429928541183472, "learning_rate": 3.511871452554161e-05, "loss": 1.7218, "step": 8120 }, { "epoch": 1.9476050596487022, "grad_norm": 0.6636118292808533, "learning_rate": 3.510272603725318e-05, "loss": 1.7067, "step": 8122 }, { "epoch": 1.948084647203405, "grad_norm": 0.6756719350814819, "learning_rate": 3.508673754896475e-05, "loss": 1.6488, "step": 8124 }, { "epoch": 1.948564234758108, "grad_norm": 0.759248673915863, "learning_rate": 3.5070749060676314e-05, "loss": 1.6606, "step": 8126 }, { "epoch": 1.949043822312811, "grad_norm": 0.6664876341819763, "learning_rate": 3.505476057238789e-05, "loss": 1.751, "step": 8128 }, { "epoch": 1.9495234098675138, "grad_norm": 0.5946485996246338, "learning_rate": 3.503877208409945e-05, "loss": 1.7108, "step": 8130 }, { "epoch": 1.9500029974222168, "grad_norm": 0.587777316570282, "learning_rate": 3.502278359581102e-05, "loss": 1.689, "step": 8132 }, { "epoch": 1.9504825849769198, "grad_norm": 0.6218248009681702, "learning_rate": 3.5006795107522584e-05, "loss": 1.6906, "step": 8134 }, { "epoch": 1.9509621725316229, "grad_norm": 0.6350857615470886, "learning_rate": 3.499080661923415e-05, "loss": 1.7269, "step": 8136 }, { "epoch": 1.9514417600863259, "grad_norm": 0.7609436511993408, "learning_rate": 3.497481813094572e-05, "loss": 1.6722, "step": 8138 }, { "epoch": 1.9519213476410289, "grad_norm": 0.6885733604431152, "learning_rate": 3.495882964265729e-05, "loss": 1.6284, "step": 8140 }, { "epoch": 1.9524009351957317, "grad_norm": 0.616371750831604, "learning_rate": 3.4942841154368853e-05, "loss": 1.6757, "step": 8142 }, { "epoch": 1.9528805227504347, "grad_norm": 0.6487219929695129, "learning_rate": 3.4926852666080426e-05, "loss": 1.6815, "step": 8144 }, { "epoch": 1.9533601103051375, "grad_norm": 0.6586840748786926, "learning_rate": 3.491086417779199e-05, "loss": 1.7298, "step": 8146 }, { "epoch": 1.9538396978598405, "grad_norm": 0.6951699256896973, "learning_rate": 3.489487568950356e-05, "loss": 1.6886, "step": 8148 }, { "epoch": 1.9543192854145435, "grad_norm": 0.7510191202163696, "learning_rate": 3.487888720121513e-05, "loss": 1.619, "step": 8150 }, { "epoch": 1.9547988729692465, "grad_norm": 0.668768048286438, "learning_rate": 3.4862898712926696e-05, "loss": 1.6942, "step": 8152 }, { "epoch": 1.9552784605239495, "grad_norm": 0.6523802280426025, "learning_rate": 3.484691022463826e-05, "loss": 1.6658, "step": 8154 }, { "epoch": 1.9557580480786525, "grad_norm": 0.6350815892219543, "learning_rate": 3.4830921736349834e-05, "loss": 1.681, "step": 8156 }, { "epoch": 1.9562376356333553, "grad_norm": 0.755064845085144, "learning_rate": 3.48149332480614e-05, "loss": 1.7065, "step": 8158 }, { "epoch": 1.9567172231880583, "grad_norm": 0.6800840497016907, "learning_rate": 3.4798944759772965e-05, "loss": 1.6859, "step": 8160 }, { "epoch": 1.957196810742761, "grad_norm": 0.6668593287467957, "learning_rate": 3.478295627148454e-05, "loss": 1.7021, "step": 8162 }, { "epoch": 1.957676398297464, "grad_norm": 0.6988581418991089, "learning_rate": 3.47669677831961e-05, "loss": 1.6596, "step": 8164 }, { "epoch": 1.958155985852167, "grad_norm": 0.6272971630096436, "learning_rate": 3.475097929490767e-05, "loss": 1.6656, "step": 8166 }, { "epoch": 1.9586355734068701, "grad_norm": 0.7040666937828064, "learning_rate": 3.4734990806619235e-05, "loss": 1.7129, "step": 8168 }, { "epoch": 1.9591151609615731, "grad_norm": 0.6378180980682373, "learning_rate": 3.47190023183308e-05, "loss": 1.693, "step": 8170 }, { "epoch": 1.9595947485162761, "grad_norm": 0.6254209280014038, "learning_rate": 3.4703013830042366e-05, "loss": 1.6338, "step": 8172 }, { "epoch": 1.960074336070979, "grad_norm": 0.6469828486442566, "learning_rate": 3.468702534175394e-05, "loss": 1.6671, "step": 8174 }, { "epoch": 1.960553923625682, "grad_norm": 0.6638965606689453, "learning_rate": 3.4671036853465504e-05, "loss": 1.6875, "step": 8176 }, { "epoch": 1.9610335111803847, "grad_norm": 0.5948504209518433, "learning_rate": 3.465504836517707e-05, "loss": 1.7122, "step": 8178 }, { "epoch": 1.9615130987350877, "grad_norm": 0.6644191145896912, "learning_rate": 3.463905987688864e-05, "loss": 1.7187, "step": 8180 }, { "epoch": 1.9619926862897907, "grad_norm": 0.6729462146759033, "learning_rate": 3.462307138860021e-05, "loss": 1.6906, "step": 8182 }, { "epoch": 1.9624722738444937, "grad_norm": 0.7013154029846191, "learning_rate": 3.4607082900311774e-05, "loss": 1.7077, "step": 8184 }, { "epoch": 1.9629518613991968, "grad_norm": 0.6777079701423645, "learning_rate": 3.4591094412023346e-05, "loss": 1.6795, "step": 8186 }, { "epoch": 1.9634314489538998, "grad_norm": 0.6560238003730774, "learning_rate": 3.457510592373491e-05, "loss": 1.7047, "step": 8188 }, { "epoch": 1.9639110365086025, "grad_norm": 0.6658952236175537, "learning_rate": 3.455911743544648e-05, "loss": 1.7281, "step": 8190 }, { "epoch": 1.9643906240633056, "grad_norm": 0.6482221484184265, "learning_rate": 3.454312894715805e-05, "loss": 1.6872, "step": 8192 }, { "epoch": 1.9648702116180086, "grad_norm": 0.6419835686683655, "learning_rate": 3.4527140458869616e-05, "loss": 1.6776, "step": 8194 }, { "epoch": 1.9653497991727114, "grad_norm": 0.6510510444641113, "learning_rate": 3.451115197058118e-05, "loss": 1.7229, "step": 8196 }, { "epoch": 1.9658293867274144, "grad_norm": 0.6738373041152954, "learning_rate": 3.4495163482292754e-05, "loss": 1.6512, "step": 8198 }, { "epoch": 1.9663089742821174, "grad_norm": 0.617775022983551, "learning_rate": 3.447917499400432e-05, "loss": 1.7278, "step": 8200 }, { "epoch": 1.9667885618368204, "grad_norm": 0.5950390696525574, "learning_rate": 3.4463186505715885e-05, "loss": 1.7082, "step": 8202 }, { "epoch": 1.9672681493915234, "grad_norm": 0.7142738103866577, "learning_rate": 3.444719801742745e-05, "loss": 1.7079, "step": 8204 }, { "epoch": 1.9677477369462264, "grad_norm": 0.7217665910720825, "learning_rate": 3.443120952913902e-05, "loss": 1.6744, "step": 8206 }, { "epoch": 1.9682273245009292, "grad_norm": 0.7044743299484253, "learning_rate": 3.441522104085059e-05, "loss": 1.7237, "step": 8208 }, { "epoch": 1.9687069120556322, "grad_norm": 0.7353547811508179, "learning_rate": 3.4399232552562155e-05, "loss": 1.68, "step": 8210 }, { "epoch": 1.969186499610335, "grad_norm": 0.6988115310668945, "learning_rate": 3.438324406427372e-05, "loss": 1.6693, "step": 8212 }, { "epoch": 1.969666087165038, "grad_norm": 0.5977243781089783, "learning_rate": 3.436725557598529e-05, "loss": 1.6858, "step": 8214 }, { "epoch": 1.970145674719741, "grad_norm": 0.6153491735458374, "learning_rate": 3.435126708769686e-05, "loss": 1.71, "step": 8216 }, { "epoch": 1.970625262274444, "grad_norm": 0.7497369647026062, "learning_rate": 3.4335278599408425e-05, "loss": 1.6902, "step": 8218 }, { "epoch": 1.971104849829147, "grad_norm": 0.6422404050827026, "learning_rate": 3.431929011112e-05, "loss": 1.6865, "step": 8220 }, { "epoch": 1.97158443738385, "grad_norm": 0.6009935736656189, "learning_rate": 3.430330162283156e-05, "loss": 1.6921, "step": 8222 }, { "epoch": 1.9720640249385528, "grad_norm": 0.8237195611000061, "learning_rate": 3.428731313454313e-05, "loss": 1.7234, "step": 8224 }, { "epoch": 1.9725436124932558, "grad_norm": 0.7374351024627686, "learning_rate": 3.42713246462547e-05, "loss": 1.6662, "step": 8226 }, { "epoch": 1.9730232000479586, "grad_norm": 0.6869112849235535, "learning_rate": 3.425533615796627e-05, "loss": 1.6321, "step": 8228 }, { "epoch": 1.9735027876026616, "grad_norm": 0.6668219566345215, "learning_rate": 3.423934766967783e-05, "loss": 1.6662, "step": 8230 }, { "epoch": 1.9739823751573646, "grad_norm": 0.6028409600257874, "learning_rate": 3.4223359181389405e-05, "loss": 1.6773, "step": 8232 }, { "epoch": 1.9744619627120676, "grad_norm": 0.6400965452194214, "learning_rate": 3.420737069310097e-05, "loss": 1.6757, "step": 8234 }, { "epoch": 1.9749415502667707, "grad_norm": 0.6823108196258545, "learning_rate": 3.4191382204812536e-05, "loss": 1.6868, "step": 8236 }, { "epoch": 1.9754211378214737, "grad_norm": 0.7072123289108276, "learning_rate": 3.417539371652411e-05, "loss": 1.7163, "step": 8238 }, { "epoch": 1.9759007253761764, "grad_norm": 0.6482272148132324, "learning_rate": 3.4159405228235674e-05, "loss": 1.6815, "step": 8240 }, { "epoch": 1.9763803129308795, "grad_norm": 0.6473912596702576, "learning_rate": 3.414341673994724e-05, "loss": 1.72, "step": 8242 }, { "epoch": 1.9768599004855822, "grad_norm": 0.6724404692649841, "learning_rate": 3.4127428251658806e-05, "loss": 1.6509, "step": 8244 }, { "epoch": 1.9773394880402853, "grad_norm": 0.6437061429023743, "learning_rate": 3.411143976337037e-05, "loss": 1.714, "step": 8246 }, { "epoch": 1.9778190755949883, "grad_norm": 0.6276998519897461, "learning_rate": 3.4095451275081944e-05, "loss": 1.6392, "step": 8248 }, { "epoch": 1.9782986631496913, "grad_norm": 0.6805800199508667, "learning_rate": 3.407946278679351e-05, "loss": 1.6705, "step": 8250 }, { "epoch": 1.9787782507043943, "grad_norm": 0.6835786700248718, "learning_rate": 3.4063474298505075e-05, "loss": 1.6818, "step": 8252 }, { "epoch": 1.9792578382590973, "grad_norm": 0.5904322862625122, "learning_rate": 3.404748581021665e-05, "loss": 1.6679, "step": 8254 }, { "epoch": 1.9797374258138, "grad_norm": 0.6012238264083862, "learning_rate": 3.4031497321928214e-05, "loss": 1.6859, "step": 8256 }, { "epoch": 1.980217013368503, "grad_norm": 0.6818436980247498, "learning_rate": 3.401550883363978e-05, "loss": 1.685, "step": 8258 }, { "epoch": 1.980696600923206, "grad_norm": 0.6303591132164001, "learning_rate": 3.399952034535135e-05, "loss": 1.6788, "step": 8260 }, { "epoch": 1.9811761884779089, "grad_norm": 0.6164663434028625, "learning_rate": 3.398353185706292e-05, "loss": 1.6931, "step": 8262 }, { "epoch": 1.981655776032612, "grad_norm": 0.6059918999671936, "learning_rate": 3.396754336877448e-05, "loss": 1.6487, "step": 8264 }, { "epoch": 1.982135363587315, "grad_norm": 0.61863112449646, "learning_rate": 3.3951554880486056e-05, "loss": 1.6152, "step": 8266 }, { "epoch": 1.982614951142018, "grad_norm": 0.6355100870132446, "learning_rate": 3.393556639219762e-05, "loss": 1.7013, "step": 8268 }, { "epoch": 1.983094538696721, "grad_norm": 0.7149233222007751, "learning_rate": 3.391957790390919e-05, "loss": 1.7212, "step": 8270 }, { "epoch": 1.983574126251424, "grad_norm": 0.7889514565467834, "learning_rate": 3.390358941562076e-05, "loss": 1.7118, "step": 8272 }, { "epoch": 1.9840537138061267, "grad_norm": 0.5803468227386475, "learning_rate": 3.3887600927332325e-05, "loss": 1.6715, "step": 8274 }, { "epoch": 1.9845333013608297, "grad_norm": 0.6576654314994812, "learning_rate": 3.387161243904389e-05, "loss": 1.6842, "step": 8276 }, { "epoch": 1.9850128889155325, "grad_norm": 0.6169103384017944, "learning_rate": 3.3855623950755457e-05, "loss": 1.706, "step": 8278 }, { "epoch": 1.9854924764702355, "grad_norm": 0.7423117160797119, "learning_rate": 3.383963546246702e-05, "loss": 1.639, "step": 8280 }, { "epoch": 1.9859720640249385, "grad_norm": 0.6922323107719421, "learning_rate": 3.382364697417859e-05, "loss": 1.7366, "step": 8282 }, { "epoch": 1.9864516515796415, "grad_norm": 0.633983850479126, "learning_rate": 3.380765848589016e-05, "loss": 1.6964, "step": 8284 }, { "epoch": 1.9869312391343446, "grad_norm": 0.7189815044403076, "learning_rate": 3.3791669997601726e-05, "loss": 1.6996, "step": 8286 }, { "epoch": 1.9874108266890476, "grad_norm": 0.610712468624115, "learning_rate": 3.377568150931329e-05, "loss": 1.6872, "step": 8288 }, { "epoch": 1.9878904142437503, "grad_norm": 0.6426445841789246, "learning_rate": 3.3759693021024864e-05, "loss": 1.6391, "step": 8290 }, { "epoch": 1.9883700017984534, "grad_norm": 0.6091791391372681, "learning_rate": 3.374370453273643e-05, "loss": 1.6299, "step": 8292 }, { "epoch": 1.9888495893531561, "grad_norm": 0.6356334090232849, "learning_rate": 3.3727716044447996e-05, "loss": 1.6919, "step": 8294 }, { "epoch": 1.9893291769078592, "grad_norm": 0.7127795219421387, "learning_rate": 3.371172755615957e-05, "loss": 1.6616, "step": 8296 }, { "epoch": 1.9898087644625622, "grad_norm": 0.6863290667533875, "learning_rate": 3.3695739067871134e-05, "loss": 1.7337, "step": 8298 }, { "epoch": 1.9902883520172652, "grad_norm": 0.7100715637207031, "learning_rate": 3.36797505795827e-05, "loss": 1.6804, "step": 8300 }, { "epoch": 1.9907679395719682, "grad_norm": 0.6072589755058289, "learning_rate": 3.366376209129427e-05, "loss": 1.6749, "step": 8302 }, { "epoch": 1.9912475271266712, "grad_norm": 0.6852173805236816, "learning_rate": 3.364777360300584e-05, "loss": 1.6855, "step": 8304 }, { "epoch": 1.991727114681374, "grad_norm": 0.5785421133041382, "learning_rate": 3.3631785114717403e-05, "loss": 1.7007, "step": 8306 }, { "epoch": 1.992206702236077, "grad_norm": 0.6479393243789673, "learning_rate": 3.3615796626428976e-05, "loss": 1.6714, "step": 8308 }, { "epoch": 1.9926862897907798, "grad_norm": 0.6300817131996155, "learning_rate": 3.359980813814054e-05, "loss": 1.6909, "step": 8310 }, { "epoch": 1.9931658773454828, "grad_norm": 0.602825403213501, "learning_rate": 3.358381964985211e-05, "loss": 1.7273, "step": 8312 }, { "epoch": 1.9936454649001858, "grad_norm": 0.6529088020324707, "learning_rate": 3.356783116156367e-05, "loss": 1.6716, "step": 8314 }, { "epoch": 1.9941250524548888, "grad_norm": 0.6528719067573547, "learning_rate": 3.355184267327524e-05, "loss": 1.7167, "step": 8316 }, { "epoch": 1.9946046400095918, "grad_norm": 0.5806543827056885, "learning_rate": 3.353585418498681e-05, "loss": 1.7081, "step": 8318 }, { "epoch": 1.9950842275642948, "grad_norm": 0.6980097889900208, "learning_rate": 3.351986569669838e-05, "loss": 1.652, "step": 8320 }, { "epoch": 1.9955638151189976, "grad_norm": 0.6478246450424194, "learning_rate": 3.350387720840994e-05, "loss": 1.6824, "step": 8322 }, { "epoch": 1.9960434026737006, "grad_norm": 0.678794801235199, "learning_rate": 3.3487888720121515e-05, "loss": 1.704, "step": 8324 }, { "epoch": 1.9965229902284036, "grad_norm": 0.6494038105010986, "learning_rate": 3.347190023183308e-05, "loss": 1.6752, "step": 8326 }, { "epoch": 1.9970025777831064, "grad_norm": 0.6091940402984619, "learning_rate": 3.3455911743544646e-05, "loss": 1.7035, "step": 8328 }, { "epoch": 1.9974821653378094, "grad_norm": 0.6413626670837402, "learning_rate": 3.343992325525622e-05, "loss": 1.6105, "step": 8330 }, { "epoch": 1.9979617528925124, "grad_norm": 0.603643000125885, "learning_rate": 3.3423934766967785e-05, "loss": 1.6528, "step": 8332 }, { "epoch": 1.9984413404472154, "grad_norm": 0.6080652475357056, "learning_rate": 3.340794627867935e-05, "loss": 1.7285, "step": 8334 }, { "epoch": 1.9989209280019185, "grad_norm": 0.7511417269706726, "learning_rate": 3.339195779039092e-05, "loss": 1.6375, "step": 8336 }, { "epoch": 1.9994005155566215, "grad_norm": 0.6166892051696777, "learning_rate": 3.337596930210249e-05, "loss": 1.7239, "step": 8338 }, { "epoch": 1.9998801031113242, "grad_norm": 0.6734156608581543, "learning_rate": 3.3359980813814054e-05, "loss": 1.6822, "step": 8340 }, { "epoch": 2.000359690666027, "grad_norm": 0.6356856226921082, "learning_rate": 3.334399232552563e-05, "loss": 1.6524, "step": 8342 }, { "epoch": 2.00083927822073, "grad_norm": 0.6470574140548706, "learning_rate": 3.332800383723719e-05, "loss": 1.6124, "step": 8344 }, { "epoch": 2.001318865775433, "grad_norm": 0.6235235333442688, "learning_rate": 3.331201534894876e-05, "loss": 1.6456, "step": 8346 }, { "epoch": 2.001798453330136, "grad_norm": 0.6055468320846558, "learning_rate": 3.329602686066033e-05, "loss": 1.6256, "step": 8348 }, { "epoch": 2.002278040884839, "grad_norm": 0.6345258355140686, "learning_rate": 3.328003837237189e-05, "loss": 1.634, "step": 8350 }, { "epoch": 2.002757628439542, "grad_norm": 0.6603770852088928, "learning_rate": 3.326404988408346e-05, "loss": 1.6313, "step": 8352 }, { "epoch": 2.003237215994245, "grad_norm": 0.5793284177780151, "learning_rate": 3.324806139579503e-05, "loss": 1.5971, "step": 8354 }, { "epoch": 2.003716803548948, "grad_norm": 0.6029469966888428, "learning_rate": 3.3232072907506593e-05, "loss": 1.6134, "step": 8356 }, { "epoch": 2.0041963911036507, "grad_norm": 0.655534029006958, "learning_rate": 3.3216084419218166e-05, "loss": 1.6184, "step": 8358 }, { "epoch": 2.0046759786583537, "grad_norm": 0.6816288828849792, "learning_rate": 3.320009593092973e-05, "loss": 1.6519, "step": 8360 }, { "epoch": 2.0051555662130567, "grad_norm": 0.6359346508979797, "learning_rate": 3.31841074426413e-05, "loss": 1.5757, "step": 8362 }, { "epoch": 2.0056351537677597, "grad_norm": 0.6460583209991455, "learning_rate": 3.316811895435287e-05, "loss": 1.625, "step": 8364 }, { "epoch": 2.0061147413224627, "grad_norm": 0.7845428586006165, "learning_rate": 3.3152130466064435e-05, "loss": 1.5965, "step": 8366 }, { "epoch": 2.0065943288771657, "grad_norm": 0.7819381952285767, "learning_rate": 3.3136141977776e-05, "loss": 1.5778, "step": 8368 }, { "epoch": 2.0070739164318687, "grad_norm": 0.7114874124526978, "learning_rate": 3.3120153489487574e-05, "loss": 1.7373, "step": 8370 }, { "epoch": 2.0075535039865717, "grad_norm": 0.7182443141937256, "learning_rate": 3.310416500119914e-05, "loss": 1.6272, "step": 8372 }, { "epoch": 2.0080330915412743, "grad_norm": 0.6407554745674133, "learning_rate": 3.3088176512910705e-05, "loss": 1.6097, "step": 8374 }, { "epoch": 2.0085126790959773, "grad_norm": 0.6343185901641846, "learning_rate": 3.307218802462228e-05, "loss": 1.6208, "step": 8376 }, { "epoch": 2.0089922666506803, "grad_norm": 0.6605022549629211, "learning_rate": 3.305619953633384e-05, "loss": 1.5839, "step": 8378 }, { "epoch": 2.0094718542053833, "grad_norm": 0.6257832050323486, "learning_rate": 3.304021104804541e-05, "loss": 1.6308, "step": 8380 }, { "epoch": 2.0099514417600863, "grad_norm": 0.7217289805412292, "learning_rate": 3.302422255975698e-05, "loss": 1.6454, "step": 8382 }, { "epoch": 2.0104310293147893, "grad_norm": 0.7345327138900757, "learning_rate": 3.300823407146855e-05, "loss": 1.5907, "step": 8384 }, { "epoch": 2.0109106168694924, "grad_norm": 0.6301185488700867, "learning_rate": 3.2992245583180106e-05, "loss": 1.5913, "step": 8386 }, { "epoch": 2.0113902044241954, "grad_norm": 0.6275911927223206, "learning_rate": 3.297625709489168e-05, "loss": 1.6285, "step": 8388 }, { "epoch": 2.0118697919788984, "grad_norm": 0.6783732771873474, "learning_rate": 3.2960268606603244e-05, "loss": 1.6522, "step": 8390 }, { "epoch": 2.012349379533601, "grad_norm": 0.7404776811599731, "learning_rate": 3.294428011831481e-05, "loss": 1.6504, "step": 8392 }, { "epoch": 2.012828967088304, "grad_norm": 0.6788515448570251, "learning_rate": 3.292829163002638e-05, "loss": 1.6187, "step": 8394 }, { "epoch": 2.013308554643007, "grad_norm": 0.6237536072731018, "learning_rate": 3.291230314173795e-05, "loss": 1.6019, "step": 8396 }, { "epoch": 2.01378814219771, "grad_norm": 0.6788496971130371, "learning_rate": 3.2896314653449514e-05, "loss": 1.6384, "step": 8398 }, { "epoch": 2.014267729752413, "grad_norm": 0.7238354086875916, "learning_rate": 3.2880326165161086e-05, "loss": 1.6363, "step": 8400 }, { "epoch": 2.014267729752413, "eval_loss": 1.7224833965301514, "eval_runtime": 331.26, "eval_samples_per_second": 402.853, "eval_steps_per_second": 12.591, "step": 8400 }, { "epoch": 2.014747317307116, "grad_norm": 0.6747950315475464, "learning_rate": 3.286433767687265e-05, "loss": 1.6554, "step": 8402 }, { "epoch": 2.015226904861819, "grad_norm": 0.693892240524292, "learning_rate": 3.284834918858422e-05, "loss": 1.6307, "step": 8404 }, { "epoch": 2.015706492416522, "grad_norm": 0.6237190365791321, "learning_rate": 3.283236070029579e-05, "loss": 1.6609, "step": 8406 }, { "epoch": 2.0161860799712246, "grad_norm": 0.6356117129325867, "learning_rate": 3.2816372212007356e-05, "loss": 1.6376, "step": 8408 }, { "epoch": 2.0166656675259276, "grad_norm": 0.6834249496459961, "learning_rate": 3.280038372371892e-05, "loss": 1.6322, "step": 8410 }, { "epoch": 2.0171452550806306, "grad_norm": 0.6311863660812378, "learning_rate": 3.2784395235430494e-05, "loss": 1.607, "step": 8412 }, { "epoch": 2.0176248426353336, "grad_norm": 0.6442950367927551, "learning_rate": 3.276840674714206e-05, "loss": 1.6588, "step": 8414 }, { "epoch": 2.0181044301900366, "grad_norm": 0.6762610673904419, "learning_rate": 3.275241825885363e-05, "loss": 1.6567, "step": 8416 }, { "epoch": 2.0185840177447396, "grad_norm": 0.667732298374176, "learning_rate": 3.27364297705652e-05, "loss": 1.5859, "step": 8418 }, { "epoch": 2.0190636052994426, "grad_norm": 0.7198083996772766, "learning_rate": 3.2720441282276764e-05, "loss": 1.6189, "step": 8420 }, { "epoch": 2.0195431928541456, "grad_norm": 0.6373675465583801, "learning_rate": 3.2704452793988336e-05, "loss": 1.5932, "step": 8422 }, { "epoch": 2.020022780408848, "grad_norm": 0.7628896236419678, "learning_rate": 3.2688464305699895e-05, "loss": 1.6681, "step": 8424 }, { "epoch": 2.020502367963551, "grad_norm": 0.6613326072692871, "learning_rate": 3.267247581741146e-05, "loss": 1.6147, "step": 8426 }, { "epoch": 2.020981955518254, "grad_norm": 0.7206804752349854, "learning_rate": 3.265648732912303e-05, "loss": 1.6569, "step": 8428 }, { "epoch": 2.0214615430729572, "grad_norm": 0.719906210899353, "learning_rate": 3.26404988408346e-05, "loss": 1.5763, "step": 8430 }, { "epoch": 2.0219411306276602, "grad_norm": 0.7119305729866028, "learning_rate": 3.2624510352546165e-05, "loss": 1.6742, "step": 8432 }, { "epoch": 2.0224207181823632, "grad_norm": 0.6747273206710815, "learning_rate": 3.260852186425774e-05, "loss": 1.6046, "step": 8434 }, { "epoch": 2.0229003057370663, "grad_norm": 0.7311089038848877, "learning_rate": 3.25925333759693e-05, "loss": 1.6232, "step": 8436 }, { "epoch": 2.0233798932917693, "grad_norm": 0.6838259100914001, "learning_rate": 3.257654488768087e-05, "loss": 1.6512, "step": 8438 }, { "epoch": 2.023859480846472, "grad_norm": 0.634032666683197, "learning_rate": 3.256055639939244e-05, "loss": 1.6704, "step": 8440 }, { "epoch": 2.024339068401175, "grad_norm": 0.607285737991333, "learning_rate": 3.2544567911104007e-05, "loss": 1.6648, "step": 8442 }, { "epoch": 2.024818655955878, "grad_norm": 0.6178430318832397, "learning_rate": 3.252857942281557e-05, "loss": 1.5822, "step": 8444 }, { "epoch": 2.025298243510581, "grad_norm": 0.6503200531005859, "learning_rate": 3.2512590934527145e-05, "loss": 1.6007, "step": 8446 }, { "epoch": 2.025777831065284, "grad_norm": 0.7454342842102051, "learning_rate": 3.249660244623871e-05, "loss": 1.5997, "step": 8448 }, { "epoch": 2.026257418619987, "grad_norm": 0.6107275485992432, "learning_rate": 3.2480613957950276e-05, "loss": 1.5777, "step": 8450 }, { "epoch": 2.02673700617469, "grad_norm": 0.7101795673370361, "learning_rate": 3.246462546966185e-05, "loss": 1.6197, "step": 8452 }, { "epoch": 2.027216593729393, "grad_norm": 0.6344289183616638, "learning_rate": 3.2448636981373414e-05, "loss": 1.6138, "step": 8454 }, { "epoch": 2.027696181284096, "grad_norm": 0.842406690120697, "learning_rate": 3.243264849308498e-05, "loss": 1.6398, "step": 8456 }, { "epoch": 2.0281757688387985, "grad_norm": 0.850006103515625, "learning_rate": 3.241666000479655e-05, "loss": 1.6097, "step": 8458 }, { "epoch": 2.0286553563935015, "grad_norm": 0.690979540348053, "learning_rate": 3.240067151650811e-05, "loss": 1.628, "step": 8460 }, { "epoch": 2.0291349439482045, "grad_norm": 0.6292360424995422, "learning_rate": 3.2384683028219684e-05, "loss": 1.5619, "step": 8462 }, { "epoch": 2.0296145315029075, "grad_norm": 0.6163510084152222, "learning_rate": 3.236869453993125e-05, "loss": 1.6497, "step": 8464 }, { "epoch": 2.0300941190576105, "grad_norm": 0.6381913423538208, "learning_rate": 3.2352706051642815e-05, "loss": 1.6328, "step": 8466 }, { "epoch": 2.0305737066123135, "grad_norm": 0.6958069205284119, "learning_rate": 3.233671756335439e-05, "loss": 1.6317, "step": 8468 }, { "epoch": 2.0310532941670165, "grad_norm": 0.6959384679794312, "learning_rate": 3.2320729075065953e-05, "loss": 1.6205, "step": 8470 }, { "epoch": 2.0315328817217195, "grad_norm": 0.6045403480529785, "learning_rate": 3.230474058677752e-05, "loss": 1.6661, "step": 8472 }, { "epoch": 2.032012469276422, "grad_norm": 0.6634867191314697, "learning_rate": 3.228875209848909e-05, "loss": 1.6045, "step": 8474 }, { "epoch": 2.032492056831125, "grad_norm": 0.6565666198730469, "learning_rate": 3.227276361020066e-05, "loss": 1.6571, "step": 8476 }, { "epoch": 2.032971644385828, "grad_norm": 0.657392680644989, "learning_rate": 3.225677512191222e-05, "loss": 1.6547, "step": 8478 }, { "epoch": 2.033451231940531, "grad_norm": 0.7220017910003662, "learning_rate": 3.2240786633623796e-05, "loss": 1.6701, "step": 8480 }, { "epoch": 2.033930819495234, "grad_norm": 0.7025959491729736, "learning_rate": 3.222479814533536e-05, "loss": 1.6156, "step": 8482 }, { "epoch": 2.034410407049937, "grad_norm": 0.6680828332901001, "learning_rate": 3.220880965704693e-05, "loss": 1.6091, "step": 8484 }, { "epoch": 2.03488999460464, "grad_norm": 0.7016916275024414, "learning_rate": 3.21928211687585e-05, "loss": 1.6002, "step": 8486 }, { "epoch": 2.035369582159343, "grad_norm": 0.7392259836196899, "learning_rate": 3.2176832680470065e-05, "loss": 1.6409, "step": 8488 }, { "epoch": 2.0358491697140457, "grad_norm": 0.6520053744316101, "learning_rate": 3.216084419218163e-05, "loss": 1.6043, "step": 8490 }, { "epoch": 2.0363287572687487, "grad_norm": 0.6984710097312927, "learning_rate": 3.21448557038932e-05, "loss": 1.6413, "step": 8492 }, { "epoch": 2.0368083448234517, "grad_norm": 0.6394338011741638, "learning_rate": 3.212886721560477e-05, "loss": 1.6207, "step": 8494 }, { "epoch": 2.0372879323781548, "grad_norm": 0.591716468334198, "learning_rate": 3.211287872731633e-05, "loss": 1.6259, "step": 8496 }, { "epoch": 2.0377675199328578, "grad_norm": 0.6892250180244446, "learning_rate": 3.20968902390279e-05, "loss": 1.6019, "step": 8498 }, { "epoch": 2.0382471074875608, "grad_norm": 0.6660720705986023, "learning_rate": 3.2080901750739466e-05, "loss": 1.6578, "step": 8500 }, { "epoch": 2.038726695042264, "grad_norm": 0.6990212202072144, "learning_rate": 3.206491326245103e-05, "loss": 1.6206, "step": 8502 }, { "epoch": 2.039206282596967, "grad_norm": 0.6718714833259583, "learning_rate": 3.2048924774162604e-05, "loss": 1.6529, "step": 8504 }, { "epoch": 2.0396858701516694, "grad_norm": 0.6585991978645325, "learning_rate": 3.203293628587417e-05, "loss": 1.6157, "step": 8506 }, { "epoch": 2.0401654577063724, "grad_norm": 0.7937438488006592, "learning_rate": 3.2016947797585736e-05, "loss": 1.6734, "step": 8508 }, { "epoch": 2.0406450452610754, "grad_norm": 0.6878540515899658, "learning_rate": 3.200095930929731e-05, "loss": 1.6094, "step": 8510 }, { "epoch": 2.0411246328157784, "grad_norm": 0.7133707404136658, "learning_rate": 3.1984970821008874e-05, "loss": 1.6366, "step": 8512 }, { "epoch": 2.0416042203704814, "grad_norm": 0.6923553347587585, "learning_rate": 3.1968982332720446e-05, "loss": 1.6342, "step": 8514 }, { "epoch": 2.0420838079251844, "grad_norm": 0.6779884696006775, "learning_rate": 3.195299384443201e-05, "loss": 1.6021, "step": 8516 }, { "epoch": 2.0425633954798874, "grad_norm": 0.6370491981506348, "learning_rate": 3.193700535614358e-05, "loss": 1.6459, "step": 8518 }, { "epoch": 2.0430429830345904, "grad_norm": 0.7318246960639954, "learning_rate": 3.192101686785515e-05, "loss": 1.6656, "step": 8520 }, { "epoch": 2.0435225705892934, "grad_norm": 0.6919170022010803, "learning_rate": 3.1905028379566716e-05, "loss": 1.5955, "step": 8522 }, { "epoch": 2.044002158143996, "grad_norm": 0.7273973822593689, "learning_rate": 3.188903989127828e-05, "loss": 1.6511, "step": 8524 }, { "epoch": 2.044481745698699, "grad_norm": 0.6742769479751587, "learning_rate": 3.1873051402989854e-05, "loss": 1.6233, "step": 8526 }, { "epoch": 2.044961333253402, "grad_norm": 0.6500266790390015, "learning_rate": 3.185706291470142e-05, "loss": 1.629, "step": 8528 }, { "epoch": 2.045440920808105, "grad_norm": 0.6640816926956177, "learning_rate": 3.1841074426412985e-05, "loss": 1.6315, "step": 8530 }, { "epoch": 2.045920508362808, "grad_norm": 0.6381326913833618, "learning_rate": 3.182508593812455e-05, "loss": 1.6348, "step": 8532 }, { "epoch": 2.046400095917511, "grad_norm": 0.6434223651885986, "learning_rate": 3.180909744983612e-05, "loss": 1.6497, "step": 8534 }, { "epoch": 2.046879683472214, "grad_norm": 0.6389319896697998, "learning_rate": 3.179310896154768e-05, "loss": 1.6459, "step": 8536 }, { "epoch": 2.047359271026917, "grad_norm": 0.6300839781761169, "learning_rate": 3.1777120473259255e-05, "loss": 1.5954, "step": 8538 }, { "epoch": 2.0478388585816196, "grad_norm": 0.6813657879829407, "learning_rate": 3.176113198497082e-05, "loss": 1.6365, "step": 8540 }, { "epoch": 2.0483184461363226, "grad_norm": 0.6575913429260254, "learning_rate": 3.1745143496682386e-05, "loss": 1.6395, "step": 8542 }, { "epoch": 2.0487980336910256, "grad_norm": 0.6250491738319397, "learning_rate": 3.172915500839396e-05, "loss": 1.6385, "step": 8544 }, { "epoch": 2.0492776212457287, "grad_norm": 0.6425729990005493, "learning_rate": 3.1713166520105525e-05, "loss": 1.5828, "step": 8546 }, { "epoch": 2.0497572088004317, "grad_norm": 0.627362847328186, "learning_rate": 3.169717803181709e-05, "loss": 1.6386, "step": 8548 }, { "epoch": 2.0502367963551347, "grad_norm": 0.6873817443847656, "learning_rate": 3.168118954352866e-05, "loss": 1.6456, "step": 8550 }, { "epoch": 2.0507163839098377, "grad_norm": 0.6946566700935364, "learning_rate": 3.166520105524023e-05, "loss": 1.5884, "step": 8552 }, { "epoch": 2.0511959714645407, "grad_norm": 0.7440459132194519, "learning_rate": 3.1649212566951794e-05, "loss": 1.6328, "step": 8554 }, { "epoch": 2.0516755590192433, "grad_norm": 0.6813302636146545, "learning_rate": 3.163322407866337e-05, "loss": 1.6437, "step": 8556 }, { "epoch": 2.0521551465739463, "grad_norm": 0.8327045440673828, "learning_rate": 3.161723559037493e-05, "loss": 1.7013, "step": 8558 }, { "epoch": 2.0526347341286493, "grad_norm": 0.7364752888679504, "learning_rate": 3.16012471020865e-05, "loss": 1.6416, "step": 8560 }, { "epoch": 2.0531143216833523, "grad_norm": 0.7136257290840149, "learning_rate": 3.158525861379807e-05, "loss": 1.6957, "step": 8562 }, { "epoch": 2.0535939092380553, "grad_norm": 0.6204575300216675, "learning_rate": 3.1569270125509636e-05, "loss": 1.6101, "step": 8564 }, { "epoch": 2.0540734967927583, "grad_norm": 0.6880372166633606, "learning_rate": 3.15532816372212e-05, "loss": 1.622, "step": 8566 }, { "epoch": 2.0545530843474613, "grad_norm": 0.6913808584213257, "learning_rate": 3.153729314893277e-05, "loss": 1.634, "step": 8568 }, { "epoch": 2.0550326719021643, "grad_norm": 0.7482178211212158, "learning_rate": 3.152130466064433e-05, "loss": 1.5582, "step": 8570 }, { "epoch": 2.055512259456867, "grad_norm": 0.7060802578926086, "learning_rate": 3.1505316172355906e-05, "loss": 1.6768, "step": 8572 }, { "epoch": 2.05599184701157, "grad_norm": 0.6938905119895935, "learning_rate": 3.148932768406747e-05, "loss": 1.6225, "step": 8574 }, { "epoch": 2.056471434566273, "grad_norm": 0.6982652544975281, "learning_rate": 3.147333919577904e-05, "loss": 1.5817, "step": 8576 }, { "epoch": 2.056951022120976, "grad_norm": 0.7727763056755066, "learning_rate": 3.145735070749061e-05, "loss": 1.6254, "step": 8578 }, { "epoch": 2.057430609675679, "grad_norm": 0.7219879627227783, "learning_rate": 3.1441362219202175e-05, "loss": 1.6125, "step": 8580 }, { "epoch": 2.057910197230382, "grad_norm": 0.7010926604270935, "learning_rate": 3.142537373091374e-05, "loss": 1.6364, "step": 8582 }, { "epoch": 2.058389784785085, "grad_norm": 0.6482648253440857, "learning_rate": 3.1409385242625314e-05, "loss": 1.5911, "step": 8584 }, { "epoch": 2.058869372339788, "grad_norm": 0.8062965869903564, "learning_rate": 3.139339675433688e-05, "loss": 1.6421, "step": 8586 }, { "epoch": 2.059348959894491, "grad_norm": 0.6413891911506653, "learning_rate": 3.1377408266048445e-05, "loss": 1.6036, "step": 8588 }, { "epoch": 2.0598285474491935, "grad_norm": 0.6845553517341614, "learning_rate": 3.136141977776002e-05, "loss": 1.6054, "step": 8590 }, { "epoch": 2.0603081350038965, "grad_norm": 0.7680586576461792, "learning_rate": 3.134543128947158e-05, "loss": 1.5782, "step": 8592 }, { "epoch": 2.0607877225585995, "grad_norm": 0.712758481502533, "learning_rate": 3.132944280118315e-05, "loss": 1.5887, "step": 8594 }, { "epoch": 2.0612673101133026, "grad_norm": 0.692294180393219, "learning_rate": 3.131345431289472e-05, "loss": 1.6502, "step": 8596 }, { "epoch": 2.0617468976680056, "grad_norm": 0.6461578607559204, "learning_rate": 3.129746582460629e-05, "loss": 1.6946, "step": 8598 }, { "epoch": 2.0622264852227086, "grad_norm": 0.6732186079025269, "learning_rate": 3.128147733631785e-05, "loss": 1.5946, "step": 8600 }, { "epoch": 2.0627060727774116, "grad_norm": 0.6543331146240234, "learning_rate": 3.1265488848029425e-05, "loss": 1.6214, "step": 8602 }, { "epoch": 2.0631856603321146, "grad_norm": 0.6882637143135071, "learning_rate": 3.1249500359740984e-05, "loss": 1.6345, "step": 8604 }, { "epoch": 2.063665247886817, "grad_norm": 0.7584676742553711, "learning_rate": 3.123351187145255e-05, "loss": 1.6655, "step": 8606 }, { "epoch": 2.06414483544152, "grad_norm": 0.6601169109344482, "learning_rate": 3.121752338316412e-05, "loss": 1.6805, "step": 8608 }, { "epoch": 2.064624422996223, "grad_norm": 0.8022003173828125, "learning_rate": 3.120153489487569e-05, "loss": 1.6351, "step": 8610 }, { "epoch": 2.065104010550926, "grad_norm": 0.7425168752670288, "learning_rate": 3.118554640658726e-05, "loss": 1.6496, "step": 8612 }, { "epoch": 2.065583598105629, "grad_norm": 0.6576455235481262, "learning_rate": 3.1169557918298826e-05, "loss": 1.6304, "step": 8614 }, { "epoch": 2.066063185660332, "grad_norm": 0.724745512008667, "learning_rate": 3.115356943001039e-05, "loss": 1.6173, "step": 8616 }, { "epoch": 2.066542773215035, "grad_norm": 0.6949250102043152, "learning_rate": 3.1137580941721964e-05, "loss": 1.6857, "step": 8618 }, { "epoch": 2.067022360769738, "grad_norm": 0.7420800924301147, "learning_rate": 3.112159245343353e-05, "loss": 1.6772, "step": 8620 }, { "epoch": 2.067501948324441, "grad_norm": 0.6721562147140503, "learning_rate": 3.1105603965145096e-05, "loss": 1.6544, "step": 8622 }, { "epoch": 2.067981535879144, "grad_norm": 0.7579545974731445, "learning_rate": 3.108961547685667e-05, "loss": 1.6184, "step": 8624 }, { "epoch": 2.068461123433847, "grad_norm": 0.686438798904419, "learning_rate": 3.1073626988568234e-05, "loss": 1.662, "step": 8626 }, { "epoch": 2.06894071098855, "grad_norm": 0.7794975638389587, "learning_rate": 3.10576385002798e-05, "loss": 1.6385, "step": 8628 }, { "epoch": 2.069420298543253, "grad_norm": 0.6378806829452515, "learning_rate": 3.104165001199137e-05, "loss": 1.6568, "step": 8630 }, { "epoch": 2.069899886097956, "grad_norm": 0.6344314217567444, "learning_rate": 3.102566152370294e-05, "loss": 1.5909, "step": 8632 }, { "epoch": 2.070379473652659, "grad_norm": 0.7636588215827942, "learning_rate": 3.1009673035414503e-05, "loss": 1.6262, "step": 8634 }, { "epoch": 2.070859061207362, "grad_norm": 0.7170425057411194, "learning_rate": 3.0993684547126076e-05, "loss": 1.6553, "step": 8636 }, { "epoch": 2.0713386487620644, "grad_norm": 0.6777974963188171, "learning_rate": 3.097769605883764e-05, "loss": 1.6329, "step": 8638 }, { "epoch": 2.0718182363167674, "grad_norm": 0.7179539203643799, "learning_rate": 3.096170757054921e-05, "loss": 1.6128, "step": 8640 }, { "epoch": 2.0722978238714704, "grad_norm": 0.7758874297142029, "learning_rate": 3.094571908226077e-05, "loss": 1.6436, "step": 8642 }, { "epoch": 2.0727774114261734, "grad_norm": 0.739707350730896, "learning_rate": 3.092973059397234e-05, "loss": 1.6655, "step": 8644 }, { "epoch": 2.0732569989808765, "grad_norm": 0.7610236406326294, "learning_rate": 3.0913742105683904e-05, "loss": 1.6239, "step": 8646 }, { "epoch": 2.0737365865355795, "grad_norm": 0.7092705965042114, "learning_rate": 3.089775361739548e-05, "loss": 1.6731, "step": 8648 }, { "epoch": 2.0742161740902825, "grad_norm": 0.8838898539543152, "learning_rate": 3.088176512910704e-05, "loss": 1.6795, "step": 8650 }, { "epoch": 2.0746957616449855, "grad_norm": 0.6336094737052917, "learning_rate": 3.086577664081861e-05, "loss": 1.5953, "step": 8652 }, { "epoch": 2.0751753491996885, "grad_norm": 0.6866628527641296, "learning_rate": 3.084978815253018e-05, "loss": 1.6556, "step": 8654 }, { "epoch": 2.075654936754391, "grad_norm": 0.6781659126281738, "learning_rate": 3.0833799664241747e-05, "loss": 1.6427, "step": 8656 }, { "epoch": 2.076134524309094, "grad_norm": 0.7626724243164062, "learning_rate": 3.081781117595331e-05, "loss": 1.6228, "step": 8658 }, { "epoch": 2.076614111863797, "grad_norm": 0.7414382100105286, "learning_rate": 3.0801822687664885e-05, "loss": 1.6616, "step": 8660 }, { "epoch": 2.0770936994185, "grad_norm": 0.6978985667228699, "learning_rate": 3.078583419937645e-05, "loss": 1.5926, "step": 8662 }, { "epoch": 2.077573286973203, "grad_norm": 0.7235999703407288, "learning_rate": 3.0769845711088016e-05, "loss": 1.6695, "step": 8664 }, { "epoch": 2.078052874527906, "grad_norm": 0.6834054589271545, "learning_rate": 3.075385722279959e-05, "loss": 1.5692, "step": 8666 }, { "epoch": 2.078532462082609, "grad_norm": 0.6974983811378479, "learning_rate": 3.0737868734511154e-05, "loss": 1.6819, "step": 8668 }, { "epoch": 2.079012049637312, "grad_norm": 0.6775282025337219, "learning_rate": 3.072188024622272e-05, "loss": 1.6469, "step": 8670 }, { "epoch": 2.0794916371920147, "grad_norm": 0.6748122572898865, "learning_rate": 3.070589175793429e-05, "loss": 1.6202, "step": 8672 }, { "epoch": 2.0799712247467177, "grad_norm": 0.6931487917900085, "learning_rate": 3.068990326964586e-05, "loss": 1.5907, "step": 8674 }, { "epoch": 2.0804508123014207, "grad_norm": 0.8337322473526001, "learning_rate": 3.0673914781357424e-05, "loss": 1.6534, "step": 8676 }, { "epoch": 2.0809303998561237, "grad_norm": 0.7394777536392212, "learning_rate": 3.065792629306899e-05, "loss": 1.6644, "step": 8678 }, { "epoch": 2.0814099874108267, "grad_norm": 0.7791523933410645, "learning_rate": 3.0641937804780555e-05, "loss": 1.6567, "step": 8680 }, { "epoch": 2.0818895749655297, "grad_norm": 0.6905451416969299, "learning_rate": 3.062594931649213e-05, "loss": 1.6189, "step": 8682 }, { "epoch": 2.0823691625202327, "grad_norm": 0.6629452705383301, "learning_rate": 3.0609960828203693e-05, "loss": 1.6442, "step": 8684 }, { "epoch": 2.0828487500749358, "grad_norm": 0.787812352180481, "learning_rate": 3.059397233991526e-05, "loss": 1.6411, "step": 8686 }, { "epoch": 2.0833283376296383, "grad_norm": 0.7998948693275452, "learning_rate": 3.057798385162683e-05, "loss": 1.6467, "step": 8688 }, { "epoch": 2.0838079251843413, "grad_norm": 0.821109414100647, "learning_rate": 3.05619953633384e-05, "loss": 1.6386, "step": 8690 }, { "epoch": 2.0842875127390443, "grad_norm": 0.68824702501297, "learning_rate": 3.054600687504996e-05, "loss": 1.6645, "step": 8692 }, { "epoch": 2.0847671002937473, "grad_norm": 0.6521971225738525, "learning_rate": 3.0530018386761535e-05, "loss": 1.6197, "step": 8694 }, { "epoch": 2.0852466878484504, "grad_norm": 0.7034232020378113, "learning_rate": 3.05140298984731e-05, "loss": 1.5866, "step": 8696 }, { "epoch": 2.0857262754031534, "grad_norm": 0.7577939629554749, "learning_rate": 3.049804141018467e-05, "loss": 1.6299, "step": 8698 }, { "epoch": 2.0862058629578564, "grad_norm": 0.7220171689987183, "learning_rate": 3.0482052921896236e-05, "loss": 1.647, "step": 8700 }, { "epoch": 2.0866854505125594, "grad_norm": 0.7028330564498901, "learning_rate": 3.0466064433607805e-05, "loss": 1.5679, "step": 8702 }, { "epoch": 2.087165038067262, "grad_norm": 0.6414615511894226, "learning_rate": 3.0450075945319374e-05, "loss": 1.6442, "step": 8704 }, { "epoch": 2.087644625621965, "grad_norm": 0.6404299736022949, "learning_rate": 3.043408745703094e-05, "loss": 1.6228, "step": 8706 }, { "epoch": 2.088124213176668, "grad_norm": 0.7222985625267029, "learning_rate": 3.041809896874251e-05, "loss": 1.5966, "step": 8708 }, { "epoch": 2.088603800731371, "grad_norm": 0.6767625212669373, "learning_rate": 3.0402110480454078e-05, "loss": 1.6091, "step": 8710 }, { "epoch": 2.089083388286074, "grad_norm": 0.8501393795013428, "learning_rate": 3.0386121992165644e-05, "loss": 1.6474, "step": 8712 }, { "epoch": 2.089562975840777, "grad_norm": 0.7627061009407043, "learning_rate": 3.0370133503877206e-05, "loss": 1.6481, "step": 8714 }, { "epoch": 2.09004256339548, "grad_norm": 0.6440218687057495, "learning_rate": 3.0354145015588775e-05, "loss": 1.6082, "step": 8716 }, { "epoch": 2.090522150950183, "grad_norm": 0.7294249534606934, "learning_rate": 3.0338156527300344e-05, "loss": 1.6152, "step": 8718 }, { "epoch": 2.091001738504886, "grad_norm": 0.6421859860420227, "learning_rate": 3.032216803901191e-05, "loss": 1.641, "step": 8720 }, { "epoch": 2.0914813260595886, "grad_norm": 0.6858601570129395, "learning_rate": 3.030617955072348e-05, "loss": 1.6291, "step": 8722 }, { "epoch": 2.0919609136142916, "grad_norm": 0.6566706299781799, "learning_rate": 3.0290191062435048e-05, "loss": 1.6138, "step": 8724 }, { "epoch": 2.0924405011689946, "grad_norm": 0.6850074529647827, "learning_rate": 3.0274202574146614e-05, "loss": 1.5967, "step": 8726 }, { "epoch": 2.0929200887236976, "grad_norm": 0.6837908625602722, "learning_rate": 3.0258214085858183e-05, "loss": 1.6189, "step": 8728 }, { "epoch": 2.0933996762784006, "grad_norm": 0.6635924577713013, "learning_rate": 3.0242225597569752e-05, "loss": 1.5908, "step": 8730 }, { "epoch": 2.0938792638331036, "grad_norm": 0.6995803713798523, "learning_rate": 3.0226237109281318e-05, "loss": 1.6326, "step": 8732 }, { "epoch": 2.0943588513878066, "grad_norm": 0.7533659338951111, "learning_rate": 3.0210248620992887e-05, "loss": 1.654, "step": 8734 }, { "epoch": 2.0948384389425097, "grad_norm": 0.6907867193222046, "learning_rate": 3.0194260132704456e-05, "loss": 1.6268, "step": 8736 }, { "epoch": 2.095318026497212, "grad_norm": 0.6241247057914734, "learning_rate": 3.017827164441602e-05, "loss": 1.6345, "step": 8738 }, { "epoch": 2.0957976140519152, "grad_norm": 0.6138958930969238, "learning_rate": 3.016228315612759e-05, "loss": 1.5961, "step": 8740 }, { "epoch": 2.0962772016066182, "grad_norm": 0.6095771193504333, "learning_rate": 3.014629466783916e-05, "loss": 1.6089, "step": 8742 }, { "epoch": 2.0967567891613212, "grad_norm": 0.767591655254364, "learning_rate": 3.0130306179550725e-05, "loss": 1.6349, "step": 8744 }, { "epoch": 2.0972363767160243, "grad_norm": 0.7064860463142395, "learning_rate": 3.0114317691262294e-05, "loss": 1.6111, "step": 8746 }, { "epoch": 2.0977159642707273, "grad_norm": 0.7334032654762268, "learning_rate": 3.0098329202973864e-05, "loss": 1.6128, "step": 8748 }, { "epoch": 2.0981955518254303, "grad_norm": 0.6694831252098083, "learning_rate": 3.0082340714685426e-05, "loss": 1.6499, "step": 8750 }, { "epoch": 2.0986751393801333, "grad_norm": 0.6933753490447998, "learning_rate": 3.006635222639699e-05, "loss": 1.6832, "step": 8752 }, { "epoch": 2.099154726934836, "grad_norm": 0.6322463750839233, "learning_rate": 3.005036373810856e-05, "loss": 1.6161, "step": 8754 }, { "epoch": 2.099634314489539, "grad_norm": 0.7164376974105835, "learning_rate": 3.003437524982013e-05, "loss": 1.598, "step": 8756 }, { "epoch": 2.100113902044242, "grad_norm": 0.7027553915977478, "learning_rate": 3.0018386761531695e-05, "loss": 1.663, "step": 8758 }, { "epoch": 2.100593489598945, "grad_norm": 0.7490584254264832, "learning_rate": 3.0002398273243265e-05, "loss": 1.6603, "step": 8760 }, { "epoch": 2.101073077153648, "grad_norm": 0.6807793378829956, "learning_rate": 2.9986409784954834e-05, "loss": 1.6168, "step": 8762 }, { "epoch": 2.101552664708351, "grad_norm": 0.880546510219574, "learning_rate": 2.9970421296666403e-05, "loss": 1.6328, "step": 8764 }, { "epoch": 2.102032252263054, "grad_norm": 0.6586583256721497, "learning_rate": 2.995443280837797e-05, "loss": 1.6084, "step": 8766 }, { "epoch": 2.102511839817757, "grad_norm": 0.6477830410003662, "learning_rate": 2.9938444320089538e-05, "loss": 1.6103, "step": 8768 }, { "epoch": 2.1029914273724595, "grad_norm": 0.7026844024658203, "learning_rate": 2.9922455831801107e-05, "loss": 1.6319, "step": 8770 }, { "epoch": 2.1034710149271625, "grad_norm": 0.6944605708122253, "learning_rate": 2.9906467343512672e-05, "loss": 1.6175, "step": 8772 }, { "epoch": 2.1039506024818655, "grad_norm": 0.6567557454109192, "learning_rate": 2.989047885522424e-05, "loss": 1.6618, "step": 8774 }, { "epoch": 2.1044301900365685, "grad_norm": 0.8537805676460266, "learning_rate": 2.987449036693581e-05, "loss": 1.6229, "step": 8776 }, { "epoch": 2.1049097775912715, "grad_norm": 0.7029289603233337, "learning_rate": 2.9858501878647376e-05, "loss": 1.6993, "step": 8778 }, { "epoch": 2.1053893651459745, "grad_norm": 0.7655268907546997, "learning_rate": 2.9842513390358945e-05, "loss": 1.6264, "step": 8780 }, { "epoch": 2.1058689527006775, "grad_norm": 0.7049762010574341, "learning_rate": 2.9826524902070514e-05, "loss": 1.7013, "step": 8782 }, { "epoch": 2.1063485402553805, "grad_norm": 0.6669335961341858, "learning_rate": 2.981053641378208e-05, "loss": 1.6152, "step": 8784 }, { "epoch": 2.1068281278100836, "grad_norm": 0.741811215877533, "learning_rate": 2.9794547925493642e-05, "loss": 1.6421, "step": 8786 }, { "epoch": 2.107307715364786, "grad_norm": 0.6564712524414062, "learning_rate": 2.977855943720521e-05, "loss": 1.6647, "step": 8788 }, { "epoch": 2.107787302919489, "grad_norm": 0.6709362864494324, "learning_rate": 2.976257094891678e-05, "loss": 1.6699, "step": 8790 }, { "epoch": 2.108266890474192, "grad_norm": 0.7547887563705444, "learning_rate": 2.9746582460628346e-05, "loss": 1.6166, "step": 8792 }, { "epoch": 2.108746478028895, "grad_norm": 0.7485690712928772, "learning_rate": 2.9730593972339915e-05, "loss": 1.622, "step": 8794 }, { "epoch": 2.109226065583598, "grad_norm": 0.6937956809997559, "learning_rate": 2.9714605484051484e-05, "loss": 1.6015, "step": 8796 }, { "epoch": 2.109705653138301, "grad_norm": 0.6297216415405273, "learning_rate": 2.969861699576305e-05, "loss": 1.6512, "step": 8798 }, { "epoch": 2.110185240693004, "grad_norm": 0.6668024063110352, "learning_rate": 2.968262850747462e-05, "loss": 1.7108, "step": 8800 }, { "epoch": 2.110185240693004, "eval_loss": 1.7212488651275635, "eval_runtime": 331.2122, "eval_samples_per_second": 402.911, "eval_steps_per_second": 12.593, "step": 8800 }, { "epoch": 2.110664828247707, "grad_norm": 0.6306362748146057, "learning_rate": 2.9666640019186188e-05, "loss": 1.6423, "step": 8802 }, { "epoch": 2.1111444158024097, "grad_norm": 0.6544023156166077, "learning_rate": 2.9650651530897754e-05, "loss": 1.6547, "step": 8804 }, { "epoch": 2.1116240033571128, "grad_norm": 0.7508514523506165, "learning_rate": 2.9634663042609323e-05, "loss": 1.6655, "step": 8806 }, { "epoch": 2.1121035909118158, "grad_norm": 0.7126561999320984, "learning_rate": 2.9618674554320892e-05, "loss": 1.6173, "step": 8808 }, { "epoch": 2.1125831784665188, "grad_norm": 0.7066534161567688, "learning_rate": 2.9602686066032458e-05, "loss": 1.6518, "step": 8810 }, { "epoch": 2.113062766021222, "grad_norm": 0.6905492544174194, "learning_rate": 2.9586697577744027e-05, "loss": 1.6287, "step": 8812 }, { "epoch": 2.113542353575925, "grad_norm": 0.6950437426567078, "learning_rate": 2.9570709089455596e-05, "loss": 1.6331, "step": 8814 }, { "epoch": 2.114021941130628, "grad_norm": 0.6898583769798279, "learning_rate": 2.9554720601167162e-05, "loss": 1.6303, "step": 8816 }, { "epoch": 2.114501528685331, "grad_norm": 0.7534643411636353, "learning_rate": 2.953873211287873e-05, "loss": 1.6694, "step": 8818 }, { "epoch": 2.1149811162400334, "grad_norm": 0.6369642019271851, "learning_rate": 2.95227436245903e-05, "loss": 1.6119, "step": 8820 }, { "epoch": 2.1154607037947364, "grad_norm": 0.6520496606826782, "learning_rate": 2.9506755136301866e-05, "loss": 1.5863, "step": 8822 }, { "epoch": 2.1159402913494394, "grad_norm": 0.7066394090652466, "learning_rate": 2.9490766648013428e-05, "loss": 1.6548, "step": 8824 }, { "epoch": 2.1164198789041424, "grad_norm": 0.6576610803604126, "learning_rate": 2.9474778159724997e-05, "loss": 1.6218, "step": 8826 }, { "epoch": 2.1168994664588454, "grad_norm": 0.6859420537948608, "learning_rate": 2.9458789671436566e-05, "loss": 1.6258, "step": 8828 }, { "epoch": 2.1173790540135484, "grad_norm": 0.7458869218826294, "learning_rate": 2.9442801183148132e-05, "loss": 1.6477, "step": 8830 }, { "epoch": 2.1178586415682514, "grad_norm": 0.648341953754425, "learning_rate": 2.94268126948597e-05, "loss": 1.6535, "step": 8832 }, { "epoch": 2.1183382291229544, "grad_norm": 0.6887162923812866, "learning_rate": 2.941082420657127e-05, "loss": 1.6487, "step": 8834 }, { "epoch": 2.118817816677657, "grad_norm": 0.8393876552581787, "learning_rate": 2.9394835718282836e-05, "loss": 1.663, "step": 8836 }, { "epoch": 2.11929740423236, "grad_norm": 0.7926290035247803, "learning_rate": 2.9378847229994405e-05, "loss": 1.5981, "step": 8838 }, { "epoch": 2.119776991787063, "grad_norm": 0.6534532308578491, "learning_rate": 2.9362858741705974e-05, "loss": 1.6361, "step": 8840 }, { "epoch": 2.120256579341766, "grad_norm": 0.6824303865432739, "learning_rate": 2.934687025341754e-05, "loss": 1.5663, "step": 8842 }, { "epoch": 2.120736166896469, "grad_norm": 0.6527374982833862, "learning_rate": 2.933088176512911e-05, "loss": 1.6185, "step": 8844 }, { "epoch": 2.121215754451172, "grad_norm": 0.7059105038642883, "learning_rate": 2.9314893276840678e-05, "loss": 1.6131, "step": 8846 }, { "epoch": 2.121695342005875, "grad_norm": 0.6857210993766785, "learning_rate": 2.9298904788552243e-05, "loss": 1.5983, "step": 8848 }, { "epoch": 2.122174929560578, "grad_norm": 0.7569195032119751, "learning_rate": 2.9282916300263813e-05, "loss": 1.6612, "step": 8850 }, { "epoch": 2.122654517115281, "grad_norm": 0.8181119561195374, "learning_rate": 2.926692781197538e-05, "loss": 1.6578, "step": 8852 }, { "epoch": 2.1231341046699836, "grad_norm": 0.6958023309707642, "learning_rate": 2.9250939323686947e-05, "loss": 1.667, "step": 8854 }, { "epoch": 2.1236136922246867, "grad_norm": 0.7089328765869141, "learning_rate": 2.9234950835398516e-05, "loss": 1.6232, "step": 8856 }, { "epoch": 2.1240932797793897, "grad_norm": 0.7830196022987366, "learning_rate": 2.9218962347110085e-05, "loss": 1.643, "step": 8858 }, { "epoch": 2.1245728673340927, "grad_norm": 0.8320754766464233, "learning_rate": 2.9202973858821648e-05, "loss": 1.6179, "step": 8860 }, { "epoch": 2.1250524548887957, "grad_norm": 0.7113444209098816, "learning_rate": 2.9186985370533217e-05, "loss": 1.6249, "step": 8862 }, { "epoch": 2.1255320424434987, "grad_norm": 0.7754170894622803, "learning_rate": 2.9170996882244783e-05, "loss": 1.6336, "step": 8864 }, { "epoch": 2.1260116299982017, "grad_norm": 0.7529258728027344, "learning_rate": 2.915500839395635e-05, "loss": 1.6245, "step": 8866 }, { "epoch": 2.1264912175529043, "grad_norm": 0.6919487118721008, "learning_rate": 2.913901990566792e-05, "loss": 1.6597, "step": 8868 }, { "epoch": 2.1269708051076073, "grad_norm": 0.8736829161643982, "learning_rate": 2.9123031417379486e-05, "loss": 1.6183, "step": 8870 }, { "epoch": 2.1274503926623103, "grad_norm": 0.7144657373428345, "learning_rate": 2.9107042929091056e-05, "loss": 1.6059, "step": 8872 }, { "epoch": 2.1279299802170133, "grad_norm": 0.6471815705299377, "learning_rate": 2.9091054440802625e-05, "loss": 1.6117, "step": 8874 }, { "epoch": 2.1284095677717163, "grad_norm": 0.6766116619110107, "learning_rate": 2.907506595251419e-05, "loss": 1.634, "step": 8876 }, { "epoch": 2.1288891553264193, "grad_norm": 0.6373894810676575, "learning_rate": 2.905907746422576e-05, "loss": 1.6767, "step": 8878 }, { "epoch": 2.1293687428811223, "grad_norm": 0.7054986357688904, "learning_rate": 2.904308897593733e-05, "loss": 1.6443, "step": 8880 }, { "epoch": 2.1298483304358253, "grad_norm": 1.0024844408035278, "learning_rate": 2.9027100487648894e-05, "loss": 1.6574, "step": 8882 }, { "epoch": 2.1303279179905283, "grad_norm": 0.6584547758102417, "learning_rate": 2.9011111999360463e-05, "loss": 1.6694, "step": 8884 }, { "epoch": 2.130807505545231, "grad_norm": 0.8405470848083496, "learning_rate": 2.8995123511072032e-05, "loss": 1.6475, "step": 8886 }, { "epoch": 2.131287093099934, "grad_norm": 0.7340532541275024, "learning_rate": 2.8979135022783598e-05, "loss": 1.6373, "step": 8888 }, { "epoch": 2.131766680654637, "grad_norm": 0.6881219148635864, "learning_rate": 2.8963146534495167e-05, "loss": 1.5809, "step": 8890 }, { "epoch": 2.13224626820934, "grad_norm": 0.6866159439086914, "learning_rate": 2.8947158046206736e-05, "loss": 1.5968, "step": 8892 }, { "epoch": 2.132725855764043, "grad_norm": 0.6321470141410828, "learning_rate": 2.8931169557918302e-05, "loss": 1.6417, "step": 8894 }, { "epoch": 2.133205443318746, "grad_norm": 0.7353967428207397, "learning_rate": 2.8915181069629864e-05, "loss": 1.5896, "step": 8896 }, { "epoch": 2.133685030873449, "grad_norm": 0.702441930770874, "learning_rate": 2.8899192581341433e-05, "loss": 1.6192, "step": 8898 }, { "epoch": 2.134164618428152, "grad_norm": 0.7744559645652771, "learning_rate": 2.8883204093053002e-05, "loss": 1.6239, "step": 8900 }, { "epoch": 2.1346442059828545, "grad_norm": 0.6607762575149536, "learning_rate": 2.8867215604764568e-05, "loss": 1.6131, "step": 8902 }, { "epoch": 2.1351237935375575, "grad_norm": 0.6817500591278076, "learning_rate": 2.8851227116476137e-05, "loss": 1.6583, "step": 8904 }, { "epoch": 2.1356033810922606, "grad_norm": 0.7846953868865967, "learning_rate": 2.8835238628187706e-05, "loss": 1.6129, "step": 8906 }, { "epoch": 2.1360829686469636, "grad_norm": 0.7758301496505737, "learning_rate": 2.8819250139899272e-05, "loss": 1.5806, "step": 8908 }, { "epoch": 2.1365625562016666, "grad_norm": 0.6571009755134583, "learning_rate": 2.880326165161084e-05, "loss": 1.6064, "step": 8910 }, { "epoch": 2.1370421437563696, "grad_norm": 0.7634652256965637, "learning_rate": 2.878727316332241e-05, "loss": 1.6245, "step": 8912 }, { "epoch": 2.1375217313110726, "grad_norm": 0.6424011588096619, "learning_rate": 2.8771284675033976e-05, "loss": 1.6443, "step": 8914 }, { "epoch": 2.1380013188657756, "grad_norm": 0.714451789855957, "learning_rate": 2.8755296186745545e-05, "loss": 1.6315, "step": 8916 }, { "epoch": 2.1384809064204786, "grad_norm": 0.712914228439331, "learning_rate": 2.8739307698457114e-05, "loss": 1.634, "step": 8918 }, { "epoch": 2.138960493975181, "grad_norm": 0.6706340909004211, "learning_rate": 2.872331921016868e-05, "loss": 1.6296, "step": 8920 }, { "epoch": 2.139440081529884, "grad_norm": 0.6786482930183411, "learning_rate": 2.870733072188025e-05, "loss": 1.6334, "step": 8922 }, { "epoch": 2.139919669084587, "grad_norm": 0.7078786492347717, "learning_rate": 2.8691342233591818e-05, "loss": 1.6177, "step": 8924 }, { "epoch": 2.14039925663929, "grad_norm": 0.720043420791626, "learning_rate": 2.8675353745303384e-05, "loss": 1.6431, "step": 8926 }, { "epoch": 2.140878844193993, "grad_norm": 0.748433530330658, "learning_rate": 2.8659365257014953e-05, "loss": 1.6642, "step": 8928 }, { "epoch": 2.1413584317486962, "grad_norm": 0.7626001834869385, "learning_rate": 2.8643376768726522e-05, "loss": 1.6274, "step": 8930 }, { "epoch": 2.1418380193033992, "grad_norm": 0.8298491835594177, "learning_rate": 2.8627388280438084e-05, "loss": 1.6786, "step": 8932 }, { "epoch": 2.142317606858102, "grad_norm": 0.8023671507835388, "learning_rate": 2.861139979214965e-05, "loss": 1.63, "step": 8934 }, { "epoch": 2.142797194412805, "grad_norm": 0.7077812552452087, "learning_rate": 2.859541130386122e-05, "loss": 1.5792, "step": 8936 }, { "epoch": 2.143276781967508, "grad_norm": 0.7233709692955017, "learning_rate": 2.8579422815572788e-05, "loss": 1.6662, "step": 8938 }, { "epoch": 2.143756369522211, "grad_norm": 0.7756668329238892, "learning_rate": 2.8563434327284354e-05, "loss": 1.6149, "step": 8940 }, { "epoch": 2.144235957076914, "grad_norm": 0.7155622243881226, "learning_rate": 2.8547445838995923e-05, "loss": 1.6321, "step": 8942 }, { "epoch": 2.144715544631617, "grad_norm": 0.7327544093132019, "learning_rate": 2.8531457350707492e-05, "loss": 1.6368, "step": 8944 }, { "epoch": 2.14519513218632, "grad_norm": 0.6435972452163696, "learning_rate": 2.8515468862419058e-05, "loss": 1.6152, "step": 8946 }, { "epoch": 2.145674719741023, "grad_norm": 0.7117206454277039, "learning_rate": 2.8499480374130627e-05, "loss": 1.6536, "step": 8948 }, { "epoch": 2.146154307295726, "grad_norm": 0.7318780422210693, "learning_rate": 2.8483491885842196e-05, "loss": 1.6734, "step": 8950 }, { "epoch": 2.1466338948504284, "grad_norm": 0.6644673347473145, "learning_rate": 2.846750339755376e-05, "loss": 1.6088, "step": 8952 }, { "epoch": 2.1471134824051314, "grad_norm": 0.7147232294082642, "learning_rate": 2.845151490926533e-05, "loss": 1.6035, "step": 8954 }, { "epoch": 2.1475930699598345, "grad_norm": 0.6679558157920837, "learning_rate": 2.84355264209769e-05, "loss": 1.6145, "step": 8956 }, { "epoch": 2.1480726575145375, "grad_norm": 0.6594291925430298, "learning_rate": 2.8419537932688465e-05, "loss": 1.6428, "step": 8958 }, { "epoch": 2.1485522450692405, "grad_norm": 0.6370211839675903, "learning_rate": 2.8403549444400034e-05, "loss": 1.6472, "step": 8960 }, { "epoch": 2.1490318326239435, "grad_norm": 0.675334632396698, "learning_rate": 2.8387560956111604e-05, "loss": 1.6181, "step": 8962 }, { "epoch": 2.1495114201786465, "grad_norm": 0.6235164999961853, "learning_rate": 2.8371572467823173e-05, "loss": 1.65, "step": 8964 }, { "epoch": 2.1499910077333495, "grad_norm": 0.6821826696395874, "learning_rate": 2.8355583979534738e-05, "loss": 1.6102, "step": 8966 }, { "epoch": 2.150470595288052, "grad_norm": 0.6683946251869202, "learning_rate": 2.83395954912463e-05, "loss": 1.6131, "step": 8968 }, { "epoch": 2.150950182842755, "grad_norm": 0.7403035759925842, "learning_rate": 2.832360700295787e-05, "loss": 1.6304, "step": 8970 }, { "epoch": 2.151429770397458, "grad_norm": 0.6642279028892517, "learning_rate": 2.830761851466944e-05, "loss": 1.6315, "step": 8972 }, { "epoch": 2.151909357952161, "grad_norm": 0.6527605056762695, "learning_rate": 2.8291630026381004e-05, "loss": 1.6437, "step": 8974 }, { "epoch": 2.152388945506864, "grad_norm": 0.7109906673431396, "learning_rate": 2.8275641538092574e-05, "loss": 1.6757, "step": 8976 }, { "epoch": 2.152868533061567, "grad_norm": 0.7334847450256348, "learning_rate": 2.8259653049804143e-05, "loss": 1.6035, "step": 8978 }, { "epoch": 2.15334812061627, "grad_norm": 0.697538435459137, "learning_rate": 2.824366456151571e-05, "loss": 1.6052, "step": 8980 }, { "epoch": 2.153827708170973, "grad_norm": 0.6585859656333923, "learning_rate": 2.8227676073227277e-05, "loss": 1.6408, "step": 8982 }, { "epoch": 2.154307295725676, "grad_norm": 0.6527297496795654, "learning_rate": 2.8211687584938847e-05, "loss": 1.6889, "step": 8984 }, { "epoch": 2.1547868832803787, "grad_norm": 0.715987503528595, "learning_rate": 2.8195699096650412e-05, "loss": 1.612, "step": 8986 }, { "epoch": 2.1552664708350817, "grad_norm": 0.6542458534240723, "learning_rate": 2.817971060836198e-05, "loss": 1.5999, "step": 8988 }, { "epoch": 2.1557460583897847, "grad_norm": 0.6293676495552063, "learning_rate": 2.816372212007355e-05, "loss": 1.6582, "step": 8990 }, { "epoch": 2.1562256459444877, "grad_norm": 0.7031814455986023, "learning_rate": 2.8147733631785116e-05, "loss": 1.5757, "step": 8992 }, { "epoch": 2.1567052334991907, "grad_norm": 0.6364571452140808, "learning_rate": 2.8131745143496685e-05, "loss": 1.6189, "step": 8994 }, { "epoch": 2.1571848210538938, "grad_norm": 0.6757885217666626, "learning_rate": 2.8115756655208254e-05, "loss": 1.6543, "step": 8996 }, { "epoch": 2.1576644086085968, "grad_norm": 0.6828928589820862, "learning_rate": 2.809976816691982e-05, "loss": 1.5956, "step": 8998 }, { "epoch": 2.1581439961632993, "grad_norm": 0.6485259532928467, "learning_rate": 2.808377967863139e-05, "loss": 1.6396, "step": 9000 }, { "epoch": 2.1586235837180023, "grad_norm": 0.7166759371757507, "learning_rate": 2.8067791190342958e-05, "loss": 1.6974, "step": 9002 }, { "epoch": 2.1591031712727053, "grad_norm": 0.7027713060379028, "learning_rate": 2.805180270205452e-05, "loss": 1.6602, "step": 9004 }, { "epoch": 2.1595827588274084, "grad_norm": 0.6531209945678711, "learning_rate": 2.8035814213766086e-05, "loss": 1.6461, "step": 9006 }, { "epoch": 2.1600623463821114, "grad_norm": 0.6804800033569336, "learning_rate": 2.8019825725477655e-05, "loss": 1.613, "step": 9008 }, { "epoch": 2.1605419339368144, "grad_norm": 0.6789934039115906, "learning_rate": 2.8003837237189224e-05, "loss": 1.605, "step": 9010 }, { "epoch": 2.1610215214915174, "grad_norm": 0.7969672679901123, "learning_rate": 2.798784874890079e-05, "loss": 1.6219, "step": 9012 }, { "epoch": 2.1615011090462204, "grad_norm": 0.6392354965209961, "learning_rate": 2.797186026061236e-05, "loss": 1.6181, "step": 9014 }, { "epoch": 2.1619806966009234, "grad_norm": 0.665256142616272, "learning_rate": 2.7955871772323928e-05, "loss": 1.6652, "step": 9016 }, { "epoch": 2.162460284155626, "grad_norm": 0.7600290775299072, "learning_rate": 2.7939883284035494e-05, "loss": 1.6284, "step": 9018 }, { "epoch": 2.162939871710329, "grad_norm": 0.6941183805465698, "learning_rate": 2.7923894795747063e-05, "loss": 1.6448, "step": 9020 }, { "epoch": 2.163419459265032, "grad_norm": 0.6377661228179932, "learning_rate": 2.7907906307458632e-05, "loss": 1.6273, "step": 9022 }, { "epoch": 2.163899046819735, "grad_norm": 0.6449486017227173, "learning_rate": 2.7891917819170198e-05, "loss": 1.6157, "step": 9024 }, { "epoch": 2.164378634374438, "grad_norm": 0.6982719898223877, "learning_rate": 2.7875929330881767e-05, "loss": 1.6774, "step": 9026 }, { "epoch": 2.164858221929141, "grad_norm": 0.6991705894470215, "learning_rate": 2.7859940842593336e-05, "loss": 1.5971, "step": 9028 }, { "epoch": 2.165337809483844, "grad_norm": 0.6566417217254639, "learning_rate": 2.78439523543049e-05, "loss": 1.7145, "step": 9030 }, { "epoch": 2.165817397038547, "grad_norm": 0.6836729645729065, "learning_rate": 2.782796386601647e-05, "loss": 1.6585, "step": 9032 }, { "epoch": 2.1662969845932496, "grad_norm": 0.7036912441253662, "learning_rate": 2.781197537772804e-05, "loss": 1.6229, "step": 9034 }, { "epoch": 2.1667765721479526, "grad_norm": 0.6601892709732056, "learning_rate": 2.7795986889439606e-05, "loss": 1.6422, "step": 9036 }, { "epoch": 2.1672561597026556, "grad_norm": 0.6660856604576111, "learning_rate": 2.7779998401151175e-05, "loss": 1.6286, "step": 9038 }, { "epoch": 2.1677357472573586, "grad_norm": 0.6990690231323242, "learning_rate": 2.7764009912862744e-05, "loss": 1.6408, "step": 9040 }, { "epoch": 2.1682153348120616, "grad_norm": 0.6932114958763123, "learning_rate": 2.7748021424574306e-05, "loss": 1.6027, "step": 9042 }, { "epoch": 2.1686949223667646, "grad_norm": 0.6517059206962585, "learning_rate": 2.7732032936285872e-05, "loss": 1.6624, "step": 9044 }, { "epoch": 2.1691745099214677, "grad_norm": 0.7394084334373474, "learning_rate": 2.771604444799744e-05, "loss": 1.6696, "step": 9046 }, { "epoch": 2.1696540974761707, "grad_norm": 0.6802868843078613, "learning_rate": 2.770005595970901e-05, "loss": 1.6435, "step": 9048 }, { "epoch": 2.1701336850308737, "grad_norm": 0.7229099273681641, "learning_rate": 2.7684067471420576e-05, "loss": 1.6189, "step": 9050 }, { "epoch": 2.1706132725855762, "grad_norm": 0.6491344571113586, "learning_rate": 2.7668078983132145e-05, "loss": 1.5748, "step": 9052 }, { "epoch": 2.1710928601402792, "grad_norm": 0.691628098487854, "learning_rate": 2.7652090494843714e-05, "loss": 1.6688, "step": 9054 }, { "epoch": 2.1715724476949823, "grad_norm": 0.7196014523506165, "learning_rate": 2.763610200655528e-05, "loss": 1.6219, "step": 9056 }, { "epoch": 2.1720520352496853, "grad_norm": 0.8515409231185913, "learning_rate": 2.762011351826685e-05, "loss": 1.6434, "step": 9058 }, { "epoch": 2.1725316228043883, "grad_norm": 0.6792849898338318, "learning_rate": 2.7604125029978418e-05, "loss": 1.6199, "step": 9060 }, { "epoch": 2.1730112103590913, "grad_norm": 0.7396702170372009, "learning_rate": 2.7588136541689987e-05, "loss": 1.6253, "step": 9062 }, { "epoch": 2.1734907979137943, "grad_norm": 0.7363607883453369, "learning_rate": 2.7572148053401552e-05, "loss": 1.5907, "step": 9064 }, { "epoch": 2.173970385468497, "grad_norm": 0.6437717080116272, "learning_rate": 2.755615956511312e-05, "loss": 1.594, "step": 9066 }, { "epoch": 2.1744499730232, "grad_norm": 0.7199034690856934, "learning_rate": 2.754017107682469e-05, "loss": 1.6886, "step": 9068 }, { "epoch": 2.174929560577903, "grad_norm": 0.7835302352905273, "learning_rate": 2.7524182588536256e-05, "loss": 1.6786, "step": 9070 }, { "epoch": 2.175409148132606, "grad_norm": 0.9354981184005737, "learning_rate": 2.7508194100247825e-05, "loss": 1.6949, "step": 9072 }, { "epoch": 2.175888735687309, "grad_norm": 0.6790528297424316, "learning_rate": 2.7492205611959395e-05, "loss": 1.602, "step": 9074 }, { "epoch": 2.176368323242012, "grad_norm": 0.5821729898452759, "learning_rate": 2.747621712367096e-05, "loss": 1.5313, "step": 9076 }, { "epoch": 2.176847910796715, "grad_norm": 0.683730959892273, "learning_rate": 2.7460228635382523e-05, "loss": 1.6134, "step": 9078 }, { "epoch": 2.177327498351418, "grad_norm": 0.6463903784751892, "learning_rate": 2.744424014709409e-05, "loss": 1.6695, "step": 9080 }, { "epoch": 2.177807085906121, "grad_norm": 0.629951536655426, "learning_rate": 2.742825165880566e-05, "loss": 1.619, "step": 9082 }, { "epoch": 2.1782866734608235, "grad_norm": 0.64698725938797, "learning_rate": 2.7412263170517226e-05, "loss": 1.6398, "step": 9084 }, { "epoch": 2.1787662610155265, "grad_norm": 0.7165349125862122, "learning_rate": 2.7396274682228795e-05, "loss": 1.601, "step": 9086 }, { "epoch": 2.1792458485702295, "grad_norm": 0.6371572017669678, "learning_rate": 2.7380286193940365e-05, "loss": 1.5972, "step": 9088 }, { "epoch": 2.1797254361249325, "grad_norm": 0.6671940088272095, "learning_rate": 2.736429770565193e-05, "loss": 1.6459, "step": 9090 }, { "epoch": 2.1802050236796355, "grad_norm": 0.6451492309570312, "learning_rate": 2.73483092173635e-05, "loss": 1.6065, "step": 9092 }, { "epoch": 2.1806846112343385, "grad_norm": 0.7914867997169495, "learning_rate": 2.733232072907507e-05, "loss": 1.6605, "step": 9094 }, { "epoch": 2.1811641987890416, "grad_norm": 0.743280827999115, "learning_rate": 2.7316332240786634e-05, "loss": 1.5812, "step": 9096 }, { "epoch": 2.1816437863437446, "grad_norm": 0.689128577709198, "learning_rate": 2.7300343752498203e-05, "loss": 1.6527, "step": 9098 }, { "epoch": 2.182123373898447, "grad_norm": 0.7470855116844177, "learning_rate": 2.7284355264209772e-05, "loss": 1.6309, "step": 9100 }, { "epoch": 2.18260296145315, "grad_norm": 0.7245185375213623, "learning_rate": 2.7268366775921338e-05, "loss": 1.6891, "step": 9102 }, { "epoch": 2.183082549007853, "grad_norm": 0.6563366651535034, "learning_rate": 2.7252378287632907e-05, "loss": 1.6295, "step": 9104 }, { "epoch": 2.183562136562556, "grad_norm": 0.7487242221832275, "learning_rate": 2.7236389799344476e-05, "loss": 1.678, "step": 9106 }, { "epoch": 2.184041724117259, "grad_norm": 0.775061309337616, "learning_rate": 2.7220401311056042e-05, "loss": 1.6459, "step": 9108 }, { "epoch": 2.184521311671962, "grad_norm": 0.6986782550811768, "learning_rate": 2.720441282276761e-05, "loss": 1.6462, "step": 9110 }, { "epoch": 2.185000899226665, "grad_norm": 0.6937562227249146, "learning_rate": 2.718842433447918e-05, "loss": 1.6306, "step": 9112 }, { "epoch": 2.185480486781368, "grad_norm": 0.632322371006012, "learning_rate": 2.7172435846190742e-05, "loss": 1.5863, "step": 9114 }, { "epoch": 2.185960074336071, "grad_norm": 0.9080226421356201, "learning_rate": 2.7156447357902308e-05, "loss": 1.6481, "step": 9116 }, { "epoch": 2.1864396618907738, "grad_norm": 0.6730765104293823, "learning_rate": 2.7140458869613877e-05, "loss": 1.6304, "step": 9118 }, { "epoch": 2.1869192494454768, "grad_norm": 0.6057719588279724, "learning_rate": 2.7124470381325446e-05, "loss": 1.6208, "step": 9120 }, { "epoch": 2.18739883700018, "grad_norm": 0.8225935697555542, "learning_rate": 2.7108481893037012e-05, "loss": 1.6124, "step": 9122 }, { "epoch": 2.187878424554883, "grad_norm": 0.7415140271186829, "learning_rate": 2.709249340474858e-05, "loss": 1.5922, "step": 9124 }, { "epoch": 2.188358012109586, "grad_norm": 0.7465587258338928, "learning_rate": 2.707650491646015e-05, "loss": 1.5985, "step": 9126 }, { "epoch": 2.188837599664289, "grad_norm": 0.6725730895996094, "learning_rate": 2.7060516428171716e-05, "loss": 1.5751, "step": 9128 }, { "epoch": 2.189317187218992, "grad_norm": 0.8501695394515991, "learning_rate": 2.7044527939883285e-05, "loss": 1.57, "step": 9130 }, { "epoch": 2.1897967747736944, "grad_norm": 0.6890982389450073, "learning_rate": 2.7028539451594854e-05, "loss": 1.6477, "step": 9132 }, { "epoch": 2.1902763623283974, "grad_norm": 0.7261202931404114, "learning_rate": 2.701255096330642e-05, "loss": 1.6439, "step": 9134 }, { "epoch": 2.1907559498831004, "grad_norm": 0.6996576189994812, "learning_rate": 2.699656247501799e-05, "loss": 1.5825, "step": 9136 }, { "epoch": 2.1912355374378034, "grad_norm": 0.6822003722190857, "learning_rate": 2.6980573986729558e-05, "loss": 1.5999, "step": 9138 }, { "epoch": 2.1917151249925064, "grad_norm": 0.9286832213401794, "learning_rate": 2.6964585498441124e-05, "loss": 1.6498, "step": 9140 }, { "epoch": 2.1921947125472094, "grad_norm": 0.6920325756072998, "learning_rate": 2.6948597010152693e-05, "loss": 1.6338, "step": 9142 }, { "epoch": 2.1926743001019124, "grad_norm": 0.6925249099731445, "learning_rate": 2.6932608521864262e-05, "loss": 1.5825, "step": 9144 }, { "epoch": 2.1931538876566155, "grad_norm": 0.7720644474029541, "learning_rate": 2.6916620033575827e-05, "loss": 1.6404, "step": 9146 }, { "epoch": 2.1936334752113185, "grad_norm": 0.6891867518424988, "learning_rate": 2.6900631545287397e-05, "loss": 1.6227, "step": 9148 }, { "epoch": 2.194113062766021, "grad_norm": 0.6766827702522278, "learning_rate": 2.688464305699896e-05, "loss": 1.6544, "step": 9150 }, { "epoch": 2.194592650320724, "grad_norm": 0.6722220182418823, "learning_rate": 2.6868654568710528e-05, "loss": 1.64, "step": 9152 }, { "epoch": 2.195072237875427, "grad_norm": 0.7264376282691956, "learning_rate": 2.6852666080422094e-05, "loss": 1.654, "step": 9154 }, { "epoch": 2.19555182543013, "grad_norm": 0.7249670028686523, "learning_rate": 2.6836677592133663e-05, "loss": 1.6267, "step": 9156 }, { "epoch": 2.196031412984833, "grad_norm": 0.7708137035369873, "learning_rate": 2.6820689103845232e-05, "loss": 1.5873, "step": 9158 }, { "epoch": 2.196511000539536, "grad_norm": 0.7035956978797913, "learning_rate": 2.68047006155568e-05, "loss": 1.6, "step": 9160 }, { "epoch": 2.196990588094239, "grad_norm": 0.6927496790885925, "learning_rate": 2.6788712127268367e-05, "loss": 1.6261, "step": 9162 }, { "epoch": 2.197470175648942, "grad_norm": 0.7938705086708069, "learning_rate": 2.6772723638979936e-05, "loss": 1.6285, "step": 9164 }, { "epoch": 2.1979497632036447, "grad_norm": 0.7801268100738525, "learning_rate": 2.6756735150691505e-05, "loss": 1.6409, "step": 9166 }, { "epoch": 2.1984293507583477, "grad_norm": 0.8329654335975647, "learning_rate": 2.674074666240307e-05, "loss": 1.6342, "step": 9168 }, { "epoch": 2.1989089383130507, "grad_norm": 0.6771716475486755, "learning_rate": 2.672475817411464e-05, "loss": 1.6181, "step": 9170 }, { "epoch": 2.1993885258677537, "grad_norm": 0.6838635802268982, "learning_rate": 2.670876968582621e-05, "loss": 1.6077, "step": 9172 }, { "epoch": 2.1998681134224567, "grad_norm": 0.7246561050415039, "learning_rate": 2.6692781197537774e-05, "loss": 1.6502, "step": 9174 }, { "epoch": 2.2003477009771597, "grad_norm": 0.6747000217437744, "learning_rate": 2.6676792709249343e-05, "loss": 1.6196, "step": 9176 }, { "epoch": 2.2008272885318627, "grad_norm": 0.6765057444572449, "learning_rate": 2.6660804220960913e-05, "loss": 1.6201, "step": 9178 }, { "epoch": 2.2013068760865657, "grad_norm": 0.7167196273803711, "learning_rate": 2.6644815732672478e-05, "loss": 1.6351, "step": 9180 }, { "epoch": 2.2017864636412687, "grad_norm": 0.7519485354423523, "learning_rate": 2.6628827244384047e-05, "loss": 1.6195, "step": 9182 }, { "epoch": 2.2022660511959713, "grad_norm": 0.7311533689498901, "learning_rate": 2.6612838756095616e-05, "loss": 1.6351, "step": 9184 }, { "epoch": 2.2027456387506743, "grad_norm": 0.7979393005371094, "learning_rate": 2.659685026780718e-05, "loss": 1.6263, "step": 9186 }, { "epoch": 2.2032252263053773, "grad_norm": 0.719285249710083, "learning_rate": 2.6580861779518744e-05, "loss": 1.6933, "step": 9188 }, { "epoch": 2.2037048138600803, "grad_norm": 0.7492539286613464, "learning_rate": 2.6564873291230314e-05, "loss": 1.6547, "step": 9190 }, { "epoch": 2.2041844014147833, "grad_norm": 0.6598458290100098, "learning_rate": 2.6548884802941883e-05, "loss": 1.671, "step": 9192 }, { "epoch": 2.2046639889694863, "grad_norm": 0.7084299921989441, "learning_rate": 2.6532896314653448e-05, "loss": 1.6571, "step": 9194 }, { "epoch": 2.2051435765241894, "grad_norm": 0.7248772382736206, "learning_rate": 2.6516907826365017e-05, "loss": 1.6006, "step": 9196 }, { "epoch": 2.205623164078892, "grad_norm": 0.6799157857894897, "learning_rate": 2.6500919338076586e-05, "loss": 1.6465, "step": 9198 }, { "epoch": 2.206102751633595, "grad_norm": 0.7880686521530151, "learning_rate": 2.6484930849788152e-05, "loss": 1.6735, "step": 9200 }, { "epoch": 2.206102751633595, "eval_loss": 1.7178289890289307, "eval_runtime": 331.5819, "eval_samples_per_second": 402.462, "eval_steps_per_second": 12.579, "step": 9200 }, { "epoch": 2.206582339188298, "grad_norm": 0.7518048286437988, "learning_rate": 2.646894236149972e-05, "loss": 1.6607, "step": 9202 }, { "epoch": 2.207061926743001, "grad_norm": 0.6991863250732422, "learning_rate": 2.645295387321129e-05, "loss": 1.6435, "step": 9204 }, { "epoch": 2.207541514297704, "grad_norm": 0.6786771416664124, "learning_rate": 2.6436965384922856e-05, "loss": 1.7269, "step": 9206 }, { "epoch": 2.208021101852407, "grad_norm": 0.626617431640625, "learning_rate": 2.6420976896634425e-05, "loss": 1.6883, "step": 9208 }, { "epoch": 2.20850068940711, "grad_norm": 0.7266605496406555, "learning_rate": 2.6404988408345994e-05, "loss": 1.6411, "step": 9210 }, { "epoch": 2.208980276961813, "grad_norm": 0.6482115387916565, "learning_rate": 2.638899992005756e-05, "loss": 1.6297, "step": 9212 }, { "epoch": 2.209459864516516, "grad_norm": 0.6855117678642273, "learning_rate": 2.637301143176913e-05, "loss": 1.6522, "step": 9214 }, { "epoch": 2.2099394520712186, "grad_norm": 0.7340042591094971, "learning_rate": 2.6357022943480698e-05, "loss": 1.6753, "step": 9216 }, { "epoch": 2.2104190396259216, "grad_norm": 0.853279709815979, "learning_rate": 2.6341034455192264e-05, "loss": 1.6339, "step": 9218 }, { "epoch": 2.2108986271806246, "grad_norm": 0.6707942485809326, "learning_rate": 2.6325045966903833e-05, "loss": 1.6213, "step": 9220 }, { "epoch": 2.2113782147353276, "grad_norm": 0.8469206094741821, "learning_rate": 2.6309057478615402e-05, "loss": 1.6331, "step": 9222 }, { "epoch": 2.2118578022900306, "grad_norm": 0.6547406911849976, "learning_rate": 2.6293068990326964e-05, "loss": 1.6214, "step": 9224 }, { "epoch": 2.2123373898447336, "grad_norm": 0.7086814641952515, "learning_rate": 2.627708050203853e-05, "loss": 1.6161, "step": 9226 }, { "epoch": 2.2128169773994366, "grad_norm": 0.7388503551483154, "learning_rate": 2.62610920137501e-05, "loss": 1.6588, "step": 9228 }, { "epoch": 2.2132965649541396, "grad_norm": 0.7070127129554749, "learning_rate": 2.6245103525461668e-05, "loss": 1.5206, "step": 9230 }, { "epoch": 2.213776152508842, "grad_norm": 0.6763485670089722, "learning_rate": 2.6229115037173234e-05, "loss": 1.5988, "step": 9232 }, { "epoch": 2.214255740063545, "grad_norm": 0.8416496515274048, "learning_rate": 2.6213126548884803e-05, "loss": 1.6539, "step": 9234 }, { "epoch": 2.214735327618248, "grad_norm": 0.6692326664924622, "learning_rate": 2.6197138060596372e-05, "loss": 1.627, "step": 9236 }, { "epoch": 2.215214915172951, "grad_norm": 0.6739577651023865, "learning_rate": 2.6181149572307938e-05, "loss": 1.6119, "step": 9238 }, { "epoch": 2.2156945027276542, "grad_norm": 0.7641106843948364, "learning_rate": 2.6165161084019507e-05, "loss": 1.6379, "step": 9240 }, { "epoch": 2.2161740902823572, "grad_norm": 0.696032702922821, "learning_rate": 2.6149172595731076e-05, "loss": 1.6082, "step": 9242 }, { "epoch": 2.2166536778370602, "grad_norm": 0.6675939559936523, "learning_rate": 2.613318410744264e-05, "loss": 1.6286, "step": 9244 }, { "epoch": 2.2171332653917633, "grad_norm": 0.7577967047691345, "learning_rate": 2.611719561915421e-05, "loss": 1.6252, "step": 9246 }, { "epoch": 2.2176128529464663, "grad_norm": 0.6601229906082153, "learning_rate": 2.610120713086578e-05, "loss": 1.6583, "step": 9248 }, { "epoch": 2.218092440501169, "grad_norm": 0.722792387008667, "learning_rate": 2.6085218642577345e-05, "loss": 1.6333, "step": 9250 }, { "epoch": 2.218572028055872, "grad_norm": 0.6817644834518433, "learning_rate": 2.6069230154288915e-05, "loss": 1.6172, "step": 9252 }, { "epoch": 2.219051615610575, "grad_norm": 0.756897509098053, "learning_rate": 2.6053241666000484e-05, "loss": 1.6927, "step": 9254 }, { "epoch": 2.219531203165278, "grad_norm": 0.7609176635742188, "learning_rate": 2.603725317771205e-05, "loss": 1.6284, "step": 9256 }, { "epoch": 2.220010790719981, "grad_norm": 0.7349625825881958, "learning_rate": 2.602126468942362e-05, "loss": 1.6148, "step": 9258 }, { "epoch": 2.220490378274684, "grad_norm": 0.6394014954566956, "learning_rate": 2.600527620113518e-05, "loss": 1.6682, "step": 9260 }, { "epoch": 2.220969965829387, "grad_norm": 0.9255886077880859, "learning_rate": 2.598928771284675e-05, "loss": 1.5819, "step": 9262 }, { "epoch": 2.2214495533840894, "grad_norm": 0.6687517166137695, "learning_rate": 2.597329922455832e-05, "loss": 1.6233, "step": 9264 }, { "epoch": 2.2219291409387925, "grad_norm": 0.8049976825714111, "learning_rate": 2.5957310736269885e-05, "loss": 1.6594, "step": 9266 }, { "epoch": 2.2224087284934955, "grad_norm": 0.8025593757629395, "learning_rate": 2.5941322247981454e-05, "loss": 1.6512, "step": 9268 }, { "epoch": 2.2228883160481985, "grad_norm": 0.873997688293457, "learning_rate": 2.5925333759693023e-05, "loss": 1.6064, "step": 9270 }, { "epoch": 2.2233679036029015, "grad_norm": 0.7398845553398132, "learning_rate": 2.590934527140459e-05, "loss": 1.6407, "step": 9272 }, { "epoch": 2.2238474911576045, "grad_norm": 0.7185213565826416, "learning_rate": 2.5893356783116158e-05, "loss": 1.627, "step": 9274 }, { "epoch": 2.2243270787123075, "grad_norm": 0.6642566919326782, "learning_rate": 2.5877368294827727e-05, "loss": 1.5773, "step": 9276 }, { "epoch": 2.2248066662670105, "grad_norm": 0.6387593746185303, "learning_rate": 2.5861379806539292e-05, "loss": 1.5832, "step": 9278 }, { "epoch": 2.2252862538217135, "grad_norm": 0.7631940245628357, "learning_rate": 2.584539131825086e-05, "loss": 1.5953, "step": 9280 }, { "epoch": 2.225765841376416, "grad_norm": 0.7972085475921631, "learning_rate": 2.582940282996243e-05, "loss": 1.5513, "step": 9282 }, { "epoch": 2.226245428931119, "grad_norm": 0.7077127695083618, "learning_rate": 2.5813414341673996e-05, "loss": 1.663, "step": 9284 }, { "epoch": 2.226725016485822, "grad_norm": 0.7418619990348816, "learning_rate": 2.5797425853385565e-05, "loss": 1.6005, "step": 9286 }, { "epoch": 2.227204604040525, "grad_norm": 0.7545381784439087, "learning_rate": 2.5781437365097134e-05, "loss": 1.643, "step": 9288 }, { "epoch": 2.227684191595228, "grad_norm": 0.7143659591674805, "learning_rate": 2.57654488768087e-05, "loss": 1.661, "step": 9290 }, { "epoch": 2.228163779149931, "grad_norm": 0.619979202747345, "learning_rate": 2.574946038852027e-05, "loss": 1.6378, "step": 9292 }, { "epoch": 2.228643366704634, "grad_norm": 0.668156087398529, "learning_rate": 2.573347190023184e-05, "loss": 1.6672, "step": 9294 }, { "epoch": 2.229122954259337, "grad_norm": 0.6936262249946594, "learning_rate": 2.57174834119434e-05, "loss": 1.6109, "step": 9296 }, { "epoch": 2.2296025418140397, "grad_norm": 0.6305476427078247, "learning_rate": 2.5701494923654966e-05, "loss": 1.6063, "step": 9298 }, { "epoch": 2.2300821293687427, "grad_norm": 0.6941704154014587, "learning_rate": 2.5685506435366535e-05, "loss": 1.6586, "step": 9300 }, { "epoch": 2.2305617169234457, "grad_norm": 0.7569704055786133, "learning_rate": 2.5669517947078105e-05, "loss": 1.5946, "step": 9302 }, { "epoch": 2.2310413044781487, "grad_norm": 0.6701012253761292, "learning_rate": 2.565352945878967e-05, "loss": 1.6214, "step": 9304 }, { "epoch": 2.2315208920328518, "grad_norm": 0.6272004246711731, "learning_rate": 2.563754097050124e-05, "loss": 1.6043, "step": 9306 }, { "epoch": 2.2320004795875548, "grad_norm": 0.6632606983184814, "learning_rate": 2.562155248221281e-05, "loss": 1.6261, "step": 9308 }, { "epoch": 2.2324800671422578, "grad_norm": 0.778589129447937, "learning_rate": 2.5605563993924374e-05, "loss": 1.6424, "step": 9310 }, { "epoch": 2.232959654696961, "grad_norm": 0.6905750036239624, "learning_rate": 2.5589575505635943e-05, "loss": 1.6282, "step": 9312 }, { "epoch": 2.233439242251664, "grad_norm": 0.6717818975448608, "learning_rate": 2.5573587017347512e-05, "loss": 1.609, "step": 9314 }, { "epoch": 2.2339188298063664, "grad_norm": 0.6934249401092529, "learning_rate": 2.5557598529059078e-05, "loss": 1.6661, "step": 9316 }, { "epoch": 2.2343984173610694, "grad_norm": 0.7122979164123535, "learning_rate": 2.5541610040770647e-05, "loss": 1.6219, "step": 9318 }, { "epoch": 2.2348780049157724, "grad_norm": 0.764451265335083, "learning_rate": 2.5525621552482216e-05, "loss": 1.5767, "step": 9320 }, { "epoch": 2.2353575924704754, "grad_norm": 0.8461470007896423, "learning_rate": 2.5509633064193782e-05, "loss": 1.629, "step": 9322 }, { "epoch": 2.2358371800251784, "grad_norm": 0.7207307815551758, "learning_rate": 2.549364457590535e-05, "loss": 1.6287, "step": 9324 }, { "epoch": 2.2363167675798814, "grad_norm": 0.6622203588485718, "learning_rate": 2.547765608761692e-05, "loss": 1.6187, "step": 9326 }, { "epoch": 2.2367963551345844, "grad_norm": 0.7211847305297852, "learning_rate": 2.5461667599328486e-05, "loss": 1.6426, "step": 9328 }, { "epoch": 2.237275942689287, "grad_norm": 0.647935152053833, "learning_rate": 2.5445679111040055e-05, "loss": 1.5754, "step": 9330 }, { "epoch": 2.23775553024399, "grad_norm": 0.6781213879585266, "learning_rate": 2.5429690622751617e-05, "loss": 1.6189, "step": 9332 }, { "epoch": 2.238235117798693, "grad_norm": 0.6535171270370483, "learning_rate": 2.5413702134463186e-05, "loss": 1.6616, "step": 9334 }, { "epoch": 2.238714705353396, "grad_norm": 0.6865279674530029, "learning_rate": 2.5397713646174752e-05, "loss": 1.6617, "step": 9336 }, { "epoch": 2.239194292908099, "grad_norm": 0.6477672457695007, "learning_rate": 2.538172515788632e-05, "loss": 1.6605, "step": 9338 }, { "epoch": 2.239673880462802, "grad_norm": 0.730222225189209, "learning_rate": 2.536573666959789e-05, "loss": 1.641, "step": 9340 }, { "epoch": 2.240153468017505, "grad_norm": 0.7612428665161133, "learning_rate": 2.5349748181309456e-05, "loss": 1.6251, "step": 9342 }, { "epoch": 2.240633055572208, "grad_norm": 0.6982625722885132, "learning_rate": 2.5333759693021025e-05, "loss": 1.5761, "step": 9344 }, { "epoch": 2.241112643126911, "grad_norm": 0.6798668503761292, "learning_rate": 2.5317771204732594e-05, "loss": 1.6371, "step": 9346 }, { "epoch": 2.2415922306816136, "grad_norm": 0.6320176720619202, "learning_rate": 2.530178271644416e-05, "loss": 1.6341, "step": 9348 }, { "epoch": 2.2420718182363166, "grad_norm": 0.6732601523399353, "learning_rate": 2.528579422815573e-05, "loss": 1.648, "step": 9350 }, { "epoch": 2.2425514057910196, "grad_norm": 0.7074276804924011, "learning_rate": 2.5269805739867298e-05, "loss": 1.6288, "step": 9352 }, { "epoch": 2.2430309933457226, "grad_norm": 0.7960969805717468, "learning_rate": 2.5253817251578864e-05, "loss": 1.6544, "step": 9354 }, { "epoch": 2.2435105809004257, "grad_norm": 0.6650604009628296, "learning_rate": 2.5237828763290433e-05, "loss": 1.6222, "step": 9356 }, { "epoch": 2.2439901684551287, "grad_norm": 0.7578086256980896, "learning_rate": 2.5221840275002002e-05, "loss": 1.6137, "step": 9358 }, { "epoch": 2.2444697560098317, "grad_norm": 0.6769105792045593, "learning_rate": 2.5205851786713567e-05, "loss": 1.6883, "step": 9360 }, { "epoch": 2.2449493435645347, "grad_norm": 0.6492621898651123, "learning_rate": 2.5189863298425136e-05, "loss": 1.5919, "step": 9362 }, { "epoch": 2.2454289311192372, "grad_norm": 0.6837701797485352, "learning_rate": 2.5173874810136706e-05, "loss": 1.6055, "step": 9364 }, { "epoch": 2.2459085186739403, "grad_norm": 0.7614205479621887, "learning_rate": 2.5157886321848275e-05, "loss": 1.659, "step": 9366 }, { "epoch": 2.2463881062286433, "grad_norm": 0.6801870465278625, "learning_rate": 2.5141897833559837e-05, "loss": 1.6667, "step": 9368 }, { "epoch": 2.2468676937833463, "grad_norm": 0.6615434885025024, "learning_rate": 2.5125909345271403e-05, "loss": 1.5877, "step": 9370 }, { "epoch": 2.2473472813380493, "grad_norm": 0.6556148529052734, "learning_rate": 2.5109920856982972e-05, "loss": 1.6686, "step": 9372 }, { "epoch": 2.2478268688927523, "grad_norm": 0.775557279586792, "learning_rate": 2.509393236869454e-05, "loss": 1.6028, "step": 9374 }, { "epoch": 2.2483064564474553, "grad_norm": 0.652387797832489, "learning_rate": 2.5077943880406107e-05, "loss": 1.637, "step": 9376 }, { "epoch": 2.2487860440021583, "grad_norm": 0.8699994087219238, "learning_rate": 2.5061955392117676e-05, "loss": 1.5892, "step": 9378 }, { "epoch": 2.2492656315568613, "grad_norm": 0.6533414721488953, "learning_rate": 2.5045966903829245e-05, "loss": 1.6435, "step": 9380 }, { "epoch": 2.249745219111564, "grad_norm": 0.6973972916603088, "learning_rate": 2.502997841554081e-05, "loss": 1.6187, "step": 9382 }, { "epoch": 2.250224806666267, "grad_norm": 0.6499430537223816, "learning_rate": 2.501398992725238e-05, "loss": 1.6142, "step": 9384 }, { "epoch": 2.25070439422097, "grad_norm": 0.6700563430786133, "learning_rate": 2.499800143896395e-05, "loss": 1.6343, "step": 9386 }, { "epoch": 2.251183981775673, "grad_norm": 0.6450253129005432, "learning_rate": 2.4982012950675514e-05, "loss": 1.6432, "step": 9388 }, { "epoch": 2.251663569330376, "grad_norm": 0.7263568043708801, "learning_rate": 2.4966024462387083e-05, "loss": 1.5857, "step": 9390 }, { "epoch": 2.252143156885079, "grad_norm": 0.7335007786750793, "learning_rate": 2.4950035974098652e-05, "loss": 1.6222, "step": 9392 }, { "epoch": 2.252622744439782, "grad_norm": 0.6426674723625183, "learning_rate": 2.4934047485810218e-05, "loss": 1.6282, "step": 9394 }, { "epoch": 2.2531023319944845, "grad_norm": 0.7712120413780212, "learning_rate": 2.4918058997521784e-05, "loss": 1.6551, "step": 9396 }, { "epoch": 2.2535819195491875, "grad_norm": 0.718134880065918, "learning_rate": 2.4902070509233353e-05, "loss": 1.6628, "step": 9398 }, { "epoch": 2.2540615071038905, "grad_norm": 0.756875216960907, "learning_rate": 2.488608202094492e-05, "loss": 1.5808, "step": 9400 }, { "epoch": 2.2545410946585935, "grad_norm": 0.6713234782218933, "learning_rate": 2.4870093532656488e-05, "loss": 1.656, "step": 9402 }, { "epoch": 2.2550206822132965, "grad_norm": 0.7061495780944824, "learning_rate": 2.4854105044368057e-05, "loss": 1.6376, "step": 9404 }, { "epoch": 2.2555002697679996, "grad_norm": 0.6613569855690002, "learning_rate": 2.4838116556079623e-05, "loss": 1.6605, "step": 9406 }, { "epoch": 2.2559798573227026, "grad_norm": 0.6611689329147339, "learning_rate": 2.482212806779119e-05, "loss": 1.6026, "step": 9408 }, { "epoch": 2.2564594448774056, "grad_norm": 0.7253855466842651, "learning_rate": 2.480613957950276e-05, "loss": 1.6754, "step": 9410 }, { "epoch": 2.2569390324321086, "grad_norm": 0.6802476644515991, "learning_rate": 2.4790151091214326e-05, "loss": 1.6344, "step": 9412 }, { "epoch": 2.257418619986811, "grad_norm": 0.70644611120224, "learning_rate": 2.4774162602925892e-05, "loss": 1.6221, "step": 9414 }, { "epoch": 2.257898207541514, "grad_norm": 0.7162779569625854, "learning_rate": 2.475817411463746e-05, "loss": 1.6652, "step": 9416 }, { "epoch": 2.258377795096217, "grad_norm": 0.643575131893158, "learning_rate": 2.474218562634903e-05, "loss": 1.6419, "step": 9418 }, { "epoch": 2.25885738265092, "grad_norm": 0.6788632273674011, "learning_rate": 2.4726197138060596e-05, "loss": 1.5901, "step": 9420 }, { "epoch": 2.259336970205623, "grad_norm": 0.6290987133979797, "learning_rate": 2.4710208649772165e-05, "loss": 1.6338, "step": 9422 }, { "epoch": 2.259816557760326, "grad_norm": 0.6799466013908386, "learning_rate": 2.4694220161483734e-05, "loss": 1.6679, "step": 9424 }, { "epoch": 2.260296145315029, "grad_norm": 0.6972357630729675, "learning_rate": 2.46782316731953e-05, "loss": 1.6654, "step": 9426 }, { "epoch": 2.260775732869732, "grad_norm": 0.7219063639640808, "learning_rate": 2.466224318490687e-05, "loss": 1.6047, "step": 9428 }, { "epoch": 2.261255320424435, "grad_norm": 0.6871181130409241, "learning_rate": 2.4646254696618438e-05, "loss": 1.6253, "step": 9430 }, { "epoch": 2.261734907979138, "grad_norm": 0.8106650710105896, "learning_rate": 2.4630266208330004e-05, "loss": 1.625, "step": 9432 }, { "epoch": 2.262214495533841, "grad_norm": 0.6391503214836121, "learning_rate": 2.461427772004157e-05, "loss": 1.6394, "step": 9434 }, { "epoch": 2.262694083088544, "grad_norm": 0.6522243022918701, "learning_rate": 2.459828923175314e-05, "loss": 1.6103, "step": 9436 }, { "epoch": 2.263173670643247, "grad_norm": 0.6940624713897705, "learning_rate": 2.4582300743464708e-05, "loss": 1.6268, "step": 9438 }, { "epoch": 2.26365325819795, "grad_norm": 0.7826984524726868, "learning_rate": 2.4566312255176273e-05, "loss": 1.6873, "step": 9440 }, { "epoch": 2.264132845752653, "grad_norm": 0.7200073599815369, "learning_rate": 2.4550323766887842e-05, "loss": 1.5902, "step": 9442 }, { "epoch": 2.264612433307356, "grad_norm": 0.6728625297546387, "learning_rate": 2.453433527859941e-05, "loss": 1.5932, "step": 9444 }, { "epoch": 2.265092020862059, "grad_norm": 0.7987384796142578, "learning_rate": 2.4518346790310977e-05, "loss": 1.5896, "step": 9446 }, { "epoch": 2.2655716084167614, "grad_norm": 0.7584971189498901, "learning_rate": 2.4502358302022546e-05, "loss": 1.6189, "step": 9448 }, { "epoch": 2.2660511959714644, "grad_norm": 0.9870505928993225, "learning_rate": 2.4486369813734112e-05, "loss": 1.6202, "step": 9450 }, { "epoch": 2.2665307835261674, "grad_norm": 0.7287760972976685, "learning_rate": 2.4470381325445678e-05, "loss": 1.6282, "step": 9452 }, { "epoch": 2.2670103710808704, "grad_norm": 0.7689207792282104, "learning_rate": 2.4454392837157247e-05, "loss": 1.6594, "step": 9454 }, { "epoch": 2.2674899586355735, "grad_norm": 0.694817066192627, "learning_rate": 2.4438404348868816e-05, "loss": 1.5821, "step": 9456 }, { "epoch": 2.2679695461902765, "grad_norm": 0.710868239402771, "learning_rate": 2.442241586058038e-05, "loss": 1.6148, "step": 9458 }, { "epoch": 2.2684491337449795, "grad_norm": 0.685296893119812, "learning_rate": 2.440642737229195e-05, "loss": 1.6458, "step": 9460 }, { "epoch": 2.268928721299682, "grad_norm": 0.6788715124130249, "learning_rate": 2.439043888400352e-05, "loss": 1.6375, "step": 9462 }, { "epoch": 2.269408308854385, "grad_norm": 0.6098292469978333, "learning_rate": 2.437445039571509e-05, "loss": 1.5576, "step": 9464 }, { "epoch": 2.269887896409088, "grad_norm": 0.6630106568336487, "learning_rate": 2.4358461907426655e-05, "loss": 1.6038, "step": 9466 }, { "epoch": 2.270367483963791, "grad_norm": 0.7169865965843201, "learning_rate": 2.434247341913822e-05, "loss": 1.637, "step": 9468 }, { "epoch": 2.270847071518494, "grad_norm": 0.7615345120429993, "learning_rate": 2.432648493084979e-05, "loss": 1.5773, "step": 9470 }, { "epoch": 2.271326659073197, "grad_norm": 0.695539653301239, "learning_rate": 2.4310496442561355e-05, "loss": 1.6147, "step": 9472 }, { "epoch": 2.2718062466279, "grad_norm": 0.71500563621521, "learning_rate": 2.4294507954272924e-05, "loss": 1.63, "step": 9474 }, { "epoch": 2.272285834182603, "grad_norm": 0.7455164790153503, "learning_rate": 2.4278519465984493e-05, "loss": 1.6348, "step": 9476 }, { "epoch": 2.272765421737306, "grad_norm": 0.7168025374412537, "learning_rate": 2.426253097769606e-05, "loss": 1.6501, "step": 9478 }, { "epoch": 2.2732450092920087, "grad_norm": 0.8510520458221436, "learning_rate": 2.4246542489407628e-05, "loss": 1.6065, "step": 9480 }, { "epoch": 2.2737245968467117, "grad_norm": 0.755222737789154, "learning_rate": 2.4230554001119197e-05, "loss": 1.5942, "step": 9482 }, { "epoch": 2.2742041844014147, "grad_norm": 0.7755987644195557, "learning_rate": 2.4214565512830763e-05, "loss": 1.6363, "step": 9484 }, { "epoch": 2.2746837719561177, "grad_norm": 0.7332563996315002, "learning_rate": 2.419857702454233e-05, "loss": 1.6444, "step": 9486 }, { "epoch": 2.2751633595108207, "grad_norm": 0.8362389206886292, "learning_rate": 2.4182588536253898e-05, "loss": 1.6298, "step": 9488 }, { "epoch": 2.2756429470655237, "grad_norm": 0.6630945205688477, "learning_rate": 2.4166600047965467e-05, "loss": 1.5769, "step": 9490 }, { "epoch": 2.2761225346202267, "grad_norm": 0.7929099202156067, "learning_rate": 2.4150611559677032e-05, "loss": 1.6917, "step": 9492 }, { "epoch": 2.2766021221749297, "grad_norm": 0.8471056818962097, "learning_rate": 2.41346230713886e-05, "loss": 1.636, "step": 9494 }, { "epoch": 2.2770817097296323, "grad_norm": 0.7016795873641968, "learning_rate": 2.411863458310017e-05, "loss": 1.6667, "step": 9496 }, { "epoch": 2.2775612972843353, "grad_norm": 0.6910960078239441, "learning_rate": 2.4102646094811736e-05, "loss": 1.6289, "step": 9498 }, { "epoch": 2.2780408848390383, "grad_norm": 0.6698476076126099, "learning_rate": 2.4086657606523305e-05, "loss": 1.6291, "step": 9500 }, { "epoch": 2.2785204723937413, "grad_norm": 0.7256120443344116, "learning_rate": 2.4070669118234874e-05, "loss": 1.6629, "step": 9502 }, { "epoch": 2.2790000599484443, "grad_norm": 0.6930549740791321, "learning_rate": 2.4054680629946437e-05, "loss": 1.5816, "step": 9504 }, { "epoch": 2.2794796475031474, "grad_norm": 0.6666162014007568, "learning_rate": 2.4038692141658006e-05, "loss": 1.5949, "step": 9506 }, { "epoch": 2.2799592350578504, "grad_norm": 0.8139579892158508, "learning_rate": 2.4022703653369575e-05, "loss": 1.7094, "step": 9508 }, { "epoch": 2.2804388226125534, "grad_norm": 0.72346031665802, "learning_rate": 2.400671516508114e-05, "loss": 1.6168, "step": 9510 }, { "epoch": 2.2809184101672564, "grad_norm": 0.6481231451034546, "learning_rate": 2.399072667679271e-05, "loss": 1.5692, "step": 9512 }, { "epoch": 2.281397997721959, "grad_norm": 0.6271021962165833, "learning_rate": 2.397473818850428e-05, "loss": 1.6227, "step": 9514 }, { "epoch": 2.281877585276662, "grad_norm": 0.6546706557273865, "learning_rate": 2.3958749700215848e-05, "loss": 1.5991, "step": 9516 }, { "epoch": 2.282357172831365, "grad_norm": 0.7063210010528564, "learning_rate": 2.3942761211927414e-05, "loss": 1.6298, "step": 9518 }, { "epoch": 2.282836760386068, "grad_norm": 0.7275537848472595, "learning_rate": 2.3926772723638983e-05, "loss": 1.6652, "step": 9520 }, { "epoch": 2.283316347940771, "grad_norm": 0.6423574090003967, "learning_rate": 2.3910784235350552e-05, "loss": 1.6296, "step": 9522 }, { "epoch": 2.283795935495474, "grad_norm": 0.7377068996429443, "learning_rate": 2.3894795747062114e-05, "loss": 1.6884, "step": 9524 }, { "epoch": 2.284275523050177, "grad_norm": 0.6371185183525085, "learning_rate": 2.3878807258773683e-05, "loss": 1.6152, "step": 9526 }, { "epoch": 2.2847551106048796, "grad_norm": 0.7300588488578796, "learning_rate": 2.3862818770485252e-05, "loss": 1.6649, "step": 9528 }, { "epoch": 2.2852346981595826, "grad_norm": 0.794248640537262, "learning_rate": 2.3846830282196818e-05, "loss": 1.5813, "step": 9530 }, { "epoch": 2.2857142857142856, "grad_norm": 0.6460303068161011, "learning_rate": 2.3830841793908387e-05, "loss": 1.6561, "step": 9532 }, { "epoch": 2.2861938732689886, "grad_norm": 0.7299615740776062, "learning_rate": 2.3814853305619956e-05, "loss": 1.6487, "step": 9534 }, { "epoch": 2.2866734608236916, "grad_norm": 0.7057012319564819, "learning_rate": 2.3798864817331522e-05, "loss": 1.5783, "step": 9536 }, { "epoch": 2.2871530483783946, "grad_norm": 0.6611147522926331, "learning_rate": 2.378287632904309e-05, "loss": 1.6629, "step": 9538 }, { "epoch": 2.2876326359330976, "grad_norm": 0.6348873972892761, "learning_rate": 2.376688784075466e-05, "loss": 1.6483, "step": 9540 }, { "epoch": 2.2881122234878006, "grad_norm": 0.6168524622917175, "learning_rate": 2.3750899352466226e-05, "loss": 1.6544, "step": 9542 }, { "epoch": 2.2885918110425036, "grad_norm": 0.6141296029090881, "learning_rate": 2.373491086417779e-05, "loss": 1.657, "step": 9544 }, { "epoch": 2.289071398597206, "grad_norm": 0.6878377795219421, "learning_rate": 2.371892237588936e-05, "loss": 1.6256, "step": 9546 }, { "epoch": 2.289550986151909, "grad_norm": 0.685857355594635, "learning_rate": 2.370293388760093e-05, "loss": 1.6574, "step": 9548 }, { "epoch": 2.2900305737066122, "grad_norm": 0.6514169573783875, "learning_rate": 2.3686945399312495e-05, "loss": 1.6395, "step": 9550 }, { "epoch": 2.2905101612613152, "grad_norm": 0.6574902534484863, "learning_rate": 2.3670956911024064e-05, "loss": 1.6292, "step": 9552 }, { "epoch": 2.2909897488160182, "grad_norm": 0.6240106821060181, "learning_rate": 2.3654968422735633e-05, "loss": 1.6352, "step": 9554 }, { "epoch": 2.2914693363707213, "grad_norm": 0.7069869637489319, "learning_rate": 2.36389799344472e-05, "loss": 1.6657, "step": 9556 }, { "epoch": 2.2919489239254243, "grad_norm": 0.7971845269203186, "learning_rate": 2.3622991446158768e-05, "loss": 1.6548, "step": 9558 }, { "epoch": 2.2924285114801273, "grad_norm": 0.7524431347846985, "learning_rate": 2.3607002957870334e-05, "loss": 1.6485, "step": 9560 }, { "epoch": 2.29290809903483, "grad_norm": 0.6824484467506409, "learning_rate": 2.3591014469581903e-05, "loss": 1.6072, "step": 9562 }, { "epoch": 2.293387686589533, "grad_norm": 0.6568175554275513, "learning_rate": 2.357502598129347e-05, "loss": 1.6522, "step": 9564 }, { "epoch": 2.293867274144236, "grad_norm": 0.6325098872184753, "learning_rate": 2.3559037493005038e-05, "loss": 1.5799, "step": 9566 }, { "epoch": 2.294346861698939, "grad_norm": 0.6666774749755859, "learning_rate": 2.3543049004716607e-05, "loss": 1.6381, "step": 9568 }, { "epoch": 2.294826449253642, "grad_norm": 0.627038836479187, "learning_rate": 2.3527060516428173e-05, "loss": 1.5655, "step": 9570 }, { "epoch": 2.295306036808345, "grad_norm": 0.6103514432907104, "learning_rate": 2.351107202813974e-05, "loss": 1.5872, "step": 9572 }, { "epoch": 2.295785624363048, "grad_norm": 0.7044161558151245, "learning_rate": 2.349508353985131e-05, "loss": 1.6057, "step": 9574 }, { "epoch": 2.296265211917751, "grad_norm": 0.7753270268440247, "learning_rate": 2.3479095051562876e-05, "loss": 1.6261, "step": 9576 }, { "epoch": 2.296744799472454, "grad_norm": 0.8279408812522888, "learning_rate": 2.3463106563274442e-05, "loss": 1.665, "step": 9578 }, { "epoch": 2.2972243870271565, "grad_norm": 0.7380911111831665, "learning_rate": 2.344711807498601e-05, "loss": 1.6352, "step": 9580 }, { "epoch": 2.2977039745818595, "grad_norm": 0.7176647186279297, "learning_rate": 2.3431129586697577e-05, "loss": 1.5993, "step": 9582 }, { "epoch": 2.2981835621365625, "grad_norm": 0.6332418322563171, "learning_rate": 2.3415141098409146e-05, "loss": 1.605, "step": 9584 }, { "epoch": 2.2986631496912655, "grad_norm": 0.6474382877349854, "learning_rate": 2.3399152610120715e-05, "loss": 1.6413, "step": 9586 }, { "epoch": 2.2991427372459685, "grad_norm": 0.7949414253234863, "learning_rate": 2.338316412183228e-05, "loss": 1.6154, "step": 9588 }, { "epoch": 2.2996223248006715, "grad_norm": 0.6867544651031494, "learning_rate": 2.336717563354385e-05, "loss": 1.6115, "step": 9590 }, { "epoch": 2.3001019123553745, "grad_norm": 0.7031636834144592, "learning_rate": 2.335118714525542e-05, "loss": 1.5937, "step": 9592 }, { "epoch": 2.300581499910077, "grad_norm": 0.7849412560462952, "learning_rate": 2.3335198656966985e-05, "loss": 1.6374, "step": 9594 }, { "epoch": 2.30106108746478, "grad_norm": 0.6580243110656738, "learning_rate": 2.331921016867855e-05, "loss": 1.622, "step": 9596 }, { "epoch": 2.301540675019483, "grad_norm": 0.6962557435035706, "learning_rate": 2.330322168039012e-05, "loss": 1.6501, "step": 9598 }, { "epoch": 2.302020262574186, "grad_norm": 0.7158783674240112, "learning_rate": 2.328723319210169e-05, "loss": 1.6242, "step": 9600 }, { "epoch": 2.302020262574186, "eval_loss": 1.7165648937225342, "eval_runtime": 331.3682, "eval_samples_per_second": 402.721, "eval_steps_per_second": 12.587, "step": 9600 }, { "epoch": 2.302499850128889, "grad_norm": 0.7004644870758057, "learning_rate": 2.3271244703813254e-05, "loss": 1.6609, "step": 9602 }, { "epoch": 2.302979437683592, "grad_norm": 0.7392162680625916, "learning_rate": 2.3255256215524823e-05, "loss": 1.598, "step": 9604 }, { "epoch": 2.303459025238295, "grad_norm": 0.7368196249008179, "learning_rate": 2.3239267727236392e-05, "loss": 1.5842, "step": 9606 }, { "epoch": 2.303938612792998, "grad_norm": 0.709476113319397, "learning_rate": 2.3223279238947958e-05, "loss": 1.6289, "step": 9608 }, { "epoch": 2.304418200347701, "grad_norm": 0.7549755573272705, "learning_rate": 2.3207290750659527e-05, "loss": 1.659, "step": 9610 }, { "epoch": 2.3048977879024037, "grad_norm": 0.7339287400245667, "learning_rate": 2.3191302262371096e-05, "loss": 1.6376, "step": 9612 }, { "epoch": 2.3053773754571067, "grad_norm": 0.6647840738296509, "learning_rate": 2.3175313774082662e-05, "loss": 1.6367, "step": 9614 }, { "epoch": 2.3058569630118098, "grad_norm": 0.7596381902694702, "learning_rate": 2.3159325285794228e-05, "loss": 1.6467, "step": 9616 }, { "epoch": 2.3063365505665128, "grad_norm": 0.6871278285980225, "learning_rate": 2.3143336797505797e-05, "loss": 1.6447, "step": 9618 }, { "epoch": 2.3068161381212158, "grad_norm": 0.6877886056900024, "learning_rate": 2.3127348309217366e-05, "loss": 1.6273, "step": 9620 }, { "epoch": 2.307295725675919, "grad_norm": 0.6756399869918823, "learning_rate": 2.311135982092893e-05, "loss": 1.6023, "step": 9622 }, { "epoch": 2.307775313230622, "grad_norm": 0.7024552822113037, "learning_rate": 2.30953713326405e-05, "loss": 1.6194, "step": 9624 }, { "epoch": 2.308254900785325, "grad_norm": 0.6601521372795105, "learning_rate": 2.307938284435207e-05, "loss": 1.6311, "step": 9626 }, { "epoch": 2.3087344883400274, "grad_norm": 0.8026699423789978, "learning_rate": 2.3063394356063635e-05, "loss": 1.5951, "step": 9628 }, { "epoch": 2.3092140758947304, "grad_norm": 0.6511561274528503, "learning_rate": 2.3047405867775205e-05, "loss": 1.6138, "step": 9630 }, { "epoch": 2.3096936634494334, "grad_norm": 0.7698425054550171, "learning_rate": 2.303141737948677e-05, "loss": 1.6171, "step": 9632 }, { "epoch": 2.3101732510041364, "grad_norm": 0.7333431243896484, "learning_rate": 2.3015428891198336e-05, "loss": 1.638, "step": 9634 }, { "epoch": 2.3106528385588394, "grad_norm": 0.7092655897140503, "learning_rate": 2.2999440402909905e-05, "loss": 1.6328, "step": 9636 }, { "epoch": 2.3111324261135424, "grad_norm": 0.6570863723754883, "learning_rate": 2.2983451914621474e-05, "loss": 1.5903, "step": 9638 }, { "epoch": 2.3116120136682454, "grad_norm": 0.7172690629959106, "learning_rate": 2.296746342633304e-05, "loss": 1.6297, "step": 9640 }, { "epoch": 2.3120916012229484, "grad_norm": 0.6720386743545532, "learning_rate": 2.295147493804461e-05, "loss": 1.6423, "step": 9642 }, { "epoch": 2.3125711887776514, "grad_norm": 0.6785076856613159, "learning_rate": 2.2935486449756178e-05, "loss": 1.5977, "step": 9644 }, { "epoch": 2.313050776332354, "grad_norm": 0.7413010001182556, "learning_rate": 2.2919497961467744e-05, "loss": 1.6463, "step": 9646 }, { "epoch": 2.313530363887057, "grad_norm": 0.687010645866394, "learning_rate": 2.2903509473179313e-05, "loss": 1.6191, "step": 9648 }, { "epoch": 2.31400995144176, "grad_norm": 0.6582628488540649, "learning_rate": 2.288752098489088e-05, "loss": 1.6311, "step": 9650 }, { "epoch": 2.314489538996463, "grad_norm": 0.7040482759475708, "learning_rate": 2.2871532496602448e-05, "loss": 1.5958, "step": 9652 }, { "epoch": 2.314969126551166, "grad_norm": 0.731720507144928, "learning_rate": 2.2855544008314013e-05, "loss": 1.6232, "step": 9654 }, { "epoch": 2.315448714105869, "grad_norm": 0.7167035937309265, "learning_rate": 2.2839555520025582e-05, "loss": 1.6669, "step": 9656 }, { "epoch": 2.315928301660572, "grad_norm": 0.7809245586395264, "learning_rate": 2.282356703173715e-05, "loss": 1.6107, "step": 9658 }, { "epoch": 2.3164078892152746, "grad_norm": 0.6475433707237244, "learning_rate": 2.2807578543448717e-05, "loss": 1.592, "step": 9660 }, { "epoch": 2.3168874767699776, "grad_norm": 0.8259770274162292, "learning_rate": 2.2791590055160286e-05, "loss": 1.6307, "step": 9662 }, { "epoch": 2.3173670643246806, "grad_norm": 0.7055097222328186, "learning_rate": 2.2775601566871855e-05, "loss": 1.6738, "step": 9664 }, { "epoch": 2.3178466518793837, "grad_norm": 0.7036665081977844, "learning_rate": 2.275961307858342e-05, "loss": 1.6312, "step": 9666 }, { "epoch": 2.3183262394340867, "grad_norm": 0.7568461298942566, "learning_rate": 2.2743624590294987e-05, "loss": 1.5894, "step": 9668 }, { "epoch": 2.3188058269887897, "grad_norm": 0.7047367095947266, "learning_rate": 2.2727636102006556e-05, "loss": 1.5906, "step": 9670 }, { "epoch": 2.3192854145434927, "grad_norm": 0.6948134303092957, "learning_rate": 2.2711647613718125e-05, "loss": 1.6465, "step": 9672 }, { "epoch": 2.3197650020981957, "grad_norm": 0.7200517654418945, "learning_rate": 2.269565912542969e-05, "loss": 1.6771, "step": 9674 }, { "epoch": 2.3202445896528987, "grad_norm": 0.6763641238212585, "learning_rate": 2.267967063714126e-05, "loss": 1.6033, "step": 9676 }, { "epoch": 2.3207241772076013, "grad_norm": 0.6334316730499268, "learning_rate": 2.266368214885283e-05, "loss": 1.6108, "step": 9678 }, { "epoch": 2.3212037647623043, "grad_norm": 0.6529021263122559, "learning_rate": 2.2647693660564394e-05, "loss": 1.5912, "step": 9680 }, { "epoch": 2.3216833523170073, "grad_norm": 0.7829843759536743, "learning_rate": 2.2631705172275964e-05, "loss": 1.644, "step": 9682 }, { "epoch": 2.3221629398717103, "grad_norm": 0.694158673286438, "learning_rate": 2.2615716683987533e-05, "loss": 1.6265, "step": 9684 }, { "epoch": 2.3226425274264133, "grad_norm": 0.7620572447776794, "learning_rate": 2.2599728195699095e-05, "loss": 1.638, "step": 9686 }, { "epoch": 2.3231221149811163, "grad_norm": 0.6815117597579956, "learning_rate": 2.2583739707410664e-05, "loss": 1.6384, "step": 9688 }, { "epoch": 2.3236017025358193, "grad_norm": 0.6927556991577148, "learning_rate": 2.2567751219122233e-05, "loss": 1.6658, "step": 9690 }, { "epoch": 2.3240812900905223, "grad_norm": 0.7416291832923889, "learning_rate": 2.25517627308338e-05, "loss": 1.5899, "step": 9692 }, { "epoch": 2.324560877645225, "grad_norm": 0.7578151822090149, "learning_rate": 2.2535774242545368e-05, "loss": 1.6924, "step": 9694 }, { "epoch": 2.325040465199928, "grad_norm": 0.6926432251930237, "learning_rate": 2.2519785754256937e-05, "loss": 1.6555, "step": 9696 }, { "epoch": 2.325520052754631, "grad_norm": 0.7182091474533081, "learning_rate": 2.2503797265968503e-05, "loss": 1.6535, "step": 9698 }, { "epoch": 2.325999640309334, "grad_norm": 0.6011888384819031, "learning_rate": 2.2487808777680072e-05, "loss": 1.6024, "step": 9700 }, { "epoch": 2.326479227864037, "grad_norm": 0.6835609078407288, "learning_rate": 2.247182028939164e-05, "loss": 1.6348, "step": 9702 }, { "epoch": 2.32695881541874, "grad_norm": 0.6775954961776733, "learning_rate": 2.2455831801103207e-05, "loss": 1.6031, "step": 9704 }, { "epoch": 2.327438402973443, "grad_norm": 0.7147383689880371, "learning_rate": 2.2439843312814772e-05, "loss": 1.6345, "step": 9706 }, { "epoch": 2.327917990528146, "grad_norm": 0.7112233638763428, "learning_rate": 2.242385482452634e-05, "loss": 1.697, "step": 9708 }, { "epoch": 2.328397578082849, "grad_norm": 0.7590519189834595, "learning_rate": 2.240786633623791e-05, "loss": 1.6428, "step": 9710 }, { "epoch": 2.3288771656375515, "grad_norm": 0.7449796795845032, "learning_rate": 2.2391877847949476e-05, "loss": 1.5871, "step": 9712 }, { "epoch": 2.3293567531922545, "grad_norm": 0.728531539440155, "learning_rate": 2.2375889359661045e-05, "loss": 1.6603, "step": 9714 }, { "epoch": 2.3298363407469576, "grad_norm": 0.6635847687721252, "learning_rate": 2.2359900871372614e-05, "loss": 1.6397, "step": 9716 }, { "epoch": 2.3303159283016606, "grad_norm": 0.754403293132782, "learning_rate": 2.234391238308418e-05, "loss": 1.6576, "step": 9718 }, { "epoch": 2.3307955158563636, "grad_norm": 0.7614685297012329, "learning_rate": 2.232792389479575e-05, "loss": 1.6207, "step": 9720 }, { "epoch": 2.3312751034110666, "grad_norm": 0.7222563028335571, "learning_rate": 2.2311935406507318e-05, "loss": 1.6186, "step": 9722 }, { "epoch": 2.3317546909657696, "grad_norm": 0.7185641527175903, "learning_rate": 2.2295946918218884e-05, "loss": 1.6027, "step": 9724 }, { "epoch": 2.332234278520472, "grad_norm": 0.6481488347053528, "learning_rate": 2.227995842993045e-05, "loss": 1.6419, "step": 9726 }, { "epoch": 2.332713866075175, "grad_norm": 0.7641459703445435, "learning_rate": 2.226396994164202e-05, "loss": 1.6231, "step": 9728 }, { "epoch": 2.333193453629878, "grad_norm": 0.6749909520149231, "learning_rate": 2.2247981453353588e-05, "loss": 1.6423, "step": 9730 }, { "epoch": 2.333673041184581, "grad_norm": 0.7829117178916931, "learning_rate": 2.2231992965065153e-05, "loss": 1.6292, "step": 9732 }, { "epoch": 2.334152628739284, "grad_norm": 0.6413714289665222, "learning_rate": 2.2216004476776723e-05, "loss": 1.6226, "step": 9734 }, { "epoch": 2.334632216293987, "grad_norm": 0.7547104358673096, "learning_rate": 2.220001598848829e-05, "loss": 1.637, "step": 9736 }, { "epoch": 2.33511180384869, "grad_norm": 0.6482092142105103, "learning_rate": 2.2184027500199857e-05, "loss": 1.5835, "step": 9738 }, { "epoch": 2.3355913914033932, "grad_norm": 0.6429774165153503, "learning_rate": 2.2168039011911426e-05, "loss": 1.6225, "step": 9740 }, { "epoch": 2.3360709789580962, "grad_norm": 0.7161058783531189, "learning_rate": 2.2152050523622992e-05, "loss": 1.6555, "step": 9742 }, { "epoch": 2.336550566512799, "grad_norm": 0.6819443702697754, "learning_rate": 2.2136062035334558e-05, "loss": 1.6172, "step": 9744 }, { "epoch": 2.337030154067502, "grad_norm": 0.8020004630088806, "learning_rate": 2.2120073547046127e-05, "loss": 1.6284, "step": 9746 }, { "epoch": 2.337509741622205, "grad_norm": 0.7330119013786316, "learning_rate": 2.2104085058757696e-05, "loss": 1.6496, "step": 9748 }, { "epoch": 2.337989329176908, "grad_norm": 0.679638147354126, "learning_rate": 2.2088096570469262e-05, "loss": 1.5932, "step": 9750 }, { "epoch": 2.338468916731611, "grad_norm": 0.7314726710319519, "learning_rate": 2.207210808218083e-05, "loss": 1.6713, "step": 9752 }, { "epoch": 2.338948504286314, "grad_norm": 0.7635046243667603, "learning_rate": 2.20561195938924e-05, "loss": 1.6117, "step": 9754 }, { "epoch": 2.339428091841017, "grad_norm": 0.7697387337684631, "learning_rate": 2.2040131105603966e-05, "loss": 1.6274, "step": 9756 }, { "epoch": 2.33990767939572, "grad_norm": 0.6913846731185913, "learning_rate": 2.2024142617315535e-05, "loss": 1.6508, "step": 9758 }, { "epoch": 2.3403872669504224, "grad_norm": 0.7647649645805359, "learning_rate": 2.20081541290271e-05, "loss": 1.6445, "step": 9760 }, { "epoch": 2.3408668545051254, "grad_norm": 0.6780459880828857, "learning_rate": 2.199216564073867e-05, "loss": 1.6735, "step": 9762 }, { "epoch": 2.3413464420598284, "grad_norm": 0.7263216972351074, "learning_rate": 2.1976177152450235e-05, "loss": 1.6884, "step": 9764 }, { "epoch": 2.3418260296145315, "grad_norm": 0.7125828862190247, "learning_rate": 2.1960188664161804e-05, "loss": 1.5979, "step": 9766 }, { "epoch": 2.3423056171692345, "grad_norm": 0.6628342270851135, "learning_rate": 2.1944200175873373e-05, "loss": 1.5843, "step": 9768 }, { "epoch": 2.3427852047239375, "grad_norm": 0.7311340570449829, "learning_rate": 2.192821168758494e-05, "loss": 1.6819, "step": 9770 }, { "epoch": 2.3432647922786405, "grad_norm": 0.6758164167404175, "learning_rate": 2.1912223199296508e-05, "loss": 1.6204, "step": 9772 }, { "epoch": 2.3437443798333435, "grad_norm": 0.7619339227676392, "learning_rate": 2.1896234711008077e-05, "loss": 1.5958, "step": 9774 }, { "epoch": 2.3442239673880465, "grad_norm": 0.8500030636787415, "learning_rate": 2.1880246222719643e-05, "loss": 1.6134, "step": 9776 }, { "epoch": 2.344703554942749, "grad_norm": 0.692648708820343, "learning_rate": 2.186425773443121e-05, "loss": 1.6236, "step": 9778 }, { "epoch": 2.345183142497452, "grad_norm": 0.7490618824958801, "learning_rate": 2.1848269246142778e-05, "loss": 1.5873, "step": 9780 }, { "epoch": 2.345662730052155, "grad_norm": 0.7710291147232056, "learning_rate": 2.1832280757854347e-05, "loss": 1.6574, "step": 9782 }, { "epoch": 2.346142317606858, "grad_norm": 0.7601223587989807, "learning_rate": 2.1816292269565912e-05, "loss": 1.6259, "step": 9784 }, { "epoch": 2.346621905161561, "grad_norm": 0.7255996465682983, "learning_rate": 2.180030378127748e-05, "loss": 1.6333, "step": 9786 }, { "epoch": 2.347101492716264, "grad_norm": 0.777911901473999, "learning_rate": 2.178431529298905e-05, "loss": 1.6449, "step": 9788 }, { "epoch": 2.347581080270967, "grad_norm": 0.821625828742981, "learning_rate": 2.1768326804700616e-05, "loss": 1.5956, "step": 9790 }, { "epoch": 2.3480606678256697, "grad_norm": 0.7034921050071716, "learning_rate": 2.1752338316412185e-05, "loss": 1.6066, "step": 9792 }, { "epoch": 2.3485402553803727, "grad_norm": 0.6973909139633179, "learning_rate": 2.1736349828123755e-05, "loss": 1.6152, "step": 9794 }, { "epoch": 2.3490198429350757, "grad_norm": 0.6861059069633484, "learning_rate": 2.1720361339835317e-05, "loss": 1.6134, "step": 9796 }, { "epoch": 2.3494994304897787, "grad_norm": 0.6648637652397156, "learning_rate": 2.1704372851546886e-05, "loss": 1.6329, "step": 9798 }, { "epoch": 2.3499790180444817, "grad_norm": 0.743447482585907, "learning_rate": 2.1688384363258455e-05, "loss": 1.6636, "step": 9800 }, { "epoch": 2.3504586055991847, "grad_norm": 0.6701830625534058, "learning_rate": 2.167239587497002e-05, "loss": 1.6664, "step": 9802 }, { "epoch": 2.3509381931538877, "grad_norm": 0.7464039921760559, "learning_rate": 2.165640738668159e-05, "loss": 1.5805, "step": 9804 }, { "epoch": 2.3514177807085908, "grad_norm": 0.635672926902771, "learning_rate": 2.164041889839316e-05, "loss": 1.5551, "step": 9806 }, { "epoch": 2.3518973682632938, "grad_norm": 0.7225595712661743, "learning_rate": 2.1624430410104725e-05, "loss": 1.6358, "step": 9808 }, { "epoch": 2.3523769558179963, "grad_norm": 0.6773654222488403, "learning_rate": 2.1608441921816294e-05, "loss": 1.6526, "step": 9810 }, { "epoch": 2.3528565433726993, "grad_norm": 0.7089298367500305, "learning_rate": 2.1592453433527863e-05, "loss": 1.6185, "step": 9812 }, { "epoch": 2.3533361309274023, "grad_norm": 0.7489103674888611, "learning_rate": 2.157646494523943e-05, "loss": 1.6338, "step": 9814 }, { "epoch": 2.3538157184821054, "grad_norm": 0.7660675048828125, "learning_rate": 2.1560476456950994e-05, "loss": 1.5911, "step": 9816 }, { "epoch": 2.3542953060368084, "grad_norm": 0.71012943983078, "learning_rate": 2.1544487968662563e-05, "loss": 1.6796, "step": 9818 }, { "epoch": 2.3547748935915114, "grad_norm": 0.8298568725585938, "learning_rate": 2.1528499480374132e-05, "loss": 1.5985, "step": 9820 }, { "epoch": 2.3552544811462144, "grad_norm": 0.7168521881103516, "learning_rate": 2.1512510992085698e-05, "loss": 1.639, "step": 9822 }, { "epoch": 2.3557340687009174, "grad_norm": 0.8218011260032654, "learning_rate": 2.1496522503797267e-05, "loss": 1.6373, "step": 9824 }, { "epoch": 2.35621365625562, "grad_norm": 0.7021591663360596, "learning_rate": 2.1480534015508836e-05, "loss": 1.6248, "step": 9826 }, { "epoch": 2.356693243810323, "grad_norm": 0.673957109451294, "learning_rate": 2.1464545527220402e-05, "loss": 1.6284, "step": 9828 }, { "epoch": 2.357172831365026, "grad_norm": 0.7464858889579773, "learning_rate": 2.144855703893197e-05, "loss": 1.6635, "step": 9830 }, { "epoch": 2.357652418919729, "grad_norm": 0.6657732129096985, "learning_rate": 2.1432568550643537e-05, "loss": 1.6288, "step": 9832 }, { "epoch": 2.358132006474432, "grad_norm": 0.734073281288147, "learning_rate": 2.1416580062355106e-05, "loss": 1.634, "step": 9834 }, { "epoch": 2.358611594029135, "grad_norm": 0.7025119066238403, "learning_rate": 2.140059157406667e-05, "loss": 1.6298, "step": 9836 }, { "epoch": 2.359091181583838, "grad_norm": 0.657155454158783, "learning_rate": 2.138460308577824e-05, "loss": 1.6482, "step": 9838 }, { "epoch": 2.359570769138541, "grad_norm": 0.6819760203361511, "learning_rate": 2.136861459748981e-05, "loss": 1.6301, "step": 9840 }, { "epoch": 2.360050356693244, "grad_norm": 0.6323127150535583, "learning_rate": 2.1352626109201375e-05, "loss": 1.6039, "step": 9842 }, { "epoch": 2.3605299442479466, "grad_norm": 0.6292808055877686, "learning_rate": 2.1336637620912944e-05, "loss": 1.6362, "step": 9844 }, { "epoch": 2.3610095318026496, "grad_norm": 0.675114095211029, "learning_rate": 2.1320649132624514e-05, "loss": 1.6762, "step": 9846 }, { "epoch": 2.3614891193573526, "grad_norm": 0.8160675168037415, "learning_rate": 2.130466064433608e-05, "loss": 1.6304, "step": 9848 }, { "epoch": 2.3619687069120556, "grad_norm": 0.7248559594154358, "learning_rate": 2.1288672156047645e-05, "loss": 1.6042, "step": 9850 }, { "epoch": 2.3624482944667586, "grad_norm": 0.8044885396957397, "learning_rate": 2.1272683667759214e-05, "loss": 1.626, "step": 9852 }, { "epoch": 2.3629278820214616, "grad_norm": 0.693099319934845, "learning_rate": 2.125669517947078e-05, "loss": 1.6438, "step": 9854 }, { "epoch": 2.3634074695761647, "grad_norm": 0.6903206706047058, "learning_rate": 2.124070669118235e-05, "loss": 1.6411, "step": 9856 }, { "epoch": 2.363887057130867, "grad_norm": 0.7063393592834473, "learning_rate": 2.1224718202893918e-05, "loss": 1.6338, "step": 9858 }, { "epoch": 2.3643666446855702, "grad_norm": 0.822384774684906, "learning_rate": 2.1208729714605484e-05, "loss": 1.6292, "step": 9860 }, { "epoch": 2.3648462322402732, "grad_norm": 0.8553763628005981, "learning_rate": 2.1192741226317053e-05, "loss": 1.6282, "step": 9862 }, { "epoch": 2.3653258197949762, "grad_norm": 0.7248478531837463, "learning_rate": 2.1176752738028622e-05, "loss": 1.6299, "step": 9864 }, { "epoch": 2.3658054073496793, "grad_norm": 0.7415565848350525, "learning_rate": 2.116076424974019e-05, "loss": 1.574, "step": 9866 }, { "epoch": 2.3662849949043823, "grad_norm": 0.7068228125572205, "learning_rate": 2.1144775761451753e-05, "loss": 1.6499, "step": 9868 }, { "epoch": 2.3667645824590853, "grad_norm": 0.621998131275177, "learning_rate": 2.1128787273163322e-05, "loss": 1.5854, "step": 9870 }, { "epoch": 2.3672441700137883, "grad_norm": 0.7757390737533569, "learning_rate": 2.111279878487489e-05, "loss": 1.6043, "step": 9872 }, { "epoch": 2.3677237575684913, "grad_norm": 0.7877200841903687, "learning_rate": 2.1096810296586457e-05, "loss": 1.6193, "step": 9874 }, { "epoch": 2.368203345123194, "grad_norm": 0.8090939521789551, "learning_rate": 2.1080821808298026e-05, "loss": 1.6104, "step": 9876 }, { "epoch": 2.368682932677897, "grad_norm": 0.7006146907806396, "learning_rate": 2.1064833320009595e-05, "loss": 1.6695, "step": 9878 }, { "epoch": 2.3691625202326, "grad_norm": 0.711831271648407, "learning_rate": 2.104884483172116e-05, "loss": 1.6105, "step": 9880 }, { "epoch": 2.369642107787303, "grad_norm": 0.670779287815094, "learning_rate": 2.103285634343273e-05, "loss": 1.6303, "step": 9882 }, { "epoch": 2.370121695342006, "grad_norm": 0.6674257516860962, "learning_rate": 2.10168678551443e-05, "loss": 1.6022, "step": 9884 }, { "epoch": 2.370601282896709, "grad_norm": 0.7408220171928406, "learning_rate": 2.1000879366855865e-05, "loss": 1.6578, "step": 9886 }, { "epoch": 2.371080870451412, "grad_norm": 0.8723045587539673, "learning_rate": 2.098489087856743e-05, "loss": 1.5677, "step": 9888 }, { "epoch": 2.371560458006115, "grad_norm": 0.6483781337738037, "learning_rate": 2.0968902390279e-05, "loss": 1.5826, "step": 9890 }, { "epoch": 2.3720400455608175, "grad_norm": 0.7538403868675232, "learning_rate": 2.095291390199057e-05, "loss": 1.6379, "step": 9892 }, { "epoch": 2.3725196331155205, "grad_norm": 0.6815696954727173, "learning_rate": 2.0936925413702134e-05, "loss": 1.6136, "step": 9894 }, { "epoch": 2.3729992206702235, "grad_norm": 0.749081552028656, "learning_rate": 2.0920936925413703e-05, "loss": 1.6103, "step": 9896 }, { "epoch": 2.3734788082249265, "grad_norm": 0.7316029071807861, "learning_rate": 2.0904948437125273e-05, "loss": 1.611, "step": 9898 }, { "epoch": 2.3739583957796295, "grad_norm": 0.7820071578025818, "learning_rate": 2.0888959948836838e-05, "loss": 1.611, "step": 9900 }, { "epoch": 2.3744379833343325, "grad_norm": 0.6730666756629944, "learning_rate": 2.0872971460548407e-05, "loss": 1.6557, "step": 9902 }, { "epoch": 2.3749175708890355, "grad_norm": 0.7028372883796692, "learning_rate": 2.0856982972259973e-05, "loss": 1.6535, "step": 9904 }, { "epoch": 2.3753971584437386, "grad_norm": 0.7340073585510254, "learning_rate": 2.084099448397154e-05, "loss": 1.6341, "step": 9906 }, { "epoch": 2.3758767459984416, "grad_norm": 0.8210899829864502, "learning_rate": 2.0825005995683108e-05, "loss": 1.6612, "step": 9908 }, { "epoch": 2.376356333553144, "grad_norm": 0.6747004985809326, "learning_rate": 2.0809017507394677e-05, "loss": 1.5703, "step": 9910 }, { "epoch": 2.376835921107847, "grad_norm": 0.70277339220047, "learning_rate": 2.0793029019106246e-05, "loss": 1.6459, "step": 9912 }, { "epoch": 2.37731550866255, "grad_norm": 0.7040407657623291, "learning_rate": 2.0777040530817812e-05, "loss": 1.6312, "step": 9914 }, { "epoch": 2.377795096217253, "grad_norm": 0.7331043481826782, "learning_rate": 2.076105204252938e-05, "loss": 1.6374, "step": 9916 }, { "epoch": 2.378274683771956, "grad_norm": 0.6797980070114136, "learning_rate": 2.074506355424095e-05, "loss": 1.6126, "step": 9918 }, { "epoch": 2.378754271326659, "grad_norm": 0.6885566115379333, "learning_rate": 2.0729075065952516e-05, "loss": 1.6424, "step": 9920 }, { "epoch": 2.379233858881362, "grad_norm": 0.6361182928085327, "learning_rate": 2.0713086577664085e-05, "loss": 1.7005, "step": 9922 }, { "epoch": 2.3797134464360647, "grad_norm": 0.6785575151443481, "learning_rate": 2.069709808937565e-05, "loss": 1.5902, "step": 9924 }, { "epoch": 2.3801930339907678, "grad_norm": 0.721722424030304, "learning_rate": 2.0681109601087216e-05, "loss": 1.5826, "step": 9926 }, { "epoch": 2.3806726215454708, "grad_norm": 0.689137876033783, "learning_rate": 2.0665121112798785e-05, "loss": 1.6478, "step": 9928 }, { "epoch": 2.3811522091001738, "grad_norm": 0.7494526505470276, "learning_rate": 2.0649132624510354e-05, "loss": 1.6408, "step": 9930 }, { "epoch": 2.381631796654877, "grad_norm": 0.7581151723861694, "learning_rate": 2.063314413622192e-05, "loss": 1.64, "step": 9932 }, { "epoch": 2.38211138420958, "grad_norm": 1.0768314599990845, "learning_rate": 2.061715564793349e-05, "loss": 1.5993, "step": 9934 }, { "epoch": 2.382590971764283, "grad_norm": 0.701602578163147, "learning_rate": 2.0601167159645058e-05, "loss": 1.6389, "step": 9936 }, { "epoch": 2.383070559318986, "grad_norm": 0.7022370100021362, "learning_rate": 2.0585178671356624e-05, "loss": 1.6469, "step": 9938 }, { "epoch": 2.383550146873689, "grad_norm": 0.7417654991149902, "learning_rate": 2.0569190183068193e-05, "loss": 1.6399, "step": 9940 }, { "epoch": 2.3840297344283914, "grad_norm": 0.6619227528572083, "learning_rate": 2.055320169477976e-05, "loss": 1.598, "step": 9942 }, { "epoch": 2.3845093219830944, "grad_norm": 0.7332239151000977, "learning_rate": 2.0537213206491328e-05, "loss": 1.6305, "step": 9944 }, { "epoch": 2.3849889095377974, "grad_norm": 0.6894728541374207, "learning_rate": 2.0521224718202893e-05, "loss": 1.6499, "step": 9946 }, { "epoch": 2.3854684970925004, "grad_norm": 0.7377683520317078, "learning_rate": 2.0505236229914463e-05, "loss": 1.656, "step": 9948 }, { "epoch": 2.3859480846472034, "grad_norm": 0.6960068345069885, "learning_rate": 2.048924774162603e-05, "loss": 1.6159, "step": 9950 }, { "epoch": 2.3864276722019064, "grad_norm": 0.6670417189598083, "learning_rate": 2.0473259253337597e-05, "loss": 1.6733, "step": 9952 }, { "epoch": 2.3869072597566094, "grad_norm": 0.8475309610366821, "learning_rate": 2.0457270765049166e-05, "loss": 1.6076, "step": 9954 }, { "epoch": 2.3873868473113125, "grad_norm": 0.7376111149787903, "learning_rate": 2.0441282276760735e-05, "loss": 1.658, "step": 9956 }, { "epoch": 2.387866434866015, "grad_norm": 0.6852994561195374, "learning_rate": 2.04252937884723e-05, "loss": 1.6415, "step": 9958 }, { "epoch": 2.388346022420718, "grad_norm": 0.7571813464164734, "learning_rate": 2.0409305300183867e-05, "loss": 1.6307, "step": 9960 }, { "epoch": 2.388825609975421, "grad_norm": 0.666217565536499, "learning_rate": 2.0393316811895436e-05, "loss": 1.5935, "step": 9962 }, { "epoch": 2.389305197530124, "grad_norm": 0.6363504528999329, "learning_rate": 2.0377328323607005e-05, "loss": 1.6543, "step": 9964 }, { "epoch": 2.389784785084827, "grad_norm": 0.7608767151832581, "learning_rate": 2.036133983531857e-05, "loss": 1.5747, "step": 9966 }, { "epoch": 2.39026437263953, "grad_norm": 0.7081926465034485, "learning_rate": 2.034535134703014e-05, "loss": 1.6543, "step": 9968 }, { "epoch": 2.390743960194233, "grad_norm": 0.6585273742675781, "learning_rate": 2.032936285874171e-05, "loss": 1.6333, "step": 9970 }, { "epoch": 2.391223547748936, "grad_norm": 0.6887211203575134, "learning_rate": 2.0313374370453275e-05, "loss": 1.625, "step": 9972 }, { "epoch": 2.391703135303639, "grad_norm": 0.6671034693717957, "learning_rate": 2.0297385882164844e-05, "loss": 1.6535, "step": 9974 }, { "epoch": 2.3921827228583417, "grad_norm": 0.7101598381996155, "learning_rate": 2.0281397393876413e-05, "loss": 1.6599, "step": 9976 }, { "epoch": 2.3926623104130447, "grad_norm": 0.7036350965499878, "learning_rate": 2.0265408905587975e-05, "loss": 1.6209, "step": 9978 }, { "epoch": 2.3931418979677477, "grad_norm": 0.7524555921554565, "learning_rate": 2.0249420417299544e-05, "loss": 1.6307, "step": 9980 }, { "epoch": 2.3936214855224507, "grad_norm": 0.6529361605644226, "learning_rate": 2.0233431929011113e-05, "loss": 1.6336, "step": 9982 }, { "epoch": 2.3941010730771537, "grad_norm": 0.6971542835235596, "learning_rate": 2.021744344072268e-05, "loss": 1.6473, "step": 9984 }, { "epoch": 2.3945806606318567, "grad_norm": 0.9262648224830627, "learning_rate": 2.0201454952434248e-05, "loss": 1.6445, "step": 9986 }, { "epoch": 2.3950602481865597, "grad_norm": 0.7492663264274597, "learning_rate": 2.0185466464145817e-05, "loss": 1.6505, "step": 9988 }, { "epoch": 2.3955398357412623, "grad_norm": 0.6855824589729309, "learning_rate": 2.0169477975857383e-05, "loss": 1.6442, "step": 9990 }, { "epoch": 2.3960194232959653, "grad_norm": 0.6969702243804932, "learning_rate": 2.0153489487568952e-05, "loss": 1.63, "step": 9992 }, { "epoch": 2.3964990108506683, "grad_norm": 0.6565621495246887, "learning_rate": 2.013750099928052e-05, "loss": 1.5818, "step": 9994 }, { "epoch": 2.3969785984053713, "grad_norm": 0.7069346904754639, "learning_rate": 2.0121512510992087e-05, "loss": 1.668, "step": 9996 }, { "epoch": 2.3974581859600743, "grad_norm": 0.7000877857208252, "learning_rate": 2.0105524022703652e-05, "loss": 1.576, "step": 9998 }, { "epoch": 2.3979377735147773, "grad_norm": 0.742354691028595, "learning_rate": 2.008953553441522e-05, "loss": 1.6453, "step": 10000 }, { "epoch": 2.3979377735147773, "eval_loss": 1.713421106338501, "eval_runtime": 331.4032, "eval_samples_per_second": 402.679, "eval_steps_per_second": 12.586, "step": 10000 }, { "epoch": 2.3984173610694803, "grad_norm": 0.7282195687294006, "learning_rate": 2.007354704612679e-05, "loss": 1.595, "step": 10002 }, { "epoch": 2.3988969486241833, "grad_norm": 0.6856352090835571, "learning_rate": 2.0057558557838356e-05, "loss": 1.61, "step": 10004 }, { "epoch": 2.3993765361788864, "grad_norm": 0.6639589667320251, "learning_rate": 2.0041570069549925e-05, "loss": 1.62, "step": 10006 }, { "epoch": 2.399856123733589, "grad_norm": 0.7210366129875183, "learning_rate": 2.0025581581261494e-05, "loss": 1.6085, "step": 10008 }, { "epoch": 2.400335711288292, "grad_norm": 0.855900228023529, "learning_rate": 2.000959309297306e-05, "loss": 1.5762, "step": 10010 }, { "epoch": 2.400815298842995, "grad_norm": 0.75304114818573, "learning_rate": 1.999360460468463e-05, "loss": 1.6497, "step": 10012 }, { "epoch": 2.401294886397698, "grad_norm": 0.777572751045227, "learning_rate": 1.9977616116396195e-05, "loss": 1.5801, "step": 10014 }, { "epoch": 2.401774473952401, "grad_norm": 0.6700795292854309, "learning_rate": 1.9961627628107764e-05, "loss": 1.661, "step": 10016 }, { "epoch": 2.402254061507104, "grad_norm": 0.8382949233055115, "learning_rate": 1.994563913981933e-05, "loss": 1.664, "step": 10018 }, { "epoch": 2.402733649061807, "grad_norm": 0.7825652360916138, "learning_rate": 1.99296506515309e-05, "loss": 1.5528, "step": 10020 }, { "epoch": 2.40321323661651, "grad_norm": 0.7795958518981934, "learning_rate": 1.9913662163242468e-05, "loss": 1.6015, "step": 10022 }, { "epoch": 2.4036928241712125, "grad_norm": 0.6726167798042297, "learning_rate": 1.9897673674954034e-05, "loss": 1.5513, "step": 10024 }, { "epoch": 2.4041724117259156, "grad_norm": 0.7037211060523987, "learning_rate": 1.9881685186665603e-05, "loss": 1.5997, "step": 10026 }, { "epoch": 2.4046519992806186, "grad_norm": 0.7124084234237671, "learning_rate": 1.9865696698377172e-05, "loss": 1.6113, "step": 10028 }, { "epoch": 2.4051315868353216, "grad_norm": 0.8444749116897583, "learning_rate": 1.9849708210088738e-05, "loss": 1.6452, "step": 10030 }, { "epoch": 2.4056111743900246, "grad_norm": 0.7555323243141174, "learning_rate": 1.9833719721800303e-05, "loss": 1.6493, "step": 10032 }, { "epoch": 2.4060907619447276, "grad_norm": 0.6924343109130859, "learning_rate": 1.9817731233511872e-05, "loss": 1.5778, "step": 10034 }, { "epoch": 2.4065703494994306, "grad_norm": 0.7322763800621033, "learning_rate": 1.9801742745223438e-05, "loss": 1.647, "step": 10036 }, { "epoch": 2.4070499370541336, "grad_norm": 0.8544310927391052, "learning_rate": 1.9785754256935007e-05, "loss": 1.6187, "step": 10038 }, { "epoch": 2.4075295246088366, "grad_norm": 0.8069093227386475, "learning_rate": 1.9769765768646576e-05, "loss": 1.6159, "step": 10040 }, { "epoch": 2.408009112163539, "grad_norm": 0.6574259400367737, "learning_rate": 1.9753777280358142e-05, "loss": 1.6267, "step": 10042 }, { "epoch": 2.408488699718242, "grad_norm": 0.6890607476234436, "learning_rate": 1.973778879206971e-05, "loss": 1.717, "step": 10044 }, { "epoch": 2.408968287272945, "grad_norm": 0.6937676668167114, "learning_rate": 1.972180030378128e-05, "loss": 1.5766, "step": 10046 }, { "epoch": 2.409447874827648, "grad_norm": 0.6399776339530945, "learning_rate": 1.9705811815492846e-05, "loss": 1.6087, "step": 10048 }, { "epoch": 2.4099274623823512, "grad_norm": 0.8311952948570251, "learning_rate": 1.968982332720441e-05, "loss": 1.6564, "step": 10050 }, { "epoch": 2.4104070499370542, "grad_norm": 0.6645456552505493, "learning_rate": 1.967383483891598e-05, "loss": 1.6443, "step": 10052 }, { "epoch": 2.4108866374917572, "grad_norm": 0.8162228465080261, "learning_rate": 1.965784635062755e-05, "loss": 1.6767, "step": 10054 }, { "epoch": 2.41136622504646, "grad_norm": 0.7223405838012695, "learning_rate": 1.9641857862339115e-05, "loss": 1.6394, "step": 10056 }, { "epoch": 2.411845812601163, "grad_norm": 0.6644138693809509, "learning_rate": 1.9625869374050684e-05, "loss": 1.6783, "step": 10058 }, { "epoch": 2.412325400155866, "grad_norm": 0.7197304964065552, "learning_rate": 1.9609880885762253e-05, "loss": 1.6022, "step": 10060 }, { "epoch": 2.412804987710569, "grad_norm": 0.8191043734550476, "learning_rate": 1.959389239747382e-05, "loss": 1.5986, "step": 10062 }, { "epoch": 2.413284575265272, "grad_norm": 0.9143446683883667, "learning_rate": 1.9577903909185388e-05, "loss": 1.6243, "step": 10064 }, { "epoch": 2.413764162819975, "grad_norm": 0.7895171642303467, "learning_rate": 1.9561915420896957e-05, "loss": 1.6211, "step": 10066 }, { "epoch": 2.414243750374678, "grad_norm": 0.6652787327766418, "learning_rate": 1.9545926932608523e-05, "loss": 1.6124, "step": 10068 }, { "epoch": 2.414723337929381, "grad_norm": 0.705093502998352, "learning_rate": 1.952993844432009e-05, "loss": 1.6621, "step": 10070 }, { "epoch": 2.415202925484084, "grad_norm": 0.6766807436943054, "learning_rate": 1.9513949956031658e-05, "loss": 1.6303, "step": 10072 }, { "epoch": 2.4156825130387864, "grad_norm": 0.6830898523330688, "learning_rate": 1.9497961467743227e-05, "loss": 1.5754, "step": 10074 }, { "epoch": 2.4161621005934895, "grad_norm": 0.7548022866249084, "learning_rate": 1.9481972979454793e-05, "loss": 1.6043, "step": 10076 }, { "epoch": 2.4166416881481925, "grad_norm": 0.725665271282196, "learning_rate": 1.9465984491166362e-05, "loss": 1.5792, "step": 10078 }, { "epoch": 2.4171212757028955, "grad_norm": 0.7187552452087402, "learning_rate": 1.944999600287793e-05, "loss": 1.6235, "step": 10080 }, { "epoch": 2.4176008632575985, "grad_norm": 0.689163863658905, "learning_rate": 1.9434007514589497e-05, "loss": 1.6037, "step": 10082 }, { "epoch": 2.4180804508123015, "grad_norm": 0.6783608794212341, "learning_rate": 1.9418019026301066e-05, "loss": 1.6195, "step": 10084 }, { "epoch": 2.4185600383670045, "grad_norm": 0.6913689374923706, "learning_rate": 1.940203053801263e-05, "loss": 1.5625, "step": 10086 }, { "epoch": 2.4190396259217075, "grad_norm": 0.7205849885940552, "learning_rate": 1.9386042049724197e-05, "loss": 1.6455, "step": 10088 }, { "epoch": 2.41951921347641, "grad_norm": 0.6999826431274414, "learning_rate": 1.9370053561435766e-05, "loss": 1.6175, "step": 10090 }, { "epoch": 2.419998801031113, "grad_norm": 0.7657488584518433, "learning_rate": 1.9354065073147335e-05, "loss": 1.6309, "step": 10092 }, { "epoch": 2.420478388585816, "grad_norm": 0.7497066259384155, "learning_rate": 1.93380765848589e-05, "loss": 1.5985, "step": 10094 }, { "epoch": 2.420957976140519, "grad_norm": 0.6708978414535522, "learning_rate": 1.932208809657047e-05, "loss": 1.6192, "step": 10096 }, { "epoch": 2.421437563695222, "grad_norm": 0.7782462239265442, "learning_rate": 1.930609960828204e-05, "loss": 1.608, "step": 10098 }, { "epoch": 2.421917151249925, "grad_norm": 0.6439203023910522, "learning_rate": 1.9290111119993605e-05, "loss": 1.5991, "step": 10100 }, { "epoch": 2.422396738804628, "grad_norm": 0.767932116985321, "learning_rate": 1.9274122631705174e-05, "loss": 1.6582, "step": 10102 }, { "epoch": 2.422876326359331, "grad_norm": 0.8013466000556946, "learning_rate": 1.925813414341674e-05, "loss": 1.6076, "step": 10104 }, { "epoch": 2.423355913914034, "grad_norm": 0.7107015252113342, "learning_rate": 1.924214565512831e-05, "loss": 1.6077, "step": 10106 }, { "epoch": 2.4238355014687367, "grad_norm": 0.7360036969184875, "learning_rate": 1.9226157166839874e-05, "loss": 1.6484, "step": 10108 }, { "epoch": 2.4243150890234397, "grad_norm": 0.7030726671218872, "learning_rate": 1.9210168678551443e-05, "loss": 1.5833, "step": 10110 }, { "epoch": 2.4247946765781427, "grad_norm": 0.7110872864723206, "learning_rate": 1.9194180190263013e-05, "loss": 1.5992, "step": 10112 }, { "epoch": 2.4252742641328457, "grad_norm": 0.6825920343399048, "learning_rate": 1.9178191701974578e-05, "loss": 1.6819, "step": 10114 }, { "epoch": 2.4257538516875488, "grad_norm": 0.6703671813011169, "learning_rate": 1.9162203213686147e-05, "loss": 1.5977, "step": 10116 }, { "epoch": 2.4262334392422518, "grad_norm": 0.6802082657814026, "learning_rate": 1.9146214725397716e-05, "loss": 1.6973, "step": 10118 }, { "epoch": 2.4267130267969548, "grad_norm": 0.6876943111419678, "learning_rate": 1.9130226237109282e-05, "loss": 1.6708, "step": 10120 }, { "epoch": 2.4271926143516573, "grad_norm": 0.687616229057312, "learning_rate": 1.911423774882085e-05, "loss": 1.6481, "step": 10122 }, { "epoch": 2.4276722019063603, "grad_norm": 0.6933209896087646, "learning_rate": 1.9098249260532417e-05, "loss": 1.5661, "step": 10124 }, { "epoch": 2.4281517894610634, "grad_norm": 0.716002881526947, "learning_rate": 1.9082260772243986e-05, "loss": 1.6739, "step": 10126 }, { "epoch": 2.4286313770157664, "grad_norm": 0.6988885402679443, "learning_rate": 1.906627228395555e-05, "loss": 1.6061, "step": 10128 }, { "epoch": 2.4291109645704694, "grad_norm": 0.6603298783302307, "learning_rate": 1.905028379566712e-05, "loss": 1.6028, "step": 10130 }, { "epoch": 2.4295905521251724, "grad_norm": 0.6621716618537903, "learning_rate": 1.903429530737869e-05, "loss": 1.5815, "step": 10132 }, { "epoch": 2.4300701396798754, "grad_norm": 0.8320167064666748, "learning_rate": 1.9018306819090256e-05, "loss": 1.6802, "step": 10134 }, { "epoch": 2.4305497272345784, "grad_norm": 0.7443535327911377, "learning_rate": 1.9002318330801825e-05, "loss": 1.6307, "step": 10136 }, { "epoch": 2.4310293147892814, "grad_norm": 0.7232717275619507, "learning_rate": 1.8986329842513394e-05, "loss": 1.6005, "step": 10138 }, { "epoch": 2.431508902343984, "grad_norm": 0.7039580345153809, "learning_rate": 1.897034135422496e-05, "loss": 1.621, "step": 10140 }, { "epoch": 2.431988489898687, "grad_norm": 0.6015660166740417, "learning_rate": 1.8954352865936525e-05, "loss": 1.58, "step": 10142 }, { "epoch": 2.43246807745339, "grad_norm": 0.7512909173965454, "learning_rate": 1.8938364377648094e-05, "loss": 1.65, "step": 10144 }, { "epoch": 2.432947665008093, "grad_norm": 0.7209957242012024, "learning_rate": 1.892237588935966e-05, "loss": 1.6172, "step": 10146 }, { "epoch": 2.433427252562796, "grad_norm": 0.6962937116622925, "learning_rate": 1.890638740107123e-05, "loss": 1.697, "step": 10148 }, { "epoch": 2.433906840117499, "grad_norm": 0.7499356269836426, "learning_rate": 1.8890398912782798e-05, "loss": 1.6537, "step": 10150 }, { "epoch": 2.434386427672202, "grad_norm": 0.7028437852859497, "learning_rate": 1.8874410424494364e-05, "loss": 1.6522, "step": 10152 }, { "epoch": 2.434866015226905, "grad_norm": 0.7398279309272766, "learning_rate": 1.8858421936205933e-05, "loss": 1.642, "step": 10154 }, { "epoch": 2.4353456027816076, "grad_norm": 0.6843492984771729, "learning_rate": 1.8842433447917502e-05, "loss": 1.6099, "step": 10156 }, { "epoch": 2.4358251903363106, "grad_norm": 0.6681973934173584, "learning_rate": 1.8826444959629068e-05, "loss": 1.6133, "step": 10158 }, { "epoch": 2.4363047778910136, "grad_norm": 0.7720243334770203, "learning_rate": 1.8810456471340633e-05, "loss": 1.6615, "step": 10160 }, { "epoch": 2.4367843654457166, "grad_norm": 0.8125911951065063, "learning_rate": 1.8794467983052202e-05, "loss": 1.6461, "step": 10162 }, { "epoch": 2.4372639530004196, "grad_norm": 0.7540487051010132, "learning_rate": 1.877847949476377e-05, "loss": 1.6611, "step": 10164 }, { "epoch": 2.4377435405551227, "grad_norm": 0.7431527376174927, "learning_rate": 1.8762491006475337e-05, "loss": 1.6556, "step": 10166 }, { "epoch": 2.4382231281098257, "grad_norm": 0.6848000884056091, "learning_rate": 1.8746502518186906e-05, "loss": 1.641, "step": 10168 }, { "epoch": 2.4387027156645287, "grad_norm": 0.7415996789932251, "learning_rate": 1.8730514029898475e-05, "loss": 1.6273, "step": 10170 }, { "epoch": 2.4391823032192317, "grad_norm": 0.6761967539787292, "learning_rate": 1.871452554161004e-05, "loss": 1.6235, "step": 10172 }, { "epoch": 2.4396618907739342, "grad_norm": 0.669214129447937, "learning_rate": 1.869853705332161e-05, "loss": 1.6098, "step": 10174 }, { "epoch": 2.4401414783286373, "grad_norm": 0.7148521542549133, "learning_rate": 1.868254856503318e-05, "loss": 1.679, "step": 10176 }, { "epoch": 2.4406210658833403, "grad_norm": 0.7657209038734436, "learning_rate": 1.8666560076744745e-05, "loss": 1.6045, "step": 10178 }, { "epoch": 2.4411006534380433, "grad_norm": 0.6553277373313904, "learning_rate": 1.865057158845631e-05, "loss": 1.6177, "step": 10180 }, { "epoch": 2.4415802409927463, "grad_norm": 0.7078243494033813, "learning_rate": 1.863458310016788e-05, "loss": 1.631, "step": 10182 }, { "epoch": 2.4420598285474493, "grad_norm": 0.7868657112121582, "learning_rate": 1.861859461187945e-05, "loss": 1.663, "step": 10184 }, { "epoch": 2.4425394161021523, "grad_norm": 0.77935391664505, "learning_rate": 1.8602606123591015e-05, "loss": 1.6276, "step": 10186 }, { "epoch": 2.443019003656855, "grad_norm": 0.699848473072052, "learning_rate": 1.8586617635302584e-05, "loss": 1.6282, "step": 10188 }, { "epoch": 2.443498591211558, "grad_norm": 0.7164567112922668, "learning_rate": 1.8570629147014153e-05, "loss": 1.623, "step": 10190 }, { "epoch": 2.443978178766261, "grad_norm": 0.687366247177124, "learning_rate": 1.855464065872572e-05, "loss": 1.6263, "step": 10192 }, { "epoch": 2.444457766320964, "grad_norm": 0.6828987002372742, "learning_rate": 1.8538652170437288e-05, "loss": 1.5989, "step": 10194 }, { "epoch": 2.444937353875667, "grad_norm": 0.7038889527320862, "learning_rate": 1.8522663682148853e-05, "loss": 1.6024, "step": 10196 }, { "epoch": 2.44541694143037, "grad_norm": 0.7293007373809814, "learning_rate": 1.850667519386042e-05, "loss": 1.6137, "step": 10198 }, { "epoch": 2.445896528985073, "grad_norm": 0.707152247428894, "learning_rate": 1.8490686705571988e-05, "loss": 1.6282, "step": 10200 }, { "epoch": 2.446376116539776, "grad_norm": 0.7371562123298645, "learning_rate": 1.8474698217283557e-05, "loss": 1.6446, "step": 10202 }, { "epoch": 2.446855704094479, "grad_norm": 0.6097273230552673, "learning_rate": 1.8458709728995123e-05, "loss": 1.5912, "step": 10204 }, { "epoch": 2.4473352916491815, "grad_norm": 0.6862480640411377, "learning_rate": 1.8442721240706692e-05, "loss": 1.6221, "step": 10206 }, { "epoch": 2.4478148792038845, "grad_norm": 0.6841017007827759, "learning_rate": 1.842673275241826e-05, "loss": 1.6176, "step": 10208 }, { "epoch": 2.4482944667585875, "grad_norm": 0.6960110664367676, "learning_rate": 1.8410744264129827e-05, "loss": 1.6478, "step": 10210 }, { "epoch": 2.4487740543132905, "grad_norm": 0.7142976522445679, "learning_rate": 1.8394755775841396e-05, "loss": 1.6073, "step": 10212 }, { "epoch": 2.4492536418679935, "grad_norm": 0.7776036262512207, "learning_rate": 1.837876728755296e-05, "loss": 1.6496, "step": 10214 }, { "epoch": 2.4497332294226966, "grad_norm": 0.7235303521156311, "learning_rate": 1.836277879926453e-05, "loss": 1.5794, "step": 10216 }, { "epoch": 2.4502128169773996, "grad_norm": 0.6849141716957092, "learning_rate": 1.8346790310976096e-05, "loss": 1.662, "step": 10218 }, { "epoch": 2.4506924045321026, "grad_norm": 0.7029483318328857, "learning_rate": 1.8330801822687665e-05, "loss": 1.6166, "step": 10220 }, { "epoch": 2.451171992086805, "grad_norm": 0.7189714908599854, "learning_rate": 1.8314813334399234e-05, "loss": 1.5827, "step": 10222 }, { "epoch": 2.451651579641508, "grad_norm": 0.7296513915061951, "learning_rate": 1.82988248461108e-05, "loss": 1.6036, "step": 10224 }, { "epoch": 2.452131167196211, "grad_norm": 0.8722501993179321, "learning_rate": 1.828283635782237e-05, "loss": 1.6436, "step": 10226 }, { "epoch": 2.452610754750914, "grad_norm": 0.7512632608413696, "learning_rate": 1.8266847869533938e-05, "loss": 1.6202, "step": 10228 }, { "epoch": 2.453090342305617, "grad_norm": 0.7861328721046448, "learning_rate": 1.8250859381245504e-05, "loss": 1.6155, "step": 10230 }, { "epoch": 2.45356992986032, "grad_norm": 0.6684183478355408, "learning_rate": 1.823487089295707e-05, "loss": 1.6206, "step": 10232 }, { "epoch": 2.454049517415023, "grad_norm": 0.7889269590377808, "learning_rate": 1.821888240466864e-05, "loss": 1.61, "step": 10234 }, { "epoch": 2.454529104969726, "grad_norm": 0.7539697885513306, "learning_rate": 1.8202893916380208e-05, "loss": 1.686, "step": 10236 }, { "epoch": 2.455008692524429, "grad_norm": 0.6598774790763855, "learning_rate": 1.8186905428091774e-05, "loss": 1.6545, "step": 10238 }, { "epoch": 2.455488280079132, "grad_norm": 0.7152436375617981, "learning_rate": 1.8170916939803343e-05, "loss": 1.6697, "step": 10240 }, { "epoch": 2.455967867633835, "grad_norm": 0.7236979603767395, "learning_rate": 1.8154928451514912e-05, "loss": 1.6099, "step": 10242 }, { "epoch": 2.456447455188538, "grad_norm": 0.8968623876571655, "learning_rate": 1.8138939963226477e-05, "loss": 1.6815, "step": 10244 }, { "epoch": 2.456927042743241, "grad_norm": 0.7234359383583069, "learning_rate": 1.8122951474938047e-05, "loss": 1.5929, "step": 10246 }, { "epoch": 2.457406630297944, "grad_norm": 0.6794857978820801, "learning_rate": 1.8106962986649616e-05, "loss": 1.615, "step": 10248 }, { "epoch": 2.457886217852647, "grad_norm": 0.8003257513046265, "learning_rate": 1.8090974498361178e-05, "loss": 1.6248, "step": 10250 }, { "epoch": 2.45836580540735, "grad_norm": 0.7420939207077026, "learning_rate": 1.8074986010072747e-05, "loss": 1.6168, "step": 10252 }, { "epoch": 2.4588453929620524, "grad_norm": 0.8500974178314209, "learning_rate": 1.8058997521784316e-05, "loss": 1.6187, "step": 10254 }, { "epoch": 2.4593249805167554, "grad_norm": 0.6936556696891785, "learning_rate": 1.8043009033495882e-05, "loss": 1.6489, "step": 10256 }, { "epoch": 2.4598045680714584, "grad_norm": 0.7204890847206116, "learning_rate": 1.802702054520745e-05, "loss": 1.6607, "step": 10258 }, { "epoch": 2.4602841556261614, "grad_norm": 0.7154961824417114, "learning_rate": 1.801103205691902e-05, "loss": 1.5593, "step": 10260 }, { "epoch": 2.4607637431808644, "grad_norm": 0.7585080862045288, "learning_rate": 1.7995043568630586e-05, "loss": 1.6212, "step": 10262 }, { "epoch": 2.4612433307355674, "grad_norm": 0.646954357624054, "learning_rate": 1.7979055080342155e-05, "loss": 1.6377, "step": 10264 }, { "epoch": 2.4617229182902705, "grad_norm": 0.6921256184577942, "learning_rate": 1.7963066592053724e-05, "loss": 1.5884, "step": 10266 }, { "epoch": 2.4622025058449735, "grad_norm": 0.687671959400177, "learning_rate": 1.794707810376529e-05, "loss": 1.6717, "step": 10268 }, { "epoch": 2.4626820933996765, "grad_norm": 0.6681802272796631, "learning_rate": 1.7931089615476855e-05, "loss": 1.6098, "step": 10270 }, { "epoch": 2.463161680954379, "grad_norm": 0.6449736952781677, "learning_rate": 1.7915101127188424e-05, "loss": 1.6018, "step": 10272 }, { "epoch": 2.463641268509082, "grad_norm": 0.6819206476211548, "learning_rate": 1.7899112638899993e-05, "loss": 1.6521, "step": 10274 }, { "epoch": 2.464120856063785, "grad_norm": 0.7684497833251953, "learning_rate": 1.788312415061156e-05, "loss": 1.6314, "step": 10276 }, { "epoch": 2.464600443618488, "grad_norm": 0.6975690126419067, "learning_rate": 1.7867135662323128e-05, "loss": 1.6514, "step": 10278 }, { "epoch": 2.465080031173191, "grad_norm": 0.6819930672645569, "learning_rate": 1.7851147174034697e-05, "loss": 1.6193, "step": 10280 }, { "epoch": 2.465559618727894, "grad_norm": 0.6868979930877686, "learning_rate": 1.7835158685746263e-05, "loss": 1.6216, "step": 10282 }, { "epoch": 2.466039206282597, "grad_norm": 0.6618260145187378, "learning_rate": 1.7819170197457832e-05, "loss": 1.6536, "step": 10284 }, { "epoch": 2.4665187938373, "grad_norm": 0.6494329571723938, "learning_rate": 1.7803181709169398e-05, "loss": 1.6711, "step": 10286 }, { "epoch": 2.4669983813920027, "grad_norm": 0.7406972646713257, "learning_rate": 1.7787193220880967e-05, "loss": 1.5915, "step": 10288 }, { "epoch": 2.4674779689467057, "grad_norm": 0.7356812357902527, "learning_rate": 1.7771204732592533e-05, "loss": 1.6779, "step": 10290 }, { "epoch": 2.4679575565014087, "grad_norm": 0.6308363676071167, "learning_rate": 1.77552162443041e-05, "loss": 1.601, "step": 10292 }, { "epoch": 2.4684371440561117, "grad_norm": 1.0128909349441528, "learning_rate": 1.773922775601567e-05, "loss": 1.6401, "step": 10294 }, { "epoch": 2.4689167316108147, "grad_norm": 0.7012378573417664, "learning_rate": 1.7723239267727236e-05, "loss": 1.6282, "step": 10296 }, { "epoch": 2.4693963191655177, "grad_norm": 0.8659380674362183, "learning_rate": 1.7707250779438806e-05, "loss": 1.6047, "step": 10298 }, { "epoch": 2.4698759067202207, "grad_norm": 0.6887945532798767, "learning_rate": 1.7691262291150375e-05, "loss": 1.6195, "step": 10300 }, { "epoch": 2.4703554942749237, "grad_norm": 0.65821373462677, "learning_rate": 1.767527380286194e-05, "loss": 1.6107, "step": 10302 }, { "epoch": 2.4708350818296267, "grad_norm": 0.6978738307952881, "learning_rate": 1.7659285314573506e-05, "loss": 1.5992, "step": 10304 }, { "epoch": 2.4713146693843293, "grad_norm": 0.7425180673599243, "learning_rate": 1.7643296826285075e-05, "loss": 1.5978, "step": 10306 }, { "epoch": 2.4717942569390323, "grad_norm": 0.6615174412727356, "learning_rate": 1.762730833799664e-05, "loss": 1.6545, "step": 10308 }, { "epoch": 2.4722738444937353, "grad_norm": 0.6568105816841125, "learning_rate": 1.761131984970821e-05, "loss": 1.5527, "step": 10310 }, { "epoch": 2.4727534320484383, "grad_norm": 0.7383237481117249, "learning_rate": 1.759533136141978e-05, "loss": 1.6152, "step": 10312 }, { "epoch": 2.4732330196031413, "grad_norm": 0.779808759689331, "learning_rate": 1.7579342873131348e-05, "loss": 1.6204, "step": 10314 }, { "epoch": 2.4737126071578444, "grad_norm": 0.7262237071990967, "learning_rate": 1.7563354384842914e-05, "loss": 1.633, "step": 10316 }, { "epoch": 2.4741921947125474, "grad_norm": 0.7388796806335449, "learning_rate": 1.7547365896554483e-05, "loss": 1.6706, "step": 10318 }, { "epoch": 2.47467178226725, "grad_norm": 0.6678906083106995, "learning_rate": 1.7531377408266052e-05, "loss": 1.6449, "step": 10320 }, { "epoch": 2.475151369821953, "grad_norm": 0.7015342712402344, "learning_rate": 1.7515388919977618e-05, "loss": 1.5879, "step": 10322 }, { "epoch": 2.475630957376656, "grad_norm": 0.7578606009483337, "learning_rate": 1.7499400431689183e-05, "loss": 1.6154, "step": 10324 }, { "epoch": 2.476110544931359, "grad_norm": 0.7399972677230835, "learning_rate": 1.7483411943400752e-05, "loss": 1.6476, "step": 10326 }, { "epoch": 2.476590132486062, "grad_norm": 0.7841532230377197, "learning_rate": 1.7467423455112318e-05, "loss": 1.7027, "step": 10328 }, { "epoch": 2.477069720040765, "grad_norm": 0.6904253959655762, "learning_rate": 1.7451434966823887e-05, "loss": 1.6208, "step": 10330 }, { "epoch": 2.477549307595468, "grad_norm": 0.6918362379074097, "learning_rate": 1.7435446478535456e-05, "loss": 1.6646, "step": 10332 }, { "epoch": 2.478028895150171, "grad_norm": 0.7743179202079773, "learning_rate": 1.7419457990247022e-05, "loss": 1.6595, "step": 10334 }, { "epoch": 2.478508482704874, "grad_norm": 0.741704523563385, "learning_rate": 1.740346950195859e-05, "loss": 1.6048, "step": 10336 }, { "epoch": 2.4789880702595766, "grad_norm": 0.7492694854736328, "learning_rate": 1.738748101367016e-05, "loss": 1.6172, "step": 10338 }, { "epoch": 2.4794676578142796, "grad_norm": 0.7377253770828247, "learning_rate": 1.7371492525381726e-05, "loss": 1.6084, "step": 10340 }, { "epoch": 2.4799472453689826, "grad_norm": 0.6942775845527649, "learning_rate": 1.735550403709329e-05, "loss": 1.644, "step": 10342 }, { "epoch": 2.4804268329236856, "grad_norm": 0.7435987591743469, "learning_rate": 1.733951554880486e-05, "loss": 1.5684, "step": 10344 }, { "epoch": 2.4809064204783886, "grad_norm": 0.6465569138526917, "learning_rate": 1.732352706051643e-05, "loss": 1.6042, "step": 10346 }, { "epoch": 2.4813860080330916, "grad_norm": 0.7489793300628662, "learning_rate": 1.7307538572227995e-05, "loss": 1.6231, "step": 10348 }, { "epoch": 2.4818655955877946, "grad_norm": 0.6946049332618713, "learning_rate": 1.7291550083939565e-05, "loss": 1.6653, "step": 10350 }, { "epoch": 2.4823451831424976, "grad_norm": 0.7004793882369995, "learning_rate": 1.7275561595651134e-05, "loss": 1.6413, "step": 10352 }, { "epoch": 2.4828247706972, "grad_norm": 0.6343263387680054, "learning_rate": 1.72595731073627e-05, "loss": 1.6069, "step": 10354 }, { "epoch": 2.483304358251903, "grad_norm": 0.7325859069824219, "learning_rate": 1.724358461907427e-05, "loss": 1.6338, "step": 10356 }, { "epoch": 2.483783945806606, "grad_norm": 0.6846243143081665, "learning_rate": 1.7227596130785838e-05, "loss": 1.6137, "step": 10358 }, { "epoch": 2.4842635333613092, "grad_norm": 0.6565819382667542, "learning_rate": 1.7211607642497403e-05, "loss": 1.6188, "step": 10360 }, { "epoch": 2.4847431209160122, "grad_norm": 0.666898250579834, "learning_rate": 1.719561915420897e-05, "loss": 1.6769, "step": 10362 }, { "epoch": 2.4852227084707152, "grad_norm": 0.9010687470436096, "learning_rate": 1.7179630665920538e-05, "loss": 1.6956, "step": 10364 }, { "epoch": 2.4857022960254183, "grad_norm": 0.7270331382751465, "learning_rate": 1.7163642177632107e-05, "loss": 1.6965, "step": 10366 }, { "epoch": 2.4861818835801213, "grad_norm": 0.813008725643158, "learning_rate": 1.7147653689343673e-05, "loss": 1.6114, "step": 10368 }, { "epoch": 2.4866614711348243, "grad_norm": 0.6649904847145081, "learning_rate": 1.7131665201055242e-05, "loss": 1.6335, "step": 10370 }, { "epoch": 2.487141058689527, "grad_norm": 0.686902642250061, "learning_rate": 1.711567671276681e-05, "loss": 1.6139, "step": 10372 }, { "epoch": 2.48762064624423, "grad_norm": 0.6594638824462891, "learning_rate": 1.7099688224478377e-05, "loss": 1.6116, "step": 10374 }, { "epoch": 2.488100233798933, "grad_norm": 0.7281873226165771, "learning_rate": 1.7083699736189946e-05, "loss": 1.6437, "step": 10376 }, { "epoch": 2.488579821353636, "grad_norm": 0.6972200274467468, "learning_rate": 1.706771124790151e-05, "loss": 1.6715, "step": 10378 }, { "epoch": 2.489059408908339, "grad_norm": 0.6996859312057495, "learning_rate": 1.7051722759613077e-05, "loss": 1.6331, "step": 10380 }, { "epoch": 2.489538996463042, "grad_norm": 0.8116429448127747, "learning_rate": 1.7035734271324646e-05, "loss": 1.6362, "step": 10382 }, { "epoch": 2.490018584017745, "grad_norm": 0.6958587765693665, "learning_rate": 1.7019745783036215e-05, "loss": 1.6417, "step": 10384 }, { "epoch": 2.4904981715724475, "grad_norm": 0.6709808111190796, "learning_rate": 1.700375729474778e-05, "loss": 1.6093, "step": 10386 }, { "epoch": 2.4909777591271505, "grad_norm": 0.7017360925674438, "learning_rate": 1.698776880645935e-05, "loss": 1.5948, "step": 10388 }, { "epoch": 2.4914573466818535, "grad_norm": 0.6611506342887878, "learning_rate": 1.697178031817092e-05, "loss": 1.6215, "step": 10390 }, { "epoch": 2.4919369342365565, "grad_norm": 0.7342482805252075, "learning_rate": 1.6955791829882485e-05, "loss": 1.6693, "step": 10392 }, { "epoch": 2.4924165217912595, "grad_norm": 0.7931073307991028, "learning_rate": 1.6939803341594054e-05, "loss": 1.6492, "step": 10394 }, { "epoch": 2.4928961093459625, "grad_norm": 0.6853740811347961, "learning_rate": 1.692381485330562e-05, "loss": 1.6228, "step": 10396 }, { "epoch": 2.4933756969006655, "grad_norm": 0.7028765082359314, "learning_rate": 1.690782636501719e-05, "loss": 1.6744, "step": 10398 }, { "epoch": 2.4938552844553685, "grad_norm": 0.7493379712104797, "learning_rate": 1.6891837876728754e-05, "loss": 1.646, "step": 10400 }, { "epoch": 2.4938552844553685, "eval_loss": 1.7094917297363281, "eval_runtime": 331.2229, "eval_samples_per_second": 402.898, "eval_steps_per_second": 12.593, "step": 10400 }, { "epoch": 2.4943348720100715, "grad_norm": 0.7642424702644348, "learning_rate": 1.6875849388440324e-05, "loss": 1.6471, "step": 10402 }, { "epoch": 2.494814459564774, "grad_norm": 0.681695818901062, "learning_rate": 1.6859860900151893e-05, "loss": 1.6385, "step": 10404 }, { "epoch": 2.495294047119477, "grad_norm": 0.6757962703704834, "learning_rate": 1.684387241186346e-05, "loss": 1.6686, "step": 10406 }, { "epoch": 2.49577363467418, "grad_norm": 0.7887467741966248, "learning_rate": 1.6827883923575027e-05, "loss": 1.624, "step": 10408 }, { "epoch": 2.496253222228883, "grad_norm": 0.6515763401985168, "learning_rate": 1.6811895435286597e-05, "loss": 1.6014, "step": 10410 }, { "epoch": 2.496732809783586, "grad_norm": 0.709968090057373, "learning_rate": 1.6795906946998162e-05, "loss": 1.637, "step": 10412 }, { "epoch": 2.497212397338289, "grad_norm": 0.7650299668312073, "learning_rate": 1.6779918458709728e-05, "loss": 1.6512, "step": 10414 }, { "epoch": 2.497691984892992, "grad_norm": 0.6798341274261475, "learning_rate": 1.6763929970421297e-05, "loss": 1.5951, "step": 10416 }, { "epoch": 2.498171572447695, "grad_norm": 0.684188187122345, "learning_rate": 1.6747941482132866e-05, "loss": 1.662, "step": 10418 }, { "epoch": 2.4986511600023977, "grad_norm": 0.6669833660125732, "learning_rate": 1.6731952993844432e-05, "loss": 1.6049, "step": 10420 }, { "epoch": 2.4991307475571007, "grad_norm": 0.7581161856651306, "learning_rate": 1.6715964505556e-05, "loss": 1.6419, "step": 10422 }, { "epoch": 2.4996103351118037, "grad_norm": 0.6783333420753479, "learning_rate": 1.669997601726757e-05, "loss": 1.5974, "step": 10424 }, { "epoch": 2.5000899226665068, "grad_norm": 0.7101329565048218, "learning_rate": 1.6683987528979136e-05, "loss": 1.6297, "step": 10426 }, { "epoch": 2.5005695102212098, "grad_norm": 0.7206289768218994, "learning_rate": 1.6667999040690705e-05, "loss": 1.6483, "step": 10428 }, { "epoch": 2.5010490977759128, "grad_norm": 0.7380495667457581, "learning_rate": 1.6652010552402274e-05, "loss": 1.6321, "step": 10430 }, { "epoch": 2.501528685330616, "grad_norm": 0.7887142300605774, "learning_rate": 1.6636022064113836e-05, "loss": 1.5851, "step": 10432 }, { "epoch": 2.502008272885319, "grad_norm": 0.7704177498817444, "learning_rate": 1.6620033575825405e-05, "loss": 1.6709, "step": 10434 }, { "epoch": 2.502487860440022, "grad_norm": 0.8107625842094421, "learning_rate": 1.6604045087536974e-05, "loss": 1.625, "step": 10436 }, { "epoch": 2.5029674479947244, "grad_norm": 0.9530971050262451, "learning_rate": 1.658805659924854e-05, "loss": 1.6627, "step": 10438 }, { "epoch": 2.5034470355494274, "grad_norm": 0.6847907304763794, "learning_rate": 1.657206811096011e-05, "loss": 1.6164, "step": 10440 }, { "epoch": 2.5039266231041304, "grad_norm": 0.7104029655456543, "learning_rate": 1.6556079622671678e-05, "loss": 1.6507, "step": 10442 }, { "epoch": 2.5044062106588334, "grad_norm": 0.7155182361602783, "learning_rate": 1.6540091134383244e-05, "loss": 1.6361, "step": 10444 }, { "epoch": 2.5048857982135364, "grad_norm": 0.6593635082244873, "learning_rate": 1.6524102646094813e-05, "loss": 1.5553, "step": 10446 }, { "epoch": 2.5053653857682394, "grad_norm": 0.699564516544342, "learning_rate": 1.6508114157806382e-05, "loss": 1.5971, "step": 10448 }, { "epoch": 2.5058449733229424, "grad_norm": 0.6777533292770386, "learning_rate": 1.6492125669517948e-05, "loss": 1.6119, "step": 10450 }, { "epoch": 2.506324560877645, "grad_norm": 0.6627251505851746, "learning_rate": 1.6476137181229514e-05, "loss": 1.6683, "step": 10452 }, { "epoch": 2.506804148432348, "grad_norm": 0.6995949745178223, "learning_rate": 1.6460148692941083e-05, "loss": 1.5886, "step": 10454 }, { "epoch": 2.507283735987051, "grad_norm": 0.6837794780731201, "learning_rate": 1.644416020465265e-05, "loss": 1.6474, "step": 10456 }, { "epoch": 2.507763323541754, "grad_norm": 0.6937560439109802, "learning_rate": 1.6428171716364217e-05, "loss": 1.6761, "step": 10458 }, { "epoch": 2.508242911096457, "grad_norm": 0.7022885084152222, "learning_rate": 1.6412183228075786e-05, "loss": 1.6311, "step": 10460 }, { "epoch": 2.50872249865116, "grad_norm": 0.7195507287979126, "learning_rate": 1.6396194739787356e-05, "loss": 1.679, "step": 10462 }, { "epoch": 2.509202086205863, "grad_norm": 0.7003393173217773, "learning_rate": 1.638020625149892e-05, "loss": 1.6991, "step": 10464 }, { "epoch": 2.509681673760566, "grad_norm": 0.7060467004776001, "learning_rate": 1.636421776321049e-05, "loss": 1.6492, "step": 10466 }, { "epoch": 2.510161261315269, "grad_norm": 0.6866103410720825, "learning_rate": 1.6348229274922056e-05, "loss": 1.6059, "step": 10468 }, { "epoch": 2.510640848869972, "grad_norm": 0.6959758400917053, "learning_rate": 1.6332240786633625e-05, "loss": 1.6531, "step": 10470 }, { "epoch": 2.5111204364246746, "grad_norm": 0.6902159452438354, "learning_rate": 1.631625229834519e-05, "loss": 1.6501, "step": 10472 }, { "epoch": 2.5116000239793776, "grad_norm": 0.6559665203094482, "learning_rate": 1.630026381005676e-05, "loss": 1.632, "step": 10474 }, { "epoch": 2.5120796115340807, "grad_norm": 0.7222556471824646, "learning_rate": 1.628427532176833e-05, "loss": 1.648, "step": 10476 }, { "epoch": 2.5125591990887837, "grad_norm": 0.7382713556289673, "learning_rate": 1.6268286833479895e-05, "loss": 1.6061, "step": 10478 }, { "epoch": 2.5130387866434867, "grad_norm": 0.6578150391578674, "learning_rate": 1.6252298345191464e-05, "loss": 1.6978, "step": 10480 }, { "epoch": 2.5135183741981897, "grad_norm": 0.6337912082672119, "learning_rate": 1.6236309856903033e-05, "loss": 1.6713, "step": 10482 }, { "epoch": 2.5139979617528923, "grad_norm": 0.6896392703056335, "learning_rate": 1.62203213686146e-05, "loss": 1.6051, "step": 10484 }, { "epoch": 2.5144775493075953, "grad_norm": 0.670844554901123, "learning_rate": 1.6204332880326164e-05, "loss": 1.5969, "step": 10486 }, { "epoch": 2.5149571368622983, "grad_norm": 0.6698813438415527, "learning_rate": 1.6188344392037733e-05, "loss": 1.6278, "step": 10488 }, { "epoch": 2.5154367244170013, "grad_norm": 0.7080579996109009, "learning_rate": 1.61723559037493e-05, "loss": 1.6426, "step": 10490 }, { "epoch": 2.5159163119717043, "grad_norm": 0.6959465742111206, "learning_rate": 1.6156367415460868e-05, "loss": 1.6268, "step": 10492 }, { "epoch": 2.5163958995264073, "grad_norm": 0.6538605093955994, "learning_rate": 1.6140378927172437e-05, "loss": 1.5904, "step": 10494 }, { "epoch": 2.5168754870811103, "grad_norm": 0.6546627283096313, "learning_rate": 1.6124390438884003e-05, "loss": 1.6175, "step": 10496 }, { "epoch": 2.5173550746358133, "grad_norm": 0.6581306457519531, "learning_rate": 1.6108401950595572e-05, "loss": 1.6509, "step": 10498 }, { "epoch": 2.5178346621905163, "grad_norm": 0.7327696681022644, "learning_rate": 1.609241346230714e-05, "loss": 1.6478, "step": 10500 }, { "epoch": 2.5183142497452193, "grad_norm": 0.6201031804084778, "learning_rate": 1.6076424974018707e-05, "loss": 1.5989, "step": 10502 }, { "epoch": 2.518793837299922, "grad_norm": 0.6586251258850098, "learning_rate": 1.6060436485730273e-05, "loss": 1.6133, "step": 10504 }, { "epoch": 2.519273424854625, "grad_norm": 0.6975637674331665, "learning_rate": 1.604444799744184e-05, "loss": 1.6412, "step": 10506 }, { "epoch": 2.519753012409328, "grad_norm": 0.6793755292892456, "learning_rate": 1.602845950915341e-05, "loss": 1.6448, "step": 10508 }, { "epoch": 2.520232599964031, "grad_norm": 0.6533005833625793, "learning_rate": 1.6012471020864976e-05, "loss": 1.6673, "step": 10510 }, { "epoch": 2.520712187518734, "grad_norm": 0.658416211605072, "learning_rate": 1.5996482532576545e-05, "loss": 1.5782, "step": 10512 }, { "epoch": 2.521191775073437, "grad_norm": 0.7086223363876343, "learning_rate": 1.5980494044288115e-05, "loss": 1.6339, "step": 10514 }, { "epoch": 2.52167136262814, "grad_norm": 0.6660764813423157, "learning_rate": 1.596450555599968e-05, "loss": 1.6133, "step": 10516 }, { "epoch": 2.5221509501828425, "grad_norm": 0.6817411780357361, "learning_rate": 1.594851706771125e-05, "loss": 1.6552, "step": 10518 }, { "epoch": 2.5226305377375455, "grad_norm": 0.7060635089874268, "learning_rate": 1.593252857942282e-05, "loss": 1.6059, "step": 10520 }, { "epoch": 2.5231101252922485, "grad_norm": 0.717452347278595, "learning_rate": 1.5916540091134384e-05, "loss": 1.6236, "step": 10522 }, { "epoch": 2.5235897128469515, "grad_norm": 0.6820067167282104, "learning_rate": 1.590055160284595e-05, "loss": 1.6342, "step": 10524 }, { "epoch": 2.5240693004016546, "grad_norm": 0.7788046002388, "learning_rate": 1.588456311455752e-05, "loss": 1.6481, "step": 10526 }, { "epoch": 2.5245488879563576, "grad_norm": 0.7966445684432983, "learning_rate": 1.5868574626269088e-05, "loss": 1.6291, "step": 10528 }, { "epoch": 2.5250284755110606, "grad_norm": 0.6325135827064514, "learning_rate": 1.5852586137980654e-05, "loss": 1.5858, "step": 10530 }, { "epoch": 2.5255080630657636, "grad_norm": 0.6686004400253296, "learning_rate": 1.5836597649692223e-05, "loss": 1.5944, "step": 10532 }, { "epoch": 2.5259876506204666, "grad_norm": 0.6778841614723206, "learning_rate": 1.5820609161403792e-05, "loss": 1.5717, "step": 10534 }, { "epoch": 2.5264672381751696, "grad_norm": 0.7416860461235046, "learning_rate": 1.5804620673115358e-05, "loss": 1.5879, "step": 10536 }, { "epoch": 2.526946825729872, "grad_norm": 0.6901241540908813, "learning_rate": 1.5788632184826927e-05, "loss": 1.6366, "step": 10538 }, { "epoch": 2.527426413284575, "grad_norm": 0.7021841406822205, "learning_rate": 1.5772643696538496e-05, "loss": 1.5702, "step": 10540 }, { "epoch": 2.527906000839278, "grad_norm": 0.6634772419929504, "learning_rate": 1.5756655208250058e-05, "loss": 1.6222, "step": 10542 }, { "epoch": 2.528385588393981, "grad_norm": 0.7675395011901855, "learning_rate": 1.5740666719961627e-05, "loss": 1.6378, "step": 10544 }, { "epoch": 2.528865175948684, "grad_norm": 0.7742576599121094, "learning_rate": 1.5724678231673196e-05, "loss": 1.5991, "step": 10546 }, { "epoch": 2.529344763503387, "grad_norm": 0.6738306283950806, "learning_rate": 1.5708689743384762e-05, "loss": 1.5695, "step": 10548 }, { "epoch": 2.52982435105809, "grad_norm": 0.7509223222732544, "learning_rate": 1.569270125509633e-05, "loss": 1.63, "step": 10550 }, { "epoch": 2.530303938612793, "grad_norm": 0.6289635896682739, "learning_rate": 1.56767127668079e-05, "loss": 1.5999, "step": 10552 }, { "epoch": 2.530783526167496, "grad_norm": 0.7041516304016113, "learning_rate": 1.5660724278519466e-05, "loss": 1.6419, "step": 10554 }, { "epoch": 2.531263113722199, "grad_norm": 0.6832795739173889, "learning_rate": 1.5644735790231035e-05, "loss": 1.65, "step": 10556 }, { "epoch": 2.531742701276902, "grad_norm": 0.6826574802398682, "learning_rate": 1.5628747301942604e-05, "loss": 1.6361, "step": 10558 }, { "epoch": 2.532222288831605, "grad_norm": 0.6925866007804871, "learning_rate": 1.561275881365417e-05, "loss": 1.6242, "step": 10560 }, { "epoch": 2.532701876386308, "grad_norm": 0.7193728685379028, "learning_rate": 1.5596770325365735e-05, "loss": 1.6642, "step": 10562 }, { "epoch": 2.533181463941011, "grad_norm": 0.7896717190742493, "learning_rate": 1.5580781837077305e-05, "loss": 1.62, "step": 10564 }, { "epoch": 2.533661051495714, "grad_norm": 0.66329425573349, "learning_rate": 1.5564793348788874e-05, "loss": 1.5873, "step": 10566 }, { "epoch": 2.534140639050417, "grad_norm": 0.7382948398590088, "learning_rate": 1.554880486050044e-05, "loss": 1.6318, "step": 10568 }, { "epoch": 2.5346202266051194, "grad_norm": 0.8265359401702881, "learning_rate": 1.553281637221201e-05, "loss": 1.6234, "step": 10570 }, { "epoch": 2.5350998141598224, "grad_norm": 0.7350818514823914, "learning_rate": 1.5516827883923577e-05, "loss": 1.585, "step": 10572 }, { "epoch": 2.5355794017145254, "grad_norm": 0.6792868375778198, "learning_rate": 1.5500839395635143e-05, "loss": 1.5941, "step": 10574 }, { "epoch": 2.5360589892692285, "grad_norm": 0.6692653298377991, "learning_rate": 1.5484850907346712e-05, "loss": 1.5849, "step": 10576 }, { "epoch": 2.5365385768239315, "grad_norm": 0.6964331865310669, "learning_rate": 1.5468862419058278e-05, "loss": 1.6219, "step": 10578 }, { "epoch": 2.5370181643786345, "grad_norm": 0.6618159413337708, "learning_rate": 1.5452873930769847e-05, "loss": 1.606, "step": 10580 }, { "epoch": 2.5374977519333375, "grad_norm": 0.7004371285438538, "learning_rate": 1.5436885442481413e-05, "loss": 1.6802, "step": 10582 }, { "epoch": 2.53797733948804, "grad_norm": 0.6576506495475769, "learning_rate": 1.5420896954192982e-05, "loss": 1.6047, "step": 10584 }, { "epoch": 2.538456927042743, "grad_norm": 0.6850501298904419, "learning_rate": 1.540490846590455e-05, "loss": 1.6081, "step": 10586 }, { "epoch": 2.538936514597446, "grad_norm": 0.7142689228057861, "learning_rate": 1.5388919977616117e-05, "loss": 1.637, "step": 10588 }, { "epoch": 2.539416102152149, "grad_norm": 0.7945790886878967, "learning_rate": 1.5372931489327686e-05, "loss": 1.6238, "step": 10590 }, { "epoch": 2.539895689706852, "grad_norm": 0.7197648882865906, "learning_rate": 1.5356943001039255e-05, "loss": 1.6397, "step": 10592 }, { "epoch": 2.540375277261555, "grad_norm": 0.8586829900741577, "learning_rate": 1.534095451275082e-05, "loss": 1.597, "step": 10594 }, { "epoch": 2.540854864816258, "grad_norm": 0.7212238907814026, "learning_rate": 1.5324966024462386e-05, "loss": 1.6252, "step": 10596 }, { "epoch": 2.541334452370961, "grad_norm": 0.6868704557418823, "learning_rate": 1.5308977536173955e-05, "loss": 1.6245, "step": 10598 }, { "epoch": 2.541814039925664, "grad_norm": 0.6290331482887268, "learning_rate": 1.529298904788552e-05, "loss": 1.5664, "step": 10600 }, { "epoch": 2.542293627480367, "grad_norm": 0.7586652636528015, "learning_rate": 1.527700055959709e-05, "loss": 1.6429, "step": 10602 }, { "epoch": 2.5427732150350697, "grad_norm": 0.8043549060821533, "learning_rate": 1.526101207130866e-05, "loss": 1.6425, "step": 10604 }, { "epoch": 2.5432528025897727, "grad_norm": 0.6419581770896912, "learning_rate": 1.5245023583020227e-05, "loss": 1.6011, "step": 10606 }, { "epoch": 2.5437323901444757, "grad_norm": 0.6763697862625122, "learning_rate": 1.5229035094731794e-05, "loss": 1.6611, "step": 10608 }, { "epoch": 2.5442119776991787, "grad_norm": 0.6895164847373962, "learning_rate": 1.5213046606443363e-05, "loss": 1.655, "step": 10610 }, { "epoch": 2.5446915652538817, "grad_norm": 0.711158037185669, "learning_rate": 1.519705811815493e-05, "loss": 1.6626, "step": 10612 }, { "epoch": 2.5451711528085847, "grad_norm": 0.6882568001747131, "learning_rate": 1.5181069629866496e-05, "loss": 1.6448, "step": 10614 }, { "epoch": 2.5456507403632873, "grad_norm": 0.6909088492393494, "learning_rate": 1.5165081141578064e-05, "loss": 1.6413, "step": 10616 }, { "epoch": 2.5461303279179903, "grad_norm": 0.7627881169319153, "learning_rate": 1.5149092653289631e-05, "loss": 1.5568, "step": 10618 }, { "epoch": 2.5466099154726933, "grad_norm": 0.8022089004516602, "learning_rate": 1.51331041650012e-05, "loss": 1.5924, "step": 10620 }, { "epoch": 2.5470895030273963, "grad_norm": 0.6536405682563782, "learning_rate": 1.5117115676712767e-05, "loss": 1.6509, "step": 10622 }, { "epoch": 2.5475690905820993, "grad_norm": 0.7468989491462708, "learning_rate": 1.5101127188424335e-05, "loss": 1.6411, "step": 10624 }, { "epoch": 2.5480486781368024, "grad_norm": 0.6465256214141846, "learning_rate": 1.5085138700135904e-05, "loss": 1.6551, "step": 10626 }, { "epoch": 2.5485282656915054, "grad_norm": 0.6743022799491882, "learning_rate": 1.5069150211847471e-05, "loss": 1.6519, "step": 10628 }, { "epoch": 2.5490078532462084, "grad_norm": 0.6675214767456055, "learning_rate": 1.5053161723559039e-05, "loss": 1.6372, "step": 10630 }, { "epoch": 2.5494874408009114, "grad_norm": 0.7847001552581787, "learning_rate": 1.5037173235270604e-05, "loss": 1.6224, "step": 10632 }, { "epoch": 2.5499670283556144, "grad_norm": 0.6913665533065796, "learning_rate": 1.5021184746982172e-05, "loss": 1.6544, "step": 10634 }, { "epoch": 2.550446615910317, "grad_norm": 0.6415286064147949, "learning_rate": 1.5005196258693741e-05, "loss": 1.6018, "step": 10636 }, { "epoch": 2.55092620346502, "grad_norm": 0.6768595576286316, "learning_rate": 1.4989207770405308e-05, "loss": 1.6282, "step": 10638 }, { "epoch": 2.551405791019723, "grad_norm": 0.8262168765068054, "learning_rate": 1.4973219282116877e-05, "loss": 1.6199, "step": 10640 }, { "epoch": 2.551885378574426, "grad_norm": 0.7295670509338379, "learning_rate": 1.4957230793828445e-05, "loss": 1.6108, "step": 10642 }, { "epoch": 2.552364966129129, "grad_norm": 0.6511848568916321, "learning_rate": 1.4941242305540012e-05, "loss": 1.5943, "step": 10644 }, { "epoch": 2.552844553683832, "grad_norm": 0.7120746374130249, "learning_rate": 1.4925253817251581e-05, "loss": 1.6557, "step": 10646 }, { "epoch": 2.553324141238535, "grad_norm": 0.6719639897346497, "learning_rate": 1.4909265328963149e-05, "loss": 1.6047, "step": 10648 }, { "epoch": 2.5538037287932376, "grad_norm": 0.6406275033950806, "learning_rate": 1.4893276840674714e-05, "loss": 1.551, "step": 10650 }, { "epoch": 2.5542833163479406, "grad_norm": 0.6432368159294128, "learning_rate": 1.4877288352386282e-05, "loss": 1.6252, "step": 10652 }, { "epoch": 2.5547629039026436, "grad_norm": 0.7734670042991638, "learning_rate": 1.4861299864097849e-05, "loss": 1.6032, "step": 10654 }, { "epoch": 2.5552424914573466, "grad_norm": 0.6918604969978333, "learning_rate": 1.4845311375809418e-05, "loss": 1.6161, "step": 10656 }, { "epoch": 2.5557220790120496, "grad_norm": 0.680238664150238, "learning_rate": 1.4829322887520986e-05, "loss": 1.6779, "step": 10658 }, { "epoch": 2.5562016665667526, "grad_norm": 0.6447405219078064, "learning_rate": 1.4813334399232553e-05, "loss": 1.6344, "step": 10660 }, { "epoch": 2.5566812541214556, "grad_norm": 0.6734899878501892, "learning_rate": 1.4797345910944122e-05, "loss": 1.6056, "step": 10662 }, { "epoch": 2.5571608416761586, "grad_norm": 0.6948286294937134, "learning_rate": 1.478135742265569e-05, "loss": 1.5868, "step": 10664 }, { "epoch": 2.5576404292308617, "grad_norm": 0.7176745533943176, "learning_rate": 1.4765368934367257e-05, "loss": 1.5525, "step": 10666 }, { "epoch": 2.5581200167855647, "grad_norm": 0.824496865272522, "learning_rate": 1.4749380446078823e-05, "loss": 1.6282, "step": 10668 }, { "epoch": 2.5585996043402672, "grad_norm": 0.6513259410858154, "learning_rate": 1.473339195779039e-05, "loss": 1.6408, "step": 10670 }, { "epoch": 2.5590791918949702, "grad_norm": 0.7265354990959167, "learning_rate": 1.4717403469501959e-05, "loss": 1.6167, "step": 10672 }, { "epoch": 2.5595587794496732, "grad_norm": 0.7466129064559937, "learning_rate": 1.4701414981213526e-05, "loss": 1.6053, "step": 10674 }, { "epoch": 2.5600383670043763, "grad_norm": 0.7244205474853516, "learning_rate": 1.4685426492925094e-05, "loss": 1.6311, "step": 10676 }, { "epoch": 2.5605179545590793, "grad_norm": 0.7442002892494202, "learning_rate": 1.4669438004636663e-05, "loss": 1.6376, "step": 10678 }, { "epoch": 2.5609975421137823, "grad_norm": 0.7377302050590515, "learning_rate": 1.465344951634823e-05, "loss": 1.6624, "step": 10680 }, { "epoch": 2.561477129668485, "grad_norm": 0.7167534232139587, "learning_rate": 1.4637461028059798e-05, "loss": 1.6354, "step": 10682 }, { "epoch": 2.561956717223188, "grad_norm": 0.6711373925209045, "learning_rate": 1.4621472539771367e-05, "loss": 1.6081, "step": 10684 }, { "epoch": 2.562436304777891, "grad_norm": 0.664377748966217, "learning_rate": 1.4605484051482932e-05, "loss": 1.6424, "step": 10686 }, { "epoch": 2.562915892332594, "grad_norm": 0.7070662975311279, "learning_rate": 1.45894955631945e-05, "loss": 1.6318, "step": 10688 }, { "epoch": 2.563395479887297, "grad_norm": 0.6702580451965332, "learning_rate": 1.4573507074906067e-05, "loss": 1.6133, "step": 10690 }, { "epoch": 2.563875067442, "grad_norm": 1.079084873199463, "learning_rate": 1.4557518586617636e-05, "loss": 1.5917, "step": 10692 }, { "epoch": 2.564354654996703, "grad_norm": 0.6660482287406921, "learning_rate": 1.4541530098329204e-05, "loss": 1.6277, "step": 10694 }, { "epoch": 2.564834242551406, "grad_norm": 0.7122556567192078, "learning_rate": 1.4525541610040771e-05, "loss": 1.589, "step": 10696 }, { "epoch": 2.565313830106109, "grad_norm": 0.6822601556777954, "learning_rate": 1.450955312175234e-05, "loss": 1.6736, "step": 10698 }, { "epoch": 2.565793417660812, "grad_norm": 0.7764472365379333, "learning_rate": 1.4493564633463908e-05, "loss": 1.66, "step": 10700 }, { "epoch": 2.5662730052155145, "grad_norm": 0.7319567203521729, "learning_rate": 1.4477576145175475e-05, "loss": 1.6156, "step": 10702 }, { "epoch": 2.5667525927702175, "grad_norm": 0.6977313160896301, "learning_rate": 1.446158765688704e-05, "loss": 1.6274, "step": 10704 }, { "epoch": 2.5672321803249205, "grad_norm": 0.7143352627754211, "learning_rate": 1.4445599168598608e-05, "loss": 1.6061, "step": 10706 }, { "epoch": 2.5677117678796235, "grad_norm": 0.7208801507949829, "learning_rate": 1.4429610680310177e-05, "loss": 1.629, "step": 10708 }, { "epoch": 2.5681913554343265, "grad_norm": 0.7203874588012695, "learning_rate": 1.4413622192021745e-05, "loss": 1.5836, "step": 10710 }, { "epoch": 2.5686709429890295, "grad_norm": 0.7779393196105957, "learning_rate": 1.4397633703733312e-05, "loss": 1.651, "step": 10712 }, { "epoch": 2.5691505305437325, "grad_norm": 0.7247142791748047, "learning_rate": 1.4381645215444881e-05, "loss": 1.6227, "step": 10714 }, { "epoch": 2.569630118098435, "grad_norm": 0.8244185447692871, "learning_rate": 1.4365656727156448e-05, "loss": 1.6083, "step": 10716 }, { "epoch": 2.570109705653138, "grad_norm": 0.67893385887146, "learning_rate": 1.4349668238868016e-05, "loss": 1.5951, "step": 10718 }, { "epoch": 2.570589293207841, "grad_norm": 0.6872825622558594, "learning_rate": 1.4333679750579585e-05, "loss": 1.6188, "step": 10720 }, { "epoch": 2.571068880762544, "grad_norm": 0.7600519061088562, "learning_rate": 1.4317691262291152e-05, "loss": 1.6216, "step": 10722 }, { "epoch": 2.571548468317247, "grad_norm": 0.7694728374481201, "learning_rate": 1.4301702774002718e-05, "loss": 1.6386, "step": 10724 }, { "epoch": 2.57202805587195, "grad_norm": 0.7767746448516846, "learning_rate": 1.4285714285714285e-05, "loss": 1.638, "step": 10726 }, { "epoch": 2.572507643426653, "grad_norm": 0.6466357707977295, "learning_rate": 1.4269725797425853e-05, "loss": 1.6343, "step": 10728 }, { "epoch": 2.572987230981356, "grad_norm": 0.7749208211898804, "learning_rate": 1.4253737309137422e-05, "loss": 1.6787, "step": 10730 }, { "epoch": 2.573466818536059, "grad_norm": 0.699481725692749, "learning_rate": 1.423774882084899e-05, "loss": 1.6554, "step": 10732 }, { "epoch": 2.573946406090762, "grad_norm": 0.8611012101173401, "learning_rate": 1.4221760332560557e-05, "loss": 1.6353, "step": 10734 }, { "epoch": 2.5744259936454648, "grad_norm": 0.7607488632202148, "learning_rate": 1.4205771844272126e-05, "loss": 1.6342, "step": 10736 }, { "epoch": 2.5749055812001678, "grad_norm": 0.7225797176361084, "learning_rate": 1.4189783355983693e-05, "loss": 1.68, "step": 10738 }, { "epoch": 2.575385168754871, "grad_norm": 0.7108954191207886, "learning_rate": 1.4173794867695262e-05, "loss": 1.5998, "step": 10740 }, { "epoch": 2.575864756309574, "grad_norm": 0.9067678451538086, "learning_rate": 1.4157806379406826e-05, "loss": 1.6414, "step": 10742 }, { "epoch": 2.576344343864277, "grad_norm": 0.8180708885192871, "learning_rate": 1.4141817891118395e-05, "loss": 1.6063, "step": 10744 }, { "epoch": 2.57682393141898, "grad_norm": 0.6730145215988159, "learning_rate": 1.4125829402829963e-05, "loss": 1.6301, "step": 10746 }, { "epoch": 2.5773035189736824, "grad_norm": 0.6824819445610046, "learning_rate": 1.410984091454153e-05, "loss": 1.6011, "step": 10748 }, { "epoch": 2.5777831065283854, "grad_norm": 0.6601020097732544, "learning_rate": 1.40938524262531e-05, "loss": 1.6327, "step": 10750 }, { "epoch": 2.5782626940830884, "grad_norm": 0.7200961709022522, "learning_rate": 1.4077863937964667e-05, "loss": 1.6632, "step": 10752 }, { "epoch": 2.5787422816377914, "grad_norm": 0.6940547227859497, "learning_rate": 1.4061875449676234e-05, "loss": 1.5656, "step": 10754 }, { "epoch": 2.5792218691924944, "grad_norm": 0.787777841091156, "learning_rate": 1.4045886961387803e-05, "loss": 1.6343, "step": 10756 }, { "epoch": 2.5797014567471974, "grad_norm": 0.8548477292060852, "learning_rate": 1.402989847309937e-05, "loss": 1.6642, "step": 10758 }, { "epoch": 2.5801810443019004, "grad_norm": 0.6857019066810608, "learning_rate": 1.4013909984810936e-05, "loss": 1.5889, "step": 10760 }, { "epoch": 2.5806606318566034, "grad_norm": 0.6906143426895142, "learning_rate": 1.3997921496522504e-05, "loss": 1.6076, "step": 10762 }, { "epoch": 2.5811402194113064, "grad_norm": 0.6477100253105164, "learning_rate": 1.3981933008234071e-05, "loss": 1.6039, "step": 10764 }, { "epoch": 2.5816198069660095, "grad_norm": 0.6945614814758301, "learning_rate": 1.396594451994564e-05, "loss": 1.611, "step": 10766 }, { "epoch": 2.582099394520712, "grad_norm": 0.7679142355918884, "learning_rate": 1.3949956031657207e-05, "loss": 1.6394, "step": 10768 }, { "epoch": 2.582578982075415, "grad_norm": 0.7259703874588013, "learning_rate": 1.3933967543368775e-05, "loss": 1.5994, "step": 10770 }, { "epoch": 2.583058569630118, "grad_norm": 0.685659646987915, "learning_rate": 1.3917979055080344e-05, "loss": 1.5696, "step": 10772 }, { "epoch": 2.583538157184821, "grad_norm": 0.7272144556045532, "learning_rate": 1.3901990566791911e-05, "loss": 1.6478, "step": 10774 }, { "epoch": 2.584017744739524, "grad_norm": 0.7300522923469543, "learning_rate": 1.3886002078503479e-05, "loss": 1.6246, "step": 10776 }, { "epoch": 2.584497332294227, "grad_norm": 0.7318264842033386, "learning_rate": 1.3870013590215044e-05, "loss": 1.6586, "step": 10778 }, { "epoch": 2.58497691984893, "grad_norm": 0.791695773601532, "learning_rate": 1.3854025101926612e-05, "loss": 1.6488, "step": 10780 }, { "epoch": 2.5854565074036326, "grad_norm": 0.7149094343185425, "learning_rate": 1.3838036613638181e-05, "loss": 1.6399, "step": 10782 }, { "epoch": 2.5859360949583357, "grad_norm": 0.7368173599243164, "learning_rate": 1.3822048125349748e-05, "loss": 1.6684, "step": 10784 }, { "epoch": 2.5864156825130387, "grad_norm": 0.6805135011672974, "learning_rate": 1.3806059637061316e-05, "loss": 1.6115, "step": 10786 }, { "epoch": 2.5868952700677417, "grad_norm": 0.7749119400978088, "learning_rate": 1.3790071148772885e-05, "loss": 1.6248, "step": 10788 }, { "epoch": 2.5873748576224447, "grad_norm": 0.8127108216285706, "learning_rate": 1.3774082660484452e-05, "loss": 1.6515, "step": 10790 }, { "epoch": 2.5878544451771477, "grad_norm": 0.7563222050666809, "learning_rate": 1.3758094172196021e-05, "loss": 1.6288, "step": 10792 }, { "epoch": 2.5883340327318507, "grad_norm": 0.7060605883598328, "learning_rate": 1.3742105683907589e-05, "loss": 1.5966, "step": 10794 }, { "epoch": 2.5888136202865537, "grad_norm": 0.8380133509635925, "learning_rate": 1.3726117195619154e-05, "loss": 1.6986, "step": 10796 }, { "epoch": 2.5892932078412567, "grad_norm": 0.801169753074646, "learning_rate": 1.3710128707330722e-05, "loss": 1.5999, "step": 10798 }, { "epoch": 2.5897727953959597, "grad_norm": 0.7728160619735718, "learning_rate": 1.369414021904229e-05, "loss": 1.5925, "step": 10800 }, { "epoch": 2.5897727953959597, "eval_loss": 1.708970069885254, "eval_runtime": 331.2753, "eval_samples_per_second": 402.834, "eval_steps_per_second": 12.591, "step": 10800 }, { "epoch": 2.5902523829506623, "grad_norm": 0.7013607621192932, "learning_rate": 1.3678151730753858e-05, "loss": 1.6225, "step": 10802 }, { "epoch": 2.5907319705053653, "grad_norm": 0.7127708196640015, "learning_rate": 1.3662163242465426e-05, "loss": 1.6422, "step": 10804 }, { "epoch": 2.5912115580600683, "grad_norm": 0.7397205233573914, "learning_rate": 1.3646174754176993e-05, "loss": 1.6266, "step": 10806 }, { "epoch": 2.5916911456147713, "grad_norm": 0.833928644657135, "learning_rate": 1.3630186265888562e-05, "loss": 1.6136, "step": 10808 }, { "epoch": 2.5921707331694743, "grad_norm": 0.7043015956878662, "learning_rate": 1.361419777760013e-05, "loss": 1.6224, "step": 10810 }, { "epoch": 2.5926503207241773, "grad_norm": 0.7350032329559326, "learning_rate": 1.3598209289311697e-05, "loss": 1.6222, "step": 10812 }, { "epoch": 2.59312990827888, "grad_norm": 0.655188262462616, "learning_rate": 1.3582220801023263e-05, "loss": 1.6057, "step": 10814 }, { "epoch": 2.593609495833583, "grad_norm": 0.7142622470855713, "learning_rate": 1.356623231273483e-05, "loss": 1.5905, "step": 10816 }, { "epoch": 2.594089083388286, "grad_norm": 0.710344672203064, "learning_rate": 1.3550243824446399e-05, "loss": 1.6108, "step": 10818 }, { "epoch": 2.594568670942989, "grad_norm": 0.7370651960372925, "learning_rate": 1.3534255336157966e-05, "loss": 1.6193, "step": 10820 }, { "epoch": 2.595048258497692, "grad_norm": 0.715307891368866, "learning_rate": 1.3518266847869534e-05, "loss": 1.6009, "step": 10822 }, { "epoch": 2.595527846052395, "grad_norm": 0.6794072389602661, "learning_rate": 1.3502278359581103e-05, "loss": 1.6095, "step": 10824 }, { "epoch": 2.596007433607098, "grad_norm": 0.7877379655838013, "learning_rate": 1.348628987129267e-05, "loss": 1.6536, "step": 10826 }, { "epoch": 2.596487021161801, "grad_norm": 0.8335821032524109, "learning_rate": 1.3470301383004238e-05, "loss": 1.6508, "step": 10828 }, { "epoch": 2.596966608716504, "grad_norm": 0.707051694393158, "learning_rate": 1.3454312894715807e-05, "loss": 1.6486, "step": 10830 }, { "epoch": 2.597446196271207, "grad_norm": 0.7840430736541748, "learning_rate": 1.3438324406427371e-05, "loss": 1.6276, "step": 10832 }, { "epoch": 2.5979257838259096, "grad_norm": 0.7611523866653442, "learning_rate": 1.342233591813894e-05, "loss": 1.5785, "step": 10834 }, { "epoch": 2.5984053713806126, "grad_norm": 0.6968637108802795, "learning_rate": 1.3406347429850507e-05, "loss": 1.6339, "step": 10836 }, { "epoch": 2.5988849589353156, "grad_norm": 0.7736181020736694, "learning_rate": 1.3390358941562076e-05, "loss": 1.6374, "step": 10838 }, { "epoch": 2.5993645464900186, "grad_norm": 0.6952282190322876, "learning_rate": 1.3374370453273644e-05, "loss": 1.6049, "step": 10840 }, { "epoch": 2.5998441340447216, "grad_norm": 0.7458938956260681, "learning_rate": 1.3358381964985211e-05, "loss": 1.6337, "step": 10842 }, { "epoch": 2.6003237215994246, "grad_norm": 0.7620350122451782, "learning_rate": 1.334239347669678e-05, "loss": 1.6641, "step": 10844 }, { "epoch": 2.6008033091541276, "grad_norm": 0.8240498304367065, "learning_rate": 1.3326404988408348e-05, "loss": 1.6394, "step": 10846 }, { "epoch": 2.60128289670883, "grad_norm": 0.7517419457435608, "learning_rate": 1.3310416500119915e-05, "loss": 1.589, "step": 10848 }, { "epoch": 2.601762484263533, "grad_norm": 0.731480598449707, "learning_rate": 1.329442801183148e-05, "loss": 1.6604, "step": 10850 }, { "epoch": 2.602242071818236, "grad_norm": 0.7585097551345825, "learning_rate": 1.3278439523543048e-05, "loss": 1.6102, "step": 10852 }, { "epoch": 2.602721659372939, "grad_norm": 0.747085690498352, "learning_rate": 1.3262451035254617e-05, "loss": 1.6332, "step": 10854 }, { "epoch": 2.603201246927642, "grad_norm": 0.6982395648956299, "learning_rate": 1.3246462546966185e-05, "loss": 1.5557, "step": 10856 }, { "epoch": 2.603680834482345, "grad_norm": 0.7219375967979431, "learning_rate": 1.3230474058677752e-05, "loss": 1.6445, "step": 10858 }, { "epoch": 2.6041604220370482, "grad_norm": 0.7166317701339722, "learning_rate": 1.3214485570389321e-05, "loss": 1.6118, "step": 10860 }, { "epoch": 2.6046400095917512, "grad_norm": 0.7053154110908508, "learning_rate": 1.3198497082100889e-05, "loss": 1.6233, "step": 10862 }, { "epoch": 2.6051195971464542, "grad_norm": 0.6898757815361023, "learning_rate": 1.3182508593812456e-05, "loss": 1.626, "step": 10864 }, { "epoch": 2.6055991847011573, "grad_norm": 0.6891774535179138, "learning_rate": 1.3166520105524025e-05, "loss": 1.6322, "step": 10866 }, { "epoch": 2.60607877225586, "grad_norm": 0.7381041049957275, "learning_rate": 1.3150531617235589e-05, "loss": 1.6826, "step": 10868 }, { "epoch": 2.606558359810563, "grad_norm": 0.7132360339164734, "learning_rate": 1.3134543128947158e-05, "loss": 1.6102, "step": 10870 }, { "epoch": 2.607037947365266, "grad_norm": 0.776106059551239, "learning_rate": 1.3118554640658726e-05, "loss": 1.6363, "step": 10872 }, { "epoch": 2.607517534919969, "grad_norm": 0.7594299912452698, "learning_rate": 1.3102566152370293e-05, "loss": 1.6051, "step": 10874 }, { "epoch": 2.607997122474672, "grad_norm": 0.6854764223098755, "learning_rate": 1.3086577664081862e-05, "loss": 1.5966, "step": 10876 }, { "epoch": 2.608476710029375, "grad_norm": 0.7511355876922607, "learning_rate": 1.307058917579343e-05, "loss": 1.6232, "step": 10878 }, { "epoch": 2.6089562975840774, "grad_norm": 0.7854150533676147, "learning_rate": 1.3054600687504997e-05, "loss": 1.5998, "step": 10880 }, { "epoch": 2.6094358851387804, "grad_norm": 0.7024514079093933, "learning_rate": 1.3038612199216566e-05, "loss": 1.6297, "step": 10882 }, { "epoch": 2.6099154726934835, "grad_norm": 0.6484757661819458, "learning_rate": 1.3022623710928133e-05, "loss": 1.5537, "step": 10884 }, { "epoch": 2.6103950602481865, "grad_norm": 0.6780731081962585, "learning_rate": 1.3006635222639699e-05, "loss": 1.6326, "step": 10886 }, { "epoch": 2.6108746478028895, "grad_norm": 0.8236744403839111, "learning_rate": 1.2990646734351266e-05, "loss": 1.6033, "step": 10888 }, { "epoch": 2.6113542353575925, "grad_norm": 0.7023913860321045, "learning_rate": 1.2974658246062835e-05, "loss": 1.6136, "step": 10890 }, { "epoch": 2.6118338229122955, "grad_norm": 0.6649223566055298, "learning_rate": 1.2958669757774403e-05, "loss": 1.6362, "step": 10892 }, { "epoch": 2.6123134104669985, "grad_norm": 0.7049388885498047, "learning_rate": 1.294268126948597e-05, "loss": 1.6011, "step": 10894 }, { "epoch": 2.6127929980217015, "grad_norm": 0.8252013325691223, "learning_rate": 1.292669278119754e-05, "loss": 1.6765, "step": 10896 }, { "epoch": 2.6132725855764045, "grad_norm": 0.7526699900627136, "learning_rate": 1.2910704292909107e-05, "loss": 1.6074, "step": 10898 }, { "epoch": 2.613752173131107, "grad_norm": 0.6607761383056641, "learning_rate": 1.2894715804620674e-05, "loss": 1.6559, "step": 10900 }, { "epoch": 2.61423176068581, "grad_norm": 0.7034802436828613, "learning_rate": 1.2878727316332243e-05, "loss": 1.6079, "step": 10902 }, { "epoch": 2.614711348240513, "grad_norm": 0.7470638751983643, "learning_rate": 1.2862738828043807e-05, "loss": 1.5757, "step": 10904 }, { "epoch": 2.615190935795216, "grad_norm": 0.681937575340271, "learning_rate": 1.2846750339755376e-05, "loss": 1.6001, "step": 10906 }, { "epoch": 2.615670523349919, "grad_norm": 0.7297748923301697, "learning_rate": 1.2830761851466944e-05, "loss": 1.6403, "step": 10908 }, { "epoch": 2.616150110904622, "grad_norm": 0.7242927551269531, "learning_rate": 1.2814773363178511e-05, "loss": 1.6597, "step": 10910 }, { "epoch": 2.616629698459325, "grad_norm": 0.6844403743743896, "learning_rate": 1.279878487489008e-05, "loss": 1.653, "step": 10912 }, { "epoch": 2.6171092860140277, "grad_norm": 0.680440366268158, "learning_rate": 1.2782796386601648e-05, "loss": 1.6109, "step": 10914 }, { "epoch": 2.6175888735687307, "grad_norm": 0.6733812093734741, "learning_rate": 1.2766807898313215e-05, "loss": 1.6131, "step": 10916 }, { "epoch": 2.6180684611234337, "grad_norm": 0.7013739347457886, "learning_rate": 1.2750819410024784e-05, "loss": 1.6129, "step": 10918 }, { "epoch": 2.6185480486781367, "grad_norm": 0.7526195645332336, "learning_rate": 1.2734830921736351e-05, "loss": 1.6585, "step": 10920 }, { "epoch": 2.6190276362328397, "grad_norm": 0.7239239811897278, "learning_rate": 1.2718842433447919e-05, "loss": 1.6184, "step": 10922 }, { "epoch": 2.6195072237875427, "grad_norm": 0.693473219871521, "learning_rate": 1.2702853945159485e-05, "loss": 1.6489, "step": 10924 }, { "epoch": 2.6199868113422458, "grad_norm": 0.7842479944229126, "learning_rate": 1.2686865456871052e-05, "loss": 1.599, "step": 10926 }, { "epoch": 2.6204663988969488, "grad_norm": 0.7554038763046265, "learning_rate": 1.2670876968582621e-05, "loss": 1.6512, "step": 10928 }, { "epoch": 2.6209459864516518, "grad_norm": 0.6877875328063965, "learning_rate": 1.2654888480294188e-05, "loss": 1.6223, "step": 10930 }, { "epoch": 2.621425574006355, "grad_norm": 0.7165510058403015, "learning_rate": 1.2638899992005756e-05, "loss": 1.6495, "step": 10932 }, { "epoch": 2.6219051615610574, "grad_norm": 0.7018747925758362, "learning_rate": 1.2622911503717325e-05, "loss": 1.6222, "step": 10934 }, { "epoch": 2.6223847491157604, "grad_norm": 0.6512750387191772, "learning_rate": 1.2606923015428892e-05, "loss": 1.6294, "step": 10936 }, { "epoch": 2.6228643366704634, "grad_norm": 0.6736502647399902, "learning_rate": 1.259093452714046e-05, "loss": 1.6572, "step": 10938 }, { "epoch": 2.6233439242251664, "grad_norm": 0.7374477982521057, "learning_rate": 1.2574946038852029e-05, "loss": 1.6331, "step": 10940 }, { "epoch": 2.6238235117798694, "grad_norm": 0.7841225266456604, "learning_rate": 1.2558957550563594e-05, "loss": 1.6355, "step": 10942 }, { "epoch": 2.6243030993345724, "grad_norm": 0.6844650506973267, "learning_rate": 1.2542969062275162e-05, "loss": 1.6236, "step": 10944 }, { "epoch": 2.624782686889275, "grad_norm": 0.7837991118431091, "learning_rate": 1.252698057398673e-05, "loss": 1.6789, "step": 10946 }, { "epoch": 2.625262274443978, "grad_norm": 0.6870431900024414, "learning_rate": 1.2510992085698298e-05, "loss": 1.6527, "step": 10948 }, { "epoch": 2.625741861998681, "grad_norm": 0.7158839106559753, "learning_rate": 1.2495003597409866e-05, "loss": 1.6402, "step": 10950 }, { "epoch": 2.626221449553384, "grad_norm": 0.694659948348999, "learning_rate": 1.2479015109121433e-05, "loss": 1.6252, "step": 10952 }, { "epoch": 2.626701037108087, "grad_norm": 0.6183851957321167, "learning_rate": 1.2463026620833e-05, "loss": 1.6005, "step": 10954 }, { "epoch": 2.62718062466279, "grad_norm": 0.7208304405212402, "learning_rate": 1.2447038132544568e-05, "loss": 1.6179, "step": 10956 }, { "epoch": 2.627660212217493, "grad_norm": 0.6904534101486206, "learning_rate": 1.2431049644256135e-05, "loss": 1.6321, "step": 10958 }, { "epoch": 2.628139799772196, "grad_norm": 0.700118899345398, "learning_rate": 1.2415061155967704e-05, "loss": 1.6217, "step": 10960 }, { "epoch": 2.628619387326899, "grad_norm": 0.79938805103302, "learning_rate": 1.2399072667679272e-05, "loss": 1.6014, "step": 10962 }, { "epoch": 2.629098974881602, "grad_norm": 0.6527686715126038, "learning_rate": 1.238308417939084e-05, "loss": 1.6517, "step": 10964 }, { "epoch": 2.6295785624363046, "grad_norm": 0.669331431388855, "learning_rate": 1.2367095691102407e-05, "loss": 1.6089, "step": 10966 }, { "epoch": 2.6300581499910076, "grad_norm": 0.7540653944015503, "learning_rate": 1.2351107202813974e-05, "loss": 1.6411, "step": 10968 }, { "epoch": 2.6305377375457106, "grad_norm": 0.6973647475242615, "learning_rate": 1.2335118714525543e-05, "loss": 1.5718, "step": 10970 }, { "epoch": 2.6310173251004136, "grad_norm": 0.6973733901977539, "learning_rate": 1.231913022623711e-05, "loss": 1.6219, "step": 10972 }, { "epoch": 2.6314969126551166, "grad_norm": 0.7686071395874023, "learning_rate": 1.2303141737948678e-05, "loss": 1.5597, "step": 10974 }, { "epoch": 2.6319765002098197, "grad_norm": 0.716871976852417, "learning_rate": 1.2287153249660245e-05, "loss": 1.6567, "step": 10976 }, { "epoch": 2.6324560877645227, "grad_norm": 0.7558577060699463, "learning_rate": 1.2271164761371813e-05, "loss": 1.6158, "step": 10978 }, { "epoch": 2.6329356753192252, "grad_norm": 0.8083328604698181, "learning_rate": 1.2255176273083382e-05, "loss": 1.6397, "step": 10980 }, { "epoch": 2.6334152628739282, "grad_norm": 0.7338179349899292, "learning_rate": 1.2239187784794947e-05, "loss": 1.6608, "step": 10982 }, { "epoch": 2.6338948504286313, "grad_norm": 0.6954779624938965, "learning_rate": 1.2223199296506515e-05, "loss": 1.6325, "step": 10984 }, { "epoch": 2.6343744379833343, "grad_norm": 0.6635292172431946, "learning_rate": 1.2207210808218084e-05, "loss": 1.6188, "step": 10986 }, { "epoch": 2.6348540255380373, "grad_norm": 0.6484982967376709, "learning_rate": 1.2191222319929651e-05, "loss": 1.6417, "step": 10988 }, { "epoch": 2.6353336130927403, "grad_norm": 0.6980170011520386, "learning_rate": 1.217523383164122e-05, "loss": 1.6218, "step": 10990 }, { "epoch": 2.6358132006474433, "grad_norm": 0.7583498358726501, "learning_rate": 1.2159245343352786e-05, "loss": 1.6171, "step": 10992 }, { "epoch": 2.6362927882021463, "grad_norm": 0.708967387676239, "learning_rate": 1.2143256855064353e-05, "loss": 1.6186, "step": 10994 }, { "epoch": 2.6367723757568493, "grad_norm": 0.6770802736282349, "learning_rate": 1.2127268366775923e-05, "loss": 1.5944, "step": 10996 }, { "epoch": 2.6372519633115523, "grad_norm": 0.7019736766815186, "learning_rate": 1.211127987848749e-05, "loss": 1.5754, "step": 10998 }, { "epoch": 2.637731550866255, "grad_norm": 0.8829032182693481, "learning_rate": 1.2095291390199057e-05, "loss": 1.6523, "step": 11000 }, { "epoch": 2.638211138420958, "grad_norm": 0.7034205794334412, "learning_rate": 1.2079302901910625e-05, "loss": 1.6216, "step": 11002 }, { "epoch": 2.638690725975661, "grad_norm": 0.7459619045257568, "learning_rate": 1.2063314413622192e-05, "loss": 1.6486, "step": 11004 }, { "epoch": 2.639170313530364, "grad_norm": 0.6747644543647766, "learning_rate": 1.2047325925333761e-05, "loss": 1.6028, "step": 11006 }, { "epoch": 2.639649901085067, "grad_norm": 0.7202240228652954, "learning_rate": 1.2031337437045329e-05, "loss": 1.623, "step": 11008 }, { "epoch": 2.64012948863977, "grad_norm": 0.6957982182502747, "learning_rate": 1.2015348948756894e-05, "loss": 1.6292, "step": 11010 }, { "epoch": 2.6406090761944725, "grad_norm": 0.7497761845588684, "learning_rate": 1.1999360460468463e-05, "loss": 1.6338, "step": 11012 }, { "epoch": 2.6410886637491755, "grad_norm": 0.7521147131919861, "learning_rate": 1.198337197218003e-05, "loss": 1.6464, "step": 11014 }, { "epoch": 2.6415682513038785, "grad_norm": 0.7959204912185669, "learning_rate": 1.19673834838916e-05, "loss": 1.6529, "step": 11016 }, { "epoch": 2.6420478388585815, "grad_norm": 0.8893523812294006, "learning_rate": 1.1951394995603166e-05, "loss": 1.5944, "step": 11018 }, { "epoch": 2.6425274264132845, "grad_norm": 0.7195210456848145, "learning_rate": 1.1935406507314733e-05, "loss": 1.6427, "step": 11020 }, { "epoch": 2.6430070139679875, "grad_norm": 0.7002537846565247, "learning_rate": 1.1919418019026302e-05, "loss": 1.6262, "step": 11022 }, { "epoch": 2.6434866015226905, "grad_norm": 0.702785074710846, "learning_rate": 1.190342953073787e-05, "loss": 1.6264, "step": 11024 }, { "epoch": 2.6439661890773936, "grad_norm": 0.7198031544685364, "learning_rate": 1.1887441042449437e-05, "loss": 1.6869, "step": 11026 }, { "epoch": 2.6444457766320966, "grad_norm": 0.6851058006286621, "learning_rate": 1.1871452554161004e-05, "loss": 1.5997, "step": 11028 }, { "epoch": 2.6449253641867996, "grad_norm": 0.8523953557014465, "learning_rate": 1.1855464065872572e-05, "loss": 1.6395, "step": 11030 }, { "epoch": 2.645404951741502, "grad_norm": 0.7739126682281494, "learning_rate": 1.183947557758414e-05, "loss": 1.6433, "step": 11032 }, { "epoch": 2.645884539296205, "grad_norm": 0.8116719126701355, "learning_rate": 1.1823487089295708e-05, "loss": 1.6149, "step": 11034 }, { "epoch": 2.646364126850908, "grad_norm": 0.7252166867256165, "learning_rate": 1.1807498601007276e-05, "loss": 1.603, "step": 11036 }, { "epoch": 2.646843714405611, "grad_norm": 0.7106648087501526, "learning_rate": 1.1791510112718843e-05, "loss": 1.6523, "step": 11038 }, { "epoch": 2.647323301960314, "grad_norm": 0.6745586395263672, "learning_rate": 1.177552162443041e-05, "loss": 1.6053, "step": 11040 }, { "epoch": 2.647802889515017, "grad_norm": 0.803250789642334, "learning_rate": 1.175953313614198e-05, "loss": 1.6204, "step": 11042 }, { "epoch": 2.64828247706972, "grad_norm": 0.6801742315292358, "learning_rate": 1.1743544647853547e-05, "loss": 1.5928, "step": 11044 }, { "epoch": 2.6487620646244228, "grad_norm": 0.7319782376289368, "learning_rate": 1.1727556159565112e-05, "loss": 1.6267, "step": 11046 }, { "epoch": 2.6492416521791258, "grad_norm": 0.7102635502815247, "learning_rate": 1.1711567671276682e-05, "loss": 1.5898, "step": 11048 }, { "epoch": 2.649721239733829, "grad_norm": 0.6373254060745239, "learning_rate": 1.1695579182988249e-05, "loss": 1.6527, "step": 11050 }, { "epoch": 2.650200827288532, "grad_norm": 0.7637274265289307, "learning_rate": 1.1679590694699816e-05, "loss": 1.6071, "step": 11052 }, { "epoch": 2.650680414843235, "grad_norm": 1.059818148612976, "learning_rate": 1.1663602206411384e-05, "loss": 1.6478, "step": 11054 }, { "epoch": 2.651160002397938, "grad_norm": 0.7572793364524841, "learning_rate": 1.1647613718122951e-05, "loss": 1.6176, "step": 11056 }, { "epoch": 2.651639589952641, "grad_norm": 0.6592146754264832, "learning_rate": 1.163162522983452e-05, "loss": 1.5771, "step": 11058 }, { "epoch": 2.652119177507344, "grad_norm": 0.6784149408340454, "learning_rate": 1.1615636741546088e-05, "loss": 1.6093, "step": 11060 }, { "epoch": 2.652598765062047, "grad_norm": 0.724530816078186, "learning_rate": 1.1599648253257655e-05, "loss": 1.6296, "step": 11062 }, { "epoch": 2.65307835261675, "grad_norm": 0.7104493379592896, "learning_rate": 1.1583659764969222e-05, "loss": 1.5684, "step": 11064 }, { "epoch": 2.6535579401714524, "grad_norm": 0.9583975672721863, "learning_rate": 1.156767127668079e-05, "loss": 1.6224, "step": 11066 }, { "epoch": 2.6540375277261554, "grad_norm": 0.7135509848594666, "learning_rate": 1.1551682788392359e-05, "loss": 1.636, "step": 11068 }, { "epoch": 2.6545171152808584, "grad_norm": 0.7769715785980225, "learning_rate": 1.1535694300103926e-05, "loss": 1.5967, "step": 11070 }, { "epoch": 2.6549967028355614, "grad_norm": 0.6736999154090881, "learning_rate": 1.1519705811815494e-05, "loss": 1.626, "step": 11072 }, { "epoch": 2.6554762903902644, "grad_norm": 0.7403941750526428, "learning_rate": 1.1503717323527061e-05, "loss": 1.6495, "step": 11074 }, { "epoch": 2.6559558779449675, "grad_norm": 0.7210782170295715, "learning_rate": 1.1487728835238628e-05, "loss": 1.6278, "step": 11076 }, { "epoch": 2.65643546549967, "grad_norm": 0.7688258290290833, "learning_rate": 1.1471740346950196e-05, "loss": 1.6114, "step": 11078 }, { "epoch": 2.656915053054373, "grad_norm": 0.8301407098770142, "learning_rate": 1.1455751858661765e-05, "loss": 1.5947, "step": 11080 }, { "epoch": 2.657394640609076, "grad_norm": 0.6897489428520203, "learning_rate": 1.143976337037333e-05, "loss": 1.6205, "step": 11082 }, { "epoch": 2.657874228163779, "grad_norm": 0.6721181869506836, "learning_rate": 1.14237748820849e-05, "loss": 1.6328, "step": 11084 }, { "epoch": 2.658353815718482, "grad_norm": 0.6892438530921936, "learning_rate": 1.1407786393796467e-05, "loss": 1.5776, "step": 11086 }, { "epoch": 2.658833403273185, "grad_norm": 0.8589502573013306, "learning_rate": 1.1391797905508035e-05, "loss": 1.6881, "step": 11088 }, { "epoch": 2.659312990827888, "grad_norm": 0.7461434602737427, "learning_rate": 1.1375809417219604e-05, "loss": 1.6484, "step": 11090 }, { "epoch": 2.659792578382591, "grad_norm": 0.8269992470741272, "learning_rate": 1.135982092893117e-05, "loss": 1.5999, "step": 11092 }, { "epoch": 2.660272165937294, "grad_norm": 0.6763894557952881, "learning_rate": 1.1343832440642738e-05, "loss": 1.6154, "step": 11094 }, { "epoch": 2.660751753491997, "grad_norm": 0.8915654420852661, "learning_rate": 1.1327843952354306e-05, "loss": 1.6653, "step": 11096 }, { "epoch": 2.6612313410466997, "grad_norm": 0.6952832937240601, "learning_rate": 1.1311855464065873e-05, "loss": 1.7151, "step": 11098 }, { "epoch": 2.6617109286014027, "grad_norm": 0.721984326839447, "learning_rate": 1.129586697577744e-05, "loss": 1.6184, "step": 11100 }, { "epoch": 2.6621905161561057, "grad_norm": 0.7020556926727295, "learning_rate": 1.1279878487489008e-05, "loss": 1.6578, "step": 11102 }, { "epoch": 2.6626701037108087, "grad_norm": 0.7553297281265259, "learning_rate": 1.1263889999200575e-05, "loss": 1.6377, "step": 11104 }, { "epoch": 2.6631496912655117, "grad_norm": 0.6710344552993774, "learning_rate": 1.1247901510912144e-05, "loss": 1.6259, "step": 11106 }, { "epoch": 2.6636292788202147, "grad_norm": 0.6894142031669617, "learning_rate": 1.1231913022623712e-05, "loss": 1.5734, "step": 11108 }, { "epoch": 2.6641088663749177, "grad_norm": 0.7245759963989258, "learning_rate": 1.121592453433528e-05, "loss": 1.6491, "step": 11110 }, { "epoch": 2.6645884539296203, "grad_norm": 0.8037406802177429, "learning_rate": 1.1199936046046847e-05, "loss": 1.6201, "step": 11112 }, { "epoch": 2.6650680414843233, "grad_norm": 0.781032919883728, "learning_rate": 1.1183947557758414e-05, "loss": 1.6123, "step": 11114 }, { "epoch": 2.6655476290390263, "grad_norm": 0.696061909198761, "learning_rate": 1.1167959069469983e-05, "loss": 1.6729, "step": 11116 }, { "epoch": 2.6660272165937293, "grad_norm": 0.6677605509757996, "learning_rate": 1.1151970581181549e-05, "loss": 1.5654, "step": 11118 }, { "epoch": 2.6665068041484323, "grad_norm": 0.7024529576301575, "learning_rate": 1.1135982092893118e-05, "loss": 1.647, "step": 11120 }, { "epoch": 2.6669863917031353, "grad_norm": 0.7086523175239563, "learning_rate": 1.1119993604604685e-05, "loss": 1.6556, "step": 11122 }, { "epoch": 2.6674659792578383, "grad_norm": 0.7124443054199219, "learning_rate": 1.1104005116316253e-05, "loss": 1.6066, "step": 11124 }, { "epoch": 2.6679455668125414, "grad_norm": 0.6167515516281128, "learning_rate": 1.1088016628027822e-05, "loss": 1.5821, "step": 11126 }, { "epoch": 2.6684251543672444, "grad_norm": 0.6408182382583618, "learning_rate": 1.1072028139739387e-05, "loss": 1.631, "step": 11128 }, { "epoch": 2.6689047419219474, "grad_norm": 0.6640773415565491, "learning_rate": 1.1056039651450955e-05, "loss": 1.6361, "step": 11130 }, { "epoch": 2.66938432947665, "grad_norm": 0.7227541208267212, "learning_rate": 1.1040051163162524e-05, "loss": 1.6395, "step": 11132 }, { "epoch": 2.669863917031353, "grad_norm": 0.7047887444496155, "learning_rate": 1.1024062674874091e-05, "loss": 1.6226, "step": 11134 }, { "epoch": 2.670343504586056, "grad_norm": 0.7055714726448059, "learning_rate": 1.1008074186585659e-05, "loss": 1.6165, "step": 11136 }, { "epoch": 2.670823092140759, "grad_norm": 0.6442376971244812, "learning_rate": 1.0992085698297226e-05, "loss": 1.616, "step": 11138 }, { "epoch": 2.671302679695462, "grad_norm": 0.8316052556037903, "learning_rate": 1.0976097210008794e-05, "loss": 1.6237, "step": 11140 }, { "epoch": 2.671782267250165, "grad_norm": 0.7186545133590698, "learning_rate": 1.0960108721720363e-05, "loss": 1.6222, "step": 11142 }, { "epoch": 2.6722618548048676, "grad_norm": 0.6950579881668091, "learning_rate": 1.094412023343193e-05, "loss": 1.6061, "step": 11144 }, { "epoch": 2.6727414423595706, "grad_norm": 0.7200846672058105, "learning_rate": 1.0928131745143497e-05, "loss": 1.5865, "step": 11146 }, { "epoch": 2.6732210299142736, "grad_norm": 0.7511730194091797, "learning_rate": 1.0912143256855065e-05, "loss": 1.661, "step": 11148 }, { "epoch": 2.6737006174689766, "grad_norm": 0.8135468363761902, "learning_rate": 1.0896154768566632e-05, "loss": 1.6566, "step": 11150 }, { "epoch": 2.6741802050236796, "grad_norm": 0.7722132205963135, "learning_rate": 1.0880166280278201e-05, "loss": 1.6379, "step": 11152 }, { "epoch": 2.6746597925783826, "grad_norm": 0.7491798996925354, "learning_rate": 1.0864177791989767e-05, "loss": 1.5893, "step": 11154 }, { "epoch": 2.6751393801330856, "grad_norm": 0.6792373657226562, "learning_rate": 1.0848189303701334e-05, "loss": 1.5741, "step": 11156 }, { "epoch": 2.6756189676877886, "grad_norm": 0.7353643774986267, "learning_rate": 1.0832200815412903e-05, "loss": 1.6249, "step": 11158 }, { "epoch": 2.6760985552424916, "grad_norm": 0.7274590730667114, "learning_rate": 1.0816212327124471e-05, "loss": 1.6273, "step": 11160 }, { "epoch": 2.6765781427971946, "grad_norm": 0.6705948114395142, "learning_rate": 1.0800223838836038e-05, "loss": 1.6124, "step": 11162 }, { "epoch": 2.677057730351897, "grad_norm": 0.7585946321487427, "learning_rate": 1.0784235350547606e-05, "loss": 1.5938, "step": 11164 }, { "epoch": 2.6775373179066, "grad_norm": 0.6783523559570312, "learning_rate": 1.0768246862259173e-05, "loss": 1.6073, "step": 11166 }, { "epoch": 2.678016905461303, "grad_norm": 0.7032739520072937, "learning_rate": 1.0752258373970742e-05, "loss": 1.6457, "step": 11168 }, { "epoch": 2.6784964930160062, "grad_norm": 0.7506272792816162, "learning_rate": 1.073626988568231e-05, "loss": 1.6201, "step": 11170 }, { "epoch": 2.6789760805707092, "grad_norm": 0.6511099934577942, "learning_rate": 1.0720281397393877e-05, "loss": 1.5889, "step": 11172 }, { "epoch": 2.6794556681254122, "grad_norm": 0.6968287229537964, "learning_rate": 1.0704292909105444e-05, "loss": 1.633, "step": 11174 }, { "epoch": 2.6799352556801153, "grad_norm": 0.6902340650558472, "learning_rate": 1.0688304420817012e-05, "loss": 1.6222, "step": 11176 }, { "epoch": 2.680414843234818, "grad_norm": 0.6369337439537048, "learning_rate": 1.067231593252858e-05, "loss": 1.6368, "step": 11178 }, { "epoch": 2.680894430789521, "grad_norm": 0.8458722233772278, "learning_rate": 1.0656327444240148e-05, "loss": 1.6477, "step": 11180 }, { "epoch": 2.681374018344224, "grad_norm": 0.7065027356147766, "learning_rate": 1.0640338955951714e-05, "loss": 1.6106, "step": 11182 }, { "epoch": 2.681853605898927, "grad_norm": 0.7671485543251038, "learning_rate": 1.0624350467663283e-05, "loss": 1.6356, "step": 11184 }, { "epoch": 2.68233319345363, "grad_norm": 0.6409327983856201, "learning_rate": 1.060836197937485e-05, "loss": 1.6438, "step": 11186 }, { "epoch": 2.682812781008333, "grad_norm": 0.6515001654624939, "learning_rate": 1.059237349108642e-05, "loss": 1.6579, "step": 11188 }, { "epoch": 2.683292368563036, "grad_norm": 0.7631815075874329, "learning_rate": 1.0576385002797987e-05, "loss": 1.6423, "step": 11190 }, { "epoch": 2.683771956117739, "grad_norm": 0.8306704163551331, "learning_rate": 1.0560396514509553e-05, "loss": 1.5659, "step": 11192 }, { "epoch": 2.684251543672442, "grad_norm": 0.8171567916870117, "learning_rate": 1.0544408026221122e-05, "loss": 1.646, "step": 11194 }, { "epoch": 2.684731131227145, "grad_norm": 0.701374351978302, "learning_rate": 1.0528419537932689e-05, "loss": 1.6636, "step": 11196 }, { "epoch": 2.6852107187818475, "grad_norm": 0.710883617401123, "learning_rate": 1.0512431049644256e-05, "loss": 1.6315, "step": 11198 }, { "epoch": 2.6856903063365505, "grad_norm": 0.7030364871025085, "learning_rate": 1.0496442561355824e-05, "loss": 1.677, "step": 11200 }, { "epoch": 2.6856903063365505, "eval_loss": 1.7068787813186646, "eval_runtime": 331.2375, "eval_samples_per_second": 402.88, "eval_steps_per_second": 12.592, "step": 11200 }, { "epoch": 2.6861698938912535, "grad_norm": 0.6745384335517883, "learning_rate": 1.0480454073067391e-05, "loss": 1.5831, "step": 11202 }, { "epoch": 2.6866494814459565, "grad_norm": 0.7341179847717285, "learning_rate": 1.046446558477896e-05, "loss": 1.6158, "step": 11204 }, { "epoch": 2.6871290690006595, "grad_norm": 0.75367671251297, "learning_rate": 1.0448477096490528e-05, "loss": 1.5675, "step": 11206 }, { "epoch": 2.6876086565553625, "grad_norm": 0.6866568326950073, "learning_rate": 1.0432488608202095e-05, "loss": 1.605, "step": 11208 }, { "epoch": 2.688088244110065, "grad_norm": 0.6963616013526917, "learning_rate": 1.0416500119913663e-05, "loss": 1.6202, "step": 11210 }, { "epoch": 2.688567831664768, "grad_norm": 0.64943927526474, "learning_rate": 1.040051163162523e-05, "loss": 1.5818, "step": 11212 }, { "epoch": 2.689047419219471, "grad_norm": 0.662453830242157, "learning_rate": 1.0384523143336799e-05, "loss": 1.6373, "step": 11214 }, { "epoch": 2.689527006774174, "grad_norm": 0.7564813494682312, "learning_rate": 1.0368534655048366e-05, "loss": 1.68, "step": 11216 }, { "epoch": 2.690006594328877, "grad_norm": 0.7839130163192749, "learning_rate": 1.0352546166759932e-05, "loss": 1.5808, "step": 11218 }, { "epoch": 2.69048618188358, "grad_norm": 0.7916223406791687, "learning_rate": 1.0336557678471501e-05, "loss": 1.6186, "step": 11220 }, { "epoch": 2.690965769438283, "grad_norm": 0.6885735392570496, "learning_rate": 1.0320569190183069e-05, "loss": 1.5806, "step": 11222 }, { "epoch": 2.691445356992986, "grad_norm": 0.6570850610733032, "learning_rate": 1.0304580701894636e-05, "loss": 1.6582, "step": 11224 }, { "epoch": 2.691924944547689, "grad_norm": 0.6469263434410095, "learning_rate": 1.0288592213606205e-05, "loss": 1.6312, "step": 11226 }, { "epoch": 2.692404532102392, "grad_norm": 0.7418261170387268, "learning_rate": 1.027260372531777e-05, "loss": 1.6315, "step": 11228 }, { "epoch": 2.6928841196570947, "grad_norm": 0.9007003307342529, "learning_rate": 1.025661523702934e-05, "loss": 1.6404, "step": 11230 }, { "epoch": 2.6933637072117977, "grad_norm": 0.6830894351005554, "learning_rate": 1.0240626748740907e-05, "loss": 1.5895, "step": 11232 }, { "epoch": 2.6938432947665008, "grad_norm": 0.7235361337661743, "learning_rate": 1.0224638260452475e-05, "loss": 1.6086, "step": 11234 }, { "epoch": 2.6943228823212038, "grad_norm": 0.7280957102775574, "learning_rate": 1.0208649772164042e-05, "loss": 1.6414, "step": 11236 }, { "epoch": 2.6948024698759068, "grad_norm": 0.6981220841407776, "learning_rate": 1.019266128387561e-05, "loss": 1.6239, "step": 11238 }, { "epoch": 2.69528205743061, "grad_norm": 0.7113608121871948, "learning_rate": 1.0176672795587178e-05, "loss": 1.6332, "step": 11240 }, { "epoch": 2.695761644985313, "grad_norm": 0.7713492512702942, "learning_rate": 1.0160684307298746e-05, "loss": 1.6023, "step": 11242 }, { "epoch": 2.6962412325400154, "grad_norm": 0.650823712348938, "learning_rate": 1.0144695819010313e-05, "loss": 1.5414, "step": 11244 }, { "epoch": 2.6967208200947184, "grad_norm": 0.6380943059921265, "learning_rate": 1.012870733072188e-05, "loss": 1.6, "step": 11246 }, { "epoch": 2.6972004076494214, "grad_norm": 0.6310796141624451, "learning_rate": 1.0112718842433448e-05, "loss": 1.6387, "step": 11248 }, { "epoch": 2.6976799952041244, "grad_norm": 0.6993439793586731, "learning_rate": 1.0096730354145015e-05, "loss": 1.634, "step": 11250 }, { "epoch": 2.6981595827588274, "grad_norm": 0.7428319454193115, "learning_rate": 1.0080741865856585e-05, "loss": 1.6151, "step": 11252 }, { "epoch": 2.6986391703135304, "grad_norm": 0.7638795971870422, "learning_rate": 1.006475337756815e-05, "loss": 1.6181, "step": 11254 }, { "epoch": 2.6991187578682334, "grad_norm": 0.666671633720398, "learning_rate": 1.004876488927972e-05, "loss": 1.6233, "step": 11256 }, { "epoch": 2.6995983454229364, "grad_norm": 0.6700485348701477, "learning_rate": 1.0032776400991287e-05, "loss": 1.6307, "step": 11258 }, { "epoch": 2.7000779329776394, "grad_norm": 0.6790406107902527, "learning_rate": 1.0016787912702854e-05, "loss": 1.6487, "step": 11260 }, { "epoch": 2.7005575205323424, "grad_norm": 0.6686114072799683, "learning_rate": 1.0000799424414423e-05, "loss": 1.651, "step": 11262 }, { "epoch": 2.701037108087045, "grad_norm": 0.7332746386528015, "learning_rate": 9.984810936125989e-06, "loss": 1.5847, "step": 11264 }, { "epoch": 2.701516695641748, "grad_norm": 0.7041499018669128, "learning_rate": 9.968822447837558e-06, "loss": 1.6291, "step": 11266 }, { "epoch": 2.701996283196451, "grad_norm": 0.6934981942176819, "learning_rate": 9.952833959549125e-06, "loss": 1.6065, "step": 11268 }, { "epoch": 2.702475870751154, "grad_norm": 0.8703316450119019, "learning_rate": 9.936845471260693e-06, "loss": 1.5767, "step": 11270 }, { "epoch": 2.702955458305857, "grad_norm": 0.7769125699996948, "learning_rate": 9.920856982972262e-06, "loss": 1.6077, "step": 11272 }, { "epoch": 2.70343504586056, "grad_norm": 0.6899728775024414, "learning_rate": 9.904868494683828e-06, "loss": 1.6464, "step": 11274 }, { "epoch": 2.7039146334152626, "grad_norm": 0.6946173906326294, "learning_rate": 9.888880006395395e-06, "loss": 1.6469, "step": 11276 }, { "epoch": 2.7043942209699656, "grad_norm": 0.7727266550064087, "learning_rate": 9.872891518106964e-06, "loss": 1.6255, "step": 11278 }, { "epoch": 2.7048738085246686, "grad_norm": 0.7169292569160461, "learning_rate": 9.856903029818531e-06, "loss": 1.6606, "step": 11280 }, { "epoch": 2.7053533960793716, "grad_norm": 0.7110976576805115, "learning_rate": 9.840914541530099e-06, "loss": 1.6426, "step": 11282 }, { "epoch": 2.7058329836340747, "grad_norm": 0.8281223177909851, "learning_rate": 9.824926053241666e-06, "loss": 1.6049, "step": 11284 }, { "epoch": 2.7063125711887777, "grad_norm": 0.7339139580726624, "learning_rate": 9.808937564953234e-06, "loss": 1.656, "step": 11286 }, { "epoch": 2.7067921587434807, "grad_norm": 0.8082202076911926, "learning_rate": 9.792949076664803e-06, "loss": 1.5886, "step": 11288 }, { "epoch": 2.7072717462981837, "grad_norm": 0.709723949432373, "learning_rate": 9.77696058837637e-06, "loss": 1.6051, "step": 11290 }, { "epoch": 2.7077513338528867, "grad_norm": 0.6887457966804504, "learning_rate": 9.760972100087938e-06, "loss": 1.6876, "step": 11292 }, { "epoch": 2.7082309214075897, "grad_norm": 0.7429313659667969, "learning_rate": 9.744983611799505e-06, "loss": 1.6137, "step": 11294 }, { "epoch": 2.7087105089622923, "grad_norm": 0.7210909724235535, "learning_rate": 9.728995123511072e-06, "loss": 1.667, "step": 11296 }, { "epoch": 2.7091900965169953, "grad_norm": 0.8245394825935364, "learning_rate": 9.713006635222641e-06, "loss": 1.622, "step": 11298 }, { "epoch": 2.7096696840716983, "grad_norm": 0.706934928894043, "learning_rate": 9.697018146934207e-06, "loss": 1.6512, "step": 11300 }, { "epoch": 2.7101492716264013, "grad_norm": 0.7317407131195068, "learning_rate": 9.681029658645774e-06, "loss": 1.6623, "step": 11302 }, { "epoch": 2.7106288591811043, "grad_norm": 0.7047302722930908, "learning_rate": 9.665041170357344e-06, "loss": 1.6306, "step": 11304 }, { "epoch": 2.7111084467358073, "grad_norm": 0.8009446859359741, "learning_rate": 9.649052682068911e-06, "loss": 1.6071, "step": 11306 }, { "epoch": 2.7115880342905103, "grad_norm": 0.701378345489502, "learning_rate": 9.633064193780478e-06, "loss": 1.5439, "step": 11308 }, { "epoch": 2.712067621845213, "grad_norm": 0.6673853397369385, "learning_rate": 9.617075705492046e-06, "loss": 1.5707, "step": 11310 }, { "epoch": 2.712547209399916, "grad_norm": 0.7092868685722351, "learning_rate": 9.601087217203613e-06, "loss": 1.5989, "step": 11312 }, { "epoch": 2.713026796954619, "grad_norm": 0.7476891279220581, "learning_rate": 9.585098728915182e-06, "loss": 1.6744, "step": 11314 }, { "epoch": 2.713506384509322, "grad_norm": 0.7088529467582703, "learning_rate": 9.56911024062675e-06, "loss": 1.6115, "step": 11316 }, { "epoch": 2.713985972064025, "grad_norm": 0.7602643370628357, "learning_rate": 9.553121752338317e-06, "loss": 1.6257, "step": 11318 }, { "epoch": 2.714465559618728, "grad_norm": 0.7294578552246094, "learning_rate": 9.537133264049884e-06, "loss": 1.6479, "step": 11320 }, { "epoch": 2.714945147173431, "grad_norm": 0.6951786875724792, "learning_rate": 9.521144775761452e-06, "loss": 1.5326, "step": 11322 }, { "epoch": 2.715424734728134, "grad_norm": 0.6974689364433289, "learning_rate": 9.505156287473021e-06, "loss": 1.6399, "step": 11324 }, { "epoch": 2.715904322282837, "grad_norm": 0.7135364413261414, "learning_rate": 9.489167799184588e-06, "loss": 1.6646, "step": 11326 }, { "epoch": 2.71638390983754, "grad_norm": 0.7295703887939453, "learning_rate": 9.473179310896154e-06, "loss": 1.5964, "step": 11328 }, { "epoch": 2.7168634973922425, "grad_norm": 0.6657286882400513, "learning_rate": 9.457190822607723e-06, "loss": 1.5889, "step": 11330 }, { "epoch": 2.7173430849469455, "grad_norm": 0.6646488308906555, "learning_rate": 9.44120233431929e-06, "loss": 1.5792, "step": 11332 }, { "epoch": 2.7178226725016486, "grad_norm": 0.7240404486656189, "learning_rate": 9.425213846030858e-06, "loss": 1.6021, "step": 11334 }, { "epoch": 2.7183022600563516, "grad_norm": 0.7057510018348694, "learning_rate": 9.409225357742425e-06, "loss": 1.6578, "step": 11336 }, { "epoch": 2.7187818476110546, "grad_norm": 0.829738974571228, "learning_rate": 9.393236869453993e-06, "loss": 1.6289, "step": 11338 }, { "epoch": 2.7192614351657576, "grad_norm": 0.7150835990905762, "learning_rate": 9.377248381165562e-06, "loss": 1.6557, "step": 11340 }, { "epoch": 2.71974102272046, "grad_norm": 0.6793140172958374, "learning_rate": 9.361259892877129e-06, "loss": 1.5993, "step": 11342 }, { "epoch": 2.720220610275163, "grad_norm": 0.7166807651519775, "learning_rate": 9.345271404588697e-06, "loss": 1.643, "step": 11344 }, { "epoch": 2.720700197829866, "grad_norm": 0.8322452306747437, "learning_rate": 9.329282916300264e-06, "loss": 1.6225, "step": 11346 }, { "epoch": 2.721179785384569, "grad_norm": 0.7298682928085327, "learning_rate": 9.313294428011831e-06, "loss": 1.6361, "step": 11348 }, { "epoch": 2.721659372939272, "grad_norm": 0.7367530465126038, "learning_rate": 9.2973059397234e-06, "loss": 1.598, "step": 11350 }, { "epoch": 2.722138960493975, "grad_norm": 0.706445574760437, "learning_rate": 9.281317451434968e-06, "loss": 1.6255, "step": 11352 }, { "epoch": 2.722618548048678, "grad_norm": 0.718135416507721, "learning_rate": 9.265328963146533e-06, "loss": 1.6282, "step": 11354 }, { "epoch": 2.723098135603381, "grad_norm": 0.6443890333175659, "learning_rate": 9.249340474858103e-06, "loss": 1.6232, "step": 11356 }, { "epoch": 2.723577723158084, "grad_norm": 0.7358860373497009, "learning_rate": 9.23335198656967e-06, "loss": 1.6415, "step": 11358 }, { "epoch": 2.7240573107127872, "grad_norm": 0.7783441543579102, "learning_rate": 9.217363498281237e-06, "loss": 1.6138, "step": 11360 }, { "epoch": 2.72453689826749, "grad_norm": 0.7999815344810486, "learning_rate": 9.201375009992806e-06, "loss": 1.6699, "step": 11362 }, { "epoch": 2.725016485822193, "grad_norm": 0.720745325088501, "learning_rate": 9.185386521704372e-06, "loss": 1.6333, "step": 11364 }, { "epoch": 2.725496073376896, "grad_norm": 0.8370382785797119, "learning_rate": 9.169398033415941e-06, "loss": 1.6268, "step": 11366 }, { "epoch": 2.725975660931599, "grad_norm": 0.6872696280479431, "learning_rate": 9.153409545127509e-06, "loss": 1.6557, "step": 11368 }, { "epoch": 2.726455248486302, "grad_norm": 0.7190517783164978, "learning_rate": 9.137421056839076e-06, "loss": 1.6397, "step": 11370 }, { "epoch": 2.726934836041005, "grad_norm": 0.6797504425048828, "learning_rate": 9.121432568550645e-06, "loss": 1.5716, "step": 11372 }, { "epoch": 2.727414423595708, "grad_norm": 0.7689613699913025, "learning_rate": 9.10544408026221e-06, "loss": 1.6086, "step": 11374 }, { "epoch": 2.7278940111504104, "grad_norm": 0.6512105464935303, "learning_rate": 9.08945559197378e-06, "loss": 1.6255, "step": 11376 }, { "epoch": 2.7283735987051134, "grad_norm": 0.6864239573478699, "learning_rate": 9.073467103685347e-06, "loss": 1.6226, "step": 11378 }, { "epoch": 2.7288531862598164, "grad_norm": 0.7326651811599731, "learning_rate": 9.057478615396915e-06, "loss": 1.638, "step": 11380 }, { "epoch": 2.7293327738145194, "grad_norm": 0.7028729319572449, "learning_rate": 9.041490127108482e-06, "loss": 1.629, "step": 11382 }, { "epoch": 2.7298123613692225, "grad_norm": 0.702629804611206, "learning_rate": 9.02550163882005e-06, "loss": 1.6412, "step": 11384 }, { "epoch": 2.7302919489239255, "grad_norm": 0.9520564675331116, "learning_rate": 9.009513150531617e-06, "loss": 1.6008, "step": 11386 }, { "epoch": 2.7307715364786285, "grad_norm": 0.7583456039428711, "learning_rate": 8.993524662243186e-06, "loss": 1.6157, "step": 11388 }, { "epoch": 2.7312511240333315, "grad_norm": 0.659792959690094, "learning_rate": 8.977536173954753e-06, "loss": 1.624, "step": 11390 }, { "epoch": 2.7317307115880345, "grad_norm": 0.7074634432792664, "learning_rate": 8.96154768566632e-06, "loss": 1.5882, "step": 11392 }, { "epoch": 2.7322102991427375, "grad_norm": 0.7610148191452026, "learning_rate": 8.945559197377888e-06, "loss": 1.6306, "step": 11394 }, { "epoch": 2.73268988669744, "grad_norm": 0.8390698432922363, "learning_rate": 8.929570709089456e-06, "loss": 1.6367, "step": 11396 }, { "epoch": 2.733169474252143, "grad_norm": 0.6895484924316406, "learning_rate": 8.913582220801025e-06, "loss": 1.6434, "step": 11398 }, { "epoch": 2.733649061806846, "grad_norm": 0.7396335601806641, "learning_rate": 8.89759373251259e-06, "loss": 1.6469, "step": 11400 }, { "epoch": 2.734128649361549, "grad_norm": 0.7007299065589905, "learning_rate": 8.88160524422416e-06, "loss": 1.6183, "step": 11402 }, { "epoch": 2.734608236916252, "grad_norm": 0.6609092354774475, "learning_rate": 8.865616755935727e-06, "loss": 1.68, "step": 11404 }, { "epoch": 2.735087824470955, "grad_norm": 0.724422037601471, "learning_rate": 8.849628267647294e-06, "loss": 1.6085, "step": 11406 }, { "epoch": 2.7355674120256577, "grad_norm": 0.8005499839782715, "learning_rate": 8.833639779358863e-06, "loss": 1.6406, "step": 11408 }, { "epoch": 2.7360469995803607, "grad_norm": 0.7243871092796326, "learning_rate": 8.817651291070429e-06, "loss": 1.6394, "step": 11410 }, { "epoch": 2.7365265871350637, "grad_norm": 0.6945455074310303, "learning_rate": 8.801662802781998e-06, "loss": 1.6625, "step": 11412 }, { "epoch": 2.7370061746897667, "grad_norm": 0.7397179007530212, "learning_rate": 8.785674314493565e-06, "loss": 1.6227, "step": 11414 }, { "epoch": 2.7374857622444697, "grad_norm": 0.6458991765975952, "learning_rate": 8.769685826205133e-06, "loss": 1.6156, "step": 11416 }, { "epoch": 2.7379653497991727, "grad_norm": 0.6762190461158752, "learning_rate": 8.7536973379167e-06, "loss": 1.646, "step": 11418 }, { "epoch": 2.7384449373538757, "grad_norm": 0.6921707391738892, "learning_rate": 8.737708849628268e-06, "loss": 1.5872, "step": 11420 }, { "epoch": 2.7389245249085787, "grad_norm": 0.703620433807373, "learning_rate": 8.721720361339835e-06, "loss": 1.5906, "step": 11422 }, { "epoch": 2.7394041124632817, "grad_norm": 0.697053849697113, "learning_rate": 8.705731873051404e-06, "loss": 1.5913, "step": 11424 }, { "epoch": 2.7398837000179848, "grad_norm": 0.6750263571739197, "learning_rate": 8.689743384762972e-06, "loss": 1.5951, "step": 11426 }, { "epoch": 2.7403632875726873, "grad_norm": 1.037588119506836, "learning_rate": 8.673754896474539e-06, "loss": 1.6659, "step": 11428 }, { "epoch": 2.7408428751273903, "grad_norm": 0.7827330231666565, "learning_rate": 8.657766408186106e-06, "loss": 1.6343, "step": 11430 }, { "epoch": 2.7413224626820933, "grad_norm": 0.660149097442627, "learning_rate": 8.641777919897674e-06, "loss": 1.628, "step": 11432 }, { "epoch": 2.7418020502367964, "grad_norm": 0.7516499757766724, "learning_rate": 8.625789431609243e-06, "loss": 1.6034, "step": 11434 }, { "epoch": 2.7422816377914994, "grad_norm": 0.7763311266899109, "learning_rate": 8.609800943320808e-06, "loss": 1.6499, "step": 11436 }, { "epoch": 2.7427612253462024, "grad_norm": 0.6978511214256287, "learning_rate": 8.593812455032378e-06, "loss": 1.6148, "step": 11438 }, { "epoch": 2.7432408129009054, "grad_norm": 0.6642120480537415, "learning_rate": 8.577823966743945e-06, "loss": 1.6354, "step": 11440 }, { "epoch": 2.743720400455608, "grad_norm": 0.6846423745155334, "learning_rate": 8.561835478455512e-06, "loss": 1.6393, "step": 11442 }, { "epoch": 2.744199988010311, "grad_norm": 0.7636954188346863, "learning_rate": 8.545846990167081e-06, "loss": 1.6443, "step": 11444 }, { "epoch": 2.744679575565014, "grad_norm": 0.7196562886238098, "learning_rate": 8.529858501878647e-06, "loss": 1.6483, "step": 11446 }, { "epoch": 2.745159163119717, "grad_norm": 0.6642315983772278, "learning_rate": 8.513870013590215e-06, "loss": 1.6594, "step": 11448 }, { "epoch": 2.74563875067442, "grad_norm": 0.6509325504302979, "learning_rate": 8.497881525301784e-06, "loss": 1.6225, "step": 11450 }, { "epoch": 2.746118338229123, "grad_norm": 0.7846240997314453, "learning_rate": 8.481893037013351e-06, "loss": 1.6179, "step": 11452 }, { "epoch": 2.746597925783826, "grad_norm": 0.7295287251472473, "learning_rate": 8.465904548724918e-06, "loss": 1.5985, "step": 11454 }, { "epoch": 2.747077513338529, "grad_norm": 0.8385120630264282, "learning_rate": 8.449916060436486e-06, "loss": 1.6374, "step": 11456 }, { "epoch": 2.747557100893232, "grad_norm": 0.7303578853607178, "learning_rate": 8.433927572148053e-06, "loss": 1.5996, "step": 11458 }, { "epoch": 2.748036688447935, "grad_norm": 0.6580714583396912, "learning_rate": 8.417939083859622e-06, "loss": 1.6463, "step": 11460 }, { "epoch": 2.7485162760026376, "grad_norm": 0.746943473815918, "learning_rate": 8.40195059557119e-06, "loss": 1.5741, "step": 11462 }, { "epoch": 2.7489958635573406, "grad_norm": 0.7002219557762146, "learning_rate": 8.385962107282757e-06, "loss": 1.653, "step": 11464 }, { "epoch": 2.7494754511120436, "grad_norm": 0.712823748588562, "learning_rate": 8.369973618994324e-06, "loss": 1.6262, "step": 11466 }, { "epoch": 2.7499550386667466, "grad_norm": 0.7018297910690308, "learning_rate": 8.353985130705892e-06, "loss": 1.613, "step": 11468 }, { "epoch": 2.7504346262214496, "grad_norm": 0.6922919154167175, "learning_rate": 8.337996642417461e-06, "loss": 1.6515, "step": 11470 }, { "epoch": 2.7509142137761526, "grad_norm": 0.665011465549469, "learning_rate": 8.322008154129028e-06, "loss": 1.6082, "step": 11472 }, { "epoch": 2.751393801330855, "grad_norm": 0.7298151254653931, "learning_rate": 8.306019665840594e-06, "loss": 1.5934, "step": 11474 }, { "epoch": 2.751873388885558, "grad_norm": 0.662388026714325, "learning_rate": 8.290031177552163e-06, "loss": 1.6303, "step": 11476 }, { "epoch": 2.7523529764402612, "grad_norm": 0.641771674156189, "learning_rate": 8.27404268926373e-06, "loss": 1.6479, "step": 11478 }, { "epoch": 2.7528325639949642, "grad_norm": 0.7899652123451233, "learning_rate": 8.258054200975298e-06, "loss": 1.6069, "step": 11480 }, { "epoch": 2.7533121515496672, "grad_norm": 0.7570120692253113, "learning_rate": 8.242065712686865e-06, "loss": 1.6122, "step": 11482 }, { "epoch": 2.7537917391043703, "grad_norm": 0.7671220898628235, "learning_rate": 8.226077224398433e-06, "loss": 1.6204, "step": 11484 }, { "epoch": 2.7542713266590733, "grad_norm": 0.6819235682487488, "learning_rate": 8.210088736110002e-06, "loss": 1.634, "step": 11486 }, { "epoch": 2.7547509142137763, "grad_norm": 0.7157623171806335, "learning_rate": 8.19410024782157e-06, "loss": 1.6147, "step": 11488 }, { "epoch": 2.7552305017684793, "grad_norm": 0.7588821649551392, "learning_rate": 8.178111759533137e-06, "loss": 1.6663, "step": 11490 }, { "epoch": 2.7557100893231823, "grad_norm": 0.734657347202301, "learning_rate": 8.162123271244704e-06, "loss": 1.5964, "step": 11492 }, { "epoch": 2.756189676877885, "grad_norm": 0.7310928702354431, "learning_rate": 8.146134782956271e-06, "loss": 1.6372, "step": 11494 }, { "epoch": 2.756669264432588, "grad_norm": 0.6955455541610718, "learning_rate": 8.13014629466784e-06, "loss": 1.6293, "step": 11496 }, { "epoch": 2.757148851987291, "grad_norm": 0.724034309387207, "learning_rate": 8.114157806379408e-06, "loss": 1.6674, "step": 11498 }, { "epoch": 2.757628439541994, "grad_norm": 0.6898744702339172, "learning_rate": 8.098169318090974e-06, "loss": 1.6395, "step": 11500 }, { "epoch": 2.758108027096697, "grad_norm": 0.7607200145721436, "learning_rate": 8.082180829802543e-06, "loss": 1.6128, "step": 11502 }, { "epoch": 2.7585876146514, "grad_norm": 0.7010622620582581, "learning_rate": 8.06619234151411e-06, "loss": 1.6583, "step": 11504 }, { "epoch": 2.759067202206103, "grad_norm": 0.670780599117279, "learning_rate": 8.050203853225677e-06, "loss": 1.6316, "step": 11506 }, { "epoch": 2.7595467897608055, "grad_norm": 0.6607992649078369, "learning_rate": 8.034215364937247e-06, "loss": 1.6165, "step": 11508 }, { "epoch": 2.7600263773155085, "grad_norm": 0.7517655491828918, "learning_rate": 8.018226876648812e-06, "loss": 1.688, "step": 11510 }, { "epoch": 2.7605059648702115, "grad_norm": 0.7075660228729248, "learning_rate": 8.002238388360381e-06, "loss": 1.6374, "step": 11512 }, { "epoch": 2.7609855524249145, "grad_norm": 0.7092084884643555, "learning_rate": 7.986249900071949e-06, "loss": 1.603, "step": 11514 }, { "epoch": 2.7614651399796175, "grad_norm": 0.6846609711647034, "learning_rate": 7.970261411783516e-06, "loss": 1.5867, "step": 11516 }, { "epoch": 2.7619447275343205, "grad_norm": 0.6291020512580872, "learning_rate": 7.954272923495084e-06, "loss": 1.5535, "step": 11518 }, { "epoch": 2.7624243150890235, "grad_norm": 0.6861134171485901, "learning_rate": 7.938284435206651e-06, "loss": 1.5862, "step": 11520 }, { "epoch": 2.7629039026437265, "grad_norm": 0.6966245174407959, "learning_rate": 7.92229594691822e-06, "loss": 1.6032, "step": 11522 }, { "epoch": 2.7633834901984295, "grad_norm": 0.6909865140914917, "learning_rate": 7.906307458629787e-06, "loss": 1.6313, "step": 11524 }, { "epoch": 2.7638630777531326, "grad_norm": 0.6718422770500183, "learning_rate": 7.890318970341355e-06, "loss": 1.5967, "step": 11526 }, { "epoch": 2.764342665307835, "grad_norm": 0.7364253997802734, "learning_rate": 7.874330482052922e-06, "loss": 1.6306, "step": 11528 }, { "epoch": 2.764822252862538, "grad_norm": 0.6400423645973206, "learning_rate": 7.85834199376449e-06, "loss": 1.5896, "step": 11530 }, { "epoch": 2.765301840417241, "grad_norm": 0.7234786152839661, "learning_rate": 7.842353505476057e-06, "loss": 1.6318, "step": 11532 }, { "epoch": 2.765781427971944, "grad_norm": 0.7648892998695374, "learning_rate": 7.826365017187626e-06, "loss": 1.6111, "step": 11534 }, { "epoch": 2.766261015526647, "grad_norm": 0.7020738124847412, "learning_rate": 7.810376528899192e-06, "loss": 1.5929, "step": 11536 }, { "epoch": 2.76674060308135, "grad_norm": 0.7249913811683655, "learning_rate": 7.79438804061076e-06, "loss": 1.627, "step": 11538 }, { "epoch": 2.7672201906360527, "grad_norm": 0.7087380290031433, "learning_rate": 7.778399552322328e-06, "loss": 1.6112, "step": 11540 }, { "epoch": 2.7676997781907557, "grad_norm": 0.7764968276023865, "learning_rate": 7.762411064033896e-06, "loss": 1.5898, "step": 11542 }, { "epoch": 2.7681793657454588, "grad_norm": 0.7081678509712219, "learning_rate": 7.746422575745465e-06, "loss": 1.6646, "step": 11544 }, { "epoch": 2.7686589533001618, "grad_norm": 0.907955527305603, "learning_rate": 7.73043408745703e-06, "loss": 1.7096, "step": 11546 }, { "epoch": 2.7691385408548648, "grad_norm": 0.6550078988075256, "learning_rate": 7.7144455991686e-06, "loss": 1.6385, "step": 11548 }, { "epoch": 2.769618128409568, "grad_norm": 0.7473899126052856, "learning_rate": 7.698457110880167e-06, "loss": 1.6557, "step": 11550 }, { "epoch": 2.770097715964271, "grad_norm": 0.6804804801940918, "learning_rate": 7.682468622591734e-06, "loss": 1.613, "step": 11552 }, { "epoch": 2.770577303518974, "grad_norm": 0.7167413234710693, "learning_rate": 7.666480134303302e-06, "loss": 1.5824, "step": 11554 }, { "epoch": 2.771056891073677, "grad_norm": 0.9914330244064331, "learning_rate": 7.650491646014869e-06, "loss": 1.6416, "step": 11556 }, { "epoch": 2.77153647862838, "grad_norm": 0.7441843748092651, "learning_rate": 7.634503157726436e-06, "loss": 1.5992, "step": 11558 }, { "epoch": 2.7720160661830824, "grad_norm": 0.6827793121337891, "learning_rate": 7.6185146694380055e-06, "loss": 1.5953, "step": 11560 }, { "epoch": 2.7724956537377854, "grad_norm": 0.7138874530792236, "learning_rate": 7.602526181149573e-06, "loss": 1.6285, "step": 11562 }, { "epoch": 2.7729752412924884, "grad_norm": 0.7586669325828552, "learning_rate": 7.5865376928611395e-06, "loss": 1.584, "step": 11564 }, { "epoch": 2.7734548288471914, "grad_norm": 0.6896252036094666, "learning_rate": 7.570549204572708e-06, "loss": 1.6821, "step": 11566 }, { "epoch": 2.7739344164018944, "grad_norm": 0.7046811580657959, "learning_rate": 7.554560716284276e-06, "loss": 1.5963, "step": 11568 }, { "epoch": 2.7744140039565974, "grad_norm": 0.7923179268836975, "learning_rate": 7.538572227995843e-06, "loss": 1.6785, "step": 11570 }, { "epoch": 2.7748935915113004, "grad_norm": 0.748551070690155, "learning_rate": 7.522583739707412e-06, "loss": 1.5731, "step": 11572 }, { "epoch": 2.775373179066003, "grad_norm": 0.6444424986839294, "learning_rate": 7.506595251418978e-06, "loss": 1.6612, "step": 11574 }, { "epoch": 2.775852766620706, "grad_norm": 0.7614960670471191, "learning_rate": 7.490606763130546e-06, "loss": 1.625, "step": 11576 }, { "epoch": 2.776332354175409, "grad_norm": 0.7010897994041443, "learning_rate": 7.474618274842115e-06, "loss": 1.6797, "step": 11578 }, { "epoch": 2.776811941730112, "grad_norm": 0.7861680388450623, "learning_rate": 7.458629786553682e-06, "loss": 1.6106, "step": 11580 }, { "epoch": 2.777291529284815, "grad_norm": 0.6860038638114929, "learning_rate": 7.4426412982652486e-06, "loss": 1.6031, "step": 11582 }, { "epoch": 2.777771116839518, "grad_norm": 0.7228990197181702, "learning_rate": 7.426652809976817e-06, "loss": 1.6318, "step": 11584 }, { "epoch": 2.778250704394221, "grad_norm": 0.6628226637840271, "learning_rate": 7.410664321688385e-06, "loss": 1.6427, "step": 11586 }, { "epoch": 2.778730291948924, "grad_norm": 0.7101489901542664, "learning_rate": 7.3946758333999525e-06, "loss": 1.6268, "step": 11588 }, { "epoch": 2.779209879503627, "grad_norm": 0.672817051410675, "learning_rate": 7.378687345111521e-06, "loss": 1.6416, "step": 11590 }, { "epoch": 2.77968946705833, "grad_norm": 0.705810010433197, "learning_rate": 7.362698856823087e-06, "loss": 1.6255, "step": 11592 }, { "epoch": 2.7801690546130327, "grad_norm": 0.6715546250343323, "learning_rate": 7.3467103685346555e-06, "loss": 1.6645, "step": 11594 }, { "epoch": 2.7806486421677357, "grad_norm": 0.7174694538116455, "learning_rate": 7.330721880246223e-06, "loss": 1.6145, "step": 11596 }, { "epoch": 2.7811282297224387, "grad_norm": 0.7508087754249573, "learning_rate": 7.314733391957791e-06, "loss": 1.6075, "step": 11598 }, { "epoch": 2.7816078172771417, "grad_norm": 0.7073272466659546, "learning_rate": 7.298744903669358e-06, "loss": 1.6297, "step": 11600 }, { "epoch": 2.7816078172771417, "eval_loss": 1.7052762508392334, "eval_runtime": 331.1781, "eval_samples_per_second": 402.952, "eval_steps_per_second": 12.594, "step": 11600 }, { "epoch": 2.7820874048318447, "grad_norm": 1.0579419136047363, "learning_rate": 7.282756415380926e-06, "loss": 1.6009, "step": 11602 }, { "epoch": 2.7825669923865477, "grad_norm": 0.7403685450553894, "learning_rate": 7.266767927092494e-06, "loss": 1.6776, "step": 11604 }, { "epoch": 2.7830465799412503, "grad_norm": 0.6719491481781006, "learning_rate": 7.2507794388040615e-06, "loss": 1.5889, "step": 11606 }, { "epoch": 2.7835261674959533, "grad_norm": 0.72605299949646, "learning_rate": 7.23479095051563e-06, "loss": 1.6274, "step": 11608 }, { "epoch": 2.7840057550506563, "grad_norm": 0.7861446142196655, "learning_rate": 7.218802462227196e-06, "loss": 1.6194, "step": 11610 }, { "epoch": 2.7844853426053593, "grad_norm": 0.6496665477752686, "learning_rate": 7.2028139739387646e-06, "loss": 1.581, "step": 11612 }, { "epoch": 2.7849649301600623, "grad_norm": 0.6909419894218445, "learning_rate": 7.186825485650332e-06, "loss": 1.5726, "step": 11614 }, { "epoch": 2.7854445177147653, "grad_norm": 0.6972635984420776, "learning_rate": 7.1708369973619e-06, "loss": 1.6129, "step": 11616 }, { "epoch": 2.7859241052694683, "grad_norm": 0.7029706835746765, "learning_rate": 7.154848509073467e-06, "loss": 1.625, "step": 11618 }, { "epoch": 2.7864036928241713, "grad_norm": 0.7554833889007568, "learning_rate": 7.138860020785035e-06, "loss": 1.6175, "step": 11620 }, { "epoch": 2.7868832803788743, "grad_norm": 0.6933375597000122, "learning_rate": 7.122871532496602e-06, "loss": 1.6451, "step": 11622 }, { "epoch": 2.7873628679335773, "grad_norm": 0.6906677484512329, "learning_rate": 7.106883044208171e-06, "loss": 1.6114, "step": 11624 }, { "epoch": 2.78784245548828, "grad_norm": 0.6442714929580688, "learning_rate": 7.090894555919739e-06, "loss": 1.6139, "step": 11626 }, { "epoch": 2.788322043042983, "grad_norm": 0.7153184413909912, "learning_rate": 7.074906067631305e-06, "loss": 1.6545, "step": 11628 }, { "epoch": 2.788801630597686, "grad_norm": 0.6840987205505371, "learning_rate": 7.058917579342874e-06, "loss": 1.6022, "step": 11630 }, { "epoch": 2.789281218152389, "grad_norm": 0.7174234390258789, "learning_rate": 7.042929091054441e-06, "loss": 1.6419, "step": 11632 }, { "epoch": 2.789760805707092, "grad_norm": 0.6796619296073914, "learning_rate": 7.026940602766009e-06, "loss": 1.6044, "step": 11634 }, { "epoch": 2.790240393261795, "grad_norm": 0.7060830593109131, "learning_rate": 7.010952114477576e-06, "loss": 1.6763, "step": 11636 }, { "epoch": 2.790719980816498, "grad_norm": 0.7388666868209839, "learning_rate": 6.994963626189144e-06, "loss": 1.6383, "step": 11638 }, { "epoch": 2.7911995683712005, "grad_norm": 0.7026063203811646, "learning_rate": 6.9789751379007115e-06, "loss": 1.5982, "step": 11640 }, { "epoch": 2.7916791559259035, "grad_norm": 0.6539665460586548, "learning_rate": 6.96298664961228e-06, "loss": 1.5721, "step": 11642 }, { "epoch": 2.7921587434806066, "grad_norm": 0.7049320340156555, "learning_rate": 6.946998161323848e-06, "loss": 1.6507, "step": 11644 }, { "epoch": 2.7926383310353096, "grad_norm": 0.9898470044136047, "learning_rate": 6.9310096730354145e-06, "loss": 1.6491, "step": 11646 }, { "epoch": 2.7931179185900126, "grad_norm": 0.816662609577179, "learning_rate": 6.915021184746982e-06, "loss": 1.6459, "step": 11648 }, { "epoch": 2.7935975061447156, "grad_norm": 0.7837159633636475, "learning_rate": 6.89903269645855e-06, "loss": 1.6065, "step": 11650 }, { "epoch": 2.7940770936994186, "grad_norm": 0.8147327303886414, "learning_rate": 6.883044208170118e-06, "loss": 1.6559, "step": 11652 }, { "epoch": 2.7945566812541216, "grad_norm": 0.6834242343902588, "learning_rate": 6.867055719881685e-06, "loss": 1.6543, "step": 11654 }, { "epoch": 2.7950362688088246, "grad_norm": 0.819953203201294, "learning_rate": 6.851067231593253e-06, "loss": 1.6218, "step": 11656 }, { "epoch": 2.7955158563635276, "grad_norm": 0.8699454069137573, "learning_rate": 6.8350787433048206e-06, "loss": 1.5801, "step": 11658 }, { "epoch": 2.79599544391823, "grad_norm": 0.6959753632545471, "learning_rate": 6.819090255016389e-06, "loss": 1.5864, "step": 11660 }, { "epoch": 2.796475031472933, "grad_norm": 0.6595829129219055, "learning_rate": 6.803101766727957e-06, "loss": 1.6528, "step": 11662 }, { "epoch": 2.796954619027636, "grad_norm": 0.6964848041534424, "learning_rate": 6.787113278439524e-06, "loss": 1.62, "step": 11664 }, { "epoch": 2.797434206582339, "grad_norm": 0.7251077890396118, "learning_rate": 6.771124790151091e-06, "loss": 1.6106, "step": 11666 }, { "epoch": 2.797913794137042, "grad_norm": 0.830782949924469, "learning_rate": 6.755136301862659e-06, "loss": 1.5484, "step": 11668 }, { "epoch": 2.7983933816917452, "grad_norm": 0.6621502637863159, "learning_rate": 6.7391478135742275e-06, "loss": 1.6287, "step": 11670 }, { "epoch": 2.798872969246448, "grad_norm": 0.763106107711792, "learning_rate": 6.723159325285795e-06, "loss": 1.6798, "step": 11672 }, { "epoch": 2.799352556801151, "grad_norm": 0.7223814129829407, "learning_rate": 6.707170836997361e-06, "loss": 1.6263, "step": 11674 }, { "epoch": 2.799832144355854, "grad_norm": 0.6980445384979248, "learning_rate": 6.69118234870893e-06, "loss": 1.587, "step": 11676 }, { "epoch": 2.800311731910557, "grad_norm": 0.6284154057502747, "learning_rate": 6.675193860420498e-06, "loss": 1.6196, "step": 11678 }, { "epoch": 2.80079131946526, "grad_norm": 0.6989080309867859, "learning_rate": 6.659205372132066e-06, "loss": 1.5922, "step": 11680 }, { "epoch": 2.801270907019963, "grad_norm": 0.6689056754112244, "learning_rate": 6.643216883843633e-06, "loss": 1.6097, "step": 11682 }, { "epoch": 2.801750494574666, "grad_norm": 0.6703574657440186, "learning_rate": 6.6272283955552e-06, "loss": 1.6361, "step": 11684 }, { "epoch": 2.802230082129369, "grad_norm": 0.8364019393920898, "learning_rate": 6.611239907266768e-06, "loss": 1.6125, "step": 11686 }, { "epoch": 2.802709669684072, "grad_norm": 0.8548974990844727, "learning_rate": 6.5952514189783365e-06, "loss": 1.6009, "step": 11688 }, { "epoch": 2.803189257238775, "grad_norm": 0.7126105427742004, "learning_rate": 6.579262930689904e-06, "loss": 1.6157, "step": 11690 }, { "epoch": 2.8036688447934774, "grad_norm": 0.790573239326477, "learning_rate": 6.5632744424014705e-06, "loss": 1.677, "step": 11692 }, { "epoch": 2.8041484323481805, "grad_norm": 0.9060056805610657, "learning_rate": 6.547285954113039e-06, "loss": 1.6138, "step": 11694 }, { "epoch": 2.8046280199028835, "grad_norm": 0.7002848386764526, "learning_rate": 6.531297465824607e-06, "loss": 1.633, "step": 11696 }, { "epoch": 2.8051076074575865, "grad_norm": 0.7526127099990845, "learning_rate": 6.515308977536174e-06, "loss": 1.6398, "step": 11698 }, { "epoch": 2.8055871950122895, "grad_norm": 0.7055938243865967, "learning_rate": 6.499320489247742e-06, "loss": 1.6268, "step": 11700 }, { "epoch": 2.8060667825669925, "grad_norm": 0.7494547963142395, "learning_rate": 6.483332000959309e-06, "loss": 1.5438, "step": 11702 }, { "epoch": 2.8065463701216955, "grad_norm": 0.7225499153137207, "learning_rate": 6.467343512670877e-06, "loss": 1.5792, "step": 11704 }, { "epoch": 2.807025957676398, "grad_norm": 0.6796382069587708, "learning_rate": 6.451355024382446e-06, "loss": 1.6296, "step": 11706 }, { "epoch": 2.807505545231101, "grad_norm": 0.7135850191116333, "learning_rate": 6.435366536094013e-06, "loss": 1.6008, "step": 11708 }, { "epoch": 2.807985132785804, "grad_norm": 0.6856582164764404, "learning_rate": 6.4193780478055796e-06, "loss": 1.6002, "step": 11710 }, { "epoch": 2.808464720340507, "grad_norm": 0.697533905506134, "learning_rate": 6.403389559517148e-06, "loss": 1.6507, "step": 11712 }, { "epoch": 2.80894430789521, "grad_norm": 0.7339172959327698, "learning_rate": 6.387401071228716e-06, "loss": 1.6668, "step": 11714 }, { "epoch": 2.809423895449913, "grad_norm": 0.762242317199707, "learning_rate": 6.3714125829402834e-06, "loss": 1.6157, "step": 11716 }, { "epoch": 2.809903483004616, "grad_norm": 0.699923038482666, "learning_rate": 6.35542409465185e-06, "loss": 1.6487, "step": 11718 }, { "epoch": 2.810383070559319, "grad_norm": 0.6791304349899292, "learning_rate": 6.339435606363418e-06, "loss": 1.638, "step": 11720 }, { "epoch": 2.810862658114022, "grad_norm": 0.8702058792114258, "learning_rate": 6.3234471180749865e-06, "loss": 1.5946, "step": 11722 }, { "epoch": 2.811342245668725, "grad_norm": 0.6736220717430115, "learning_rate": 6.307458629786554e-06, "loss": 1.5958, "step": 11724 }, { "epoch": 2.8118218332234277, "grad_norm": 0.7241149544715881, "learning_rate": 6.291470141498122e-06, "loss": 1.5777, "step": 11726 }, { "epoch": 2.8123014207781307, "grad_norm": 0.7561489343643188, "learning_rate": 6.275481653209689e-06, "loss": 1.6122, "step": 11728 }, { "epoch": 2.8127810083328337, "grad_norm": 0.691887378692627, "learning_rate": 6.259493164921257e-06, "loss": 1.6159, "step": 11730 }, { "epoch": 2.8132605958875367, "grad_norm": 0.8146578073501587, "learning_rate": 6.243504676632825e-06, "loss": 1.5788, "step": 11732 }, { "epoch": 2.8137401834422398, "grad_norm": 0.7138683199882507, "learning_rate": 6.227516188344392e-06, "loss": 1.6234, "step": 11734 }, { "epoch": 2.8142197709969428, "grad_norm": 0.7562537789344788, "learning_rate": 6.21152770005596e-06, "loss": 1.6313, "step": 11736 }, { "epoch": 2.8146993585516453, "grad_norm": 0.7153006792068481, "learning_rate": 6.195539211767528e-06, "loss": 1.6245, "step": 11738 }, { "epoch": 2.8151789461063483, "grad_norm": 0.703458845615387, "learning_rate": 6.1795507234790956e-06, "loss": 1.6324, "step": 11740 }, { "epoch": 2.8156585336610513, "grad_norm": 0.7394338250160217, "learning_rate": 6.163562235190663e-06, "loss": 1.5818, "step": 11742 }, { "epoch": 2.8161381212157544, "grad_norm": 0.7320677042007446, "learning_rate": 6.14757374690223e-06, "loss": 1.6539, "step": 11744 }, { "epoch": 2.8166177087704574, "grad_norm": 0.6886640191078186, "learning_rate": 6.131585258613799e-06, "loss": 1.6267, "step": 11746 }, { "epoch": 2.8170972963251604, "grad_norm": 0.7740932703018188, "learning_rate": 6.115596770325366e-06, "loss": 1.6113, "step": 11748 }, { "epoch": 2.8175768838798634, "grad_norm": 0.6900630593299866, "learning_rate": 6.099608282036933e-06, "loss": 1.6487, "step": 11750 }, { "epoch": 2.8180564714345664, "grad_norm": 0.6537785530090332, "learning_rate": 6.083619793748501e-06, "loss": 1.6305, "step": 11752 }, { "epoch": 2.8185360589892694, "grad_norm": 0.6659940481185913, "learning_rate": 6.067631305460069e-06, "loss": 1.6114, "step": 11754 }, { "epoch": 2.8190156465439724, "grad_norm": 0.7645663619041443, "learning_rate": 6.051642817171637e-06, "loss": 1.636, "step": 11756 }, { "epoch": 2.819495234098675, "grad_norm": 0.7266834378242493, "learning_rate": 6.035654328883205e-06, "loss": 1.5997, "step": 11758 }, { "epoch": 2.819974821653378, "grad_norm": 0.7888368964195251, "learning_rate": 6.019665840594772e-06, "loss": 1.6042, "step": 11760 }, { "epoch": 2.820454409208081, "grad_norm": 0.6943578720092773, "learning_rate": 6.0036773523063394e-06, "loss": 1.6384, "step": 11762 }, { "epoch": 2.820933996762784, "grad_norm": 0.6717579960823059, "learning_rate": 5.987688864017908e-06, "loss": 1.621, "step": 11764 }, { "epoch": 2.821413584317487, "grad_norm": 0.7226349115371704, "learning_rate": 5.971700375729475e-06, "loss": 1.6043, "step": 11766 }, { "epoch": 2.82189317187219, "grad_norm": 0.7370363473892212, "learning_rate": 5.9557118874410425e-06, "loss": 1.6358, "step": 11768 }, { "epoch": 2.822372759426893, "grad_norm": 0.8530606627464294, "learning_rate": 5.93972339915261e-06, "loss": 1.6266, "step": 11770 }, { "epoch": 2.8228523469815956, "grad_norm": 0.794382631778717, "learning_rate": 5.923734910864178e-06, "loss": 1.6084, "step": 11772 }, { "epoch": 2.8233319345362986, "grad_norm": 0.7036386728286743, "learning_rate": 5.907746422575746e-06, "loss": 1.6268, "step": 11774 }, { "epoch": 2.8238115220910016, "grad_norm": 0.7820142507553101, "learning_rate": 5.891757934287314e-06, "loss": 1.6768, "step": 11776 }, { "epoch": 2.8242911096457046, "grad_norm": 0.6689326167106628, "learning_rate": 5.875769445998881e-06, "loss": 1.5968, "step": 11778 }, { "epoch": 2.8247706972004076, "grad_norm": 0.693340003490448, "learning_rate": 5.8597809577104485e-06, "loss": 1.6296, "step": 11780 }, { "epoch": 2.8252502847551106, "grad_norm": 0.6986399292945862, "learning_rate": 5.843792469422017e-06, "loss": 1.5913, "step": 11782 }, { "epoch": 2.8257298723098137, "grad_norm": 0.6984077095985413, "learning_rate": 5.827803981133584e-06, "loss": 1.6573, "step": 11784 }, { "epoch": 2.8262094598645167, "grad_norm": 0.8080878853797913, "learning_rate": 5.8118154928451515e-06, "loss": 1.596, "step": 11786 }, { "epoch": 2.8266890474192197, "grad_norm": 0.8053566217422485, "learning_rate": 5.79582700455672e-06, "loss": 1.573, "step": 11788 }, { "epoch": 2.8271686349739227, "grad_norm": 0.7165510654449463, "learning_rate": 5.779838516268287e-06, "loss": 1.6064, "step": 11790 }, { "epoch": 2.8276482225286252, "grad_norm": 0.8403264880180359, "learning_rate": 5.763850027979855e-06, "loss": 1.6499, "step": 11792 }, { "epoch": 2.8281278100833283, "grad_norm": 0.642299234867096, "learning_rate": 5.747861539691422e-06, "loss": 1.598, "step": 11794 }, { "epoch": 2.8286073976380313, "grad_norm": 0.6851344108581543, "learning_rate": 5.73187305140299e-06, "loss": 1.5897, "step": 11796 }, { "epoch": 2.8290869851927343, "grad_norm": 0.7119179368019104, "learning_rate": 5.715884563114558e-06, "loss": 1.6413, "step": 11798 }, { "epoch": 2.8295665727474373, "grad_norm": 0.709835946559906, "learning_rate": 5.699896074826126e-06, "loss": 1.6218, "step": 11800 }, { "epoch": 2.8300461603021403, "grad_norm": 0.6749287247657776, "learning_rate": 5.683907586537693e-06, "loss": 1.6317, "step": 11802 }, { "epoch": 2.830525747856843, "grad_norm": 0.7277793884277344, "learning_rate": 5.667919098249261e-06, "loss": 1.6609, "step": 11804 }, { "epoch": 2.831005335411546, "grad_norm": 0.6793290972709656, "learning_rate": 5.651930609960829e-06, "loss": 1.6459, "step": 11806 }, { "epoch": 2.831484922966249, "grad_norm": 0.6692298650741577, "learning_rate": 5.635942121672396e-06, "loss": 1.6197, "step": 11808 }, { "epoch": 2.831964510520952, "grad_norm": 0.7139792442321777, "learning_rate": 5.619953633383964e-06, "loss": 1.5985, "step": 11810 }, { "epoch": 2.832444098075655, "grad_norm": 0.6847903728485107, "learning_rate": 5.603965145095531e-06, "loss": 1.6203, "step": 11812 }, { "epoch": 2.832923685630358, "grad_norm": 0.7031301856040955, "learning_rate": 5.587976656807099e-06, "loss": 1.5932, "step": 11814 }, { "epoch": 2.833403273185061, "grad_norm": 0.7346205711364746, "learning_rate": 5.571988168518667e-06, "loss": 1.6313, "step": 11816 }, { "epoch": 2.833882860739764, "grad_norm": 0.7582030892372131, "learning_rate": 5.555999680230235e-06, "loss": 1.5948, "step": 11818 }, { "epoch": 2.834362448294467, "grad_norm": 0.760908842086792, "learning_rate": 5.5400111919418015e-06, "loss": 1.6382, "step": 11820 }, { "epoch": 2.83484203584917, "grad_norm": 0.7251384854316711, "learning_rate": 5.52402270365337e-06, "loss": 1.6046, "step": 11822 }, { "epoch": 2.8353216234038725, "grad_norm": 0.758451521396637, "learning_rate": 5.508034215364938e-06, "loss": 1.6345, "step": 11824 }, { "epoch": 2.8358012109585755, "grad_norm": 0.772689938545227, "learning_rate": 5.492045727076505e-06, "loss": 1.6722, "step": 11826 }, { "epoch": 2.8362807985132785, "grad_norm": 0.7200405597686768, "learning_rate": 5.476057238788073e-06, "loss": 1.6142, "step": 11828 }, { "epoch": 2.8367603860679815, "grad_norm": 0.8470343351364136, "learning_rate": 5.46006875049964e-06, "loss": 1.6098, "step": 11830 }, { "epoch": 2.8372399736226845, "grad_norm": 0.7371212244033813, "learning_rate": 5.444080262211208e-06, "loss": 1.6369, "step": 11832 }, { "epoch": 2.8377195611773876, "grad_norm": 0.6682790517807007, "learning_rate": 5.428091773922776e-06, "loss": 1.6378, "step": 11834 }, { "epoch": 2.8381991487320906, "grad_norm": 0.7289266586303711, "learning_rate": 5.412103285634343e-06, "loss": 1.5711, "step": 11836 }, { "epoch": 2.838678736286793, "grad_norm": 0.6797116994857788, "learning_rate": 5.396114797345911e-06, "loss": 1.5899, "step": 11838 }, { "epoch": 2.839158323841496, "grad_norm": 0.6530433893203735, "learning_rate": 5.380126309057479e-06, "loss": 1.5419, "step": 11840 }, { "epoch": 2.839637911396199, "grad_norm": 0.7232608795166016, "learning_rate": 5.364137820769047e-06, "loss": 1.5921, "step": 11842 }, { "epoch": 2.840117498950902, "grad_norm": 0.6176049113273621, "learning_rate": 5.3481493324806144e-06, "loss": 1.624, "step": 11844 }, { "epoch": 2.840597086505605, "grad_norm": 0.6893401145935059, "learning_rate": 5.332160844192182e-06, "loss": 1.6216, "step": 11846 }, { "epoch": 2.841076674060308, "grad_norm": 0.7289633750915527, "learning_rate": 5.316172355903749e-06, "loss": 1.6077, "step": 11848 }, { "epoch": 2.841556261615011, "grad_norm": 0.7608917355537415, "learning_rate": 5.3001838676153175e-06, "loss": 1.6513, "step": 11850 }, { "epoch": 2.842035849169714, "grad_norm": 0.652492105960846, "learning_rate": 5.284195379326885e-06, "loss": 1.5937, "step": 11852 }, { "epoch": 2.842515436724417, "grad_norm": 0.7906776666641235, "learning_rate": 5.268206891038452e-06, "loss": 1.6658, "step": 11854 }, { "epoch": 2.84299502427912, "grad_norm": 0.6336293816566467, "learning_rate": 5.2522184027500205e-06, "loss": 1.6489, "step": 11856 }, { "epoch": 2.8434746118338228, "grad_norm": 0.6862633228302002, "learning_rate": 5.236229914461588e-06, "loss": 1.6539, "step": 11858 }, { "epoch": 2.843954199388526, "grad_norm": 0.7390397787094116, "learning_rate": 5.220241426173156e-06, "loss": 1.6212, "step": 11860 }, { "epoch": 2.844433786943229, "grad_norm": 0.691399335861206, "learning_rate": 5.204252937884723e-06, "loss": 1.6383, "step": 11862 }, { "epoch": 2.844913374497932, "grad_norm": 0.6867002844810486, "learning_rate": 5.188264449596291e-06, "loss": 1.6199, "step": 11864 }, { "epoch": 2.845392962052635, "grad_norm": 0.6635928750038147, "learning_rate": 5.172275961307858e-06, "loss": 1.6048, "step": 11866 }, { "epoch": 2.845872549607338, "grad_norm": 0.6883673667907715, "learning_rate": 5.1562874730194266e-06, "loss": 1.5763, "step": 11868 }, { "epoch": 2.8463521371620404, "grad_norm": 0.8671238422393799, "learning_rate": 5.140298984730994e-06, "loss": 1.6455, "step": 11870 }, { "epoch": 2.8468317247167434, "grad_norm": 0.7194375395774841, "learning_rate": 5.124310496442561e-06, "loss": 1.6476, "step": 11872 }, { "epoch": 2.8473113122714464, "grad_norm": 0.6678900718688965, "learning_rate": 5.10832200815413e-06, "loss": 1.6044, "step": 11874 }, { "epoch": 2.8477908998261494, "grad_norm": 0.7583816051483154, "learning_rate": 5.092333519865697e-06, "loss": 1.6013, "step": 11876 }, { "epoch": 2.8482704873808524, "grad_norm": 0.7518280148506165, "learning_rate": 5.076345031577265e-06, "loss": 1.602, "step": 11878 }, { "epoch": 2.8487500749355554, "grad_norm": 0.734056293964386, "learning_rate": 5.060356543288832e-06, "loss": 1.6302, "step": 11880 }, { "epoch": 2.8492296624902584, "grad_norm": 0.7374541759490967, "learning_rate": 5.0443680550004e-06, "loss": 1.6139, "step": 11882 }, { "epoch": 2.8497092500449615, "grad_norm": 0.6819682121276855, "learning_rate": 5.028379566711967e-06, "loss": 1.635, "step": 11884 }, { "epoch": 2.8501888375996645, "grad_norm": 0.7248060703277588, "learning_rate": 5.012391078423536e-06, "loss": 1.555, "step": 11886 }, { "epoch": 2.8506684251543675, "grad_norm": 0.6628329157829285, "learning_rate": 4.996402590135103e-06, "loss": 1.6154, "step": 11888 }, { "epoch": 2.85114801270907, "grad_norm": 0.6480242609977722, "learning_rate": 4.9804141018466704e-06, "loss": 1.6381, "step": 11890 }, { "epoch": 2.851627600263773, "grad_norm": 0.7211325168609619, "learning_rate": 4.964425613558239e-06, "loss": 1.6523, "step": 11892 }, { "epoch": 2.852107187818476, "grad_norm": 0.7033253312110901, "learning_rate": 4.948437125269806e-06, "loss": 1.6751, "step": 11894 }, { "epoch": 2.852586775373179, "grad_norm": 0.6845990419387817, "learning_rate": 4.9324486369813735e-06, "loss": 1.6094, "step": 11896 }, { "epoch": 2.853066362927882, "grad_norm": 0.7319762110710144, "learning_rate": 4.916460148692941e-06, "loss": 1.6325, "step": 11898 }, { "epoch": 2.853545950482585, "grad_norm": 0.6335577368736267, "learning_rate": 4.900471660404509e-06, "loss": 1.6355, "step": 11900 }, { "epoch": 2.854025538037288, "grad_norm": 0.6753028631210327, "learning_rate": 4.8844831721160765e-06, "loss": 1.654, "step": 11902 }, { "epoch": 2.8545051255919907, "grad_norm": 0.7734463214874268, "learning_rate": 4.868494683827645e-06, "loss": 1.6059, "step": 11904 }, { "epoch": 2.8549847131466937, "grad_norm": 0.7915253043174744, "learning_rate": 4.852506195539212e-06, "loss": 1.6344, "step": 11906 }, { "epoch": 2.8554643007013967, "grad_norm": 0.6791313290596008, "learning_rate": 4.8365177072507795e-06, "loss": 1.5933, "step": 11908 }, { "epoch": 2.8559438882560997, "grad_norm": 0.6939966082572937, "learning_rate": 4.820529218962348e-06, "loss": 1.611, "step": 11910 }, { "epoch": 2.8564234758108027, "grad_norm": 0.7389323115348816, "learning_rate": 4.804540730673915e-06, "loss": 1.634, "step": 11912 }, { "epoch": 2.8569030633655057, "grad_norm": 0.7957252264022827, "learning_rate": 4.7885522423854825e-06, "loss": 1.6339, "step": 11914 }, { "epoch": 2.8573826509202087, "grad_norm": 0.948513388633728, "learning_rate": 4.77256375409705e-06, "loss": 1.6237, "step": 11916 }, { "epoch": 2.8578622384749117, "grad_norm": 0.716463565826416, "learning_rate": 4.756575265808618e-06, "loss": 1.6445, "step": 11918 }, { "epoch": 2.8583418260296147, "grad_norm": 0.7002072930335999, "learning_rate": 4.7405867775201856e-06, "loss": 1.6335, "step": 11920 }, { "epoch": 2.8588214135843177, "grad_norm": 0.712690532207489, "learning_rate": 4.724598289231753e-06, "loss": 1.6338, "step": 11922 }, { "epoch": 2.8593010011390203, "grad_norm": 0.70566725730896, "learning_rate": 4.708609800943321e-06, "loss": 1.6519, "step": 11924 }, { "epoch": 2.8597805886937233, "grad_norm": 0.7480860352516174, "learning_rate": 4.692621312654889e-06, "loss": 1.5651, "step": 11926 }, { "epoch": 2.8602601762484263, "grad_norm": 0.7802610397338867, "learning_rate": 4.676632824366457e-06, "loss": 1.6105, "step": 11928 }, { "epoch": 2.8607397638031293, "grad_norm": 0.6606208682060242, "learning_rate": 4.660644336078024e-06, "loss": 1.6737, "step": 11930 }, { "epoch": 2.8612193513578323, "grad_norm": 0.6260578632354736, "learning_rate": 4.644655847789592e-06, "loss": 1.6189, "step": 11932 }, { "epoch": 2.8616989389125354, "grad_norm": 0.6711176633834839, "learning_rate": 4.628667359501159e-06, "loss": 1.6152, "step": 11934 }, { "epoch": 2.862178526467238, "grad_norm": 0.7056280374526978, "learning_rate": 4.612678871212727e-06, "loss": 1.6274, "step": 11936 }, { "epoch": 2.862658114021941, "grad_norm": 0.6359205842018127, "learning_rate": 4.596690382924295e-06, "loss": 1.6115, "step": 11938 }, { "epoch": 2.863137701576644, "grad_norm": 0.6786330342292786, "learning_rate": 4.580701894635862e-06, "loss": 1.6095, "step": 11940 }, { "epoch": 2.863617289131347, "grad_norm": 0.7003027200698853, "learning_rate": 4.56471340634743e-06, "loss": 1.5897, "step": 11942 }, { "epoch": 2.86409687668605, "grad_norm": 0.6950631141662598, "learning_rate": 4.548724918058998e-06, "loss": 1.6346, "step": 11944 }, { "epoch": 2.864576464240753, "grad_norm": 0.6990659236907959, "learning_rate": 4.532736429770566e-06, "loss": 1.5843, "step": 11946 }, { "epoch": 2.865056051795456, "grad_norm": 0.7275398969650269, "learning_rate": 4.5167479414821325e-06, "loss": 1.5975, "step": 11948 }, { "epoch": 2.865535639350159, "grad_norm": 0.6868926286697388, "learning_rate": 4.500759453193701e-06, "loss": 1.6215, "step": 11950 }, { "epoch": 2.866015226904862, "grad_norm": 0.6572771668434143, "learning_rate": 4.484770964905268e-06, "loss": 1.617, "step": 11952 }, { "epoch": 2.866494814459565, "grad_norm": 0.6959673166275024, "learning_rate": 4.468782476616836e-06, "loss": 1.6536, "step": 11954 }, { "epoch": 2.8669744020142676, "grad_norm": 0.7670828700065613, "learning_rate": 4.452793988328404e-06, "loss": 1.6225, "step": 11956 }, { "epoch": 2.8674539895689706, "grad_norm": 0.7053151726722717, "learning_rate": 4.436805500039971e-06, "loss": 1.5915, "step": 11958 }, { "epoch": 2.8679335771236736, "grad_norm": 0.7300362586975098, "learning_rate": 4.420817011751539e-06, "loss": 1.69, "step": 11960 }, { "epoch": 2.8684131646783766, "grad_norm": 0.768601655960083, "learning_rate": 4.404828523463107e-06, "loss": 1.6084, "step": 11962 }, { "epoch": 2.8688927522330796, "grad_norm": 0.6612595319747925, "learning_rate": 4.388840035174675e-06, "loss": 1.6261, "step": 11964 }, { "epoch": 2.8693723397877826, "grad_norm": 0.6750278472900391, "learning_rate": 4.3728515468862416e-06, "loss": 1.6005, "step": 11966 }, { "epoch": 2.8698519273424856, "grad_norm": 0.7870577573776245, "learning_rate": 4.35686305859781e-06, "loss": 1.6313, "step": 11968 }, { "epoch": 2.870331514897188, "grad_norm": 0.7777563333511353, "learning_rate": 4.340874570309377e-06, "loss": 1.6135, "step": 11970 }, { "epoch": 2.870811102451891, "grad_norm": 0.7834455966949463, "learning_rate": 4.3248860820209454e-06, "loss": 1.5926, "step": 11972 }, { "epoch": 2.871290690006594, "grad_norm": 0.7197349071502686, "learning_rate": 4.308897593732513e-06, "loss": 1.6177, "step": 11974 }, { "epoch": 2.871770277561297, "grad_norm": 0.7128521203994751, "learning_rate": 4.29290910544408e-06, "loss": 1.6167, "step": 11976 }, { "epoch": 2.8722498651160002, "grad_norm": 0.6983156800270081, "learning_rate": 4.2769206171556485e-06, "loss": 1.6007, "step": 11978 }, { "epoch": 2.8727294526707032, "grad_norm": 0.6714699864387512, "learning_rate": 4.260932128867216e-06, "loss": 1.6034, "step": 11980 }, { "epoch": 2.8732090402254062, "grad_norm": 0.6749136447906494, "learning_rate": 4.244943640578783e-06, "loss": 1.6302, "step": 11982 }, { "epoch": 2.8736886277801093, "grad_norm": 0.7194207310676575, "learning_rate": 4.228955152290351e-06, "loss": 1.6477, "step": 11984 }, { "epoch": 2.8741682153348123, "grad_norm": 0.7331058382987976, "learning_rate": 4.212966664001919e-06, "loss": 1.6471, "step": 11986 }, { "epoch": 2.8746478028895153, "grad_norm": 0.8135141730308533, "learning_rate": 4.196978175713487e-06, "loss": 1.626, "step": 11988 }, { "epoch": 2.875127390444218, "grad_norm": 0.8842312097549438, "learning_rate": 4.1809896874250545e-06, "loss": 1.6206, "step": 11990 }, { "epoch": 2.875606977998921, "grad_norm": 0.7144836187362671, "learning_rate": 4.165001199136622e-06, "loss": 1.633, "step": 11992 }, { "epoch": 2.876086565553624, "grad_norm": 0.7710250020027161, "learning_rate": 4.149012710848189e-06, "loss": 1.6586, "step": 11994 }, { "epoch": 2.876566153108327, "grad_norm": 0.8246757984161377, "learning_rate": 4.1330242225597575e-06, "loss": 1.6085, "step": 11996 }, { "epoch": 2.87704574066303, "grad_norm": 0.6644957065582275, "learning_rate": 4.117035734271325e-06, "loss": 1.614, "step": 11998 }, { "epoch": 2.877525328217733, "grad_norm": 0.7266626954078674, "learning_rate": 4.101047245982892e-06, "loss": 1.6192, "step": 12000 }, { "epoch": 2.877525328217733, "eval_loss": 1.7038612365722656, "eval_runtime": 331.2326, "eval_samples_per_second": 402.886, "eval_steps_per_second": 12.592, "step": 12000 }, { "epoch": 2.8780049157724354, "grad_norm": 0.7459209561347961, "learning_rate": 4.08505875769446e-06, "loss": 1.6393, "step": 12002 }, { "epoch": 2.8784845033271385, "grad_norm": Infinity, "learning_rate": 4.077064513550244e-06, "loss": 1.6428, "step": 12004 }, { "epoch": 2.8789640908818415, "grad_norm": 0.6494587659835815, "learning_rate": 4.061076025261812e-06, "loss": 1.616, "step": 12006 }, { "epoch": 2.8794436784365445, "grad_norm": 0.7861286997795105, "learning_rate": 4.045087536973379e-06, "loss": 1.603, "step": 12008 }, { "epoch": 2.8799232659912475, "grad_norm": 0.7365432381629944, "learning_rate": 4.029099048684947e-06, "loss": 1.6075, "step": 12010 }, { "epoch": 2.8804028535459505, "grad_norm": 0.820220410823822, "learning_rate": 4.013110560396515e-06, "loss": 1.6596, "step": 12012 }, { "epoch": 2.8808824411006535, "grad_norm": 0.682213306427002, "learning_rate": 3.997122072108082e-06, "loss": 1.6198, "step": 12014 }, { "epoch": 2.8813620286553565, "grad_norm": 0.6914762854576111, "learning_rate": 3.98113358381965e-06, "loss": 1.6196, "step": 12016 }, { "epoch": 2.8818416162100595, "grad_norm": 0.7902858257293701, "learning_rate": 3.965145095531218e-06, "loss": 1.6017, "step": 12018 }, { "epoch": 2.8823212037647625, "grad_norm": 0.6804274320602417, "learning_rate": 3.949156607242786e-06, "loss": 1.5876, "step": 12020 }, { "epoch": 2.882800791319465, "grad_norm": 0.8267788290977478, "learning_rate": 3.9331681189543525e-06, "loss": 1.6632, "step": 12022 }, { "epoch": 2.883280378874168, "grad_norm": 0.7423467040061951, "learning_rate": 3.917179630665921e-06, "loss": 1.6381, "step": 12024 }, { "epoch": 2.883759966428871, "grad_norm": 0.7011227607727051, "learning_rate": 3.901191142377488e-06, "loss": 1.6468, "step": 12026 }, { "epoch": 2.884239553983574, "grad_norm": 0.8503292798995972, "learning_rate": 3.885202654089056e-06, "loss": 1.6205, "step": 12028 }, { "epoch": 2.884719141538277, "grad_norm": 0.6693927049636841, "learning_rate": 3.869214165800624e-06, "loss": 1.635, "step": 12030 }, { "epoch": 2.88519872909298, "grad_norm": 0.6989386677742004, "learning_rate": 3.853225677512191e-06, "loss": 1.6453, "step": 12032 }, { "epoch": 2.885678316647683, "grad_norm": 0.765982449054718, "learning_rate": 3.837237189223759e-06, "loss": 1.5914, "step": 12034 }, { "epoch": 2.8861579042023857, "grad_norm": 0.7430548071861267, "learning_rate": 3.821248700935327e-06, "loss": 1.595, "step": 12036 }, { "epoch": 2.8866374917570887, "grad_norm": 0.7052782773971558, "learning_rate": 3.8052602126468946e-06, "loss": 1.5836, "step": 12038 }, { "epoch": 2.8871170793117917, "grad_norm": 0.8655644655227661, "learning_rate": 3.789271724358462e-06, "loss": 1.6585, "step": 12040 }, { "epoch": 2.8875966668664947, "grad_norm": 0.7610220909118652, "learning_rate": 3.77328323607003e-06, "loss": 1.6117, "step": 12042 }, { "epoch": 2.8880762544211978, "grad_norm": 0.703325629234314, "learning_rate": 3.7572947477815972e-06, "loss": 1.6754, "step": 12044 }, { "epoch": 2.8885558419759008, "grad_norm": 0.6565271615982056, "learning_rate": 3.741306259493165e-06, "loss": 1.5784, "step": 12046 }, { "epoch": 2.8890354295306038, "grad_norm": 0.6942965388298035, "learning_rate": 3.7253177712047333e-06, "loss": 1.6435, "step": 12048 }, { "epoch": 2.889515017085307, "grad_norm": 0.6747040152549744, "learning_rate": 3.7093292829163003e-06, "loss": 1.5911, "step": 12050 }, { "epoch": 2.88999460464001, "grad_norm": 0.6562249064445496, "learning_rate": 3.6933407946278685e-06, "loss": 1.5471, "step": 12052 }, { "epoch": 2.890474192194713, "grad_norm": 0.6873058676719666, "learning_rate": 3.6773523063394355e-06, "loss": 1.6158, "step": 12054 }, { "epoch": 2.8909537797494154, "grad_norm": 0.8246073126792908, "learning_rate": 3.6613638180510037e-06, "loss": 1.6441, "step": 12056 }, { "epoch": 2.8914333673041184, "grad_norm": 0.6529651880264282, "learning_rate": 3.645375329762571e-06, "loss": 1.6188, "step": 12058 }, { "epoch": 2.8919129548588214, "grad_norm": 0.6908509135246277, "learning_rate": 3.629386841474139e-06, "loss": 1.6519, "step": 12060 }, { "epoch": 2.8923925424135244, "grad_norm": 0.7388949990272522, "learning_rate": 3.6133983531857063e-06, "loss": 1.6236, "step": 12062 }, { "epoch": 2.8928721299682274, "grad_norm": 0.7150157690048218, "learning_rate": 3.597409864897274e-06, "loss": 1.6338, "step": 12064 }, { "epoch": 2.8933517175229304, "grad_norm": 0.6989811062812805, "learning_rate": 3.581421376608842e-06, "loss": 1.5941, "step": 12066 }, { "epoch": 2.893831305077633, "grad_norm": 0.7299280762672424, "learning_rate": 3.5654328883204093e-06, "loss": 1.6082, "step": 12068 }, { "epoch": 2.894310892632336, "grad_norm": 0.6845502257347107, "learning_rate": 3.549444400031977e-06, "loss": 1.6358, "step": 12070 }, { "epoch": 2.894790480187039, "grad_norm": 0.7551816701889038, "learning_rate": 3.5334559117435446e-06, "loss": 1.6013, "step": 12072 }, { "epoch": 2.895270067741742, "grad_norm": 0.8458673357963562, "learning_rate": 3.517467423455113e-06, "loss": 1.6146, "step": 12074 }, { "epoch": 2.895749655296445, "grad_norm": 0.7706674933433533, "learning_rate": 3.5014789351666798e-06, "loss": 1.6684, "step": 12076 }, { "epoch": 2.896229242851148, "grad_norm": 0.6862460374832153, "learning_rate": 3.485490446878248e-06, "loss": 1.5882, "step": 12078 }, { "epoch": 2.896708830405851, "grad_norm": 0.7150545120239258, "learning_rate": 3.469501958589815e-06, "loss": 1.6185, "step": 12080 }, { "epoch": 2.897188417960554, "grad_norm": 0.7701267600059509, "learning_rate": 3.4535134703013832e-06, "loss": 1.6689, "step": 12082 }, { "epoch": 2.897668005515257, "grad_norm": 0.6972388625144958, "learning_rate": 3.437524982012951e-06, "loss": 1.5682, "step": 12084 }, { "epoch": 2.89814759306996, "grad_norm": 0.7370589375495911, "learning_rate": 3.4215364937245184e-06, "loss": 1.6712, "step": 12086 }, { "epoch": 2.8986271806246626, "grad_norm": 0.716402530670166, "learning_rate": 3.4055480054360862e-06, "loss": 1.6174, "step": 12088 }, { "epoch": 2.8991067681793656, "grad_norm": 0.7495001554489136, "learning_rate": 3.3895595171476536e-06, "loss": 1.6125, "step": 12090 }, { "epoch": 2.8995863557340686, "grad_norm": 0.7016059756278992, "learning_rate": 3.3735710288592215e-06, "loss": 1.6203, "step": 12092 }, { "epoch": 2.9000659432887717, "grad_norm": 0.7445346713066101, "learning_rate": 3.357582540570789e-06, "loss": 1.6621, "step": 12094 }, { "epoch": 2.9005455308434747, "grad_norm": 0.705980658531189, "learning_rate": 3.341594052282357e-06, "loss": 1.5774, "step": 12096 }, { "epoch": 2.9010251183981777, "grad_norm": 0.7115206718444824, "learning_rate": 3.325605563993925e-06, "loss": 1.6095, "step": 12098 }, { "epoch": 2.9015047059528807, "grad_norm": 0.6831516623497009, "learning_rate": 3.3096170757054923e-06, "loss": 1.6429, "step": 12100 }, { "epoch": 2.9019842935075832, "grad_norm": 0.7082229852676392, "learning_rate": 3.29362858741706e-06, "loss": 1.5787, "step": 12102 }, { "epoch": 2.9024638810622863, "grad_norm": 0.6578922867774963, "learning_rate": 3.2776400991286275e-06, "loss": 1.609, "step": 12104 }, { "epoch": 2.9029434686169893, "grad_norm": 0.7752401232719421, "learning_rate": 3.2616516108401953e-06, "loss": 1.6781, "step": 12106 }, { "epoch": 2.9034230561716923, "grad_norm": 0.7450330853462219, "learning_rate": 3.2456631225517627e-06, "loss": 1.6094, "step": 12108 }, { "epoch": 2.9039026437263953, "grad_norm": 0.7971760034561157, "learning_rate": 3.2296746342633305e-06, "loss": 1.6288, "step": 12110 }, { "epoch": 2.9043822312810983, "grad_norm": 0.7568903565406799, "learning_rate": 3.213686145974898e-06, "loss": 1.6709, "step": 12112 }, { "epoch": 2.9048618188358013, "grad_norm": 0.7296586632728577, "learning_rate": 3.1976976576864658e-06, "loss": 1.5688, "step": 12114 }, { "epoch": 2.9053414063905043, "grad_norm": 0.6925960779190063, "learning_rate": 3.181709169398034e-06, "loss": 1.635, "step": 12116 }, { "epoch": 2.9058209939452073, "grad_norm": 0.766981840133667, "learning_rate": 3.165720681109601e-06, "loss": 1.6396, "step": 12118 }, { "epoch": 2.9063005814999103, "grad_norm": 0.7426764369010925, "learning_rate": 3.149732192821169e-06, "loss": 1.5898, "step": 12120 }, { "epoch": 2.906780169054613, "grad_norm": 0.6865041851997375, "learning_rate": 3.1337437045327366e-06, "loss": 1.5938, "step": 12122 }, { "epoch": 2.907259756609316, "grad_norm": 0.6866689920425415, "learning_rate": 3.1177552162443044e-06, "loss": 1.6689, "step": 12124 }, { "epoch": 2.907739344164019, "grad_norm": 0.6939504742622375, "learning_rate": 3.101766727955872e-06, "loss": 1.6428, "step": 12126 }, { "epoch": 2.908218931718722, "grad_norm": 0.654066264629364, "learning_rate": 3.0857782396674396e-06, "loss": 1.6207, "step": 12128 }, { "epoch": 2.908698519273425, "grad_norm": 0.6906245350837708, "learning_rate": 3.0697897513790074e-06, "loss": 1.594, "step": 12130 }, { "epoch": 2.909178106828128, "grad_norm": 0.8436627984046936, "learning_rate": 3.053801263090575e-06, "loss": 1.6792, "step": 12132 }, { "epoch": 2.9096576943828305, "grad_norm": 0.8132093548774719, "learning_rate": 3.0378127748021427e-06, "loss": 1.5775, "step": 12134 }, { "epoch": 2.9101372819375335, "grad_norm": 0.7389913201332092, "learning_rate": 3.02182428651371e-06, "loss": 1.612, "step": 12136 }, { "epoch": 2.9106168694922365, "grad_norm": 0.6557602882385254, "learning_rate": 3.005835798225278e-06, "loss": 1.6461, "step": 12138 }, { "epoch": 2.9110964570469395, "grad_norm": 0.6748431324958801, "learning_rate": 2.9898473099368457e-06, "loss": 1.6048, "step": 12140 }, { "epoch": 2.9115760446016425, "grad_norm": 0.7229704856872559, "learning_rate": 2.9738588216484135e-06, "loss": 1.6075, "step": 12142 }, { "epoch": 2.9120556321563456, "grad_norm": 0.7257249355316162, "learning_rate": 2.957870333359981e-06, "loss": 1.6146, "step": 12144 }, { "epoch": 2.9125352197110486, "grad_norm": 0.6923632621765137, "learning_rate": 2.9418818450715487e-06, "loss": 1.5925, "step": 12146 }, { "epoch": 2.9130148072657516, "grad_norm": 0.6860981583595276, "learning_rate": 2.925893356783116e-06, "loss": 1.644, "step": 12148 }, { "epoch": 2.9134943948204546, "grad_norm": 0.6932970881462097, "learning_rate": 2.909904868494684e-06, "loss": 1.6061, "step": 12150 }, { "epoch": 2.9139739823751576, "grad_norm": 0.7855769395828247, "learning_rate": 2.8939163802062517e-06, "loss": 1.6259, "step": 12152 }, { "epoch": 2.91445356992986, "grad_norm": 0.7510201930999756, "learning_rate": 2.877927891917819e-06, "loss": 1.5982, "step": 12154 }, { "epoch": 2.914933157484563, "grad_norm": 0.7857456803321838, "learning_rate": 2.861939403629387e-06, "loss": 1.6468, "step": 12156 }, { "epoch": 2.915412745039266, "grad_norm": 0.7166239023208618, "learning_rate": 2.8459509153409548e-06, "loss": 1.6784, "step": 12158 }, { "epoch": 2.915892332593969, "grad_norm": 0.7134708166122437, "learning_rate": 2.8299624270525226e-06, "loss": 1.6237, "step": 12160 }, { "epoch": 2.916371920148672, "grad_norm": 0.7864059209823608, "learning_rate": 2.81397393876409e-06, "loss": 1.6314, "step": 12162 }, { "epoch": 2.916851507703375, "grad_norm": 0.6536415815353394, "learning_rate": 2.797985450475658e-06, "loss": 1.6006, "step": 12164 }, { "epoch": 2.917331095258078, "grad_norm": 0.7249706983566284, "learning_rate": 2.781996962187225e-06, "loss": 1.5993, "step": 12166 }, { "epoch": 2.9178106828127808, "grad_norm": 0.6609278321266174, "learning_rate": 2.766008473898793e-06, "loss": 1.6245, "step": 12168 }, { "epoch": 2.918290270367484, "grad_norm": 0.6808717250823975, "learning_rate": 2.7500199856103604e-06, "loss": 1.6066, "step": 12170 }, { "epoch": 2.918769857922187, "grad_norm": 0.7548129558563232, "learning_rate": 2.7340314973219282e-06, "loss": 1.6624, "step": 12172 }, { "epoch": 2.91924944547689, "grad_norm": 0.7210598587989807, "learning_rate": 2.718043009033496e-06, "loss": 1.5692, "step": 12174 }, { "epoch": 2.919729033031593, "grad_norm": 0.7670364379882812, "learning_rate": 2.702054520745064e-06, "loss": 1.6218, "step": 12176 }, { "epoch": 2.920208620586296, "grad_norm": 0.7470827698707581, "learning_rate": 2.6860660324566312e-06, "loss": 1.6518, "step": 12178 }, { "epoch": 2.920688208140999, "grad_norm": 0.7739981412887573, "learning_rate": 2.670077544168199e-06, "loss": 1.6287, "step": 12180 }, { "epoch": 2.921167795695702, "grad_norm": 0.681985080242157, "learning_rate": 2.6540890558797665e-06, "loss": 1.6244, "step": 12182 }, { "epoch": 2.921647383250405, "grad_norm": 0.694303572177887, "learning_rate": 2.6381005675913343e-06, "loss": 1.6357, "step": 12184 }, { "epoch": 2.922126970805108, "grad_norm": 0.6754750609397888, "learning_rate": 2.622112079302902e-06, "loss": 1.6173, "step": 12186 }, { "epoch": 2.9226065583598104, "grad_norm": 0.6784439086914062, "learning_rate": 2.6061235910144695e-06, "loss": 1.5936, "step": 12188 }, { "epoch": 2.9230861459145134, "grad_norm": 0.7702857255935669, "learning_rate": 2.5901351027260377e-06, "loss": 1.6475, "step": 12190 }, { "epoch": 2.9235657334692164, "grad_norm": 0.7782331109046936, "learning_rate": 2.574146614437605e-06, "loss": 1.6091, "step": 12192 }, { "epoch": 2.9240453210239195, "grad_norm": 0.8424715995788574, "learning_rate": 2.558158126149173e-06, "loss": 1.5911, "step": 12194 }, { "epoch": 2.9245249085786225, "grad_norm": 0.7462125420570374, "learning_rate": 2.5421696378607403e-06, "loss": 1.6332, "step": 12196 }, { "epoch": 2.9250044961333255, "grad_norm": 0.6633976697921753, "learning_rate": 2.526181149572308e-06, "loss": 1.6027, "step": 12198 }, { "epoch": 2.925484083688028, "grad_norm": 0.6866486668586731, "learning_rate": 2.5101926612838755e-06, "loss": 1.6045, "step": 12200 }, { "epoch": 2.925963671242731, "grad_norm": 0.7188611030578613, "learning_rate": 2.4942041729954434e-06, "loss": 1.614, "step": 12202 }, { "epoch": 2.926443258797434, "grad_norm": 0.6940524578094482, "learning_rate": 2.4782156847070108e-06, "loss": 1.5959, "step": 12204 }, { "epoch": 2.926922846352137, "grad_norm": 0.7331332564353943, "learning_rate": 2.462227196418579e-06, "loss": 1.6437, "step": 12206 }, { "epoch": 2.92740243390684, "grad_norm": 0.7029302716255188, "learning_rate": 2.4462387081301464e-06, "loss": 1.5794, "step": 12208 }, { "epoch": 2.927882021461543, "grad_norm": 0.6613962650299072, "learning_rate": 2.430250219841714e-06, "loss": 1.6386, "step": 12210 }, { "epoch": 2.928361609016246, "grad_norm": 0.7766419649124146, "learning_rate": 2.4142617315532816e-06, "loss": 1.6162, "step": 12212 }, { "epoch": 2.928841196570949, "grad_norm": 0.7204955220222473, "learning_rate": 2.3982732432648494e-06, "loss": 1.6747, "step": 12214 }, { "epoch": 2.929320784125652, "grad_norm": 0.6723090410232544, "learning_rate": 2.3822847549764172e-06, "loss": 1.5757, "step": 12216 }, { "epoch": 2.929800371680355, "grad_norm": 0.6887076497077942, "learning_rate": 2.3662962666879846e-06, "loss": 1.5917, "step": 12218 }, { "epoch": 2.9302799592350577, "grad_norm": 0.757494330406189, "learning_rate": 2.3503077783995524e-06, "loss": 1.6517, "step": 12220 }, { "epoch": 2.9307595467897607, "grad_norm": 0.6501168608665466, "learning_rate": 2.33431929011112e-06, "loss": 1.6762, "step": 12222 }, { "epoch": 2.9312391343444637, "grad_norm": 0.7621583342552185, "learning_rate": 2.318330801822688e-06, "loss": 1.6551, "step": 12224 }, { "epoch": 2.9317187218991667, "grad_norm": 0.7070362567901611, "learning_rate": 2.3023423135342555e-06, "loss": 1.6094, "step": 12226 }, { "epoch": 2.9321983094538697, "grad_norm": 0.6862632036209106, "learning_rate": 2.2863538252458233e-06, "loss": 1.6276, "step": 12228 }, { "epoch": 2.9326778970085727, "grad_norm": 0.7369387149810791, "learning_rate": 2.2703653369573907e-06, "loss": 1.6007, "step": 12230 }, { "epoch": 2.9331574845632757, "grad_norm": 0.7410715818405151, "learning_rate": 2.2543768486689585e-06, "loss": 1.5924, "step": 12232 }, { "epoch": 2.9336370721179783, "grad_norm": 0.728442370891571, "learning_rate": 2.238388360380526e-06, "loss": 1.5779, "step": 12234 }, { "epoch": 2.9341166596726813, "grad_norm": 0.6967246532440186, "learning_rate": 2.2223998720920937e-06, "loss": 1.6035, "step": 12236 }, { "epoch": 2.9345962472273843, "grad_norm": 0.7108117341995239, "learning_rate": 2.206411383803661e-06, "loss": 1.6183, "step": 12238 }, { "epoch": 2.9350758347820873, "grad_norm": 0.6980534791946411, "learning_rate": 2.1904228955152294e-06, "loss": 1.6214, "step": 12240 }, { "epoch": 2.9355554223367903, "grad_norm": 0.70053631067276, "learning_rate": 2.1744344072267967e-06, "loss": 1.6396, "step": 12242 }, { "epoch": 2.9360350098914934, "grad_norm": 0.685453474521637, "learning_rate": 2.1584459189383646e-06, "loss": 1.6292, "step": 12244 }, { "epoch": 2.9365145974461964, "grad_norm": 0.7124674916267395, "learning_rate": 2.1424574306499324e-06, "loss": 1.6405, "step": 12246 }, { "epoch": 2.9369941850008994, "grad_norm": 0.7793580889701843, "learning_rate": 2.1264689423614998e-06, "loss": 1.6318, "step": 12248 }, { "epoch": 2.9374737725556024, "grad_norm": 0.8268436193466187, "learning_rate": 2.1104804540730676e-06, "loss": 1.6682, "step": 12250 }, { "epoch": 2.9379533601103054, "grad_norm": 0.7746102809906006, "learning_rate": 2.094491965784635e-06, "loss": 1.6211, "step": 12252 }, { "epoch": 2.938432947665008, "grad_norm": 0.6512143611907959, "learning_rate": 2.078503477496203e-06, "loss": 1.6577, "step": 12254 }, { "epoch": 2.938912535219711, "grad_norm": 0.7636014223098755, "learning_rate": 2.0625149892077706e-06, "loss": 1.5947, "step": 12256 }, { "epoch": 2.939392122774414, "grad_norm": 0.642645001411438, "learning_rate": 2.0465265009193384e-06, "loss": 1.6208, "step": 12258 }, { "epoch": 2.939871710329117, "grad_norm": 0.7914702296257019, "learning_rate": 2.030538012630906e-06, "loss": 1.6282, "step": 12260 }, { "epoch": 2.94035129788382, "grad_norm": 0.6959459781646729, "learning_rate": 2.0145495243424736e-06, "loss": 1.5783, "step": 12262 }, { "epoch": 2.940830885438523, "grad_norm": 0.7103701829910278, "learning_rate": 1.998561036054041e-06, "loss": 1.6191, "step": 12264 }, { "epoch": 2.9413104729932256, "grad_norm": 0.8076565861701965, "learning_rate": 1.982572547765609e-06, "loss": 1.6604, "step": 12266 }, { "epoch": 2.9417900605479286, "grad_norm": 0.7106682062149048, "learning_rate": 1.9665840594771763e-06, "loss": 1.5865, "step": 12268 }, { "epoch": 2.9422696481026316, "grad_norm": 0.7153822183609009, "learning_rate": 1.950595571188744e-06, "loss": 1.6056, "step": 12270 }, { "epoch": 2.9427492356573346, "grad_norm": 0.7158863544464111, "learning_rate": 1.934607082900312e-06, "loss": 1.6361, "step": 12272 }, { "epoch": 2.9432288232120376, "grad_norm": 0.704164981842041, "learning_rate": 1.9186185946118797e-06, "loss": 1.6103, "step": 12274 }, { "epoch": 2.9437084107667406, "grad_norm": 0.6199699640274048, "learning_rate": 1.9026301063234473e-06, "loss": 1.5876, "step": 12276 }, { "epoch": 2.9441879983214436, "grad_norm": 0.717969536781311, "learning_rate": 1.886641618035015e-06, "loss": 1.6463, "step": 12278 }, { "epoch": 2.9446675858761466, "grad_norm": 0.6930253505706787, "learning_rate": 1.8706531297465825e-06, "loss": 1.615, "step": 12280 }, { "epoch": 2.9451471734308496, "grad_norm": 0.7275232076644897, "learning_rate": 1.8546646414581501e-06, "loss": 1.6242, "step": 12282 }, { "epoch": 2.9456267609855527, "grad_norm": 0.7248890995979309, "learning_rate": 1.8386761531697177e-06, "loss": 1.6144, "step": 12284 }, { "epoch": 2.946106348540255, "grad_norm": 0.8950987458229065, "learning_rate": 1.8226876648812856e-06, "loss": 1.5995, "step": 12286 }, { "epoch": 2.9465859360949582, "grad_norm": 0.651711642742157, "learning_rate": 1.8066991765928532e-06, "loss": 1.6019, "step": 12288 }, { "epoch": 2.9470655236496612, "grad_norm": 0.7241664528846741, "learning_rate": 1.790710688304421e-06, "loss": 1.64, "step": 12290 }, { "epoch": 2.9475451112043642, "grad_norm": 0.7115432620048523, "learning_rate": 1.7747222000159886e-06, "loss": 1.5702, "step": 12292 }, { "epoch": 2.9480246987590673, "grad_norm": 0.7523689866065979, "learning_rate": 1.7587337117275564e-06, "loss": 1.6378, "step": 12294 }, { "epoch": 2.9485042863137703, "grad_norm": 0.8613162636756897, "learning_rate": 1.742745223439124e-06, "loss": 1.6372, "step": 12296 }, { "epoch": 2.9489838738684733, "grad_norm": 0.7028841376304626, "learning_rate": 1.7267567351506916e-06, "loss": 1.6379, "step": 12298 }, { "epoch": 2.949463461423176, "grad_norm": 0.6819335222244263, "learning_rate": 1.7107682468622592e-06, "loss": 1.6073, "step": 12300 }, { "epoch": 2.949943048977879, "grad_norm": 0.6837725639343262, "learning_rate": 1.6947797585738268e-06, "loss": 1.5839, "step": 12302 }, { "epoch": 2.950422636532582, "grad_norm": 0.7073822617530823, "learning_rate": 1.6787912702853944e-06, "loss": 1.6601, "step": 12304 }, { "epoch": 2.950902224087285, "grad_norm": 0.7692134380340576, "learning_rate": 1.6628027819969625e-06, "loss": 1.6499, "step": 12306 }, { "epoch": 2.951381811641988, "grad_norm": 0.7239872217178345, "learning_rate": 1.64681429370853e-06, "loss": 1.6415, "step": 12308 }, { "epoch": 2.951861399196691, "grad_norm": 0.6936225295066833, "learning_rate": 1.6308258054200977e-06, "loss": 1.6561, "step": 12310 }, { "epoch": 2.952340986751394, "grad_norm": 0.7138931751251221, "learning_rate": 1.6148373171316653e-06, "loss": 1.5638, "step": 12312 }, { "epoch": 2.952820574306097, "grad_norm": 0.8389565944671631, "learning_rate": 1.5988488288432329e-06, "loss": 1.6602, "step": 12314 }, { "epoch": 2.9533001618608, "grad_norm": 0.8079691529273987, "learning_rate": 1.5828603405548005e-06, "loss": 1.6578, "step": 12316 }, { "epoch": 2.953779749415503, "grad_norm": 0.6805813312530518, "learning_rate": 1.5668718522663683e-06, "loss": 1.6401, "step": 12318 }, { "epoch": 2.9542593369702055, "grad_norm": 0.6804506778717041, "learning_rate": 1.550883363977936e-06, "loss": 1.6642, "step": 12320 }, { "epoch": 2.9547389245249085, "grad_norm": 0.6949424147605896, "learning_rate": 1.5348948756895037e-06, "loss": 1.5584, "step": 12322 }, { "epoch": 2.9552185120796115, "grad_norm": 0.6846922636032104, "learning_rate": 1.5189063874010713e-06, "loss": 1.596, "step": 12324 }, { "epoch": 2.9556980996343145, "grad_norm": 0.7132629156112671, "learning_rate": 1.502917899112639e-06, "loss": 1.6207, "step": 12326 }, { "epoch": 2.9561776871890175, "grad_norm": 0.7141411900520325, "learning_rate": 1.4869294108242068e-06, "loss": 1.6133, "step": 12328 }, { "epoch": 2.9566572747437205, "grad_norm": 0.7051303386688232, "learning_rate": 1.4709409225357744e-06, "loss": 1.5906, "step": 12330 }, { "epoch": 2.957136862298423, "grad_norm": 0.6730098724365234, "learning_rate": 1.454952434247342e-06, "loss": 1.6167, "step": 12332 }, { "epoch": 2.957616449853126, "grad_norm": 0.7205888032913208, "learning_rate": 1.4389639459589096e-06, "loss": 1.5992, "step": 12334 }, { "epoch": 2.958096037407829, "grad_norm": 0.7576385140419006, "learning_rate": 1.4229754576704774e-06, "loss": 1.6101, "step": 12336 }, { "epoch": 2.958575624962532, "grad_norm": 0.6566855311393738, "learning_rate": 1.406986969382045e-06, "loss": 1.6165, "step": 12338 }, { "epoch": 2.959055212517235, "grad_norm": 0.702817440032959, "learning_rate": 1.3909984810936126e-06, "loss": 1.6021, "step": 12340 }, { "epoch": 2.959534800071938, "grad_norm": 0.740626335144043, "learning_rate": 1.3750099928051802e-06, "loss": 1.5549, "step": 12342 }, { "epoch": 2.960014387626641, "grad_norm": 0.7785112857818604, "learning_rate": 1.359021504516748e-06, "loss": 1.64, "step": 12344 }, { "epoch": 2.960493975181344, "grad_norm": 0.6558559536933899, "learning_rate": 1.3430330162283156e-06, "loss": 1.5641, "step": 12346 }, { "epoch": 2.960973562736047, "grad_norm": 0.8216441869735718, "learning_rate": 1.3270445279398832e-06, "loss": 1.6193, "step": 12348 }, { "epoch": 2.96145315029075, "grad_norm": 0.7331695556640625, "learning_rate": 1.311056039651451e-06, "loss": 1.6097, "step": 12350 }, { "epoch": 2.9619327378454527, "grad_norm": 0.6761910915374756, "learning_rate": 1.2950675513630189e-06, "loss": 1.6308, "step": 12352 }, { "epoch": 2.9624123254001558, "grad_norm": 0.7683796286582947, "learning_rate": 1.2790790630745865e-06, "loss": 1.604, "step": 12354 }, { "epoch": 2.9628919129548588, "grad_norm": 0.7342469692230225, "learning_rate": 1.263090574786154e-06, "loss": 1.5814, "step": 12356 }, { "epoch": 2.9633715005095618, "grad_norm": 0.7335415482521057, "learning_rate": 1.2471020864977217e-06, "loss": 1.6166, "step": 12358 }, { "epoch": 2.963851088064265, "grad_norm": 0.7283056378364563, "learning_rate": 1.2311135982092895e-06, "loss": 1.6555, "step": 12360 }, { "epoch": 2.964330675618968, "grad_norm": 0.6956340670585632, "learning_rate": 1.215125109920857e-06, "loss": 1.6217, "step": 12362 }, { "epoch": 2.964810263173671, "grad_norm": 0.7280511856079102, "learning_rate": 1.1991366216324247e-06, "loss": 1.5874, "step": 12364 }, { "epoch": 2.9652898507283734, "grad_norm": 0.6614075899124146, "learning_rate": 1.1831481333439923e-06, "loss": 1.6635, "step": 12366 }, { "epoch": 2.9657694382830764, "grad_norm": 0.7692427635192871, "learning_rate": 1.16715964505556e-06, "loss": 1.5736, "step": 12368 }, { "epoch": 2.9662490258377794, "grad_norm": 0.6451560854911804, "learning_rate": 1.1511711567671277e-06, "loss": 1.6141, "step": 12370 }, { "epoch": 2.9667286133924824, "grad_norm": 0.8356853127479553, "learning_rate": 1.1351826684786953e-06, "loss": 1.5785, "step": 12372 }, { "epoch": 2.9672082009471854, "grad_norm": 0.6709420680999756, "learning_rate": 1.119194180190263e-06, "loss": 1.5963, "step": 12374 }, { "epoch": 2.9676877885018884, "grad_norm": 0.853738009929657, "learning_rate": 1.1032056919018306e-06, "loss": 1.6388, "step": 12376 }, { "epoch": 2.9681673760565914, "grad_norm": 0.776681125164032, "learning_rate": 1.0872172036133984e-06, "loss": 1.6515, "step": 12378 }, { "epoch": 2.9686469636112944, "grad_norm": 0.6504598259925842, "learning_rate": 1.0712287153249662e-06, "loss": 1.6029, "step": 12380 }, { "epoch": 2.9691265511659974, "grad_norm": 0.7277162671089172, "learning_rate": 1.0552402270365338e-06, "loss": 1.5955, "step": 12382 }, { "epoch": 2.9696061387207005, "grad_norm": 0.7317724823951721, "learning_rate": 1.0392517387481014e-06, "loss": 1.6503, "step": 12384 }, { "epoch": 2.970085726275403, "grad_norm": 0.7247269749641418, "learning_rate": 1.0232632504596692e-06, "loss": 1.5782, "step": 12386 }, { "epoch": 2.970565313830106, "grad_norm": 0.7228003144264221, "learning_rate": 1.0072747621712368e-06, "loss": 1.5905, "step": 12388 }, { "epoch": 2.971044901384809, "grad_norm": 0.7145763039588928, "learning_rate": 9.912862738828044e-07, "loss": 1.6012, "step": 12390 }, { "epoch": 2.971524488939512, "grad_norm": 0.7046102285385132, "learning_rate": 9.75297785594372e-07, "loss": 1.5912, "step": 12392 }, { "epoch": 2.972004076494215, "grad_norm": 0.7306727766990662, "learning_rate": 9.593092973059399e-07, "loss": 1.6241, "step": 12394 }, { "epoch": 2.972483664048918, "grad_norm": 0.6759101152420044, "learning_rate": 9.433208090175075e-07, "loss": 1.5959, "step": 12396 }, { "epoch": 2.9729632516036206, "grad_norm": 0.6941449046134949, "learning_rate": 9.273323207290751e-07, "loss": 1.6475, "step": 12398 }, { "epoch": 2.9734428391583236, "grad_norm": 0.6840864419937134, "learning_rate": 9.113438324406428e-07, "loss": 1.5758, "step": 12400 }, { "epoch": 2.9734428391583236, "eval_loss": 1.7035709619522095, "eval_runtime": 331.256, "eval_samples_per_second": 402.858, "eval_steps_per_second": 12.591, "step": 12400 }, { "epoch": 2.9739224267130266, "grad_norm": 0.6694924235343933, "learning_rate": 8.953553441522105e-07, "loss": 1.6459, "step": 12402 }, { "epoch": 2.9744020142677297, "grad_norm": 0.7647275328636169, "learning_rate": 8.793668558637782e-07, "loss": 1.6281, "step": 12404 }, { "epoch": 2.9748816018224327, "grad_norm": 0.6271288394927979, "learning_rate": 8.633783675753458e-07, "loss": 1.6136, "step": 12406 }, { "epoch": 2.9753611893771357, "grad_norm": 0.7538509964942932, "learning_rate": 8.473898792869134e-07, "loss": 1.6341, "step": 12408 }, { "epoch": 2.9758407769318387, "grad_norm": 0.8610097169876099, "learning_rate": 8.314013909984812e-07, "loss": 1.6353, "step": 12410 }, { "epoch": 2.9763203644865417, "grad_norm": 0.708824098110199, "learning_rate": 8.154129027100488e-07, "loss": 1.6384, "step": 12412 }, { "epoch": 2.9767999520412447, "grad_norm": 0.6431678533554077, "learning_rate": 7.994244144216164e-07, "loss": 1.6056, "step": 12414 }, { "epoch": 2.9772795395959477, "grad_norm": 0.7068302035331726, "learning_rate": 7.834359261331841e-07, "loss": 1.6104, "step": 12416 }, { "epoch": 2.9777591271506503, "grad_norm": 0.7223995923995972, "learning_rate": 7.674474378447519e-07, "loss": 1.592, "step": 12418 }, { "epoch": 2.9782387147053533, "grad_norm": 0.5942600965499878, "learning_rate": 7.514589495563195e-07, "loss": 1.5576, "step": 12420 }, { "epoch": 2.9787183022600563, "grad_norm": 0.7260330319404602, "learning_rate": 7.354704612678872e-07, "loss": 1.608, "step": 12422 }, { "epoch": 2.9791978898147593, "grad_norm": 0.8144661784172058, "learning_rate": 7.194819729794548e-07, "loss": 1.6082, "step": 12424 }, { "epoch": 2.9796774773694623, "grad_norm": 0.6630839705467224, "learning_rate": 7.034934846910225e-07, "loss": 1.5872, "step": 12426 }, { "epoch": 2.9801570649241653, "grad_norm": 0.6996806859970093, "learning_rate": 6.875049964025901e-07, "loss": 1.6007, "step": 12428 }, { "epoch": 2.9806366524788683, "grad_norm": 0.7363983988761902, "learning_rate": 6.715165081141578e-07, "loss": 1.6087, "step": 12430 }, { "epoch": 2.981116240033571, "grad_norm": 0.7379962205886841, "learning_rate": 6.555280198257255e-07, "loss": 1.6659, "step": 12432 }, { "epoch": 2.981595827588274, "grad_norm": 0.818171501159668, "learning_rate": 6.395395315372932e-07, "loss": 1.6507, "step": 12434 }, { "epoch": 2.982075415142977, "grad_norm": 0.7679448127746582, "learning_rate": 6.235510432488608e-07, "loss": 1.566, "step": 12436 }, { "epoch": 2.98255500269768, "grad_norm": 0.7824479937553406, "learning_rate": 6.075625549604286e-07, "loss": 1.6259, "step": 12438 }, { "epoch": 2.983034590252383, "grad_norm": 0.7053433656692505, "learning_rate": 5.915740666719962e-07, "loss": 1.6402, "step": 12440 }, { "epoch": 2.983514177807086, "grad_norm": 0.6903310418128967, "learning_rate": 5.755855783835639e-07, "loss": 1.6109, "step": 12442 }, { "epoch": 2.983993765361789, "grad_norm": 0.6778095364570618, "learning_rate": 5.595970900951315e-07, "loss": 1.6276, "step": 12444 }, { "epoch": 2.984473352916492, "grad_norm": 0.6853286027908325, "learning_rate": 5.436086018066992e-07, "loss": 1.6334, "step": 12446 }, { "epoch": 2.984952940471195, "grad_norm": 0.6490159034729004, "learning_rate": 5.276201135182669e-07, "loss": 1.5842, "step": 12448 }, { "epoch": 2.985432528025898, "grad_norm": 0.688663125038147, "learning_rate": 5.116316252298346e-07, "loss": 1.6627, "step": 12450 }, { "epoch": 2.9859121155806005, "grad_norm": 0.7631375193595886, "learning_rate": 4.956431369414022e-07, "loss": 1.6071, "step": 12452 }, { "epoch": 2.9863917031353036, "grad_norm": 0.7223824262619019, "learning_rate": 4.796546486529699e-07, "loss": 1.6481, "step": 12454 }, { "epoch": 2.9868712906900066, "grad_norm": 0.8014418482780457, "learning_rate": 4.6366616036453753e-07, "loss": 1.6348, "step": 12456 }, { "epoch": 2.9873508782447096, "grad_norm": 0.8039677739143372, "learning_rate": 4.4767767207610524e-07, "loss": 1.616, "step": 12458 }, { "epoch": 2.9878304657994126, "grad_norm": 0.7603790760040283, "learning_rate": 4.316891837876729e-07, "loss": 1.586, "step": 12460 }, { "epoch": 2.9883100533541156, "grad_norm": 0.6727774143218994, "learning_rate": 4.157006954992406e-07, "loss": 1.5953, "step": 12462 }, { "epoch": 2.988789640908818, "grad_norm": 0.6439036726951599, "learning_rate": 3.997122072108082e-07, "loss": 1.6137, "step": 12464 }, { "epoch": 2.989269228463521, "grad_norm": 0.7651889324188232, "learning_rate": 3.8372371892237593e-07, "loss": 1.6063, "step": 12466 }, { "epoch": 2.989748816018224, "grad_norm": 0.7807048559188843, "learning_rate": 3.677352306339436e-07, "loss": 1.6524, "step": 12468 }, { "epoch": 2.990228403572927, "grad_norm": 0.6553511619567871, "learning_rate": 3.5174674234551125e-07, "loss": 1.6162, "step": 12470 }, { "epoch": 2.99070799112763, "grad_norm": 0.6660603880882263, "learning_rate": 3.357582540570789e-07, "loss": 1.6107, "step": 12472 }, { "epoch": 2.991187578682333, "grad_norm": 0.6653023362159729, "learning_rate": 3.197697657686466e-07, "loss": 1.6043, "step": 12474 }, { "epoch": 2.991667166237036, "grad_norm": 0.7384434938430786, "learning_rate": 3.037812774802143e-07, "loss": 1.6184, "step": 12476 }, { "epoch": 2.9921467537917392, "grad_norm": 0.6814661026000977, "learning_rate": 2.8779278919178193e-07, "loss": 1.6718, "step": 12478 }, { "epoch": 2.9926263413464422, "grad_norm": 0.6913767457008362, "learning_rate": 2.718043009033496e-07, "loss": 1.6621, "step": 12480 }, { "epoch": 2.9931059289011452, "grad_norm": 0.7228371500968933, "learning_rate": 2.558158126149173e-07, "loss": 1.6581, "step": 12482 }, { "epoch": 2.993585516455848, "grad_norm": 0.8075593113899231, "learning_rate": 2.3982732432648496e-07, "loss": 1.5761, "step": 12484 }, { "epoch": 2.994065104010551, "grad_norm": 0.7104683518409729, "learning_rate": 2.2383883603805262e-07, "loss": 1.6696, "step": 12486 }, { "epoch": 2.994544691565254, "grad_norm": 0.726905882358551, "learning_rate": 2.078503477496203e-07, "loss": 1.6234, "step": 12488 }, { "epoch": 2.995024279119957, "grad_norm": 0.7258397340774536, "learning_rate": 1.9186185946118797e-07, "loss": 1.6201, "step": 12490 }, { "epoch": 2.99550386667466, "grad_norm": 0.738516092300415, "learning_rate": 1.7587337117275562e-07, "loss": 1.6019, "step": 12492 }, { "epoch": 2.995983454229363, "grad_norm": 0.6959344148635864, "learning_rate": 1.598848828843233e-07, "loss": 1.6498, "step": 12494 }, { "epoch": 2.996463041784066, "grad_norm": 0.674135684967041, "learning_rate": 1.4389639459589097e-07, "loss": 1.593, "step": 12496 }, { "epoch": 2.9969426293387684, "grad_norm": 0.665223240852356, "learning_rate": 1.2790790630745865e-07, "loss": 1.6387, "step": 12498 }, { "epoch": 2.9974222168934714, "grad_norm": 0.7403987050056458, "learning_rate": 1.1191941801902631e-07, "loss": 1.6524, "step": 12500 }, { "epoch": 2.9979018044481744, "grad_norm": 0.7446317672729492, "learning_rate": 9.593092973059398e-08, "loss": 1.5833, "step": 12502 }, { "epoch": 2.9983813920028775, "grad_norm": 0.650127649307251, "learning_rate": 7.994244144216165e-08, "loss": 1.6108, "step": 12504 }, { "epoch": 2.9988609795575805, "grad_norm": 0.7468810677528381, "learning_rate": 6.395395315372933e-08, "loss": 1.566, "step": 12506 }, { "epoch": 2.9993405671122835, "grad_norm": 0.7813684940338135, "learning_rate": 4.796546486529699e-08, "loss": 1.5821, "step": 12508 }, { "epoch": 2.9998201546669865, "grad_norm": 0.6694828867912292, "learning_rate": 3.197697657686466e-08, "loss": 1.633, "step": 12510 }, { "epoch": 2.9998201546669865, "step": 12510, "total_flos": 6.254153730663383e+17, "train_loss": 1.7348804631298014, "train_runtime": 40313.2591, "train_samples_per_second": 79.447, "train_steps_per_second": 0.31 }, { "epoch": 2.9998201546669865, "eval_loss": 1.7033987045288086, "eval_runtime": 331.1659, "eval_samples_per_second": 402.967, "eval_steps_per_second": 12.595, "step": 12510 }, { "epoch": 2.9998201546669865, "eval_loss": 1.7047816514968872, "eval_runtime": 331.0722, "eval_samples_per_second": 403.081, "eval_steps_per_second": 12.598, "step": 12510 } ], "logging_steps": 2, "max_steps": 12510, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.254153730663383e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }