{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9996395746981438, "eval_steps": 500, "global_step": 1849, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005406379527842854, "grad_norm": 5.835691452026367, "learning_rate": 5.405405405405406e-08, "loss": 0.8753, "step": 1 }, { "epoch": 0.001081275905568571, "grad_norm": 5.94018030166626, "learning_rate": 1.0810810810810812e-07, "loss": 0.892, "step": 2 }, { "epoch": 0.0016219138583528565, "grad_norm": 6.048556327819824, "learning_rate": 1.6216216216216218e-07, "loss": 0.8786, "step": 3 }, { "epoch": 0.002162551811137142, "grad_norm": 5.985991477966309, "learning_rate": 2.1621621621621625e-07, "loss": 0.8707, "step": 4 }, { "epoch": 0.0027031897639214274, "grad_norm": 5.788306713104248, "learning_rate": 2.702702702702703e-07, "loss": 0.8625, "step": 5 }, { "epoch": 0.003243827716705713, "grad_norm": 6.294447422027588, "learning_rate": 3.2432432432432436e-07, "loss": 0.9267, "step": 6 }, { "epoch": 0.003784465669489998, "grad_norm": 6.075927734375, "learning_rate": 3.7837837837837843e-07, "loss": 0.9002, "step": 7 }, { "epoch": 0.004325103622274284, "grad_norm": 6.027771472930908, "learning_rate": 4.324324324324325e-07, "loss": 0.8858, "step": 8 }, { "epoch": 0.004865741575058569, "grad_norm": 5.661100387573242, "learning_rate": 4.864864864864865e-07, "loss": 0.8382, "step": 9 }, { "epoch": 0.005406379527842855, "grad_norm": 5.359391689300537, "learning_rate": 5.405405405405406e-07, "loss": 0.8345, "step": 10 }, { "epoch": 0.00594701748062714, "grad_norm": 5.4135613441467285, "learning_rate": 5.945945945945947e-07, "loss": 0.8613, "step": 11 }, { "epoch": 0.006487655433411426, "grad_norm": 5.188356876373291, "learning_rate": 6.486486486486487e-07, "loss": 0.8316, "step": 12 }, { "epoch": 0.0070282933861957105, "grad_norm": 5.179812908172607, "learning_rate": 7.027027027027028e-07, "loss": 0.8574, "step": 13 }, { "epoch": 0.007568931338979996, "grad_norm": 4.420322418212891, "learning_rate": 7.567567567567569e-07, "loss": 0.8381, "step": 14 }, { "epoch": 0.008109569291764282, "grad_norm": 4.343968868255615, "learning_rate": 8.108108108108109e-07, "loss": 0.8244, "step": 15 }, { "epoch": 0.008650207244548567, "grad_norm": 4.318638801574707, "learning_rate": 8.64864864864865e-07, "loss": 0.8635, "step": 16 }, { "epoch": 0.009190845197332853, "grad_norm": 4.086209774017334, "learning_rate": 9.189189189189191e-07, "loss": 0.8179, "step": 17 }, { "epoch": 0.009731483150117138, "grad_norm": 4.0021467208862305, "learning_rate": 9.72972972972973e-07, "loss": 0.7978, "step": 18 }, { "epoch": 0.010272121102901424, "grad_norm": 2.3173251152038574, "learning_rate": 1.027027027027027e-06, "loss": 0.7756, "step": 19 }, { "epoch": 0.01081275905568571, "grad_norm": 2.2132866382598877, "learning_rate": 1.0810810810810812e-06, "loss": 0.761, "step": 20 }, { "epoch": 0.011353397008469995, "grad_norm": 2.2799596786499023, "learning_rate": 1.1351351351351352e-06, "loss": 0.7687, "step": 21 }, { "epoch": 0.01189403496125428, "grad_norm": 2.0921261310577393, "learning_rate": 1.1891891891891893e-06, "loss": 0.7825, "step": 22 }, { "epoch": 0.012434672914038566, "grad_norm": 1.862625002861023, "learning_rate": 1.2432432432432434e-06, "loss": 0.762, "step": 23 }, { "epoch": 0.012975310866822852, "grad_norm": 1.887032389640808, "learning_rate": 1.2972972972972974e-06, "loss": 0.7788, "step": 24 }, { "epoch": 0.013515948819607137, "grad_norm": 1.483746886253357, "learning_rate": 1.3513513513513515e-06, "loss": 0.7347, "step": 25 }, { "epoch": 0.014056586772391421, "grad_norm": 2.0825283527374268, "learning_rate": 1.4054054054054056e-06, "loss": 0.733, "step": 26 }, { "epoch": 0.014597224725175707, "grad_norm": 2.5244522094726562, "learning_rate": 1.4594594594594596e-06, "loss": 0.738, "step": 27 }, { "epoch": 0.015137862677959992, "grad_norm": 2.7919321060180664, "learning_rate": 1.5135135135135137e-06, "loss": 0.7233, "step": 28 }, { "epoch": 0.015678500630744278, "grad_norm": 2.6368916034698486, "learning_rate": 1.5675675675675678e-06, "loss": 0.7231, "step": 29 }, { "epoch": 0.016219138583528563, "grad_norm": 2.4845619201660156, "learning_rate": 1.6216216216216219e-06, "loss": 0.7093, "step": 30 }, { "epoch": 0.01675977653631285, "grad_norm": 2.3388469219207764, "learning_rate": 1.675675675675676e-06, "loss": 0.7384, "step": 31 }, { "epoch": 0.017300414489097134, "grad_norm": 2.0697882175445557, "learning_rate": 1.72972972972973e-06, "loss": 0.724, "step": 32 }, { "epoch": 0.01784105244188142, "grad_norm": 1.7929456233978271, "learning_rate": 1.783783783783784e-06, "loss": 0.7247, "step": 33 }, { "epoch": 0.018381690394665706, "grad_norm": 1.1819241046905518, "learning_rate": 1.8378378378378381e-06, "loss": 0.6814, "step": 34 }, { "epoch": 0.01892232834744999, "grad_norm": 0.9916073679924011, "learning_rate": 1.8918918918918922e-06, "loss": 0.7039, "step": 35 }, { "epoch": 0.019462966300234277, "grad_norm": 1.0272109508514404, "learning_rate": 1.945945945945946e-06, "loss": 0.6968, "step": 36 }, { "epoch": 0.020003604253018562, "grad_norm": 1.2608418464660645, "learning_rate": 2.0000000000000003e-06, "loss": 0.7107, "step": 37 }, { "epoch": 0.020544242205802848, "grad_norm": 1.2174832820892334, "learning_rate": 2.054054054054054e-06, "loss": 0.6771, "step": 38 }, { "epoch": 0.021084880158587133, "grad_norm": 1.0341556072235107, "learning_rate": 2.1081081081081085e-06, "loss": 0.6796, "step": 39 }, { "epoch": 0.02162551811137142, "grad_norm": 0.9752543568611145, "learning_rate": 2.1621621621621623e-06, "loss": 0.6607, "step": 40 }, { "epoch": 0.022166156064155704, "grad_norm": 0.8276258707046509, "learning_rate": 2.2162162162162166e-06, "loss": 0.6314, "step": 41 }, { "epoch": 0.02270679401693999, "grad_norm": 0.9401139616966248, "learning_rate": 2.2702702702702705e-06, "loss": 0.6805, "step": 42 }, { "epoch": 0.023247431969724276, "grad_norm": 0.7445069551467896, "learning_rate": 2.3243243243243247e-06, "loss": 0.6743, "step": 43 }, { "epoch": 0.02378806992250856, "grad_norm": 0.6744734644889832, "learning_rate": 2.3783783783783786e-06, "loss": 0.6249, "step": 44 }, { "epoch": 0.024328707875292847, "grad_norm": 0.8370453715324402, "learning_rate": 2.432432432432433e-06, "loss": 0.6669, "step": 45 }, { "epoch": 0.024869345828077132, "grad_norm": 0.7774664759635925, "learning_rate": 2.4864864864864867e-06, "loss": 0.6698, "step": 46 }, { "epoch": 0.025409983780861418, "grad_norm": 0.7434698343276978, "learning_rate": 2.540540540540541e-06, "loss": 0.6347, "step": 47 }, { "epoch": 0.025950621733645703, "grad_norm": 0.7597132325172424, "learning_rate": 2.594594594594595e-06, "loss": 0.6217, "step": 48 }, { "epoch": 0.02649125968642999, "grad_norm": 0.7307024598121643, "learning_rate": 2.648648648648649e-06, "loss": 0.6406, "step": 49 }, { "epoch": 0.027031897639214274, "grad_norm": 0.6316570043563843, "learning_rate": 2.702702702702703e-06, "loss": 0.5985, "step": 50 }, { "epoch": 0.02757253559199856, "grad_norm": 0.58868408203125, "learning_rate": 2.7567567567567573e-06, "loss": 0.6157, "step": 51 }, { "epoch": 0.028113173544782842, "grad_norm": 0.6674655675888062, "learning_rate": 2.810810810810811e-06, "loss": 0.6382, "step": 52 }, { "epoch": 0.028653811497567128, "grad_norm": 0.6769363880157471, "learning_rate": 2.8648648648648654e-06, "loss": 0.6284, "step": 53 }, { "epoch": 0.029194449450351413, "grad_norm": 0.5872988104820251, "learning_rate": 2.9189189189189193e-06, "loss": 0.6095, "step": 54 }, { "epoch": 0.0297350874031357, "grad_norm": 0.5742107033729553, "learning_rate": 2.9729729729729736e-06, "loss": 0.6609, "step": 55 }, { "epoch": 0.030275725355919984, "grad_norm": 0.5116291046142578, "learning_rate": 3.0270270270270274e-06, "loss": 0.5959, "step": 56 }, { "epoch": 0.03081636330870427, "grad_norm": 0.4740088880062103, "learning_rate": 3.0810810810810817e-06, "loss": 0.5925, "step": 57 }, { "epoch": 0.031357001261488555, "grad_norm": 0.536938488483429, "learning_rate": 3.1351351351351356e-06, "loss": 0.6281, "step": 58 }, { "epoch": 0.03189763921427284, "grad_norm": 0.5394325852394104, "learning_rate": 3.1891891891891894e-06, "loss": 0.604, "step": 59 }, { "epoch": 0.03243827716705713, "grad_norm": 0.5232502222061157, "learning_rate": 3.2432432432432437e-06, "loss": 0.5988, "step": 60 }, { "epoch": 0.03297891511984141, "grad_norm": 0.5132797360420227, "learning_rate": 3.2972972972972976e-06, "loss": 0.6177, "step": 61 }, { "epoch": 0.0335195530726257, "grad_norm": 0.46831756830215454, "learning_rate": 3.351351351351352e-06, "loss": 0.6274, "step": 62 }, { "epoch": 0.03406019102540998, "grad_norm": 0.4727836549282074, "learning_rate": 3.4054054054054057e-06, "loss": 0.6218, "step": 63 }, { "epoch": 0.03460082897819427, "grad_norm": 0.4939032196998596, "learning_rate": 3.45945945945946e-06, "loss": 0.5846, "step": 64 }, { "epoch": 0.035141466930978554, "grad_norm": 0.46765533089637756, "learning_rate": 3.513513513513514e-06, "loss": 0.6012, "step": 65 }, { "epoch": 0.03568210488376284, "grad_norm": 0.41743600368499756, "learning_rate": 3.567567567567568e-06, "loss": 0.6112, "step": 66 }, { "epoch": 0.036222742836547125, "grad_norm": 0.46013137698173523, "learning_rate": 3.621621621621622e-06, "loss": 0.5885, "step": 67 }, { "epoch": 0.03676338078933141, "grad_norm": 0.4405370354652405, "learning_rate": 3.6756756756756763e-06, "loss": 0.6377, "step": 68 }, { "epoch": 0.0373040187421157, "grad_norm": 0.41007307171821594, "learning_rate": 3.72972972972973e-06, "loss": 0.6025, "step": 69 }, { "epoch": 0.03784465669489998, "grad_norm": 0.3900786340236664, "learning_rate": 3.7837837837837844e-06, "loss": 0.5646, "step": 70 }, { "epoch": 0.03838529464768427, "grad_norm": 0.45452040433883667, "learning_rate": 3.837837837837838e-06, "loss": 0.5868, "step": 71 }, { "epoch": 0.03892593260046855, "grad_norm": 0.4392114579677582, "learning_rate": 3.891891891891892e-06, "loss": 0.5681, "step": 72 }, { "epoch": 0.03946657055325284, "grad_norm": 0.4254467189311981, "learning_rate": 3.945945945945947e-06, "loss": 0.6258, "step": 73 }, { "epoch": 0.040007208506037124, "grad_norm": 0.44197121262550354, "learning_rate": 4.000000000000001e-06, "loss": 0.6088, "step": 74 }, { "epoch": 0.04054784645882141, "grad_norm": 0.4894886910915375, "learning_rate": 4.0540540540540545e-06, "loss": 0.596, "step": 75 }, { "epoch": 0.041088484411605695, "grad_norm": 0.4831041395664215, "learning_rate": 4.108108108108108e-06, "loss": 0.5854, "step": 76 }, { "epoch": 0.04162912236438998, "grad_norm": 0.4371893107891083, "learning_rate": 4.162162162162163e-06, "loss": 0.5775, "step": 77 }, { "epoch": 0.04216976031717427, "grad_norm": 0.4326671361923218, "learning_rate": 4.216216216216217e-06, "loss": 0.5871, "step": 78 }, { "epoch": 0.04271039826995855, "grad_norm": 0.4272334575653076, "learning_rate": 4.270270270270271e-06, "loss": 0.5637, "step": 79 }, { "epoch": 0.04325103622274284, "grad_norm": 0.4943785071372986, "learning_rate": 4.324324324324325e-06, "loss": 0.5609, "step": 80 }, { "epoch": 0.04379167417552712, "grad_norm": 0.3759538531303406, "learning_rate": 4.378378378378379e-06, "loss": 0.5881, "step": 81 }, { "epoch": 0.04433231212831141, "grad_norm": 0.4269590377807617, "learning_rate": 4.432432432432433e-06, "loss": 0.5822, "step": 82 }, { "epoch": 0.044872950081095694, "grad_norm": 0.4101639986038208, "learning_rate": 4.486486486486487e-06, "loss": 0.5925, "step": 83 }, { "epoch": 0.04541358803387998, "grad_norm": 0.40165165066719055, "learning_rate": 4.540540540540541e-06, "loss": 0.5304, "step": 84 }, { "epoch": 0.045954225986664266, "grad_norm": 0.3949192762374878, "learning_rate": 4.594594594594596e-06, "loss": 0.5503, "step": 85 }, { "epoch": 0.04649486393944855, "grad_norm": 0.4039553999900818, "learning_rate": 4.6486486486486495e-06, "loss": 0.5634, "step": 86 }, { "epoch": 0.04703550189223284, "grad_norm": 0.4268937110900879, "learning_rate": 4.702702702702703e-06, "loss": 0.5854, "step": 87 }, { "epoch": 0.04757613984501712, "grad_norm": 0.420392245054245, "learning_rate": 4.756756756756757e-06, "loss": 0.5554, "step": 88 }, { "epoch": 0.04811677779780141, "grad_norm": 0.38791415095329285, "learning_rate": 4.810810810810811e-06, "loss": 0.5662, "step": 89 }, { "epoch": 0.04865741575058569, "grad_norm": 0.38142862915992737, "learning_rate": 4.864864864864866e-06, "loss": 0.5621, "step": 90 }, { "epoch": 0.04919805370336998, "grad_norm": 0.4095216691493988, "learning_rate": 4.91891891891892e-06, "loss": 0.5567, "step": 91 }, { "epoch": 0.049738691656154264, "grad_norm": 0.40666601061820984, "learning_rate": 4.9729729729729735e-06, "loss": 0.6048, "step": 92 }, { "epoch": 0.05027932960893855, "grad_norm": 0.37900641560554504, "learning_rate": 5.027027027027027e-06, "loss": 0.57, "step": 93 }, { "epoch": 0.050819967561722836, "grad_norm": 0.4188796579837799, "learning_rate": 5.081081081081082e-06, "loss": 0.5547, "step": 94 }, { "epoch": 0.05136060551450712, "grad_norm": 0.3917129933834076, "learning_rate": 5.135135135135135e-06, "loss": 0.5994, "step": 95 }, { "epoch": 0.05190124346729141, "grad_norm": 0.3745051920413971, "learning_rate": 5.18918918918919e-06, "loss": 0.5645, "step": 96 }, { "epoch": 0.05244188142007569, "grad_norm": 0.4055512249469757, "learning_rate": 5.243243243243244e-06, "loss": 0.5516, "step": 97 }, { "epoch": 0.05298251937285998, "grad_norm": 0.40357547998428345, "learning_rate": 5.297297297297298e-06, "loss": 0.5414, "step": 98 }, { "epoch": 0.05352315732564426, "grad_norm": 0.37698817253112793, "learning_rate": 5.351351351351351e-06, "loss": 0.5494, "step": 99 }, { "epoch": 0.05406379527842855, "grad_norm": 0.45397427678108215, "learning_rate": 5.405405405405406e-06, "loss": 0.5574, "step": 100 }, { "epoch": 0.054604433231212834, "grad_norm": 0.4567568600177765, "learning_rate": 5.45945945945946e-06, "loss": 0.578, "step": 101 }, { "epoch": 0.05514507118399712, "grad_norm": 0.4371408224105835, "learning_rate": 5.513513513513515e-06, "loss": 0.5614, "step": 102 }, { "epoch": 0.0556857091367814, "grad_norm": 0.4275542199611664, "learning_rate": 5.567567567567568e-06, "loss": 0.5884, "step": 103 }, { "epoch": 0.056226347089565684, "grad_norm": 0.4597940742969513, "learning_rate": 5.621621621621622e-06, "loss": 0.5431, "step": 104 }, { "epoch": 0.05676698504234997, "grad_norm": 0.47299715876579285, "learning_rate": 5.675675675675676e-06, "loss": 0.5648, "step": 105 }, { "epoch": 0.057307622995134255, "grad_norm": 0.435273140668869, "learning_rate": 5.729729729729731e-06, "loss": 0.5281, "step": 106 }, { "epoch": 0.05784826094791854, "grad_norm": 0.5185096859931946, "learning_rate": 5.783783783783784e-06, "loss": 0.5975, "step": 107 }, { "epoch": 0.058388898900702826, "grad_norm": 0.4125944674015045, "learning_rate": 5.837837837837839e-06, "loss": 0.5627, "step": 108 }, { "epoch": 0.05892953685348711, "grad_norm": 0.4223628640174866, "learning_rate": 5.8918918918918924e-06, "loss": 0.5734, "step": 109 }, { "epoch": 0.0594701748062714, "grad_norm": 0.4168817698955536, "learning_rate": 5.945945945945947e-06, "loss": 0.5575, "step": 110 }, { "epoch": 0.06001081275905568, "grad_norm": 0.4130726754665375, "learning_rate": 6e-06, "loss": 0.5319, "step": 111 }, { "epoch": 0.06055145071183997, "grad_norm": 0.4145527482032776, "learning_rate": 6.054054054054055e-06, "loss": 0.5544, "step": 112 }, { "epoch": 0.061092088664624254, "grad_norm": 0.47130388021469116, "learning_rate": 6.108108108108109e-06, "loss": 0.5454, "step": 113 }, { "epoch": 0.06163272661740854, "grad_norm": 0.4709829092025757, "learning_rate": 6.162162162162163e-06, "loss": 0.5477, "step": 114 }, { "epoch": 0.062173364570192825, "grad_norm": 0.4437786340713501, "learning_rate": 6.2162162162162164e-06, "loss": 0.5337, "step": 115 }, { "epoch": 0.06271400252297711, "grad_norm": 0.44709983468055725, "learning_rate": 6.270270270270271e-06, "loss": 0.5584, "step": 116 }, { "epoch": 0.0632546404757614, "grad_norm": 0.438896119594574, "learning_rate": 6.324324324324325e-06, "loss": 0.5374, "step": 117 }, { "epoch": 0.06379527842854568, "grad_norm": 0.4727858901023865, "learning_rate": 6.378378378378379e-06, "loss": 0.5907, "step": 118 }, { "epoch": 0.06433591638132997, "grad_norm": 0.5302643179893494, "learning_rate": 6.432432432432433e-06, "loss": 0.5583, "step": 119 }, { "epoch": 0.06487655433411425, "grad_norm": 0.49883052706718445, "learning_rate": 6.486486486486487e-06, "loss": 0.5728, "step": 120 }, { "epoch": 0.06541719228689855, "grad_norm": 0.45870596170425415, "learning_rate": 6.540540540540541e-06, "loss": 0.5372, "step": 121 }, { "epoch": 0.06595783023968282, "grad_norm": 0.5096645951271057, "learning_rate": 6.594594594594595e-06, "loss": 0.5361, "step": 122 }, { "epoch": 0.06649846819246712, "grad_norm": 0.5785489678382874, "learning_rate": 6.648648648648649e-06, "loss": 0.5449, "step": 123 }, { "epoch": 0.0670391061452514, "grad_norm": 0.48732200264930725, "learning_rate": 6.702702702702704e-06, "loss": 0.5647, "step": 124 }, { "epoch": 0.06757974409803569, "grad_norm": 0.5112847089767456, "learning_rate": 6.7567567567567575e-06, "loss": 0.5576, "step": 125 }, { "epoch": 0.06812038205081997, "grad_norm": 0.5451067686080933, "learning_rate": 6.810810810810811e-06, "loss": 0.5321, "step": 126 }, { "epoch": 0.06866102000360426, "grad_norm": 0.4540032744407654, "learning_rate": 6.864864864864865e-06, "loss": 0.5252, "step": 127 }, { "epoch": 0.06920165795638854, "grad_norm": 0.48304489254951477, "learning_rate": 6.91891891891892e-06, "loss": 0.5905, "step": 128 }, { "epoch": 0.06974229590917283, "grad_norm": 0.44558167457580566, "learning_rate": 6.972972972972973e-06, "loss": 0.569, "step": 129 }, { "epoch": 0.07028293386195711, "grad_norm": 0.4131074547767639, "learning_rate": 7.027027027027028e-06, "loss": 0.5453, "step": 130 }, { "epoch": 0.0708235718147414, "grad_norm": 0.3999936878681183, "learning_rate": 7.0810810810810815e-06, "loss": 0.5302, "step": 131 }, { "epoch": 0.07136420976752568, "grad_norm": 0.43754464387893677, "learning_rate": 7.135135135135136e-06, "loss": 0.5523, "step": 132 }, { "epoch": 0.07190484772030997, "grad_norm": 0.4095590114593506, "learning_rate": 7.189189189189189e-06, "loss": 0.5603, "step": 133 }, { "epoch": 0.07244548567309425, "grad_norm": 0.4144512712955475, "learning_rate": 7.243243243243244e-06, "loss": 0.5747, "step": 134 }, { "epoch": 0.07298612362587854, "grad_norm": 0.41802504658699036, "learning_rate": 7.297297297297298e-06, "loss": 0.5991, "step": 135 }, { "epoch": 0.07352676157866282, "grad_norm": 0.42772114276885986, "learning_rate": 7.3513513513513525e-06, "loss": 0.5364, "step": 136 }, { "epoch": 0.07406739953144711, "grad_norm": 0.4586148262023926, "learning_rate": 7.4054054054054055e-06, "loss": 0.5732, "step": 137 }, { "epoch": 0.0746080374842314, "grad_norm": 0.4603786766529083, "learning_rate": 7.45945945945946e-06, "loss": 0.5481, "step": 138 }, { "epoch": 0.07514867543701567, "grad_norm": 0.4603879153728485, "learning_rate": 7.513513513513514e-06, "loss": 0.5536, "step": 139 }, { "epoch": 0.07568931338979996, "grad_norm": 0.46576988697052, "learning_rate": 7.567567567567569e-06, "loss": 0.5667, "step": 140 }, { "epoch": 0.07622995134258424, "grad_norm": 0.4343358874320984, "learning_rate": 7.621621621621622e-06, "loss": 0.5316, "step": 141 }, { "epoch": 0.07677058929536854, "grad_norm": 0.5072187185287476, "learning_rate": 7.675675675675676e-06, "loss": 0.5577, "step": 142 }, { "epoch": 0.07731122724815281, "grad_norm": 0.3946337401866913, "learning_rate": 7.72972972972973e-06, "loss": 0.5349, "step": 143 }, { "epoch": 0.0778518652009371, "grad_norm": 0.4240807294845581, "learning_rate": 7.783783783783784e-06, "loss": 0.5337, "step": 144 }, { "epoch": 0.07839250315372139, "grad_norm": 0.4701116681098938, "learning_rate": 7.837837837837838e-06, "loss": 0.5159, "step": 145 }, { "epoch": 0.07893314110650568, "grad_norm": 0.5360553860664368, "learning_rate": 7.891891891891894e-06, "loss": 0.5657, "step": 146 }, { "epoch": 0.07947377905928996, "grad_norm": 0.46461114287376404, "learning_rate": 7.945945945945946e-06, "loss": 0.5631, "step": 147 }, { "epoch": 0.08001441701207425, "grad_norm": 0.5180739760398865, "learning_rate": 8.000000000000001e-06, "loss": 0.5461, "step": 148 }, { "epoch": 0.08055505496485853, "grad_norm": 0.4858033359050751, "learning_rate": 8.054054054054055e-06, "loss": 0.5493, "step": 149 }, { "epoch": 0.08109569291764282, "grad_norm": 0.4371548593044281, "learning_rate": 8.108108108108109e-06, "loss": 0.5284, "step": 150 }, { "epoch": 0.0816363308704271, "grad_norm": 0.46319711208343506, "learning_rate": 8.162162162162163e-06, "loss": 0.5401, "step": 151 }, { "epoch": 0.08217696882321139, "grad_norm": 0.563747763633728, "learning_rate": 8.216216216216217e-06, "loss": 0.5532, "step": 152 }, { "epoch": 0.08271760677599567, "grad_norm": 0.45033544301986694, "learning_rate": 8.27027027027027e-06, "loss": 0.5396, "step": 153 }, { "epoch": 0.08325824472877996, "grad_norm": 0.5136594772338867, "learning_rate": 8.324324324324326e-06, "loss": 0.5382, "step": 154 }, { "epoch": 0.08379888268156424, "grad_norm": 0.5209782123565674, "learning_rate": 8.378378378378378e-06, "loss": 0.5493, "step": 155 }, { "epoch": 0.08433952063434853, "grad_norm": 0.5323936343193054, "learning_rate": 8.432432432432434e-06, "loss": 0.539, "step": 156 }, { "epoch": 0.08488015858713281, "grad_norm": 0.4908245801925659, "learning_rate": 8.486486486486488e-06, "loss": 0.5233, "step": 157 }, { "epoch": 0.0854207965399171, "grad_norm": 0.5659683346748352, "learning_rate": 8.540540540540542e-06, "loss": 0.5293, "step": 158 }, { "epoch": 0.08596143449270138, "grad_norm": 0.5987280011177063, "learning_rate": 8.594594594594595e-06, "loss": 0.5394, "step": 159 }, { "epoch": 0.08650207244548568, "grad_norm": 0.48582640290260315, "learning_rate": 8.64864864864865e-06, "loss": 0.5356, "step": 160 }, { "epoch": 0.08704271039826995, "grad_norm": 0.5271715521812439, "learning_rate": 8.702702702702703e-06, "loss": 0.493, "step": 161 }, { "epoch": 0.08758334835105425, "grad_norm": 0.668293833732605, "learning_rate": 8.756756756756759e-06, "loss": 0.4996, "step": 162 }, { "epoch": 0.08812398630383853, "grad_norm": 0.4799709916114807, "learning_rate": 8.810810810810811e-06, "loss": 0.5161, "step": 163 }, { "epoch": 0.08866462425662282, "grad_norm": 0.43464991450309753, "learning_rate": 8.864864864864866e-06, "loss": 0.5431, "step": 164 }, { "epoch": 0.0892052622094071, "grad_norm": 0.5938690900802612, "learning_rate": 8.91891891891892e-06, "loss": 0.5677, "step": 165 }, { "epoch": 0.08974590016219139, "grad_norm": 0.575090765953064, "learning_rate": 8.972972972972974e-06, "loss": 0.5506, "step": 166 }, { "epoch": 0.09028653811497567, "grad_norm": 0.44394662976264954, "learning_rate": 9.027027027027028e-06, "loss": 0.5415, "step": 167 }, { "epoch": 0.09082717606775996, "grad_norm": 0.45902568101882935, "learning_rate": 9.081081081081082e-06, "loss": 0.501, "step": 168 }, { "epoch": 0.09136781402054424, "grad_norm": 0.6057916283607483, "learning_rate": 9.135135135135136e-06, "loss": 0.5383, "step": 169 }, { "epoch": 0.09190845197332853, "grad_norm": 0.4782038927078247, "learning_rate": 9.189189189189191e-06, "loss": 0.5522, "step": 170 }, { "epoch": 0.09244908992611281, "grad_norm": 0.5645440816879272, "learning_rate": 9.243243243243243e-06, "loss": 0.5395, "step": 171 }, { "epoch": 0.0929897278788971, "grad_norm": 0.5265817046165466, "learning_rate": 9.297297297297299e-06, "loss": 0.5352, "step": 172 }, { "epoch": 0.09353036583168138, "grad_norm": 0.504550039768219, "learning_rate": 9.351351351351353e-06, "loss": 0.5538, "step": 173 }, { "epoch": 0.09407100378446567, "grad_norm": 0.5427402257919312, "learning_rate": 9.405405405405407e-06, "loss": 0.5307, "step": 174 }, { "epoch": 0.09461164173724995, "grad_norm": 0.5136035084724426, "learning_rate": 9.45945945945946e-06, "loss": 0.5412, "step": 175 }, { "epoch": 0.09515227969003424, "grad_norm": 0.5547382831573486, "learning_rate": 9.513513513513514e-06, "loss": 0.5452, "step": 176 }, { "epoch": 0.09569291764281852, "grad_norm": 0.4894575774669647, "learning_rate": 9.567567567567568e-06, "loss": 0.5645, "step": 177 }, { "epoch": 0.09623355559560282, "grad_norm": 0.521987795829773, "learning_rate": 9.621621621621622e-06, "loss": 0.5371, "step": 178 }, { "epoch": 0.0967741935483871, "grad_norm": 0.48432838916778564, "learning_rate": 9.675675675675676e-06, "loss": 0.4921, "step": 179 }, { "epoch": 0.09731483150117139, "grad_norm": 0.5000583529472351, "learning_rate": 9.729729729729732e-06, "loss": 0.5463, "step": 180 }, { "epoch": 0.09785546945395567, "grad_norm": 0.5252205729484558, "learning_rate": 9.783783783783785e-06, "loss": 0.5571, "step": 181 }, { "epoch": 0.09839610740673996, "grad_norm": 0.49048709869384766, "learning_rate": 9.83783783783784e-06, "loss": 0.5456, "step": 182 }, { "epoch": 0.09893674535952424, "grad_norm": 0.5005491375923157, "learning_rate": 9.891891891891893e-06, "loss": 0.5263, "step": 183 }, { "epoch": 0.09947738331230853, "grad_norm": 0.47705668210983276, "learning_rate": 9.945945945945947e-06, "loss": 0.5358, "step": 184 }, { "epoch": 0.10001802126509281, "grad_norm": 0.4576117694377899, "learning_rate": 1e-05, "loss": 0.5025, "step": 185 }, { "epoch": 0.1005586592178771, "grad_norm": 0.5019325613975525, "learning_rate": 9.999991088865861e-06, "loss": 0.5123, "step": 186 }, { "epoch": 0.10109929717066138, "grad_norm": 0.47290462255477905, "learning_rate": 9.999964355495207e-06, "loss": 0.5141, "step": 187 }, { "epoch": 0.10163993512344567, "grad_norm": 0.6163212656974792, "learning_rate": 9.999919799983327e-06, "loss": 0.5468, "step": 188 }, { "epoch": 0.10218057307622995, "grad_norm": 0.5143344402313232, "learning_rate": 9.99985742248904e-06, "loss": 0.5433, "step": 189 }, { "epoch": 0.10272121102901424, "grad_norm": 0.5387307405471802, "learning_rate": 9.999777223234682e-06, "loss": 0.5117, "step": 190 }, { "epoch": 0.10326184898179852, "grad_norm": 0.6247243881225586, "learning_rate": 9.999679202506126e-06, "loss": 0.5298, "step": 191 }, { "epoch": 0.10380248693458281, "grad_norm": 0.4776528477668762, "learning_rate": 9.999563360652757e-06, "loss": 0.5227, "step": 192 }, { "epoch": 0.10434312488736709, "grad_norm": 0.5106874108314514, "learning_rate": 9.999429698087491e-06, "loss": 0.5197, "step": 193 }, { "epoch": 0.10488376284015138, "grad_norm": 0.5041946768760681, "learning_rate": 9.99927821528676e-06, "loss": 0.5162, "step": 194 }, { "epoch": 0.10542440079293566, "grad_norm": 0.5362434387207031, "learning_rate": 9.999108912790521e-06, "loss": 0.5103, "step": 195 }, { "epoch": 0.10596503874571996, "grad_norm": 0.4504891335964203, "learning_rate": 9.99892179120224e-06, "loss": 0.5253, "step": 196 }, { "epoch": 0.10650567669850423, "grad_norm": 0.5226731896400452, "learning_rate": 9.99871685118891e-06, "loss": 0.5392, "step": 197 }, { "epoch": 0.10704631465128853, "grad_norm": 0.46790215373039246, "learning_rate": 9.998494093481022e-06, "loss": 0.4986, "step": 198 }, { "epoch": 0.1075869526040728, "grad_norm": 0.4980153441429138, "learning_rate": 9.998253518872592e-06, "loss": 0.5568, "step": 199 }, { "epoch": 0.1081275905568571, "grad_norm": 0.5192104578018188, "learning_rate": 9.997995128221131e-06, "loss": 0.564, "step": 200 }, { "epoch": 0.10866822850964138, "grad_norm": 0.48999977111816406, "learning_rate": 9.997718922447669e-06, "loss": 0.5315, "step": 201 }, { "epoch": 0.10920886646242567, "grad_norm": 0.5758963823318481, "learning_rate": 9.99742490253672e-06, "loss": 0.5428, "step": 202 }, { "epoch": 0.10974950441520995, "grad_norm": 0.4924217164516449, "learning_rate": 9.99711306953631e-06, "loss": 0.5164, "step": 203 }, { "epoch": 0.11029014236799424, "grad_norm": 0.5533744692802429, "learning_rate": 9.99678342455795e-06, "loss": 0.5286, "step": 204 }, { "epoch": 0.11083078032077852, "grad_norm": 0.6308779120445251, "learning_rate": 9.996435968776646e-06, "loss": 0.5488, "step": 205 }, { "epoch": 0.1113714182735628, "grad_norm": 0.49895963072776794, "learning_rate": 9.996070703430888e-06, "loss": 0.5228, "step": 206 }, { "epoch": 0.11191205622634709, "grad_norm": 0.5760313868522644, "learning_rate": 9.995687629822647e-06, "loss": 0.5259, "step": 207 }, { "epoch": 0.11245269417913137, "grad_norm": 0.5599102973937988, "learning_rate": 9.99528674931737e-06, "loss": 0.5191, "step": 208 }, { "epoch": 0.11299333213191566, "grad_norm": 0.7119779586791992, "learning_rate": 9.99486806334398e-06, "loss": 0.5246, "step": 209 }, { "epoch": 0.11353397008469994, "grad_norm": 0.5256053805351257, "learning_rate": 9.994431573394861e-06, "loss": 0.52, "step": 210 }, { "epoch": 0.11407460803748423, "grad_norm": 0.6746430397033691, "learning_rate": 9.993977281025862e-06, "loss": 0.5519, "step": 211 }, { "epoch": 0.11461524599026851, "grad_norm": 0.5388162732124329, "learning_rate": 9.993505187856289e-06, "loss": 0.5353, "step": 212 }, { "epoch": 0.1151558839430528, "grad_norm": 0.6086297631263733, "learning_rate": 9.993015295568893e-06, "loss": 0.509, "step": 213 }, { "epoch": 0.11569652189583708, "grad_norm": 0.5672162771224976, "learning_rate": 9.992507605909873e-06, "loss": 0.5215, "step": 214 }, { "epoch": 0.11623715984862137, "grad_norm": 0.6110180616378784, "learning_rate": 9.991982120688865e-06, "loss": 0.5482, "step": 215 }, { "epoch": 0.11677779780140565, "grad_norm": 0.5936868786811829, "learning_rate": 9.99143884177894e-06, "loss": 0.5016, "step": 216 }, { "epoch": 0.11731843575418995, "grad_norm": 0.5300704836845398, "learning_rate": 9.990877771116588e-06, "loss": 0.5477, "step": 217 }, { "epoch": 0.11785907370697422, "grad_norm": 0.5491863489151001, "learning_rate": 9.99029891070172e-06, "loss": 0.5311, "step": 218 }, { "epoch": 0.11839971165975852, "grad_norm": 0.590416669845581, "learning_rate": 9.989702262597656e-06, "loss": 0.5245, "step": 219 }, { "epoch": 0.1189403496125428, "grad_norm": 0.4550315737724304, "learning_rate": 9.989087828931121e-06, "loss": 0.513, "step": 220 }, { "epoch": 0.11948098756532709, "grad_norm": 0.6941486597061157, "learning_rate": 9.988455611892237e-06, "loss": 0.5153, "step": 221 }, { "epoch": 0.12002162551811137, "grad_norm": 0.5239282846450806, "learning_rate": 9.987805613734508e-06, "loss": 0.5329, "step": 222 }, { "epoch": 0.12056226347089566, "grad_norm": 0.5700839161872864, "learning_rate": 9.987137836774827e-06, "loss": 0.5224, "step": 223 }, { "epoch": 0.12110290142367994, "grad_norm": 0.6924121379852295, "learning_rate": 9.986452283393452e-06, "loss": 0.5256, "step": 224 }, { "epoch": 0.12164353937646423, "grad_norm": 0.5878159999847412, "learning_rate": 9.985748956034007e-06, "loss": 0.5569, "step": 225 }, { "epoch": 0.12218417732924851, "grad_norm": 0.5620668530464172, "learning_rate": 9.985027857203469e-06, "loss": 0.5307, "step": 226 }, { "epoch": 0.1227248152820328, "grad_norm": 0.6018951535224915, "learning_rate": 9.984288989472162e-06, "loss": 0.517, "step": 227 }, { "epoch": 0.12326545323481708, "grad_norm": 0.43050438165664673, "learning_rate": 9.983532355473744e-06, "loss": 0.5052, "step": 228 }, { "epoch": 0.12380609118760137, "grad_norm": 0.5617673993110657, "learning_rate": 9.982757957905204e-06, "loss": 0.5355, "step": 229 }, { "epoch": 0.12434672914038565, "grad_norm": 0.5786687135696411, "learning_rate": 9.981965799526846e-06, "loss": 0.523, "step": 230 }, { "epoch": 0.12488736709316994, "grad_norm": 0.42592427134513855, "learning_rate": 9.981155883162281e-06, "loss": 0.5169, "step": 231 }, { "epoch": 0.12542800504595422, "grad_norm": 0.4949047267436981, "learning_rate": 9.980328211698418e-06, "loss": 0.5368, "step": 232 }, { "epoch": 0.1259686429987385, "grad_norm": 0.49621617794036865, "learning_rate": 9.979482788085455e-06, "loss": 0.5401, "step": 233 }, { "epoch": 0.1265092809515228, "grad_norm": 0.5175781846046448, "learning_rate": 9.978619615336866e-06, "loss": 0.5167, "step": 234 }, { "epoch": 0.12704991890430709, "grad_norm": 0.5909814834594727, "learning_rate": 9.977738696529387e-06, "loss": 0.5132, "step": 235 }, { "epoch": 0.12759055685709136, "grad_norm": 0.43574607372283936, "learning_rate": 9.976840034803014e-06, "loss": 0.5355, "step": 236 }, { "epoch": 0.12813119480987564, "grad_norm": 0.4887847602367401, "learning_rate": 9.975923633360985e-06, "loss": 0.5362, "step": 237 }, { "epoch": 0.12867183276265995, "grad_norm": 0.5564954280853271, "learning_rate": 9.974989495469771e-06, "loss": 0.5436, "step": 238 }, { "epoch": 0.12921247071544423, "grad_norm": 0.4811561405658722, "learning_rate": 9.974037624459063e-06, "loss": 0.5005, "step": 239 }, { "epoch": 0.1297531086682285, "grad_norm": 0.5640829205513, "learning_rate": 9.973068023721761e-06, "loss": 0.4908, "step": 240 }, { "epoch": 0.13029374662101278, "grad_norm": 0.558860719203949, "learning_rate": 9.972080696713962e-06, "loss": 0.5369, "step": 241 }, { "epoch": 0.1308343845737971, "grad_norm": 0.5138370394706726, "learning_rate": 9.971075646954946e-06, "loss": 0.545, "step": 242 }, { "epoch": 0.13137502252658137, "grad_norm": 0.558138906955719, "learning_rate": 9.970052878027169e-06, "loss": 0.5029, "step": 243 }, { "epoch": 0.13191566047936565, "grad_norm": 0.5365882515907288, "learning_rate": 9.969012393576241e-06, "loss": 0.5164, "step": 244 }, { "epoch": 0.13245629843214993, "grad_norm": 0.6326988935470581, "learning_rate": 9.967954197310922e-06, "loss": 0.5089, "step": 245 }, { "epoch": 0.13299693638493423, "grad_norm": 0.5007409453392029, "learning_rate": 9.966878293003102e-06, "loss": 0.4909, "step": 246 }, { "epoch": 0.1335375743377185, "grad_norm": 0.5736434459686279, "learning_rate": 9.965784684487794e-06, "loss": 0.5446, "step": 247 }, { "epoch": 0.1340782122905028, "grad_norm": 0.4436694383621216, "learning_rate": 9.964673375663114e-06, "loss": 0.5096, "step": 248 }, { "epoch": 0.13461885024328707, "grad_norm": 0.4315429627895355, "learning_rate": 9.96354437049027e-06, "loss": 0.506, "step": 249 }, { "epoch": 0.13515948819607138, "grad_norm": 0.5133793354034424, "learning_rate": 9.962397672993552e-06, "loss": 0.5142, "step": 250 }, { "epoch": 0.13570012614885565, "grad_norm": 0.4259757697582245, "learning_rate": 9.961233287260305e-06, "loss": 0.5126, "step": 251 }, { "epoch": 0.13624076410163993, "grad_norm": 0.497167706489563, "learning_rate": 9.96005121744093e-06, "loss": 0.5345, "step": 252 }, { "epoch": 0.1367814020544242, "grad_norm": 0.5054571628570557, "learning_rate": 9.958851467748863e-06, "loss": 0.5147, "step": 253 }, { "epoch": 0.13732204000720852, "grad_norm": 0.48127052187919617, "learning_rate": 9.957634042460551e-06, "loss": 0.4974, "step": 254 }, { "epoch": 0.1378626779599928, "grad_norm": 0.5029913187026978, "learning_rate": 9.956398945915455e-06, "loss": 0.5036, "step": 255 }, { "epoch": 0.13840331591277708, "grad_norm": 0.4597805142402649, "learning_rate": 9.955146182516015e-06, "loss": 0.4872, "step": 256 }, { "epoch": 0.13894395386556135, "grad_norm": 0.45765337347984314, "learning_rate": 9.95387575672765e-06, "loss": 0.5019, "step": 257 }, { "epoch": 0.13948459181834566, "grad_norm": 0.5424937009811401, "learning_rate": 9.952587673078738e-06, "loss": 0.5358, "step": 258 }, { "epoch": 0.14002522977112994, "grad_norm": 0.6009652018547058, "learning_rate": 9.951281936160587e-06, "loss": 0.533, "step": 259 }, { "epoch": 0.14056586772391422, "grad_norm": 0.5036184191703796, "learning_rate": 9.949958550627436e-06, "loss": 0.4852, "step": 260 }, { "epoch": 0.1411065056766985, "grad_norm": 0.6518417000770569, "learning_rate": 9.948617521196438e-06, "loss": 0.538, "step": 261 }, { "epoch": 0.1416471436294828, "grad_norm": 0.5519372224807739, "learning_rate": 9.947258852647623e-06, "loss": 0.5216, "step": 262 }, { "epoch": 0.14218778158226708, "grad_norm": 0.6252304911613464, "learning_rate": 9.945882549823906e-06, "loss": 0.5188, "step": 263 }, { "epoch": 0.14272841953505136, "grad_norm": 0.5159186124801636, "learning_rate": 9.944488617631053e-06, "loss": 0.5178, "step": 264 }, { "epoch": 0.14326905748783564, "grad_norm": 0.5060036182403564, "learning_rate": 9.943077061037672e-06, "loss": 0.5083, "step": 265 }, { "epoch": 0.14380969544061994, "grad_norm": 0.579529345035553, "learning_rate": 9.94164788507519e-06, "loss": 0.4991, "step": 266 }, { "epoch": 0.14435033339340422, "grad_norm": 0.5514207482337952, "learning_rate": 9.940201094837838e-06, "loss": 0.5154, "step": 267 }, { "epoch": 0.1448909713461885, "grad_norm": 0.5269582867622375, "learning_rate": 9.938736695482636e-06, "loss": 0.4937, "step": 268 }, { "epoch": 0.14543160929897278, "grad_norm": 0.5171031951904297, "learning_rate": 9.937254692229363e-06, "loss": 0.5024, "step": 269 }, { "epoch": 0.1459722472517571, "grad_norm": 0.5391225814819336, "learning_rate": 9.935755090360554e-06, "loss": 0.4961, "step": 270 }, { "epoch": 0.14651288520454137, "grad_norm": 0.47390222549438477, "learning_rate": 9.93423789522147e-06, "loss": 0.4855, "step": 271 }, { "epoch": 0.14705352315732564, "grad_norm": 0.4980723261833191, "learning_rate": 9.932703112220084e-06, "loss": 0.5481, "step": 272 }, { "epoch": 0.14759416111010992, "grad_norm": 0.5040086507797241, "learning_rate": 9.931150746827055e-06, "loss": 0.52, "step": 273 }, { "epoch": 0.14813479906289423, "grad_norm": 0.5215423107147217, "learning_rate": 9.929580804575718e-06, "loss": 0.5113, "step": 274 }, { "epoch": 0.1486754370156785, "grad_norm": 0.44872352480888367, "learning_rate": 9.927993291062064e-06, "loss": 0.4919, "step": 275 }, { "epoch": 0.1492160749684628, "grad_norm": 0.5068681240081787, "learning_rate": 9.926388211944707e-06, "loss": 0.5243, "step": 276 }, { "epoch": 0.14975671292124706, "grad_norm": 0.5543198585510254, "learning_rate": 9.924765572944879e-06, "loss": 0.5083, "step": 277 }, { "epoch": 0.15029735087403134, "grad_norm": 0.4373819828033447, "learning_rate": 9.9231253798464e-06, "loss": 0.5, "step": 278 }, { "epoch": 0.15083798882681565, "grad_norm": 0.4874158501625061, "learning_rate": 9.921467638495666e-06, "loss": 0.5336, "step": 279 }, { "epoch": 0.15137862677959993, "grad_norm": 0.4793112874031067, "learning_rate": 9.919792354801614e-06, "loss": 0.5135, "step": 280 }, { "epoch": 0.1519192647323842, "grad_norm": 0.4696117341518402, "learning_rate": 9.91809953473572e-06, "loss": 0.5018, "step": 281 }, { "epoch": 0.15245990268516849, "grad_norm": 0.5002115964889526, "learning_rate": 9.916389184331957e-06, "loss": 0.5332, "step": 282 }, { "epoch": 0.1530005406379528, "grad_norm": 0.4873870015144348, "learning_rate": 9.914661309686796e-06, "loss": 0.5238, "step": 283 }, { "epoch": 0.15354117859073707, "grad_norm": 0.5112310647964478, "learning_rate": 9.912915916959162e-06, "loss": 0.5145, "step": 284 }, { "epoch": 0.15408181654352135, "grad_norm": 0.5636197328567505, "learning_rate": 9.911153012370427e-06, "loss": 0.5095, "step": 285 }, { "epoch": 0.15462245449630563, "grad_norm": 0.4921044409275055, "learning_rate": 9.909372602204385e-06, "loss": 0.5083, "step": 286 }, { "epoch": 0.15516309244908993, "grad_norm": 0.5235900282859802, "learning_rate": 9.907574692807223e-06, "loss": 0.5126, "step": 287 }, { "epoch": 0.1557037304018742, "grad_norm": 0.5021162033081055, "learning_rate": 9.905759290587506e-06, "loss": 0.5067, "step": 288 }, { "epoch": 0.1562443683546585, "grad_norm": 0.5399244427680969, "learning_rate": 9.903926402016153e-06, "loss": 0.5082, "step": 289 }, { "epoch": 0.15678500630744277, "grad_norm": 0.48548075556755066, "learning_rate": 9.902076033626409e-06, "loss": 0.5091, "step": 290 }, { "epoch": 0.15732564426022708, "grad_norm": 0.5076701045036316, "learning_rate": 9.900208192013825e-06, "loss": 0.5002, "step": 291 }, { "epoch": 0.15786628221301136, "grad_norm": 0.5822921991348267, "learning_rate": 9.898322883836239e-06, "loss": 0.5074, "step": 292 }, { "epoch": 0.15840692016579563, "grad_norm": 0.4363133907318115, "learning_rate": 9.896420115813741e-06, "loss": 0.5065, "step": 293 }, { "epoch": 0.1589475581185799, "grad_norm": 0.47977545857429504, "learning_rate": 9.894499894728665e-06, "loss": 0.477, "step": 294 }, { "epoch": 0.15948819607136422, "grad_norm": 0.5231459736824036, "learning_rate": 9.892562227425541e-06, "loss": 0.5164, "step": 295 }, { "epoch": 0.1600288340241485, "grad_norm": 0.4468756318092346, "learning_rate": 9.890607120811104e-06, "loss": 0.4966, "step": 296 }, { "epoch": 0.16056947197693278, "grad_norm": 0.5080546736717224, "learning_rate": 9.888634581854235e-06, "loss": 0.4717, "step": 297 }, { "epoch": 0.16111010992971705, "grad_norm": 0.6117534637451172, "learning_rate": 9.88664461758596e-06, "loss": 0.5435, "step": 298 }, { "epoch": 0.16165074788250136, "grad_norm": 0.44875776767730713, "learning_rate": 9.884637235099414e-06, "loss": 0.5162, "step": 299 }, { "epoch": 0.16219138583528564, "grad_norm": 0.5946277379989624, "learning_rate": 9.882612441549817e-06, "loss": 0.5086, "step": 300 }, { "epoch": 0.16273202378806992, "grad_norm": 0.6129415035247803, "learning_rate": 9.880570244154455e-06, "loss": 0.5252, "step": 301 }, { "epoch": 0.1632726617408542, "grad_norm": 0.5008973479270935, "learning_rate": 9.878510650192644e-06, "loss": 0.5181, "step": 302 }, { "epoch": 0.1638132996936385, "grad_norm": 0.6963375210762024, "learning_rate": 9.876433667005711e-06, "loss": 0.5238, "step": 303 }, { "epoch": 0.16435393764642278, "grad_norm": 0.4780685305595398, "learning_rate": 9.874339301996968e-06, "loss": 0.5107, "step": 304 }, { "epoch": 0.16489457559920706, "grad_norm": 0.47462719678878784, "learning_rate": 9.87222756263168e-06, "loss": 0.5003, "step": 305 }, { "epoch": 0.16543521355199134, "grad_norm": 0.6904825568199158, "learning_rate": 9.870098456437045e-06, "loss": 0.5256, "step": 306 }, { "epoch": 0.16597585150477565, "grad_norm": 0.4498540759086609, "learning_rate": 9.867951991002162e-06, "loss": 0.5088, "step": 307 }, { "epoch": 0.16651648945755992, "grad_norm": 0.5996077656745911, "learning_rate": 9.865788173978011e-06, "loss": 0.5045, "step": 308 }, { "epoch": 0.1670571274103442, "grad_norm": 0.5990262627601624, "learning_rate": 9.863607013077414e-06, "loss": 0.499, "step": 309 }, { "epoch": 0.16759776536312848, "grad_norm": 0.456752210855484, "learning_rate": 9.86140851607502e-06, "loss": 0.5118, "step": 310 }, { "epoch": 0.1681384033159128, "grad_norm": 0.5640468001365662, "learning_rate": 9.85919269080727e-06, "loss": 0.5099, "step": 311 }, { "epoch": 0.16867904126869707, "grad_norm": 0.4840059280395508, "learning_rate": 9.856959545172369e-06, "loss": 0.5047, "step": 312 }, { "epoch": 0.16921967922148134, "grad_norm": 0.4675785005092621, "learning_rate": 9.854709087130261e-06, "loss": 0.5176, "step": 313 }, { "epoch": 0.16976031717426562, "grad_norm": 0.48285573720932007, "learning_rate": 9.852441324702599e-06, "loss": 0.515, "step": 314 }, { "epoch": 0.17030095512704993, "grad_norm": 0.5241273045539856, "learning_rate": 9.850156265972722e-06, "loss": 0.5132, "step": 315 }, { "epoch": 0.1708415930798342, "grad_norm": 0.5342344641685486, "learning_rate": 9.847853919085608e-06, "loss": 0.5348, "step": 316 }, { "epoch": 0.1713822310326185, "grad_norm": 0.5915160179138184, "learning_rate": 9.845534292247872e-06, "loss": 0.4903, "step": 317 }, { "epoch": 0.17192286898540277, "grad_norm": 0.5160459280014038, "learning_rate": 9.843197393727713e-06, "loss": 0.5048, "step": 318 }, { "epoch": 0.17246350693818707, "grad_norm": 0.4898647665977478, "learning_rate": 9.8408432318549e-06, "loss": 0.5076, "step": 319 }, { "epoch": 0.17300414489097135, "grad_norm": 0.4921559989452362, "learning_rate": 9.838471815020731e-06, "loss": 0.5231, "step": 320 }, { "epoch": 0.17354478284375563, "grad_norm": 0.5118461847305298, "learning_rate": 9.836083151678014e-06, "loss": 0.5057, "step": 321 }, { "epoch": 0.1740854207965399, "grad_norm": 0.48118114471435547, "learning_rate": 9.833677250341027e-06, "loss": 0.5581, "step": 322 }, { "epoch": 0.17462605874932421, "grad_norm": 0.4235428273677826, "learning_rate": 9.831254119585497e-06, "loss": 0.4938, "step": 323 }, { "epoch": 0.1751666967021085, "grad_norm": 0.49432626366615295, "learning_rate": 9.828813768048555e-06, "loss": 0.503, "step": 324 }, { "epoch": 0.17570733465489277, "grad_norm": 0.4676252007484436, "learning_rate": 9.826356204428726e-06, "loss": 0.5017, "step": 325 }, { "epoch": 0.17624797260767705, "grad_norm": 0.48674294352531433, "learning_rate": 9.823881437485882e-06, "loss": 0.5122, "step": 326 }, { "epoch": 0.17678861056046136, "grad_norm": 0.5041030645370483, "learning_rate": 9.821389476041212e-06, "loss": 0.5014, "step": 327 }, { "epoch": 0.17732924851324564, "grad_norm": 0.5799689888954163, "learning_rate": 9.8188803289772e-06, "loss": 0.5245, "step": 328 }, { "epoch": 0.17786988646602991, "grad_norm": 0.5120707750320435, "learning_rate": 9.816354005237583e-06, "loss": 0.4898, "step": 329 }, { "epoch": 0.1784105244188142, "grad_norm": 0.5724909901618958, "learning_rate": 9.813810513827324e-06, "loss": 0.5398, "step": 330 }, { "epoch": 0.1789511623715985, "grad_norm": 0.6064598560333252, "learning_rate": 9.811249863812581e-06, "loss": 0.4939, "step": 331 }, { "epoch": 0.17949180032438278, "grad_norm": 0.5473314523696899, "learning_rate": 9.808672064320672e-06, "loss": 0.5113, "step": 332 }, { "epoch": 0.18003243827716706, "grad_norm": 0.45414504408836365, "learning_rate": 9.806077124540045e-06, "loss": 0.4815, "step": 333 }, { "epoch": 0.18057307622995133, "grad_norm": 0.48928454518318176, "learning_rate": 9.803465053720242e-06, "loss": 0.502, "step": 334 }, { "epoch": 0.18111371418273564, "grad_norm": 0.4803617000579834, "learning_rate": 9.800835861171869e-06, "loss": 0.4851, "step": 335 }, { "epoch": 0.18165435213551992, "grad_norm": 0.4396837055683136, "learning_rate": 9.798189556266559e-06, "loss": 0.4956, "step": 336 }, { "epoch": 0.1821949900883042, "grad_norm": 0.5100455284118652, "learning_rate": 9.795526148436945e-06, "loss": 0.5282, "step": 337 }, { "epoch": 0.18273562804108848, "grad_norm": 0.426038920879364, "learning_rate": 9.792845647176621e-06, "loss": 0.4697, "step": 338 }, { "epoch": 0.18327626599387278, "grad_norm": 0.42569661140441895, "learning_rate": 9.790148062040108e-06, "loss": 0.4948, "step": 339 }, { "epoch": 0.18381690394665706, "grad_norm": 0.5453207492828369, "learning_rate": 9.787433402642823e-06, "loss": 0.5067, "step": 340 }, { "epoch": 0.18435754189944134, "grad_norm": 0.5373483300209045, "learning_rate": 9.784701678661045e-06, "loss": 0.5227, "step": 341 }, { "epoch": 0.18489817985222562, "grad_norm": 0.5656136274337769, "learning_rate": 9.781952899831876e-06, "loss": 0.5163, "step": 342 }, { "epoch": 0.1854388178050099, "grad_norm": 0.5008209347724915, "learning_rate": 9.779187075953215e-06, "loss": 0.5129, "step": 343 }, { "epoch": 0.1859794557577942, "grad_norm": 0.583916962146759, "learning_rate": 9.776404216883709e-06, "loss": 0.503, "step": 344 }, { "epoch": 0.18652009371057848, "grad_norm": 0.5473840832710266, "learning_rate": 9.77360433254273e-06, "loss": 0.5407, "step": 345 }, { "epoch": 0.18706073166336276, "grad_norm": 0.5509775280952454, "learning_rate": 9.770787432910336e-06, "loss": 0.4781, "step": 346 }, { "epoch": 0.18760136961614704, "grad_norm": 0.6003471612930298, "learning_rate": 9.767953528027238e-06, "loss": 0.5375, "step": 347 }, { "epoch": 0.18814200756893135, "grad_norm": 0.43554067611694336, "learning_rate": 9.765102627994757e-06, "loss": 0.4962, "step": 348 }, { "epoch": 0.18868264552171563, "grad_norm": 0.6562443375587463, "learning_rate": 9.762234742974793e-06, "loss": 0.4952, "step": 349 }, { "epoch": 0.1892232834744999, "grad_norm": 0.5603171586990356, "learning_rate": 9.759349883189788e-06, "loss": 0.4783, "step": 350 }, { "epoch": 0.18976392142728418, "grad_norm": 0.5634536147117615, "learning_rate": 9.756448058922697e-06, "loss": 0.4884, "step": 351 }, { "epoch": 0.1903045593800685, "grad_norm": 0.4945487082004547, "learning_rate": 9.753529280516931e-06, "loss": 0.5012, "step": 352 }, { "epoch": 0.19084519733285277, "grad_norm": 0.588554322719574, "learning_rate": 9.750593558376347e-06, "loss": 0.5098, "step": 353 }, { "epoch": 0.19138583528563705, "grad_norm": 0.4570363163948059, "learning_rate": 9.747640902965185e-06, "loss": 0.4675, "step": 354 }, { "epoch": 0.19192647323842132, "grad_norm": 0.6398181915283203, "learning_rate": 9.74467132480805e-06, "loss": 0.5402, "step": 355 }, { "epoch": 0.19246711119120563, "grad_norm": 0.4467182159423828, "learning_rate": 9.741684834489866e-06, "loss": 0.4916, "step": 356 }, { "epoch": 0.1930077491439899, "grad_norm": 0.5539352297782898, "learning_rate": 9.738681442655842e-06, "loss": 0.4996, "step": 357 }, { "epoch": 0.1935483870967742, "grad_norm": 0.5078352689743042, "learning_rate": 9.735661160011424e-06, "loss": 0.5055, "step": 358 }, { "epoch": 0.19408902504955847, "grad_norm": 0.4970700442790985, "learning_rate": 9.732623997322274e-06, "loss": 0.5247, "step": 359 }, { "epoch": 0.19462966300234277, "grad_norm": 0.4787460267543793, "learning_rate": 9.729569965414214e-06, "loss": 0.5114, "step": 360 }, { "epoch": 0.19517030095512705, "grad_norm": 0.5646668672561646, "learning_rate": 9.726499075173201e-06, "loss": 0.4905, "step": 361 }, { "epoch": 0.19571093890791133, "grad_norm": 0.5013266801834106, "learning_rate": 9.723411337545283e-06, "loss": 0.5112, "step": 362 }, { "epoch": 0.1962515768606956, "grad_norm": 0.5415170192718506, "learning_rate": 9.720306763536553e-06, "loss": 0.4905, "step": 363 }, { "epoch": 0.19679221481347992, "grad_norm": 0.443282812833786, "learning_rate": 9.717185364213127e-06, "loss": 0.5082, "step": 364 }, { "epoch": 0.1973328527662642, "grad_norm": 0.6220279932022095, "learning_rate": 9.714047150701082e-06, "loss": 0.5117, "step": 365 }, { "epoch": 0.19787349071904847, "grad_norm": 0.46816128492355347, "learning_rate": 9.710892134186438e-06, "loss": 0.5067, "step": 366 }, { "epoch": 0.19841412867183275, "grad_norm": 0.578245222568512, "learning_rate": 9.707720325915105e-06, "loss": 0.518, "step": 367 }, { "epoch": 0.19895476662461706, "grad_norm": 0.5336887240409851, "learning_rate": 9.704531737192847e-06, "loss": 0.513, "step": 368 }, { "epoch": 0.19949540457740134, "grad_norm": 0.5477902889251709, "learning_rate": 9.701326379385238e-06, "loss": 0.4937, "step": 369 }, { "epoch": 0.20003604253018561, "grad_norm": 0.486288845539093, "learning_rate": 9.698104263917632e-06, "loss": 0.4928, "step": 370 }, { "epoch": 0.2005766804829699, "grad_norm": 0.4892939031124115, "learning_rate": 9.694865402275105e-06, "loss": 0.5004, "step": 371 }, { "epoch": 0.2011173184357542, "grad_norm": 0.4445298910140991, "learning_rate": 9.691609806002433e-06, "loss": 0.4874, "step": 372 }, { "epoch": 0.20165795638853848, "grad_norm": 0.4402690827846527, "learning_rate": 9.688337486704038e-06, "loss": 0.4714, "step": 373 }, { "epoch": 0.20219859434132276, "grad_norm": 0.5016452670097351, "learning_rate": 9.68504845604395e-06, "loss": 0.5061, "step": 374 }, { "epoch": 0.20273923229410704, "grad_norm": 0.4491643011569977, "learning_rate": 9.681742725745762e-06, "loss": 0.5089, "step": 375 }, { "epoch": 0.20327987024689134, "grad_norm": 0.45842084288597107, "learning_rate": 9.678420307592602e-06, "loss": 0.4886, "step": 376 }, { "epoch": 0.20382050819967562, "grad_norm": 0.4361850619316101, "learning_rate": 9.675081213427076e-06, "loss": 0.5183, "step": 377 }, { "epoch": 0.2043611461524599, "grad_norm": 0.5042484998703003, "learning_rate": 9.671725455151226e-06, "loss": 0.4974, "step": 378 }, { "epoch": 0.20490178410524418, "grad_norm": 0.47856807708740234, "learning_rate": 9.668353044726498e-06, "loss": 0.4957, "step": 379 }, { "epoch": 0.20544242205802848, "grad_norm": 0.5412836074829102, "learning_rate": 9.664963994173695e-06, "loss": 0.5324, "step": 380 }, { "epoch": 0.20598306001081276, "grad_norm": 0.5093710422515869, "learning_rate": 9.66155831557293e-06, "loss": 0.5335, "step": 381 }, { "epoch": 0.20652369796359704, "grad_norm": 0.4847930073738098, "learning_rate": 9.658136021063585e-06, "loss": 0.4609, "step": 382 }, { "epoch": 0.20706433591638132, "grad_norm": 0.48776888847351074, "learning_rate": 9.65469712284427e-06, "loss": 0.5006, "step": 383 }, { "epoch": 0.20760497386916563, "grad_norm": 0.5098462104797363, "learning_rate": 9.651241633172782e-06, "loss": 0.5021, "step": 384 }, { "epoch": 0.2081456118219499, "grad_norm": 0.4696345031261444, "learning_rate": 9.647769564366048e-06, "loss": 0.4987, "step": 385 }, { "epoch": 0.20868624977473418, "grad_norm": 0.5066925883293152, "learning_rate": 9.644280928800101e-06, "loss": 0.5182, "step": 386 }, { "epoch": 0.20922688772751846, "grad_norm": 0.5005125403404236, "learning_rate": 9.640775738910019e-06, "loss": 0.5005, "step": 387 }, { "epoch": 0.20976752568030277, "grad_norm": 0.43679603934288025, "learning_rate": 9.63725400718989e-06, "loss": 0.4928, "step": 388 }, { "epoch": 0.21030816363308705, "grad_norm": 0.5588410496711731, "learning_rate": 9.633715746192762e-06, "loss": 0.5374, "step": 389 }, { "epoch": 0.21084880158587133, "grad_norm": 0.4636860489845276, "learning_rate": 9.630160968530601e-06, "loss": 0.5011, "step": 390 }, { "epoch": 0.2113894395386556, "grad_norm": 0.5035771131515503, "learning_rate": 9.626589686874252e-06, "loss": 0.5152, "step": 391 }, { "epoch": 0.2119300774914399, "grad_norm": 0.5176652669906616, "learning_rate": 9.62300191395338e-06, "loss": 0.4931, "step": 392 }, { "epoch": 0.2124707154442242, "grad_norm": 0.4670732319355011, "learning_rate": 9.619397662556434e-06, "loss": 0.5099, "step": 393 }, { "epoch": 0.21301135339700847, "grad_norm": 0.5705310702323914, "learning_rate": 9.615776945530603e-06, "loss": 0.5244, "step": 394 }, { "epoch": 0.21355199134979275, "grad_norm": 0.48803743720054626, "learning_rate": 9.612139775781766e-06, "loss": 0.4953, "step": 395 }, { "epoch": 0.21409262930257705, "grad_norm": 0.6569446921348572, "learning_rate": 9.608486166274444e-06, "loss": 0.5086, "step": 396 }, { "epoch": 0.21463326725536133, "grad_norm": 0.5075775980949402, "learning_rate": 9.60481613003176e-06, "loss": 0.5471, "step": 397 }, { "epoch": 0.2151739052081456, "grad_norm": 0.5804044604301453, "learning_rate": 9.601129680135386e-06, "loss": 0.5075, "step": 398 }, { "epoch": 0.2157145431609299, "grad_norm": 0.4748011827468872, "learning_rate": 9.597426829725504e-06, "loss": 0.4741, "step": 399 }, { "epoch": 0.2162551811137142, "grad_norm": 0.5516924858093262, "learning_rate": 9.593707592000751e-06, "loss": 0.5141, "step": 400 }, { "epoch": 0.21679581906649847, "grad_norm": 0.49007630348205566, "learning_rate": 9.58997198021818e-06, "loss": 0.4965, "step": 401 }, { "epoch": 0.21733645701928275, "grad_norm": 0.49923837184906006, "learning_rate": 9.586220007693205e-06, "loss": 0.4953, "step": 402 }, { "epoch": 0.21787709497206703, "grad_norm": 0.4540408253669739, "learning_rate": 9.582451687799557e-06, "loss": 0.4577, "step": 403 }, { "epoch": 0.21841773292485134, "grad_norm": 0.4953221380710602, "learning_rate": 9.578667033969238e-06, "loss": 0.5014, "step": 404 }, { "epoch": 0.21895837087763562, "grad_norm": 0.5561023354530334, "learning_rate": 9.574866059692471e-06, "loss": 0.5158, "step": 405 }, { "epoch": 0.2194990088304199, "grad_norm": 0.42774730920791626, "learning_rate": 9.571048778517655e-06, "loss": 0.4998, "step": 406 }, { "epoch": 0.22003964678320417, "grad_norm": 0.5168089866638184, "learning_rate": 9.567215204051307e-06, "loss": 0.5156, "step": 407 }, { "epoch": 0.22058028473598848, "grad_norm": 0.45053431391716003, "learning_rate": 9.563365349958032e-06, "loss": 0.5138, "step": 408 }, { "epoch": 0.22112092268877276, "grad_norm": 0.4745253026485443, "learning_rate": 9.55949922996045e-06, "loss": 0.5122, "step": 409 }, { "epoch": 0.22166156064155704, "grad_norm": 0.49608004093170166, "learning_rate": 9.555616857839171e-06, "loss": 0.4882, "step": 410 }, { "epoch": 0.22220219859434132, "grad_norm": 0.46152451634407043, "learning_rate": 9.551718247432732e-06, "loss": 0.4709, "step": 411 }, { "epoch": 0.2227428365471256, "grad_norm": 0.4751157760620117, "learning_rate": 9.547803412637542e-06, "loss": 0.4729, "step": 412 }, { "epoch": 0.2232834744999099, "grad_norm": 0.5150904655456543, "learning_rate": 9.543872367407854e-06, "loss": 0.5082, "step": 413 }, { "epoch": 0.22382411245269418, "grad_norm": 0.46366235613822937, "learning_rate": 9.539925125755695e-06, "loss": 0.4977, "step": 414 }, { "epoch": 0.22436475040547846, "grad_norm": 0.5216416120529175, "learning_rate": 9.535961701750825e-06, "loss": 0.513, "step": 415 }, { "epoch": 0.22490538835826274, "grad_norm": 0.465658962726593, "learning_rate": 9.531982109520686e-06, "loss": 0.4844, "step": 416 }, { "epoch": 0.22544602631104704, "grad_norm": 0.48046615719795227, "learning_rate": 9.527986363250348e-06, "loss": 0.4781, "step": 417 }, { "epoch": 0.22598666426383132, "grad_norm": 0.47210782766342163, "learning_rate": 9.523974477182465e-06, "loss": 0.4951, "step": 418 }, { "epoch": 0.2265273022166156, "grad_norm": 0.4686312973499298, "learning_rate": 9.519946465617217e-06, "loss": 0.5024, "step": 419 }, { "epoch": 0.22706794016939988, "grad_norm": 0.46087512373924255, "learning_rate": 9.515902342912268e-06, "loss": 0.4894, "step": 420 }, { "epoch": 0.22760857812218419, "grad_norm": 0.5393832921981812, "learning_rate": 9.511842123482703e-06, "loss": 0.4985, "step": 421 }, { "epoch": 0.22814921607496846, "grad_norm": 0.47389426827430725, "learning_rate": 9.507765821800988e-06, "loss": 0.5029, "step": 422 }, { "epoch": 0.22868985402775274, "grad_norm": 0.5276787281036377, "learning_rate": 9.503673452396909e-06, "loss": 0.5029, "step": 423 }, { "epoch": 0.22923049198053702, "grad_norm": 0.5731882452964783, "learning_rate": 9.499565029857529e-06, "loss": 0.5116, "step": 424 }, { "epoch": 0.22977112993332133, "grad_norm": 0.48996415734291077, "learning_rate": 9.49544056882713e-06, "loss": 0.5103, "step": 425 }, { "epoch": 0.2303117678861056, "grad_norm": 0.5771133303642273, "learning_rate": 9.491300084007162e-06, "loss": 0.4913, "step": 426 }, { "epoch": 0.23085240583888988, "grad_norm": 0.42285990715026855, "learning_rate": 9.48714359015619e-06, "loss": 0.4914, "step": 427 }, { "epoch": 0.23139304379167416, "grad_norm": 0.595855712890625, "learning_rate": 9.482971102089845e-06, "loss": 0.518, "step": 428 }, { "epoch": 0.23193368174445847, "grad_norm": 0.41206789016723633, "learning_rate": 9.478782634680765e-06, "loss": 0.4696, "step": 429 }, { "epoch": 0.23247431969724275, "grad_norm": 0.57419753074646, "learning_rate": 9.47457820285855e-06, "loss": 0.4933, "step": 430 }, { "epoch": 0.23301495765002703, "grad_norm": 0.4686283469200134, "learning_rate": 9.470357821609703e-06, "loss": 0.4914, "step": 431 }, { "epoch": 0.2335555956028113, "grad_norm": 0.4878191351890564, "learning_rate": 9.466121505977577e-06, "loss": 0.4819, "step": 432 }, { "epoch": 0.2340962335555956, "grad_norm": 0.5020343661308289, "learning_rate": 9.461869271062322e-06, "loss": 0.508, "step": 433 }, { "epoch": 0.2346368715083799, "grad_norm": 0.48259827494621277, "learning_rate": 9.457601132020832e-06, "loss": 0.4999, "step": 434 }, { "epoch": 0.23517750946116417, "grad_norm": 0.439022034406662, "learning_rate": 9.453317104066693e-06, "loss": 0.4849, "step": 435 }, { "epoch": 0.23571814741394845, "grad_norm": 0.43228328227996826, "learning_rate": 9.44901720247012e-06, "loss": 0.4841, "step": 436 }, { "epoch": 0.23625878536673275, "grad_norm": 0.43123963475227356, "learning_rate": 9.444701442557917e-06, "loss": 0.5022, "step": 437 }, { "epoch": 0.23679942331951703, "grad_norm": 0.4314519166946411, "learning_rate": 9.440369839713407e-06, "loss": 0.479, "step": 438 }, { "epoch": 0.2373400612723013, "grad_norm": 0.4455517828464508, "learning_rate": 9.436022409376391e-06, "loss": 0.5356, "step": 439 }, { "epoch": 0.2378806992250856, "grad_norm": 0.4760914742946625, "learning_rate": 9.431659167043079e-06, "loss": 0.5081, "step": 440 }, { "epoch": 0.2384213371778699, "grad_norm": 0.4999728798866272, "learning_rate": 9.427280128266049e-06, "loss": 0.518, "step": 441 }, { "epoch": 0.23896197513065418, "grad_norm": 0.397093802690506, "learning_rate": 9.422885308654183e-06, "loss": 0.5257, "step": 442 }, { "epoch": 0.23950261308343845, "grad_norm": 0.4785946011543274, "learning_rate": 9.418474723872609e-06, "loss": 0.4708, "step": 443 }, { "epoch": 0.24004325103622273, "grad_norm": 0.42722031474113464, "learning_rate": 9.414048389642652e-06, "loss": 0.508, "step": 444 }, { "epoch": 0.24058388898900704, "grad_norm": 0.4705936908721924, "learning_rate": 9.409606321741776e-06, "loss": 0.4882, "step": 445 }, { "epoch": 0.24112452694179132, "grad_norm": 0.5673285722732544, "learning_rate": 9.405148536003527e-06, "loss": 0.4983, "step": 446 }, { "epoch": 0.2416651648945756, "grad_norm": 0.48440027236938477, "learning_rate": 9.400675048317473e-06, "loss": 0.5226, "step": 447 }, { "epoch": 0.24220580284735987, "grad_norm": 0.5096259117126465, "learning_rate": 9.396185874629158e-06, "loss": 0.4979, "step": 448 }, { "epoch": 0.24274644080014418, "grad_norm": 0.5506010055541992, "learning_rate": 9.391681030940031e-06, "loss": 0.4923, "step": 449 }, { "epoch": 0.24328707875292846, "grad_norm": 0.5160098075866699, "learning_rate": 9.387160533307398e-06, "loss": 0.5082, "step": 450 }, { "epoch": 0.24382771670571274, "grad_norm": 0.483447790145874, "learning_rate": 9.382624397844363e-06, "loss": 0.5007, "step": 451 }, { "epoch": 0.24436835465849702, "grad_norm": 0.4803040325641632, "learning_rate": 9.378072640719773e-06, "loss": 0.4877, "step": 452 }, { "epoch": 0.24490899261128132, "grad_norm": 0.5114092230796814, "learning_rate": 9.373505278158152e-06, "loss": 0.5008, "step": 453 }, { "epoch": 0.2454496305640656, "grad_norm": 0.45896992087364197, "learning_rate": 9.368922326439655e-06, "loss": 0.4837, "step": 454 }, { "epoch": 0.24599026851684988, "grad_norm": 0.5090027451515198, "learning_rate": 9.364323801900002e-06, "loss": 0.478, "step": 455 }, { "epoch": 0.24653090646963416, "grad_norm": 0.441892147064209, "learning_rate": 9.359709720930417e-06, "loss": 0.511, "step": 456 }, { "epoch": 0.24707154442241847, "grad_norm": 0.45809268951416016, "learning_rate": 9.355080099977579e-06, "loss": 0.5109, "step": 457 }, { "epoch": 0.24761218237520274, "grad_norm": 0.45157548785209656, "learning_rate": 9.350434955543557e-06, "loss": 0.4929, "step": 458 }, { "epoch": 0.24815282032798702, "grad_norm": 0.4648664891719818, "learning_rate": 9.345774304185756e-06, "loss": 0.4809, "step": 459 }, { "epoch": 0.2486934582807713, "grad_norm": 0.5375930666923523, "learning_rate": 9.341098162516848e-06, "loss": 0.4998, "step": 460 }, { "epoch": 0.2492340962335556, "grad_norm": 0.42750316858291626, "learning_rate": 9.336406547204726e-06, "loss": 0.4819, "step": 461 }, { "epoch": 0.2497747341863399, "grad_norm": 0.5194140076637268, "learning_rate": 9.331699474972434e-06, "loss": 0.4908, "step": 462 }, { "epoch": 0.2503153721391242, "grad_norm": 0.5086356401443481, "learning_rate": 9.326976962598113e-06, "loss": 0.4798, "step": 463 }, { "epoch": 0.25085601009190844, "grad_norm": 0.4391303062438965, "learning_rate": 9.322239026914938e-06, "loss": 0.4989, "step": 464 }, { "epoch": 0.25139664804469275, "grad_norm": 0.5237901210784912, "learning_rate": 9.317485684811065e-06, "loss": 0.5179, "step": 465 }, { "epoch": 0.251937285997477, "grad_norm": 0.38452693819999695, "learning_rate": 9.31271695322956e-06, "loss": 0.4581, "step": 466 }, { "epoch": 0.2524779239502613, "grad_norm": 0.4975660443305969, "learning_rate": 9.307932849168341e-06, "loss": 0.4567, "step": 467 }, { "epoch": 0.2530185619030456, "grad_norm": 0.4287153482437134, "learning_rate": 9.303133389680134e-06, "loss": 0.5002, "step": 468 }, { "epoch": 0.25355919985582986, "grad_norm": 0.5507779717445374, "learning_rate": 9.298318591872381e-06, "loss": 0.4925, "step": 469 }, { "epoch": 0.25409983780861417, "grad_norm": 0.4791695475578308, "learning_rate": 9.293488472907213e-06, "loss": 0.5269, "step": 470 }, { "epoch": 0.2546404757613985, "grad_norm": 0.44029316306114197, "learning_rate": 9.288643050001362e-06, "loss": 0.4994, "step": 471 }, { "epoch": 0.25518111371418273, "grad_norm": 0.40624865889549255, "learning_rate": 9.283782340426112e-06, "loss": 0.4808, "step": 472 }, { "epoch": 0.25572175166696703, "grad_norm": 0.44208696484565735, "learning_rate": 9.278906361507238e-06, "loss": 0.4944, "step": 473 }, { "epoch": 0.2562623896197513, "grad_norm": 0.45168185234069824, "learning_rate": 9.274015130624943e-06, "loss": 0.4632, "step": 474 }, { "epoch": 0.2568030275725356, "grad_norm": 0.4674747586250305, "learning_rate": 9.26910866521379e-06, "loss": 0.5001, "step": 475 }, { "epoch": 0.2573436655253199, "grad_norm": 0.4525250792503357, "learning_rate": 9.264186982762649e-06, "loss": 0.4734, "step": 476 }, { "epoch": 0.25788430347810415, "grad_norm": 0.48848944902420044, "learning_rate": 9.25925010081463e-06, "loss": 0.5054, "step": 477 }, { "epoch": 0.25842494143088846, "grad_norm": 0.4272785484790802, "learning_rate": 9.254298036967015e-06, "loss": 0.4886, "step": 478 }, { "epoch": 0.25896557938367276, "grad_norm": 0.6393560171127319, "learning_rate": 9.249330808871213e-06, "loss": 0.4915, "step": 479 }, { "epoch": 0.259506217336457, "grad_norm": 0.4338541328907013, "learning_rate": 9.244348434232676e-06, "loss": 0.4974, "step": 480 }, { "epoch": 0.2600468552892413, "grad_norm": 0.5929692387580872, "learning_rate": 9.239350930810843e-06, "loss": 0.4978, "step": 481 }, { "epoch": 0.26058749324202557, "grad_norm": 0.523623526096344, "learning_rate": 9.23433831641909e-06, "loss": 0.5338, "step": 482 }, { "epoch": 0.2611281311948099, "grad_norm": 0.45850881934165955, "learning_rate": 9.229310608924643e-06, "loss": 0.4928, "step": 483 }, { "epoch": 0.2616687691475942, "grad_norm": 0.566050112247467, "learning_rate": 9.224267826248536e-06, "loss": 0.4998, "step": 484 }, { "epoch": 0.26220940710037843, "grad_norm": 0.5277413129806519, "learning_rate": 9.219209986365533e-06, "loss": 0.4935, "step": 485 }, { "epoch": 0.26275004505316274, "grad_norm": 0.5275083780288696, "learning_rate": 9.21413710730407e-06, "loss": 0.5195, "step": 486 }, { "epoch": 0.263290683005947, "grad_norm": 0.4609350860118866, "learning_rate": 9.20904920714619e-06, "loss": 0.5127, "step": 487 }, { "epoch": 0.2638313209587313, "grad_norm": 0.5389688611030579, "learning_rate": 9.203946304027476e-06, "loss": 0.501, "step": 488 }, { "epoch": 0.2643719589115156, "grad_norm": 0.41184860467910767, "learning_rate": 9.198828416136991e-06, "loss": 0.4591, "step": 489 }, { "epoch": 0.26491259686429985, "grad_norm": 0.5245027542114258, "learning_rate": 9.193695561717207e-06, "loss": 0.4657, "step": 490 }, { "epoch": 0.26545323481708416, "grad_norm": 0.4098646938800812, "learning_rate": 9.188547759063948e-06, "loss": 0.5049, "step": 491 }, { "epoch": 0.26599387276986847, "grad_norm": 0.5717266798019409, "learning_rate": 9.183385026526317e-06, "loss": 0.5137, "step": 492 }, { "epoch": 0.2665345107226527, "grad_norm": 0.48556917905807495, "learning_rate": 9.178207382506634e-06, "loss": 0.4805, "step": 493 }, { "epoch": 0.267075148675437, "grad_norm": 0.4661046862602234, "learning_rate": 9.173014845460375e-06, "loss": 0.4607, "step": 494 }, { "epoch": 0.2676157866282213, "grad_norm": 0.4387494921684265, "learning_rate": 9.167807433896091e-06, "loss": 0.4992, "step": 495 }, { "epoch": 0.2681564245810056, "grad_norm": 0.5127182006835938, "learning_rate": 9.162585166375367e-06, "loss": 0.5167, "step": 496 }, { "epoch": 0.2686970625337899, "grad_norm": 0.42990031838417053, "learning_rate": 9.157348061512728e-06, "loss": 0.4858, "step": 497 }, { "epoch": 0.26923770048657414, "grad_norm": 0.4679833650588989, "learning_rate": 9.152096137975593e-06, "loss": 0.5, "step": 498 }, { "epoch": 0.26977833843935844, "grad_norm": 0.4706438183784485, "learning_rate": 9.146829414484198e-06, "loss": 0.4923, "step": 499 }, { "epoch": 0.27031897639214275, "grad_norm": 0.4618656635284424, "learning_rate": 9.14154790981154e-06, "loss": 0.5015, "step": 500 }, { "epoch": 0.270859614344927, "grad_norm": 0.473890095949173, "learning_rate": 9.136251642783294e-06, "loss": 0.5052, "step": 501 }, { "epoch": 0.2714002522977113, "grad_norm": 0.4302096962928772, "learning_rate": 9.130940632277757e-06, "loss": 0.5216, "step": 502 }, { "epoch": 0.27194089025049556, "grad_norm": 0.45936453342437744, "learning_rate": 9.125614897225785e-06, "loss": 0.5082, "step": 503 }, { "epoch": 0.27248152820327987, "grad_norm": 0.42327070236206055, "learning_rate": 9.120274456610708e-06, "loss": 0.4788, "step": 504 }, { "epoch": 0.27302216615606417, "grad_norm": 0.4344567656517029, "learning_rate": 9.114919329468283e-06, "loss": 0.4613, "step": 505 }, { "epoch": 0.2735628041088484, "grad_norm": 0.44039013981819153, "learning_rate": 9.10954953488661e-06, "loss": 0.4888, "step": 506 }, { "epoch": 0.27410344206163273, "grad_norm": 0.4592479169368744, "learning_rate": 9.104165092006075e-06, "loss": 0.4997, "step": 507 }, { "epoch": 0.27464408001441704, "grad_norm": 0.4962833523750305, "learning_rate": 9.098766020019273e-06, "loss": 0.4599, "step": 508 }, { "epoch": 0.2751847179672013, "grad_norm": 0.4496838450431824, "learning_rate": 9.09335233817095e-06, "loss": 0.4969, "step": 509 }, { "epoch": 0.2757253559199856, "grad_norm": 0.44309988617897034, "learning_rate": 9.08792406575792e-06, "loss": 0.5136, "step": 510 }, { "epoch": 0.27626599387276984, "grad_norm": 0.4315456748008728, "learning_rate": 9.082481222129008e-06, "loss": 0.4811, "step": 511 }, { "epoch": 0.27680663182555415, "grad_norm": 0.45502516627311707, "learning_rate": 9.07702382668498e-06, "loss": 0.4764, "step": 512 }, { "epoch": 0.27734726977833846, "grad_norm": 0.41209426522254944, "learning_rate": 9.071551898878471e-06, "loss": 0.468, "step": 513 }, { "epoch": 0.2778879077311227, "grad_norm": 0.4675307273864746, "learning_rate": 9.066065458213908e-06, "loss": 0.4903, "step": 514 }, { "epoch": 0.278428545683907, "grad_norm": 0.46809569001197815, "learning_rate": 9.06056452424746e-06, "loss": 0.5279, "step": 515 }, { "epoch": 0.2789691836366913, "grad_norm": 0.5060775876045227, "learning_rate": 9.055049116586951e-06, "loss": 0.4866, "step": 516 }, { "epoch": 0.27950982158947557, "grad_norm": 0.5153331756591797, "learning_rate": 9.049519254891793e-06, "loss": 0.5124, "step": 517 }, { "epoch": 0.2800504595422599, "grad_norm": 0.4824049472808838, "learning_rate": 9.04397495887292e-06, "loss": 0.4711, "step": 518 }, { "epoch": 0.28059109749504413, "grad_norm": 0.4388289153575897, "learning_rate": 9.038416248292725e-06, "loss": 0.4769, "step": 519 }, { "epoch": 0.28113173544782843, "grad_norm": 0.5226829051971436, "learning_rate": 9.03284314296497e-06, "loss": 0.5135, "step": 520 }, { "epoch": 0.28167237340061274, "grad_norm": 0.436776340007782, "learning_rate": 9.02725566275473e-06, "loss": 0.5041, "step": 521 }, { "epoch": 0.282213011353397, "grad_norm": 0.5271431803703308, "learning_rate": 9.021653827578322e-06, "loss": 0.4836, "step": 522 }, { "epoch": 0.2827536493061813, "grad_norm": 0.5045011043548584, "learning_rate": 9.016037657403225e-06, "loss": 0.487, "step": 523 }, { "epoch": 0.2832942872589656, "grad_norm": 0.4657753109931946, "learning_rate": 9.01040717224802e-06, "loss": 0.4895, "step": 524 }, { "epoch": 0.28383492521174986, "grad_norm": 0.4497890770435333, "learning_rate": 9.004762392182307e-06, "loss": 0.4913, "step": 525 }, { "epoch": 0.28437556316453416, "grad_norm": 0.5092249512672424, "learning_rate": 8.999103337326646e-06, "loss": 0.5007, "step": 526 }, { "epoch": 0.2849162011173184, "grad_norm": 0.48387986421585083, "learning_rate": 8.993430027852476e-06, "loss": 0.5121, "step": 527 }, { "epoch": 0.2854568390701027, "grad_norm": 0.4973430037498474, "learning_rate": 8.987742483982044e-06, "loss": 0.4928, "step": 528 }, { "epoch": 0.285997477022887, "grad_norm": 0.44857463240623474, "learning_rate": 8.982040725988337e-06, "loss": 0.5141, "step": 529 }, { "epoch": 0.2865381149756713, "grad_norm": 0.48959702253341675, "learning_rate": 8.976324774195005e-06, "loss": 0.5099, "step": 530 }, { "epoch": 0.2870787529284556, "grad_norm": 0.4419333040714264, "learning_rate": 8.970594648976299e-06, "loss": 0.4859, "step": 531 }, { "epoch": 0.2876193908812399, "grad_norm": 0.43844538927078247, "learning_rate": 8.964850370756978e-06, "loss": 0.4931, "step": 532 }, { "epoch": 0.28816002883402414, "grad_norm": 0.5034130811691284, "learning_rate": 8.95909196001226e-06, "loss": 0.4897, "step": 533 }, { "epoch": 0.28870066678680845, "grad_norm": 0.4126366972923279, "learning_rate": 8.953319437267731e-06, "loss": 0.5, "step": 534 }, { "epoch": 0.2892413047395927, "grad_norm": 0.4978686571121216, "learning_rate": 8.947532823099284e-06, "loss": 0.4674, "step": 535 }, { "epoch": 0.289781942692377, "grad_norm": 0.49937352538108826, "learning_rate": 8.941732138133032e-06, "loss": 0.4687, "step": 536 }, { "epoch": 0.2903225806451613, "grad_norm": 0.5455271601676941, "learning_rate": 8.935917403045251e-06, "loss": 0.4956, "step": 537 }, { "epoch": 0.29086321859794556, "grad_norm": 0.46256354451179504, "learning_rate": 8.930088638562296e-06, "loss": 0.4607, "step": 538 }, { "epoch": 0.29140385655072987, "grad_norm": 0.5940908193588257, "learning_rate": 8.924245865460523e-06, "loss": 0.4889, "step": 539 }, { "epoch": 0.2919444945035142, "grad_norm": 0.4837222099304199, "learning_rate": 8.918389104566232e-06, "loss": 0.5081, "step": 540 }, { "epoch": 0.2924851324562984, "grad_norm": 0.5419607162475586, "learning_rate": 8.912518376755572e-06, "loss": 0.4951, "step": 541 }, { "epoch": 0.29302577040908273, "grad_norm": 0.47228190302848816, "learning_rate": 8.906633702954482e-06, "loss": 0.4769, "step": 542 }, { "epoch": 0.293566408361867, "grad_norm": 0.4912036061286926, "learning_rate": 8.900735104138605e-06, "loss": 0.4911, "step": 543 }, { "epoch": 0.2941070463146513, "grad_norm": 0.5826781392097473, "learning_rate": 8.894822601333228e-06, "loss": 0.4808, "step": 544 }, { "epoch": 0.2946476842674356, "grad_norm": 0.5110523104667664, "learning_rate": 8.888896215613192e-06, "loss": 0.4882, "step": 545 }, { "epoch": 0.29518832222021985, "grad_norm": 0.5374977588653564, "learning_rate": 8.882955968102822e-06, "loss": 0.488, "step": 546 }, { "epoch": 0.29572896017300415, "grad_norm": 0.49603360891342163, "learning_rate": 8.877001879975857e-06, "loss": 0.4979, "step": 547 }, { "epoch": 0.29626959812578846, "grad_norm": 0.5069612264633179, "learning_rate": 8.87103397245537e-06, "loss": 0.498, "step": 548 }, { "epoch": 0.2968102360785727, "grad_norm": 0.5196537375450134, "learning_rate": 8.865052266813686e-06, "loss": 0.4882, "step": 549 }, { "epoch": 0.297350874031357, "grad_norm": 0.4779670536518097, "learning_rate": 8.85905678437232e-06, "loss": 0.483, "step": 550 }, { "epoch": 0.29789151198414127, "grad_norm": 0.5233485102653503, "learning_rate": 8.853047546501893e-06, "loss": 0.4952, "step": 551 }, { "epoch": 0.2984321499369256, "grad_norm": 0.4517951011657715, "learning_rate": 8.847024574622051e-06, "loss": 0.5205, "step": 552 }, { "epoch": 0.2989727878897099, "grad_norm": 0.5014324188232422, "learning_rate": 8.840987890201404e-06, "loss": 0.501, "step": 553 }, { "epoch": 0.29951342584249413, "grad_norm": 0.45734500885009766, "learning_rate": 8.834937514757428e-06, "loss": 0.473, "step": 554 }, { "epoch": 0.30005406379527844, "grad_norm": 0.4451104998588562, "learning_rate": 8.828873469856408e-06, "loss": 0.5119, "step": 555 }, { "epoch": 0.3005947017480627, "grad_norm": 0.4849705398082733, "learning_rate": 8.822795777113352e-06, "loss": 0.499, "step": 556 }, { "epoch": 0.301135339700847, "grad_norm": 0.5042188167572021, "learning_rate": 8.816704458191913e-06, "loss": 0.512, "step": 557 }, { "epoch": 0.3016759776536313, "grad_norm": 0.4437592923641205, "learning_rate": 8.810599534804315e-06, "loss": 0.5103, "step": 558 }, { "epoch": 0.30221661560641555, "grad_norm": 0.46505382657051086, "learning_rate": 8.804481028711274e-06, "loss": 0.4759, "step": 559 }, { "epoch": 0.30275725355919986, "grad_norm": 0.5045806169509888, "learning_rate": 8.798348961721925e-06, "loss": 0.4896, "step": 560 }, { "epoch": 0.30329789151198416, "grad_norm": 0.4548962116241455, "learning_rate": 8.792203355693731e-06, "loss": 0.4548, "step": 561 }, { "epoch": 0.3038385294647684, "grad_norm": 0.4087465703487396, "learning_rate": 8.786044232532423e-06, "loss": 0.4975, "step": 562 }, { "epoch": 0.3043791674175527, "grad_norm": 0.5508294105529785, "learning_rate": 8.77987161419191e-06, "loss": 0.4907, "step": 563 }, { "epoch": 0.30491980537033697, "grad_norm": 0.53659987449646, "learning_rate": 8.773685522674205e-06, "loss": 0.494, "step": 564 }, { "epoch": 0.3054604433231213, "grad_norm": 0.5059062242507935, "learning_rate": 8.767485980029342e-06, "loss": 0.4904, "step": 565 }, { "epoch": 0.3060010812759056, "grad_norm": 0.462995320558548, "learning_rate": 8.761273008355306e-06, "loss": 0.483, "step": 566 }, { "epoch": 0.30654171922868984, "grad_norm": 0.6344414353370667, "learning_rate": 8.755046629797944e-06, "loss": 0.5041, "step": 567 }, { "epoch": 0.30708235718147414, "grad_norm": 0.455564022064209, "learning_rate": 8.748806866550895e-06, "loss": 0.4515, "step": 568 }, { "epoch": 0.30762299513425845, "grad_norm": 0.5680974721908569, "learning_rate": 8.742553740855507e-06, "loss": 0.5019, "step": 569 }, { "epoch": 0.3081636330870427, "grad_norm": 0.500893771648407, "learning_rate": 8.736287275000755e-06, "loss": 0.5346, "step": 570 }, { "epoch": 0.308704271039827, "grad_norm": 0.43272846937179565, "learning_rate": 8.730007491323167e-06, "loss": 0.4932, "step": 571 }, { "epoch": 0.30924490899261126, "grad_norm": 0.4665529727935791, "learning_rate": 8.723714412206741e-06, "loss": 0.5207, "step": 572 }, { "epoch": 0.30978554694539556, "grad_norm": 0.5202204585075378, "learning_rate": 8.717408060082865e-06, "loss": 0.5138, "step": 573 }, { "epoch": 0.31032618489817987, "grad_norm": 0.4143356680870056, "learning_rate": 8.711088457430239e-06, "loss": 0.4977, "step": 574 }, { "epoch": 0.3108668228509641, "grad_norm": 0.4690699875354767, "learning_rate": 8.704755626774796e-06, "loss": 0.4854, "step": 575 }, { "epoch": 0.3114074608037484, "grad_norm": 0.5123147964477539, "learning_rate": 8.698409590689616e-06, "loss": 0.493, "step": 576 }, { "epoch": 0.31194809875653273, "grad_norm": 0.38649624586105347, "learning_rate": 8.692050371794849e-06, "loss": 0.4737, "step": 577 }, { "epoch": 0.312488736709317, "grad_norm": 0.4339773952960968, "learning_rate": 8.685677992757637e-06, "loss": 0.485, "step": 578 }, { "epoch": 0.3130293746621013, "grad_norm": 0.5341055989265442, "learning_rate": 8.67929247629203e-06, "loss": 0.4933, "step": 579 }, { "epoch": 0.31357001261488554, "grad_norm": 0.38680076599121094, "learning_rate": 8.672893845158908e-06, "loss": 0.5145, "step": 580 }, { "epoch": 0.31411065056766985, "grad_norm": 0.5113509893417358, "learning_rate": 8.66648212216589e-06, "loss": 0.4792, "step": 581 }, { "epoch": 0.31465128852045415, "grad_norm": 0.41235169768333435, "learning_rate": 8.660057330167267e-06, "loss": 0.4734, "step": 582 }, { "epoch": 0.3151919264732384, "grad_norm": 0.4803949296474457, "learning_rate": 8.653619492063916e-06, "loss": 0.4898, "step": 583 }, { "epoch": 0.3157325644260227, "grad_norm": 0.4755302965641022, "learning_rate": 8.647168630803208e-06, "loss": 0.4968, "step": 584 }, { "epoch": 0.316273202378807, "grad_norm": 0.4384644329547882, "learning_rate": 8.640704769378943e-06, "loss": 0.4922, "step": 585 }, { "epoch": 0.31681384033159127, "grad_norm": 0.5590741038322449, "learning_rate": 8.634227930831252e-06, "loss": 0.4828, "step": 586 }, { "epoch": 0.3173544782843756, "grad_norm": 0.45347392559051514, "learning_rate": 8.627738138246529e-06, "loss": 0.468, "step": 587 }, { "epoch": 0.3178951162371598, "grad_norm": 0.4747249186038971, "learning_rate": 8.621235414757337e-06, "loss": 0.4905, "step": 588 }, { "epoch": 0.31843575418994413, "grad_norm": 0.4371531903743744, "learning_rate": 8.61471978354233e-06, "loss": 0.4806, "step": 589 }, { "epoch": 0.31897639214272844, "grad_norm": 0.5220611095428467, "learning_rate": 8.608191267826179e-06, "loss": 0.4872, "step": 590 }, { "epoch": 0.3195170300955127, "grad_norm": 0.46987447142601013, "learning_rate": 8.60164989087947e-06, "loss": 0.523, "step": 591 }, { "epoch": 0.320057668048297, "grad_norm": 0.5529326796531677, "learning_rate": 8.595095676018645e-06, "loss": 0.5054, "step": 592 }, { "epoch": 0.3205983060010813, "grad_norm": 0.4231288731098175, "learning_rate": 8.588528646605893e-06, "loss": 0.4985, "step": 593 }, { "epoch": 0.32113894395386555, "grad_norm": 0.5058132410049438, "learning_rate": 8.581948826049086e-06, "loss": 0.4989, "step": 594 }, { "epoch": 0.32167958190664986, "grad_norm": 0.43005961179733276, "learning_rate": 8.575356237801695e-06, "loss": 0.4898, "step": 595 }, { "epoch": 0.3222202198594341, "grad_norm": 0.4978388547897339, "learning_rate": 8.56875090536269e-06, "loss": 0.4666, "step": 596 }, { "epoch": 0.3227608578122184, "grad_norm": 0.5346764326095581, "learning_rate": 8.562132852276474e-06, "loss": 0.4828, "step": 597 }, { "epoch": 0.3233014957650027, "grad_norm": 0.5223882794380188, "learning_rate": 8.555502102132792e-06, "loss": 0.4619, "step": 598 }, { "epoch": 0.323842133717787, "grad_norm": 0.5179974436759949, "learning_rate": 8.548858678566643e-06, "loss": 0.46, "step": 599 }, { "epoch": 0.3243827716705713, "grad_norm": 0.44731220602989197, "learning_rate": 8.542202605258204e-06, "loss": 0.4712, "step": 600 }, { "epoch": 0.3249234096233556, "grad_norm": 0.5143573880195618, "learning_rate": 8.535533905932739e-06, "loss": 0.4841, "step": 601 }, { "epoch": 0.32546404757613984, "grad_norm": 0.40094849467277527, "learning_rate": 8.528852604360518e-06, "loss": 0.4825, "step": 602 }, { "epoch": 0.32600468552892414, "grad_norm": 0.4948832094669342, "learning_rate": 8.52215872435673e-06, "loss": 0.5066, "step": 603 }, { "epoch": 0.3265453234817084, "grad_norm": 0.43757307529449463, "learning_rate": 8.515452289781403e-06, "loss": 0.4831, "step": 604 }, { "epoch": 0.3270859614344927, "grad_norm": 0.4551491141319275, "learning_rate": 8.50873332453931e-06, "loss": 0.4628, "step": 605 }, { "epoch": 0.327626599387277, "grad_norm": 0.45467811822891235, "learning_rate": 8.50200185257989e-06, "loss": 0.4813, "step": 606 }, { "epoch": 0.32816723734006126, "grad_norm": 0.4058467745780945, "learning_rate": 8.495257897897166e-06, "loss": 0.4756, "step": 607 }, { "epoch": 0.32870787529284556, "grad_norm": 0.4405519664287567, "learning_rate": 8.48850148452965e-06, "loss": 0.4964, "step": 608 }, { "epoch": 0.32924851324562987, "grad_norm": 0.39016538858413696, "learning_rate": 8.481732636560266e-06, "loss": 0.4736, "step": 609 }, { "epoch": 0.3297891511984141, "grad_norm": 0.4444190561771393, "learning_rate": 8.474951378116253e-06, "loss": 0.4904, "step": 610 }, { "epoch": 0.3303297891511984, "grad_norm": 0.45072081685066223, "learning_rate": 8.468157733369102e-06, "loss": 0.4906, "step": 611 }, { "epoch": 0.3308704271039827, "grad_norm": 0.40805482864379883, "learning_rate": 8.461351726534438e-06, "loss": 0.4781, "step": 612 }, { "epoch": 0.331411065056767, "grad_norm": 0.4450664520263672, "learning_rate": 8.454533381871957e-06, "loss": 0.4985, "step": 613 }, { "epoch": 0.3319517030095513, "grad_norm": 0.465251088142395, "learning_rate": 8.447702723685335e-06, "loss": 0.4911, "step": 614 }, { "epoch": 0.33249234096233554, "grad_norm": 0.49976640939712524, "learning_rate": 8.440859776322137e-06, "loss": 0.4977, "step": 615 }, { "epoch": 0.33303297891511985, "grad_norm": 0.5387666821479797, "learning_rate": 8.43400456417373e-06, "loss": 0.4938, "step": 616 }, { "epoch": 0.3335736168679041, "grad_norm": 0.3886103630065918, "learning_rate": 8.4271371116752e-06, "loss": 0.4753, "step": 617 }, { "epoch": 0.3341142548206884, "grad_norm": 0.4702761173248291, "learning_rate": 8.420257443305264e-06, "loss": 0.4991, "step": 618 }, { "epoch": 0.3346548927734727, "grad_norm": 0.4505619406700134, "learning_rate": 8.41336558358618e-06, "loss": 0.4913, "step": 619 }, { "epoch": 0.33519553072625696, "grad_norm": 0.4063532054424286, "learning_rate": 8.406461557083666e-06, "loss": 0.4866, "step": 620 }, { "epoch": 0.33573616867904127, "grad_norm": 0.44660359621047974, "learning_rate": 8.399545388406798e-06, "loss": 0.5116, "step": 621 }, { "epoch": 0.3362768066318256, "grad_norm": 0.41896873712539673, "learning_rate": 8.392617102207945e-06, "loss": 0.4966, "step": 622 }, { "epoch": 0.3368174445846098, "grad_norm": 0.44783973693847656, "learning_rate": 8.38567672318266e-06, "loss": 0.4843, "step": 623 }, { "epoch": 0.33735808253739413, "grad_norm": 0.4662441611289978, "learning_rate": 8.3787242760696e-06, "loss": 0.4646, "step": 624 }, { "epoch": 0.3378987204901784, "grad_norm": 0.44338029623031616, "learning_rate": 8.371759785650444e-06, "loss": 0.4847, "step": 625 }, { "epoch": 0.3384393584429627, "grad_norm": 0.4882574677467346, "learning_rate": 8.364783276749794e-06, "loss": 0.489, "step": 626 }, { "epoch": 0.338979996395747, "grad_norm": 0.4160021245479584, "learning_rate": 8.357794774235094e-06, "loss": 0.504, "step": 627 }, { "epoch": 0.33952063434853125, "grad_norm": 0.40824034810066223, "learning_rate": 8.350794303016533e-06, "loss": 0.4683, "step": 628 }, { "epoch": 0.34006127230131555, "grad_norm": 0.5091646909713745, "learning_rate": 8.343781888046971e-06, "loss": 0.5207, "step": 629 }, { "epoch": 0.34060191025409986, "grad_norm": 0.43365058302879333, "learning_rate": 8.336757554321832e-06, "loss": 0.5062, "step": 630 }, { "epoch": 0.3411425482068841, "grad_norm": 0.4349713623523712, "learning_rate": 8.329721326879032e-06, "loss": 0.4745, "step": 631 }, { "epoch": 0.3416831861596684, "grad_norm": 0.4307449460029602, "learning_rate": 8.322673230798877e-06, "loss": 0.4845, "step": 632 }, { "epoch": 0.34222382411245267, "grad_norm": 0.4474685788154602, "learning_rate": 8.315613291203977e-06, "loss": 0.4928, "step": 633 }, { "epoch": 0.342764462065237, "grad_norm": 0.4700278043746948, "learning_rate": 8.30854153325916e-06, "loss": 0.5178, "step": 634 }, { "epoch": 0.3433051000180213, "grad_norm": 0.42883479595184326, "learning_rate": 8.30145798217138e-06, "loss": 0.4828, "step": 635 }, { "epoch": 0.34384573797080553, "grad_norm": 0.45476993918418884, "learning_rate": 8.294362663189626e-06, "loss": 0.4882, "step": 636 }, { "epoch": 0.34438637592358984, "grad_norm": 0.40888112783432007, "learning_rate": 8.287255601604834e-06, "loss": 0.4728, "step": 637 }, { "epoch": 0.34492701387637414, "grad_norm": 0.41027674078941345, "learning_rate": 8.280136822749796e-06, "loss": 0.4612, "step": 638 }, { "epoch": 0.3454676518291584, "grad_norm": 0.3990921080112457, "learning_rate": 8.27300635199907e-06, "loss": 0.4977, "step": 639 }, { "epoch": 0.3460082897819427, "grad_norm": 0.39647355675697327, "learning_rate": 8.265864214768883e-06, "loss": 0.4817, "step": 640 }, { "epoch": 0.34654892773472695, "grad_norm": 0.40276244282722473, "learning_rate": 8.25871043651706e-06, "loss": 0.4652, "step": 641 }, { "epoch": 0.34708956568751126, "grad_norm": 0.4234772026538849, "learning_rate": 8.25154504274291e-06, "loss": 0.4882, "step": 642 }, { "epoch": 0.34763020364029557, "grad_norm": 0.49208852648735046, "learning_rate": 8.244368058987145e-06, "loss": 0.5014, "step": 643 }, { "epoch": 0.3481708415930798, "grad_norm": 0.3916458189487457, "learning_rate": 8.237179510831792e-06, "loss": 0.4678, "step": 644 }, { "epoch": 0.3487114795458641, "grad_norm": 0.43457022309303284, "learning_rate": 8.229979423900095e-06, "loss": 0.4866, "step": 645 }, { "epoch": 0.34925211749864843, "grad_norm": 0.44561585783958435, "learning_rate": 8.222767823856435e-06, "loss": 0.491, "step": 646 }, { "epoch": 0.3497927554514327, "grad_norm": 0.3655422031879425, "learning_rate": 8.215544736406223e-06, "loss": 0.4497, "step": 647 }, { "epoch": 0.350333393404217, "grad_norm": 0.47153764963150024, "learning_rate": 8.20831018729582e-06, "loss": 0.5342, "step": 648 }, { "epoch": 0.35087403135700124, "grad_norm": 0.4188770651817322, "learning_rate": 8.20106420231244e-06, "loss": 0.4742, "step": 649 }, { "epoch": 0.35141466930978554, "grad_norm": 0.45185157656669617, "learning_rate": 8.193806807284064e-06, "loss": 0.4623, "step": 650 }, { "epoch": 0.35195530726256985, "grad_norm": 0.49852705001831055, "learning_rate": 8.186538028079338e-06, "loss": 0.4728, "step": 651 }, { "epoch": 0.3524959452153541, "grad_norm": 0.46935099363327026, "learning_rate": 8.179257890607489e-06, "loss": 0.4709, "step": 652 }, { "epoch": 0.3530365831681384, "grad_norm": 0.4840806722640991, "learning_rate": 8.171966420818227e-06, "loss": 0.4988, "step": 653 }, { "epoch": 0.3535772211209227, "grad_norm": 0.45912444591522217, "learning_rate": 8.164663644701662e-06, "loss": 0.4933, "step": 654 }, { "epoch": 0.35411785907370696, "grad_norm": 0.4942997694015503, "learning_rate": 8.157349588288202e-06, "loss": 0.4731, "step": 655 }, { "epoch": 0.35465849702649127, "grad_norm": 0.4257151782512665, "learning_rate": 8.150024277648458e-06, "loss": 0.487, "step": 656 }, { "epoch": 0.3551991349792755, "grad_norm": 0.544456958770752, "learning_rate": 8.142687738893161e-06, "loss": 0.5117, "step": 657 }, { "epoch": 0.35573977293205983, "grad_norm": 0.5224266648292542, "learning_rate": 8.135339998173064e-06, "loss": 0.4894, "step": 658 }, { "epoch": 0.35628041088484413, "grad_norm": 0.46919405460357666, "learning_rate": 8.12798108167885e-06, "loss": 0.4719, "step": 659 }, { "epoch": 0.3568210488376284, "grad_norm": 0.7049233913421631, "learning_rate": 8.120611015641036e-06, "loss": 0.5216, "step": 660 }, { "epoch": 0.3573616867904127, "grad_norm": 0.4920409619808197, "learning_rate": 8.113229826329876e-06, "loss": 0.4922, "step": 661 }, { "epoch": 0.357902324743197, "grad_norm": 0.5132108330726624, "learning_rate": 8.105837540055284e-06, "loss": 0.4894, "step": 662 }, { "epoch": 0.35844296269598125, "grad_norm": 0.5030608177185059, "learning_rate": 8.098434183166716e-06, "loss": 0.4765, "step": 663 }, { "epoch": 0.35898360064876556, "grad_norm": 0.5627034306526184, "learning_rate": 8.091019782053097e-06, "loss": 0.4862, "step": 664 }, { "epoch": 0.3595242386015498, "grad_norm": 0.5231481790542603, "learning_rate": 8.083594363142717e-06, "loss": 0.5224, "step": 665 }, { "epoch": 0.3600648765543341, "grad_norm": 0.5793126225471497, "learning_rate": 8.076157952903134e-06, "loss": 0.5063, "step": 666 }, { "epoch": 0.3606055145071184, "grad_norm": 0.5222432017326355, "learning_rate": 8.068710577841093e-06, "loss": 0.4578, "step": 667 }, { "epoch": 0.36114615245990267, "grad_norm": 0.4852503836154938, "learning_rate": 8.061252264502415e-06, "loss": 0.4936, "step": 668 }, { "epoch": 0.361686790412687, "grad_norm": 0.4867151379585266, "learning_rate": 8.053783039471909e-06, "loss": 0.4957, "step": 669 }, { "epoch": 0.3622274283654713, "grad_norm": 0.45587924122810364, "learning_rate": 8.046302929373286e-06, "loss": 0.4655, "step": 670 }, { "epoch": 0.36276806631825553, "grad_norm": 0.47155067324638367, "learning_rate": 8.038811960869051e-06, "loss": 0.487, "step": 671 }, { "epoch": 0.36330870427103984, "grad_norm": 0.4418538510799408, "learning_rate": 8.031310160660411e-06, "loss": 0.4724, "step": 672 }, { "epoch": 0.3638493422238241, "grad_norm": 0.404681533575058, "learning_rate": 8.023797555487188e-06, "loss": 0.4892, "step": 673 }, { "epoch": 0.3643899801766084, "grad_norm": 0.45918798446655273, "learning_rate": 8.016274172127715e-06, "loss": 0.5012, "step": 674 }, { "epoch": 0.3649306181293927, "grad_norm": 0.4333856701850891, "learning_rate": 8.008740037398742e-06, "loss": 0.4868, "step": 675 }, { "epoch": 0.36547125608217695, "grad_norm": 0.3893619477748871, "learning_rate": 8.001195178155344e-06, "loss": 0.4684, "step": 676 }, { "epoch": 0.36601189403496126, "grad_norm": 0.434662789106369, "learning_rate": 7.99363962129082e-06, "loss": 0.4814, "step": 677 }, { "epoch": 0.36655253198774557, "grad_norm": 0.4455735981464386, "learning_rate": 7.986073393736607e-06, "loss": 0.4856, "step": 678 }, { "epoch": 0.3670931699405298, "grad_norm": 0.39852529764175415, "learning_rate": 7.978496522462167e-06, "loss": 0.4811, "step": 679 }, { "epoch": 0.3676338078933141, "grad_norm": 0.42947253584861755, "learning_rate": 7.97090903447491e-06, "loss": 0.4687, "step": 680 }, { "epoch": 0.3681744458460984, "grad_norm": 0.4797091782093048, "learning_rate": 7.963310956820085e-06, "loss": 0.4892, "step": 681 }, { "epoch": 0.3687150837988827, "grad_norm": 0.449841171503067, "learning_rate": 7.955702316580686e-06, "loss": 0.4918, "step": 682 }, { "epoch": 0.369255721751667, "grad_norm": 0.4748377203941345, "learning_rate": 7.94808314087736e-06, "loss": 0.5122, "step": 683 }, { "epoch": 0.36979635970445124, "grad_norm": 0.45039764046669006, "learning_rate": 7.940453456868304e-06, "loss": 0.4818, "step": 684 }, { "epoch": 0.37033699765723554, "grad_norm": 0.44947609305381775, "learning_rate": 7.932813291749177e-06, "loss": 0.4906, "step": 685 }, { "epoch": 0.3708776356100198, "grad_norm": 0.46139925718307495, "learning_rate": 7.925162672752989e-06, "loss": 0.4888, "step": 686 }, { "epoch": 0.3714182735628041, "grad_norm": 0.49717041850090027, "learning_rate": 7.917501627150019e-06, "loss": 0.4981, "step": 687 }, { "epoch": 0.3719589115155884, "grad_norm": 0.5299156904220581, "learning_rate": 7.90983018224771e-06, "loss": 0.4948, "step": 688 }, { "epoch": 0.37249954946837266, "grad_norm": 0.4835863411426544, "learning_rate": 7.902148365390567e-06, "loss": 0.4915, "step": 689 }, { "epoch": 0.37304018742115697, "grad_norm": 0.5481423735618591, "learning_rate": 7.894456203960075e-06, "loss": 0.4889, "step": 690 }, { "epoch": 0.37358082537394127, "grad_norm": 0.419506311416626, "learning_rate": 7.886753725374586e-06, "loss": 0.4839, "step": 691 }, { "epoch": 0.3741214633267255, "grad_norm": 0.429317831993103, "learning_rate": 7.879040957089229e-06, "loss": 0.4709, "step": 692 }, { "epoch": 0.37466210127950983, "grad_norm": 0.47440069913864136, "learning_rate": 7.871317926595804e-06, "loss": 0.4777, "step": 693 }, { "epoch": 0.3752027392322941, "grad_norm": 0.5026919841766357, "learning_rate": 7.8635846614227e-06, "loss": 0.4749, "step": 694 }, { "epoch": 0.3757433771850784, "grad_norm": 0.39894619584083557, "learning_rate": 7.855841189134784e-06, "loss": 0.4627, "step": 695 }, { "epoch": 0.3762840151378627, "grad_norm": 0.4291825294494629, "learning_rate": 7.848087537333298e-06, "loss": 0.4667, "step": 696 }, { "epoch": 0.37682465309064694, "grad_norm": 0.3873690068721771, "learning_rate": 7.84032373365578e-06, "loss": 0.4774, "step": 697 }, { "epoch": 0.37736529104343125, "grad_norm": 0.3892592489719391, "learning_rate": 7.832549805775945e-06, "loss": 0.5081, "step": 698 }, { "epoch": 0.37790592899621556, "grad_norm": 0.40017616748809814, "learning_rate": 7.8247657814036e-06, "loss": 0.4783, "step": 699 }, { "epoch": 0.3784465669489998, "grad_norm": 0.41424062848091125, "learning_rate": 7.81697168828454e-06, "loss": 0.4904, "step": 700 }, { "epoch": 0.3789872049017841, "grad_norm": 0.42307886481285095, "learning_rate": 7.809167554200446e-06, "loss": 0.4948, "step": 701 }, { "epoch": 0.37952784285456836, "grad_norm": 0.3783946931362152, "learning_rate": 7.801353406968795e-06, "loss": 0.4778, "step": 702 }, { "epoch": 0.38006848080735267, "grad_norm": 0.39104709029197693, "learning_rate": 7.793529274442753e-06, "loss": 0.4661, "step": 703 }, { "epoch": 0.380609118760137, "grad_norm": 0.4255848824977875, "learning_rate": 7.785695184511074e-06, "loss": 0.4629, "step": 704 }, { "epoch": 0.38114975671292123, "grad_norm": 0.37515324354171753, "learning_rate": 7.777851165098012e-06, "loss": 0.4744, "step": 705 }, { "epoch": 0.38169039466570553, "grad_norm": 0.43768253922462463, "learning_rate": 7.769997244163209e-06, "loss": 0.4941, "step": 706 }, { "epoch": 0.38223103261848984, "grad_norm": 0.4131484925746918, "learning_rate": 7.762133449701603e-06, "loss": 0.4912, "step": 707 }, { "epoch": 0.3827716705712741, "grad_norm": 0.42033693194389343, "learning_rate": 7.754259809743325e-06, "loss": 0.4606, "step": 708 }, { "epoch": 0.3833123085240584, "grad_norm": 0.3850057125091553, "learning_rate": 7.746376352353599e-06, "loss": 0.4715, "step": 709 }, { "epoch": 0.38385294647684265, "grad_norm": 0.4940873086452484, "learning_rate": 7.738483105632644e-06, "loss": 0.5257, "step": 710 }, { "epoch": 0.38439358442962696, "grad_norm": 0.4676145315170288, "learning_rate": 7.730580097715575e-06, "loss": 0.4917, "step": 711 }, { "epoch": 0.38493422238241126, "grad_norm": 0.41766825318336487, "learning_rate": 7.722667356772291e-06, "loss": 0.4738, "step": 712 }, { "epoch": 0.3854748603351955, "grad_norm": 0.4553625285625458, "learning_rate": 7.714744911007395e-06, "loss": 0.4839, "step": 713 }, { "epoch": 0.3860154982879798, "grad_norm": 0.3935423195362091, "learning_rate": 7.706812788660075e-06, "loss": 0.4889, "step": 714 }, { "epoch": 0.3865561362407641, "grad_norm": 0.42542070150375366, "learning_rate": 7.698871018004016e-06, "loss": 0.5031, "step": 715 }, { "epoch": 0.3870967741935484, "grad_norm": 0.48952916264533997, "learning_rate": 7.690919627347292e-06, "loss": 0.4594, "step": 716 }, { "epoch": 0.3876374121463327, "grad_norm": 0.4403664469718933, "learning_rate": 7.682958645032265e-06, "loss": 0.458, "step": 717 }, { "epoch": 0.38817805009911693, "grad_norm": 0.3886347711086273, "learning_rate": 7.674988099435487e-06, "loss": 0.471, "step": 718 }, { "epoch": 0.38871868805190124, "grad_norm": 0.4349733889102936, "learning_rate": 7.667008018967598e-06, "loss": 0.4992, "step": 719 }, { "epoch": 0.38925932600468555, "grad_norm": 0.48294758796691895, "learning_rate": 7.65901843207323e-06, "loss": 0.487, "step": 720 }, { "epoch": 0.3897999639574698, "grad_norm": 0.4309777319431305, "learning_rate": 7.651019367230886e-06, "loss": 0.4614, "step": 721 }, { "epoch": 0.3903406019102541, "grad_norm": 0.4028390049934387, "learning_rate": 7.643010852952871e-06, "loss": 0.4942, "step": 722 }, { "epoch": 0.3908812398630384, "grad_norm": 0.4195624887943268, "learning_rate": 7.634992917785156e-06, "loss": 0.4883, "step": 723 }, { "epoch": 0.39142187781582266, "grad_norm": 0.38650834560394287, "learning_rate": 7.626965590307305e-06, "loss": 0.5055, "step": 724 }, { "epoch": 0.39196251576860697, "grad_norm": 0.4400707185268402, "learning_rate": 7.6189288991323505e-06, "loss": 0.4653, "step": 725 }, { "epoch": 0.3925031537213912, "grad_norm": 0.3416772484779358, "learning_rate": 7.610882872906709e-06, "loss": 0.4676, "step": 726 }, { "epoch": 0.3930437916741755, "grad_norm": 0.44608405232429504, "learning_rate": 7.602827540310065e-06, "loss": 0.4804, "step": 727 }, { "epoch": 0.39358442962695983, "grad_norm": 0.39614206552505493, "learning_rate": 7.594762930055281e-06, "loss": 0.4783, "step": 728 }, { "epoch": 0.3941250675797441, "grad_norm": 0.4028596580028534, "learning_rate": 7.586689070888284e-06, "loss": 0.4848, "step": 729 }, { "epoch": 0.3946657055325284, "grad_norm": 0.37662696838378906, "learning_rate": 7.578605991587974e-06, "loss": 0.4751, "step": 730 }, { "epoch": 0.3952063434853127, "grad_norm": 0.480343222618103, "learning_rate": 7.570513720966108e-06, "loss": 0.4842, "step": 731 }, { "epoch": 0.39574698143809695, "grad_norm": 0.43461012840270996, "learning_rate": 7.562412287867214e-06, "loss": 0.445, "step": 732 }, { "epoch": 0.39628761939088125, "grad_norm": 0.37615707516670227, "learning_rate": 7.5543017211684745e-06, "loss": 0.5055, "step": 733 }, { "epoch": 0.3968282573436655, "grad_norm": 0.3956639766693115, "learning_rate": 7.5461820497796255e-06, "loss": 0.4696, "step": 734 }, { "epoch": 0.3973688952964498, "grad_norm": 0.5006547570228577, "learning_rate": 7.5380533026428625e-06, "loss": 0.5176, "step": 735 }, { "epoch": 0.3979095332492341, "grad_norm": 0.37995660305023193, "learning_rate": 7.529915508732725e-06, "loss": 0.4962, "step": 736 }, { "epoch": 0.39845017120201837, "grad_norm": 0.41486889123916626, "learning_rate": 7.521768697056004e-06, "loss": 0.489, "step": 737 }, { "epoch": 0.3989908091548027, "grad_norm": 0.49411487579345703, "learning_rate": 7.513612896651632e-06, "loss": 0.4543, "step": 738 }, { "epoch": 0.399531447107587, "grad_norm": 0.4323795437812805, "learning_rate": 7.505448136590583e-06, "loss": 0.4707, "step": 739 }, { "epoch": 0.40007208506037123, "grad_norm": 0.3963027596473694, "learning_rate": 7.497274445975762e-06, "loss": 0.4838, "step": 740 }, { "epoch": 0.40061272301315554, "grad_norm": 0.3969276547431946, "learning_rate": 7.489091853941914e-06, "loss": 0.484, "step": 741 }, { "epoch": 0.4011533609659398, "grad_norm": 0.43876469135284424, "learning_rate": 7.480900389655508e-06, "loss": 0.464, "step": 742 }, { "epoch": 0.4016939989187241, "grad_norm": 0.39766424894332886, "learning_rate": 7.4727000823146386e-06, "loss": 0.4979, "step": 743 }, { "epoch": 0.4022346368715084, "grad_norm": 0.3988225758075714, "learning_rate": 7.464490961148921e-06, "loss": 0.4645, "step": 744 }, { "epoch": 0.40277527482429265, "grad_norm": 0.5512990951538086, "learning_rate": 7.4562730554193875e-06, "loss": 0.4922, "step": 745 }, { "epoch": 0.40331591277707696, "grad_norm": 0.4058268368244171, "learning_rate": 7.448046394418383e-06, "loss": 0.4796, "step": 746 }, { "epoch": 0.40385655072986126, "grad_norm": 0.3984447717666626, "learning_rate": 7.439811007469457e-06, "loss": 0.4694, "step": 747 }, { "epoch": 0.4043971886826455, "grad_norm": 0.41148069500923157, "learning_rate": 7.431566923927267e-06, "loss": 0.4757, "step": 748 }, { "epoch": 0.4049378266354298, "grad_norm": 0.429737389087677, "learning_rate": 7.423314173177467e-06, "loss": 0.4949, "step": 749 }, { "epoch": 0.40547846458821407, "grad_norm": 0.35777562856674194, "learning_rate": 7.415052784636603e-06, "loss": 0.4704, "step": 750 }, { "epoch": 0.4060191025409984, "grad_norm": 0.40855300426483154, "learning_rate": 7.406782787752011e-06, "loss": 0.4648, "step": 751 }, { "epoch": 0.4065597404937827, "grad_norm": 0.4542819857597351, "learning_rate": 7.398504212001714e-06, "loss": 0.4896, "step": 752 }, { "epoch": 0.40710037844656694, "grad_norm": 0.3928791880607605, "learning_rate": 7.390217086894309e-06, "loss": 0.5082, "step": 753 }, { "epoch": 0.40764101639935124, "grad_norm": 0.39957326650619507, "learning_rate": 7.3819214419688725e-06, "loss": 0.4752, "step": 754 }, { "epoch": 0.4081816543521355, "grad_norm": 0.4252714216709137, "learning_rate": 7.373617306794844e-06, "loss": 0.4828, "step": 755 }, { "epoch": 0.4087222923049198, "grad_norm": 0.45451775193214417, "learning_rate": 7.365304710971928e-06, "loss": 0.4776, "step": 756 }, { "epoch": 0.4092629302577041, "grad_norm": 0.42666348814964294, "learning_rate": 7.3569836841299905e-06, "loss": 0.4805, "step": 757 }, { "epoch": 0.40980356821048836, "grad_norm": 0.44532275199890137, "learning_rate": 7.348654255928941e-06, "loss": 0.4683, "step": 758 }, { "epoch": 0.41034420616327266, "grad_norm": 0.540521502494812, "learning_rate": 7.340316456058644e-06, "loss": 0.4717, "step": 759 }, { "epoch": 0.41088484411605697, "grad_norm": 0.4525167942047119, "learning_rate": 7.331970314238799e-06, "loss": 0.4782, "step": 760 }, { "epoch": 0.4114254820688412, "grad_norm": 0.44031041860580444, "learning_rate": 7.323615860218844e-06, "loss": 0.4947, "step": 761 }, { "epoch": 0.4119661200216255, "grad_norm": 0.5076286792755127, "learning_rate": 7.31525312377784e-06, "loss": 0.4638, "step": 762 }, { "epoch": 0.4125067579744098, "grad_norm": 0.4349268674850464, "learning_rate": 7.306882134724376e-06, "loss": 0.4782, "step": 763 }, { "epoch": 0.4130473959271941, "grad_norm": 0.44942405819892883, "learning_rate": 7.298502922896453e-06, "loss": 0.5115, "step": 764 }, { "epoch": 0.4135880338799784, "grad_norm": 0.4894666075706482, "learning_rate": 7.290115518161385e-06, "loss": 0.5035, "step": 765 }, { "epoch": 0.41412867183276264, "grad_norm": 0.3696083128452301, "learning_rate": 7.281719950415686e-06, "loss": 0.4537, "step": 766 }, { "epoch": 0.41466930978554695, "grad_norm": 0.40760135650634766, "learning_rate": 7.273316249584969e-06, "loss": 0.4418, "step": 767 }, { "epoch": 0.41520994773833125, "grad_norm": 0.47130587697029114, "learning_rate": 7.2649044456238334e-06, "loss": 0.4972, "step": 768 }, { "epoch": 0.4157505856911155, "grad_norm": 0.4279153048992157, "learning_rate": 7.256484568515769e-06, "loss": 0.4963, "step": 769 }, { "epoch": 0.4162912236438998, "grad_norm": 0.4578527808189392, "learning_rate": 7.248056648273034e-06, "loss": 0.4888, "step": 770 }, { "epoch": 0.41683186159668406, "grad_norm": 0.42948973178863525, "learning_rate": 7.239620714936561e-06, "loss": 0.4877, "step": 771 }, { "epoch": 0.41737249954946837, "grad_norm": 0.44241803884506226, "learning_rate": 7.231176798575843e-06, "loss": 0.4791, "step": 772 }, { "epoch": 0.4179131375022527, "grad_norm": 0.48078736662864685, "learning_rate": 7.22272492928883e-06, "loss": 0.4744, "step": 773 }, { "epoch": 0.4184537754550369, "grad_norm": 0.4113394618034363, "learning_rate": 7.214265137201817e-06, "loss": 0.4807, "step": 774 }, { "epoch": 0.41899441340782123, "grad_norm": 0.48756808042526245, "learning_rate": 7.205797452469341e-06, "loss": 0.5125, "step": 775 }, { "epoch": 0.41953505136060554, "grad_norm": 0.47686880826950073, "learning_rate": 7.197321905274071e-06, "loss": 0.4727, "step": 776 }, { "epoch": 0.4200756893133898, "grad_norm": 0.42904406785964966, "learning_rate": 7.188838525826702e-06, "loss": 0.4668, "step": 777 }, { "epoch": 0.4206163272661741, "grad_norm": 0.4416086971759796, "learning_rate": 7.18034734436585e-06, "loss": 0.4768, "step": 778 }, { "epoch": 0.42115696521895835, "grad_norm": 0.4766569137573242, "learning_rate": 7.171848391157935e-06, "loss": 0.4908, "step": 779 }, { "epoch": 0.42169760317174265, "grad_norm": 0.39499804377555847, "learning_rate": 7.163341696497084e-06, "loss": 0.4706, "step": 780 }, { "epoch": 0.42223824112452696, "grad_norm": 0.4001219868659973, "learning_rate": 7.154827290705012e-06, "loss": 0.4891, "step": 781 }, { "epoch": 0.4227788790773112, "grad_norm": 0.4044910669326782, "learning_rate": 7.146305204130928e-06, "loss": 0.4542, "step": 782 }, { "epoch": 0.4233195170300955, "grad_norm": 0.3734391927719116, "learning_rate": 7.137775467151411e-06, "loss": 0.4741, "step": 783 }, { "epoch": 0.4238601549828798, "grad_norm": 0.4248086214065552, "learning_rate": 7.129238110170315e-06, "loss": 0.4902, "step": 784 }, { "epoch": 0.4244007929356641, "grad_norm": 0.3903302550315857, "learning_rate": 7.120693163618656e-06, "loss": 0.4493, "step": 785 }, { "epoch": 0.4249414308884484, "grad_norm": 0.412576824426651, "learning_rate": 7.112140657954495e-06, "loss": 0.4939, "step": 786 }, { "epoch": 0.42548206884123263, "grad_norm": 0.39094048738479614, "learning_rate": 7.103580623662845e-06, "loss": 0.4403, "step": 787 }, { "epoch": 0.42602270679401694, "grad_norm": 0.4399993419647217, "learning_rate": 7.0950130912555515e-06, "loss": 0.4582, "step": 788 }, { "epoch": 0.42656334474680124, "grad_norm": 0.41360506415367126, "learning_rate": 7.086438091271186e-06, "loss": 0.4824, "step": 789 }, { "epoch": 0.4271039826995855, "grad_norm": 0.44524022936820984, "learning_rate": 7.077855654274939e-06, "loss": 0.4711, "step": 790 }, { "epoch": 0.4276446206523698, "grad_norm": 0.4516412913799286, "learning_rate": 7.069265810858509e-06, "loss": 0.4893, "step": 791 }, { "epoch": 0.4281852586051541, "grad_norm": 0.4142738878726959, "learning_rate": 7.0606685916399945e-06, "loss": 0.4555, "step": 792 }, { "epoch": 0.42872589655793836, "grad_norm": 0.4141763746738434, "learning_rate": 7.052064027263785e-06, "loss": 0.4387, "step": 793 }, { "epoch": 0.42926653451072266, "grad_norm": 0.47192102670669556, "learning_rate": 7.043452148400452e-06, "loss": 0.4812, "step": 794 }, { "epoch": 0.4298071724635069, "grad_norm": 0.43239060044288635, "learning_rate": 7.034832985746638e-06, "loss": 0.503, "step": 795 }, { "epoch": 0.4303478104162912, "grad_norm": 0.46152111887931824, "learning_rate": 7.026206570024949e-06, "loss": 0.4824, "step": 796 }, { "epoch": 0.4308884483690755, "grad_norm": 0.4815097451210022, "learning_rate": 7.017572931983846e-06, "loss": 0.4558, "step": 797 }, { "epoch": 0.4314290863218598, "grad_norm": 0.4330691397190094, "learning_rate": 7.00893210239753e-06, "loss": 0.4493, "step": 798 }, { "epoch": 0.4319697242746441, "grad_norm": 0.44782209396362305, "learning_rate": 7.000284112065836e-06, "loss": 0.4737, "step": 799 }, { "epoch": 0.4325103622274284, "grad_norm": 0.43315109610557556, "learning_rate": 6.9916289918141265e-06, "loss": 0.4533, "step": 800 }, { "epoch": 0.43305100018021264, "grad_norm": 0.42613768577575684, "learning_rate": 6.982966772493176e-06, "loss": 0.4517, "step": 801 }, { "epoch": 0.43359163813299695, "grad_norm": 0.39703595638275146, "learning_rate": 6.974297484979066e-06, "loss": 0.4651, "step": 802 }, { "epoch": 0.4341322760857812, "grad_norm": 0.4882420599460602, "learning_rate": 6.965621160173066e-06, "loss": 0.4816, "step": 803 }, { "epoch": 0.4346729140385655, "grad_norm": 0.4013866186141968, "learning_rate": 6.9569378290015375e-06, "loss": 0.4545, "step": 804 }, { "epoch": 0.4352135519913498, "grad_norm": 0.41527172923088074, "learning_rate": 6.948247522415811e-06, "loss": 0.4567, "step": 805 }, { "epoch": 0.43575418994413406, "grad_norm": 0.4296116530895233, "learning_rate": 6.939550271392079e-06, "loss": 0.4499, "step": 806 }, { "epoch": 0.43629482789691837, "grad_norm": 0.4318053722381592, "learning_rate": 6.930846106931292e-06, "loss": 0.4873, "step": 807 }, { "epoch": 0.4368354658497027, "grad_norm": 0.39466047286987305, "learning_rate": 6.922135060059043e-06, "loss": 0.4705, "step": 808 }, { "epoch": 0.4373761038024869, "grad_norm": 0.4394930601119995, "learning_rate": 6.913417161825449e-06, "loss": 0.4816, "step": 809 }, { "epoch": 0.43791674175527123, "grad_norm": 0.4361076354980469, "learning_rate": 6.904692443305059e-06, "loss": 0.496, "step": 810 }, { "epoch": 0.4384573797080555, "grad_norm": 0.4172879755496979, "learning_rate": 6.895960935596728e-06, "loss": 0.49, "step": 811 }, { "epoch": 0.4389980176608398, "grad_norm": 0.4652804732322693, "learning_rate": 6.8872226698235065e-06, "loss": 0.468, "step": 812 }, { "epoch": 0.4395386556136241, "grad_norm": 0.39598575234413147, "learning_rate": 6.8784776771325426e-06, "loss": 0.4487, "step": 813 }, { "epoch": 0.44007929356640835, "grad_norm": 0.5070300102233887, "learning_rate": 6.869725988694955e-06, "loss": 0.453, "step": 814 }, { "epoch": 0.44061993151919265, "grad_norm": 0.4058806598186493, "learning_rate": 6.860967635705732e-06, "loss": 0.4652, "step": 815 }, { "epoch": 0.44116056947197696, "grad_norm": 0.4511767029762268, "learning_rate": 6.8522026493836144e-06, "loss": 0.4801, "step": 816 }, { "epoch": 0.4417012074247612, "grad_norm": 0.435837984085083, "learning_rate": 6.843431060970995e-06, "loss": 0.4772, "step": 817 }, { "epoch": 0.4422418453775455, "grad_norm": 0.4215855002403259, "learning_rate": 6.834652901733789e-06, "loss": 0.4633, "step": 818 }, { "epoch": 0.44278248333032977, "grad_norm": 0.43643590807914734, "learning_rate": 6.825868202961343e-06, "loss": 0.4891, "step": 819 }, { "epoch": 0.4433231212831141, "grad_norm": 0.42145484685897827, "learning_rate": 6.8170769959663045e-06, "loss": 0.4537, "step": 820 }, { "epoch": 0.4438637592358984, "grad_norm": 0.44420287013053894, "learning_rate": 6.808279312084525e-06, "loss": 0.4894, "step": 821 }, { "epoch": 0.44440439718868263, "grad_norm": 0.4486287534236908, "learning_rate": 6.799475182674942e-06, "loss": 0.4629, "step": 822 }, { "epoch": 0.44494503514146694, "grad_norm": 0.4760923385620117, "learning_rate": 6.790664639119464e-06, "loss": 0.4876, "step": 823 }, { "epoch": 0.4454856730942512, "grad_norm": 0.46989408135414124, "learning_rate": 6.781847712822869e-06, "loss": 0.4719, "step": 824 }, { "epoch": 0.4460263110470355, "grad_norm": 0.46125340461730957, "learning_rate": 6.773024435212678e-06, "loss": 0.4733, "step": 825 }, { "epoch": 0.4465669489998198, "grad_norm": 0.45252951979637146, "learning_rate": 6.76419483773906e-06, "loss": 0.4937, "step": 826 }, { "epoch": 0.44710758695260405, "grad_norm": 0.40651631355285645, "learning_rate": 6.755358951874701e-06, "loss": 0.4674, "step": 827 }, { "epoch": 0.44764822490538836, "grad_norm": 0.5222921967506409, "learning_rate": 6.7465168091147094e-06, "loss": 0.4767, "step": 828 }, { "epoch": 0.44818886285817267, "grad_norm": 0.34470462799072266, "learning_rate": 6.737668440976494e-06, "loss": 0.4687, "step": 829 }, { "epoch": 0.4487295008109569, "grad_norm": 0.5007984042167664, "learning_rate": 6.728813878999652e-06, "loss": 0.5117, "step": 830 }, { "epoch": 0.4492701387637412, "grad_norm": 0.43586406111717224, "learning_rate": 6.719953154745857e-06, "loss": 0.4643, "step": 831 }, { "epoch": 0.4498107767165255, "grad_norm": 0.3484852910041809, "learning_rate": 6.7110862997987525e-06, "loss": 0.4711, "step": 832 }, { "epoch": 0.4503514146693098, "grad_norm": 0.5028504133224487, "learning_rate": 6.70221334576383e-06, "loss": 0.4679, "step": 833 }, { "epoch": 0.4508920526220941, "grad_norm": 0.4104493260383606, "learning_rate": 6.693334324268328e-06, "loss": 0.4639, "step": 834 }, { "epoch": 0.45143269057487834, "grad_norm": 0.40598517656326294, "learning_rate": 6.684449266961101e-06, "loss": 0.4794, "step": 835 }, { "epoch": 0.45197332852766264, "grad_norm": 0.3934710621833801, "learning_rate": 6.675558205512527e-06, "loss": 0.4734, "step": 836 }, { "epoch": 0.45251396648044695, "grad_norm": 0.39573705196380615, "learning_rate": 6.666661171614382e-06, "loss": 0.4828, "step": 837 }, { "epoch": 0.4530546044332312, "grad_norm": 0.39194008708000183, "learning_rate": 6.657758196979732e-06, "loss": 0.4396, "step": 838 }, { "epoch": 0.4535952423860155, "grad_norm": 0.38524988293647766, "learning_rate": 6.648849313342816e-06, "loss": 0.5242, "step": 839 }, { "epoch": 0.45413588033879976, "grad_norm": 0.4037633538246155, "learning_rate": 6.6399345524589366e-06, "loss": 0.4575, "step": 840 }, { "epoch": 0.45467651829158406, "grad_norm": 0.3950909376144409, "learning_rate": 6.631013946104348e-06, "loss": 0.4784, "step": 841 }, { "epoch": 0.45521715624436837, "grad_norm": 0.3955569565296173, "learning_rate": 6.622087526076135e-06, "loss": 0.4869, "step": 842 }, { "epoch": 0.4557577941971526, "grad_norm": 0.425361692905426, "learning_rate": 6.613155324192111e-06, "loss": 0.4649, "step": 843 }, { "epoch": 0.45629843214993693, "grad_norm": 0.41891539096832275, "learning_rate": 6.604217372290693e-06, "loss": 0.5171, "step": 844 }, { "epoch": 0.45683907010272123, "grad_norm": 0.41283082962036133, "learning_rate": 6.5952737022308e-06, "loss": 0.4919, "step": 845 }, { "epoch": 0.4573797080555055, "grad_norm": 0.49672916531562805, "learning_rate": 6.586324345891727e-06, "loss": 0.472, "step": 846 }, { "epoch": 0.4579203460082898, "grad_norm": 0.4132554829120636, "learning_rate": 6.57736933517304e-06, "loss": 0.4787, "step": 847 }, { "epoch": 0.45846098396107404, "grad_norm": 0.4273938834667206, "learning_rate": 6.568408701994459e-06, "loss": 0.4754, "step": 848 }, { "epoch": 0.45900162191385835, "grad_norm": 0.47689104080200195, "learning_rate": 6.559442478295745e-06, "loss": 0.4945, "step": 849 }, { "epoch": 0.45954225986664266, "grad_norm": 0.4251175820827484, "learning_rate": 6.550470696036591e-06, "loss": 0.4706, "step": 850 }, { "epoch": 0.4600828978194269, "grad_norm": 0.5029627680778503, "learning_rate": 6.541493387196496e-06, "loss": 0.464, "step": 851 }, { "epoch": 0.4606235357722112, "grad_norm": 0.43976134061813354, "learning_rate": 6.5325105837746604e-06, "loss": 0.4781, "step": 852 }, { "epoch": 0.4611641737249955, "grad_norm": 0.45708853006362915, "learning_rate": 6.523522317789874e-06, "loss": 0.4608, "step": 853 }, { "epoch": 0.46170481167777977, "grad_norm": 0.47303131222724915, "learning_rate": 6.514528621280391e-06, "loss": 0.5097, "step": 854 }, { "epoch": 0.4622454496305641, "grad_norm": 0.4358423948287964, "learning_rate": 6.5055295263038286e-06, "loss": 0.4851, "step": 855 }, { "epoch": 0.4627860875833483, "grad_norm": 0.41981184482574463, "learning_rate": 6.496525064937042e-06, "loss": 0.5024, "step": 856 }, { "epoch": 0.46332672553613263, "grad_norm": 0.4504159688949585, "learning_rate": 6.487515269276015e-06, "loss": 0.468, "step": 857 }, { "epoch": 0.46386736348891694, "grad_norm": 0.5347965955734253, "learning_rate": 6.478500171435751e-06, "loss": 0.4995, "step": 858 }, { "epoch": 0.4644080014417012, "grad_norm": 0.39357173442840576, "learning_rate": 6.469479803550144e-06, "loss": 0.4734, "step": 859 }, { "epoch": 0.4649486393944855, "grad_norm": 0.38938194513320923, "learning_rate": 6.460454197771881e-06, "loss": 0.5136, "step": 860 }, { "epoch": 0.4654892773472698, "grad_norm": 0.47099724411964417, "learning_rate": 6.451423386272312e-06, "loss": 0.4934, "step": 861 }, { "epoch": 0.46602991530005405, "grad_norm": 0.4051066040992737, "learning_rate": 6.442387401241349e-06, "loss": 0.4642, "step": 862 }, { "epoch": 0.46657055325283836, "grad_norm": 0.43694064021110535, "learning_rate": 6.433346274887341e-06, "loss": 0.484, "step": 863 }, { "epoch": 0.4671111912056226, "grad_norm": 0.42491498589515686, "learning_rate": 6.4243000394369626e-06, "loss": 0.4813, "step": 864 }, { "epoch": 0.4676518291584069, "grad_norm": 0.40995004773139954, "learning_rate": 6.415248727135103e-06, "loss": 0.446, "step": 865 }, { "epoch": 0.4681924671111912, "grad_norm": 0.440571129322052, "learning_rate": 6.406192370244742e-06, "loss": 0.4958, "step": 866 }, { "epoch": 0.4687331050639755, "grad_norm": 0.43054115772247314, "learning_rate": 6.397131001046849e-06, "loss": 0.4953, "step": 867 }, { "epoch": 0.4692737430167598, "grad_norm": 0.4248604476451874, "learning_rate": 6.38806465184025e-06, "loss": 0.4782, "step": 868 }, { "epoch": 0.4698143809695441, "grad_norm": 0.3913900852203369, "learning_rate": 6.378993354941529e-06, "loss": 0.4648, "step": 869 }, { "epoch": 0.47035501892232834, "grad_norm": 0.41105592250823975, "learning_rate": 6.3699171426849036e-06, "loss": 0.4961, "step": 870 }, { "epoch": 0.47089565687511264, "grad_norm": 0.3705733120441437, "learning_rate": 6.3608360474221106e-06, "loss": 0.4559, "step": 871 }, { "epoch": 0.4714362948278969, "grad_norm": 0.3702208995819092, "learning_rate": 6.3517501015222924e-06, "loss": 0.4727, "step": 872 }, { "epoch": 0.4719769327806812, "grad_norm": 0.3946548104286194, "learning_rate": 6.342659337371884e-06, "loss": 0.4848, "step": 873 }, { "epoch": 0.4725175707334655, "grad_norm": 0.3584488034248352, "learning_rate": 6.333563787374493e-06, "loss": 0.458, "step": 874 }, { "epoch": 0.47305820868624976, "grad_norm": 0.36788806319236755, "learning_rate": 6.3244634839507834e-06, "loss": 0.4674, "step": 875 }, { "epoch": 0.47359884663903407, "grad_norm": 0.3621140420436859, "learning_rate": 6.315358459538367e-06, "loss": 0.4718, "step": 876 }, { "epoch": 0.47413948459181837, "grad_norm": 0.375000923871994, "learning_rate": 6.3062487465916825e-06, "loss": 0.4812, "step": 877 }, { "epoch": 0.4746801225446026, "grad_norm": 0.44487372040748596, "learning_rate": 6.297134377581877e-06, "loss": 0.4746, "step": 878 }, { "epoch": 0.47522076049738693, "grad_norm": 0.3941539525985718, "learning_rate": 6.2880153849966966e-06, "loss": 0.4762, "step": 879 }, { "epoch": 0.4757613984501712, "grad_norm": 0.4312276244163513, "learning_rate": 6.2788918013403695e-06, "loss": 0.4952, "step": 880 }, { "epoch": 0.4763020364029555, "grad_norm": 0.41254860162734985, "learning_rate": 6.269763659133486e-06, "loss": 0.4963, "step": 881 }, { "epoch": 0.4768426743557398, "grad_norm": 0.40565529465675354, "learning_rate": 6.2606309909128845e-06, "loss": 0.4787, "step": 882 }, { "epoch": 0.47738331230852404, "grad_norm": 0.4065866470336914, "learning_rate": 6.251493829231539e-06, "loss": 0.4723, "step": 883 }, { "epoch": 0.47792395026130835, "grad_norm": 0.43057599663734436, "learning_rate": 6.24235220665844e-06, "loss": 0.4792, "step": 884 }, { "epoch": 0.47846458821409266, "grad_norm": 0.3824271857738495, "learning_rate": 6.233206155778476e-06, "loss": 0.4817, "step": 885 }, { "epoch": 0.4790052261668769, "grad_norm": 0.46750858426094055, "learning_rate": 6.224055709192323e-06, "loss": 0.4613, "step": 886 }, { "epoch": 0.4795458641196612, "grad_norm": 0.45582181215286255, "learning_rate": 6.21490089951632e-06, "loss": 0.4958, "step": 887 }, { "epoch": 0.48008650207244546, "grad_norm": 0.4065682291984558, "learning_rate": 6.205741759382365e-06, "loss": 0.4928, "step": 888 }, { "epoch": 0.48062714002522977, "grad_norm": 0.4958328604698181, "learning_rate": 6.1965783214377895e-06, "loss": 0.4648, "step": 889 }, { "epoch": 0.4811677779780141, "grad_norm": 0.38294878602027893, "learning_rate": 6.187410618345241e-06, "loss": 0.4905, "step": 890 }, { "epoch": 0.48170841593079833, "grad_norm": 0.471400648355484, "learning_rate": 6.178238682782574e-06, "loss": 0.4835, "step": 891 }, { "epoch": 0.48224905388358263, "grad_norm": 0.5207462310791016, "learning_rate": 6.169062547442724e-06, "loss": 0.4912, "step": 892 }, { "epoch": 0.4827896918363669, "grad_norm": 0.4611525535583496, "learning_rate": 6.159882245033606e-06, "loss": 0.5036, "step": 893 }, { "epoch": 0.4833303297891512, "grad_norm": 0.4598880708217621, "learning_rate": 6.150697808277979e-06, "loss": 0.4472, "step": 894 }, { "epoch": 0.4838709677419355, "grad_norm": 0.4637363851070404, "learning_rate": 6.141509269913343e-06, "loss": 0.4849, "step": 895 }, { "epoch": 0.48441160569471975, "grad_norm": 0.5085048675537109, "learning_rate": 6.132316662691815e-06, "loss": 0.467, "step": 896 }, { "epoch": 0.48495224364750406, "grad_norm": 0.4294266104698181, "learning_rate": 6.123120019380021e-06, "loss": 0.4781, "step": 897 }, { "epoch": 0.48549288160028836, "grad_norm": 0.47353968024253845, "learning_rate": 6.1139193727589665e-06, "loss": 0.4969, "step": 898 }, { "epoch": 0.4860335195530726, "grad_norm": 0.5444623827934265, "learning_rate": 6.1047147556239325e-06, "loss": 0.4863, "step": 899 }, { "epoch": 0.4865741575058569, "grad_norm": 0.39669960737228394, "learning_rate": 6.095506200784349e-06, "loss": 0.485, "step": 900 }, { "epoch": 0.48711479545864117, "grad_norm": 0.5122884511947632, "learning_rate": 6.086293741063685e-06, "loss": 0.4742, "step": 901 }, { "epoch": 0.4876554334114255, "grad_norm": 0.4148077070713043, "learning_rate": 6.077077409299323e-06, "loss": 0.4846, "step": 902 }, { "epoch": 0.4881960713642098, "grad_norm": 0.3915378153324127, "learning_rate": 6.067857238342451e-06, "loss": 0.4665, "step": 903 }, { "epoch": 0.48873670931699403, "grad_norm": 0.47468093037605286, "learning_rate": 6.058633261057945e-06, "loss": 0.5034, "step": 904 }, { "epoch": 0.48927734726977834, "grad_norm": 0.3712773323059082, "learning_rate": 6.049405510324237e-06, "loss": 0.4721, "step": 905 }, { "epoch": 0.48981798522256265, "grad_norm": 0.4807882010936737, "learning_rate": 6.040174019033226e-06, "loss": 0.4779, "step": 906 }, { "epoch": 0.4903586231753469, "grad_norm": 0.4945826530456543, "learning_rate": 6.030938820090128e-06, "loss": 0.4796, "step": 907 }, { "epoch": 0.4908992611281312, "grad_norm": 0.41042837500572205, "learning_rate": 6.021699946413384e-06, "loss": 0.491, "step": 908 }, { "epoch": 0.49143989908091545, "grad_norm": 0.5063190460205078, "learning_rate": 6.012457430934532e-06, "loss": 0.464, "step": 909 }, { "epoch": 0.49198053703369976, "grad_norm": 0.4003697335720062, "learning_rate": 6.003211306598089e-06, "loss": 0.4639, "step": 910 }, { "epoch": 0.49252117498648407, "grad_norm": 0.4417046010494232, "learning_rate": 5.993961606361436e-06, "loss": 0.4691, "step": 911 }, { "epoch": 0.4930618129392683, "grad_norm": 0.45925286412239075, "learning_rate": 5.984708363194702e-06, "loss": 0.4819, "step": 912 }, { "epoch": 0.4936024508920526, "grad_norm": 0.44486504793167114, "learning_rate": 5.975451610080643e-06, "loss": 0.5092, "step": 913 }, { "epoch": 0.49414308884483693, "grad_norm": 0.409686803817749, "learning_rate": 5.966191380014524e-06, "loss": 0.4812, "step": 914 }, { "epoch": 0.4946837267976212, "grad_norm": 0.4472230076789856, "learning_rate": 5.956927706004012e-06, "loss": 0.4983, "step": 915 }, { "epoch": 0.4952243647504055, "grad_norm": 0.5017169117927551, "learning_rate": 5.947660621069038e-06, "loss": 0.474, "step": 916 }, { "epoch": 0.49576500270318974, "grad_norm": 0.4999132752418518, "learning_rate": 5.938390158241701e-06, "loss": 0.4736, "step": 917 }, { "epoch": 0.49630564065597405, "grad_norm": 0.42812034487724304, "learning_rate": 5.929116350566132e-06, "loss": 0.4802, "step": 918 }, { "epoch": 0.49684627860875835, "grad_norm": 0.3980429470539093, "learning_rate": 5.919839231098392e-06, "loss": 0.4909, "step": 919 }, { "epoch": 0.4973869165615426, "grad_norm": 0.4948299527168274, "learning_rate": 5.910558832906341e-06, "loss": 0.4603, "step": 920 }, { "epoch": 0.4979275545143269, "grad_norm": 0.3767414689064026, "learning_rate": 5.90127518906953e-06, "loss": 0.4828, "step": 921 }, { "epoch": 0.4984681924671112, "grad_norm": 0.4088006019592285, "learning_rate": 5.891988332679075e-06, "loss": 0.4912, "step": 922 }, { "epoch": 0.49900883041989547, "grad_norm": 0.40145042538642883, "learning_rate": 5.882698296837549e-06, "loss": 0.4714, "step": 923 }, { "epoch": 0.4995494683726798, "grad_norm": 0.3793385922908783, "learning_rate": 5.87340511465885e-06, "loss": 0.4727, "step": 924 }, { "epoch": 0.500090106325464, "grad_norm": 0.40813931822776794, "learning_rate": 5.864108819268098e-06, "loss": 0.5169, "step": 925 }, { "epoch": 0.5006307442782484, "grad_norm": 0.40652281045913696, "learning_rate": 5.8548094438015065e-06, "loss": 0.4698, "step": 926 }, { "epoch": 0.5011713822310326, "grad_norm": 0.37487828731536865, "learning_rate": 5.8455070214062685e-06, "loss": 0.4714, "step": 927 }, { "epoch": 0.5017120201838169, "grad_norm": 0.36619752645492554, "learning_rate": 5.8362015852404365e-06, "loss": 0.4425, "step": 928 }, { "epoch": 0.5022526581366011, "grad_norm": 0.4049976170063019, "learning_rate": 5.826893168472807e-06, "loss": 0.4801, "step": 929 }, { "epoch": 0.5027932960893855, "grad_norm": 0.36154064536094666, "learning_rate": 5.8175818042828e-06, "loss": 0.4694, "step": 930 }, { "epoch": 0.5033339340421698, "grad_norm": 0.3709295690059662, "learning_rate": 5.808267525860343e-06, "loss": 0.4757, "step": 931 }, { "epoch": 0.503874571994954, "grad_norm": 0.4130777418613434, "learning_rate": 5.798950366405748e-06, "loss": 0.4681, "step": 932 }, { "epoch": 0.5044152099477384, "grad_norm": 0.42704373598098755, "learning_rate": 5.789630359129599e-06, "loss": 0.5039, "step": 933 }, { "epoch": 0.5049558479005226, "grad_norm": 0.398743212223053, "learning_rate": 5.780307537252629e-06, "loss": 0.4582, "step": 934 }, { "epoch": 0.5054964858533069, "grad_norm": 0.37964576482772827, "learning_rate": 5.770981934005606e-06, "loss": 0.4758, "step": 935 }, { "epoch": 0.5060371238060912, "grad_norm": 0.4298136234283447, "learning_rate": 5.76165358262921e-06, "loss": 0.4637, "step": 936 }, { "epoch": 0.5065777617588755, "grad_norm": 0.3951083719730377, "learning_rate": 5.752322516373916e-06, "loss": 0.4877, "step": 937 }, { "epoch": 0.5071183997116597, "grad_norm": 0.4326460063457489, "learning_rate": 5.742988768499879e-06, "loss": 0.4555, "step": 938 }, { "epoch": 0.5076590376644441, "grad_norm": 0.3702709376811981, "learning_rate": 5.733652372276809e-06, "loss": 0.495, "step": 939 }, { "epoch": 0.5081996756172283, "grad_norm": 0.41388028860092163, "learning_rate": 5.724313360983859e-06, "loss": 0.4855, "step": 940 }, { "epoch": 0.5087403135700126, "grad_norm": 0.40203750133514404, "learning_rate": 5.7149717679095026e-06, "loss": 0.4885, "step": 941 }, { "epoch": 0.509280951522797, "grad_norm": 0.39383724331855774, "learning_rate": 5.705627626351415e-06, "loss": 0.4634, "step": 942 }, { "epoch": 0.5098215894755812, "grad_norm": 0.37974879145622253, "learning_rate": 5.6962809696163536e-06, "loss": 0.4613, "step": 943 }, { "epoch": 0.5103622274283655, "grad_norm": 0.44113990664482117, "learning_rate": 5.686931831020044e-06, "loss": 0.4702, "step": 944 }, { "epoch": 0.5109028653811497, "grad_norm": 0.3676075339317322, "learning_rate": 5.6775802438870596e-06, "loss": 0.4384, "step": 945 }, { "epoch": 0.5114435033339341, "grad_norm": 0.3680996894836426, "learning_rate": 5.668226241550698e-06, "loss": 0.4823, "step": 946 }, { "epoch": 0.5119841412867183, "grad_norm": 0.37592819333076477, "learning_rate": 5.658869857352866e-06, "loss": 0.4891, "step": 947 }, { "epoch": 0.5125247792395026, "grad_norm": 0.38988274335861206, "learning_rate": 5.649511124643962e-06, "loss": 0.5056, "step": 948 }, { "epoch": 0.5130654171922869, "grad_norm": 0.3609144687652588, "learning_rate": 5.640150076782755e-06, "loss": 0.4541, "step": 949 }, { "epoch": 0.5136060551450712, "grad_norm": 0.41311243176460266, "learning_rate": 5.630786747136269e-06, "loss": 0.4958, "step": 950 }, { "epoch": 0.5141466930978554, "grad_norm": 0.3682233393192291, "learning_rate": 5.621421169079655e-06, "loss": 0.4919, "step": 951 }, { "epoch": 0.5146873310506398, "grad_norm": 0.36114174127578735, "learning_rate": 5.612053375996082e-06, "loss": 0.4825, "step": 952 }, { "epoch": 0.515227969003424, "grad_norm": 0.40716037154197693, "learning_rate": 5.6026834012766155e-06, "loss": 0.4529, "step": 953 }, { "epoch": 0.5157686069562083, "grad_norm": 0.40229591727256775, "learning_rate": 5.593311278320097e-06, "loss": 0.4857, "step": 954 }, { "epoch": 0.5163092449089927, "grad_norm": 0.3767363727092743, "learning_rate": 5.583937040533023e-06, "loss": 0.4939, "step": 955 }, { "epoch": 0.5168498828617769, "grad_norm": 0.3691665232181549, "learning_rate": 5.574560721329431e-06, "loss": 0.4705, "step": 956 }, { "epoch": 0.5173905208145612, "grad_norm": 0.38738739490509033, "learning_rate": 5.565182354130776e-06, "loss": 0.4676, "step": 957 }, { "epoch": 0.5179311587673455, "grad_norm": 0.370933473110199, "learning_rate": 5.555801972365812e-06, "loss": 0.4713, "step": 958 }, { "epoch": 0.5184717967201298, "grad_norm": 0.38803374767303467, "learning_rate": 5.5464196094704745e-06, "loss": 0.4836, "step": 959 }, { "epoch": 0.519012434672914, "grad_norm": 0.396415114402771, "learning_rate": 5.537035298887764e-06, "loss": 0.4738, "step": 960 }, { "epoch": 0.5195530726256983, "grad_norm": 0.4398602247238159, "learning_rate": 5.527649074067618e-06, "loss": 0.5144, "step": 961 }, { "epoch": 0.5200937105784826, "grad_norm": 0.3995663821697235, "learning_rate": 5.5182609684668024e-06, "loss": 0.5048, "step": 962 }, { "epoch": 0.5206343485312669, "grad_norm": 0.41245123744010925, "learning_rate": 5.508871015548781e-06, "loss": 0.4697, "step": 963 }, { "epoch": 0.5211749864840511, "grad_norm": 0.3744891285896301, "learning_rate": 5.49947924878361e-06, "loss": 0.4669, "step": 964 }, { "epoch": 0.5217156244368355, "grad_norm": 0.42122045159339905, "learning_rate": 5.490085701647805e-06, "loss": 0.4859, "step": 965 }, { "epoch": 0.5222562623896198, "grad_norm": 0.3975967466831207, "learning_rate": 5.480690407624227e-06, "loss": 0.4549, "step": 966 }, { "epoch": 0.522796900342404, "grad_norm": 0.41702109575271606, "learning_rate": 5.47129340020197e-06, "loss": 0.4944, "step": 967 }, { "epoch": 0.5233375382951884, "grad_norm": 0.4080027937889099, "learning_rate": 5.461894712876228e-06, "loss": 0.4705, "step": 968 }, { "epoch": 0.5238781762479726, "grad_norm": 0.42316797375679016, "learning_rate": 5.45249437914819e-06, "loss": 0.4567, "step": 969 }, { "epoch": 0.5244188142007569, "grad_norm": 0.3980869948863983, "learning_rate": 5.443092432524906e-06, "loss": 0.4738, "step": 970 }, { "epoch": 0.5249594521535412, "grad_norm": 0.42186301946640015, "learning_rate": 5.433688906519183e-06, "loss": 0.4854, "step": 971 }, { "epoch": 0.5255000901063255, "grad_norm": 0.3881399631500244, "learning_rate": 5.424283834649451e-06, "loss": 0.4716, "step": 972 }, { "epoch": 0.5260407280591097, "grad_norm": 0.4174569249153137, "learning_rate": 5.414877250439654e-06, "loss": 0.4897, "step": 973 }, { "epoch": 0.526581366011894, "grad_norm": 0.44264698028564453, "learning_rate": 5.405469187419126e-06, "loss": 0.4815, "step": 974 }, { "epoch": 0.5271220039646783, "grad_norm": 0.32237735390663147, "learning_rate": 5.39605967912247e-06, "loss": 0.4508, "step": 975 }, { "epoch": 0.5276626419174626, "grad_norm": 0.47621771693229675, "learning_rate": 5.386648759089441e-06, "loss": 0.5093, "step": 976 }, { "epoch": 0.5282032798702468, "grad_norm": 0.4255550801753998, "learning_rate": 5.3772364608648304e-06, "loss": 0.5067, "step": 977 }, { "epoch": 0.5287439178230312, "grad_norm": 0.35732802748680115, "learning_rate": 5.367822817998338e-06, "loss": 0.4611, "step": 978 }, { "epoch": 0.5292845557758155, "grad_norm": 0.3863273859024048, "learning_rate": 5.358407864044456e-06, "loss": 0.4833, "step": 979 }, { "epoch": 0.5298251937285997, "grad_norm": 0.3596065938472748, "learning_rate": 5.348991632562355e-06, "loss": 0.5005, "step": 980 }, { "epoch": 0.5303658316813841, "grad_norm": 0.39100930094718933, "learning_rate": 5.339574157115752e-06, "loss": 0.4815, "step": 981 }, { "epoch": 0.5309064696341683, "grad_norm": 0.38288214802742004, "learning_rate": 5.330155471272804e-06, "loss": 0.4522, "step": 982 }, { "epoch": 0.5314471075869526, "grad_norm": 0.3723912835121155, "learning_rate": 5.320735608605979e-06, "loss": 0.4618, "step": 983 }, { "epoch": 0.5319877455397369, "grad_norm": 0.3716428279876709, "learning_rate": 5.311314602691943e-06, "loss": 0.4565, "step": 984 }, { "epoch": 0.5325283834925212, "grad_norm": 0.4149164855480194, "learning_rate": 5.301892487111431e-06, "loss": 0.4597, "step": 985 }, { "epoch": 0.5330690214453054, "grad_norm": 0.45434221625328064, "learning_rate": 5.292469295449141e-06, "loss": 0.472, "step": 986 }, { "epoch": 0.5336096593980898, "grad_norm": 0.35414353013038635, "learning_rate": 5.2830450612936e-06, "loss": 0.4724, "step": 987 }, { "epoch": 0.534150297350874, "grad_norm": 0.41073864698410034, "learning_rate": 5.273619818237058e-06, "loss": 0.4936, "step": 988 }, { "epoch": 0.5346909353036583, "grad_norm": 0.3850623667240143, "learning_rate": 5.264193599875353e-06, "loss": 0.4932, "step": 989 }, { "epoch": 0.5352315732564425, "grad_norm": 0.3790433406829834, "learning_rate": 5.254766439807807e-06, "loss": 0.4615, "step": 990 }, { "epoch": 0.5357722112092269, "grad_norm": 0.3931194841861725, "learning_rate": 5.245338371637091e-06, "loss": 0.4915, "step": 991 }, { "epoch": 0.5363128491620112, "grad_norm": 0.36426377296447754, "learning_rate": 5.235909428969119e-06, "loss": 0.4598, "step": 992 }, { "epoch": 0.5368534871147954, "grad_norm": 0.381425142288208, "learning_rate": 5.226479645412923e-06, "loss": 0.4681, "step": 993 }, { "epoch": 0.5373941250675798, "grad_norm": 0.38973844051361084, "learning_rate": 5.2170490545805255e-06, "loss": 0.4858, "step": 994 }, { "epoch": 0.537934763020364, "grad_norm": 0.35494348406791687, "learning_rate": 5.207617690086831e-06, "loss": 0.4866, "step": 995 }, { "epoch": 0.5384754009731483, "grad_norm": 0.38420799374580383, "learning_rate": 5.1981855855495035e-06, "loss": 0.4588, "step": 996 }, { "epoch": 0.5390160389259326, "grad_norm": 0.4316636621952057, "learning_rate": 5.188752774588841e-06, "loss": 0.4792, "step": 997 }, { "epoch": 0.5395566768787169, "grad_norm": 0.3582872748374939, "learning_rate": 5.179319290827661e-06, "loss": 0.4911, "step": 998 }, { "epoch": 0.5400973148315011, "grad_norm": 0.38559308648109436, "learning_rate": 5.16988516789118e-06, "loss": 0.4771, "step": 999 }, { "epoch": 0.5406379527842855, "grad_norm": 0.4075615406036377, "learning_rate": 5.16045043940689e-06, "loss": 0.494, "step": 1000 }, { "epoch": 0.5411785907370698, "grad_norm": 0.39124950766563416, "learning_rate": 5.151015139004445e-06, "loss": 0.4745, "step": 1001 }, { "epoch": 0.541719228689854, "grad_norm": 0.42799654603004456, "learning_rate": 5.141579300315536e-06, "loss": 0.4366, "step": 1002 }, { "epoch": 0.5422598666426384, "grad_norm": 0.4033052921295166, "learning_rate": 5.132142956973773e-06, "loss": 0.4618, "step": 1003 }, { "epoch": 0.5428005045954226, "grad_norm": 0.41250860691070557, "learning_rate": 5.122706142614562e-06, "loss": 0.4412, "step": 1004 }, { "epoch": 0.5433411425482069, "grad_norm": 0.37778806686401367, "learning_rate": 5.113268890874994e-06, "loss": 0.4824, "step": 1005 }, { "epoch": 0.5438817805009911, "grad_norm": 0.39247915148735046, "learning_rate": 5.103831235393714e-06, "loss": 0.4853, "step": 1006 }, { "epoch": 0.5444224184537755, "grad_norm": 0.3986344039440155, "learning_rate": 5.094393209810806e-06, "loss": 0.4623, "step": 1007 }, { "epoch": 0.5449630564065597, "grad_norm": 0.3868162930011749, "learning_rate": 5.084954847767677e-06, "loss": 0.4603, "step": 1008 }, { "epoch": 0.545503694359344, "grad_norm": 0.39990749955177307, "learning_rate": 5.07551618290693e-06, "loss": 0.4866, "step": 1009 }, { "epoch": 0.5460443323121283, "grad_norm": 0.3816811442375183, "learning_rate": 5.06607724887225e-06, "loss": 0.4705, "step": 1010 }, { "epoch": 0.5465849702649126, "grad_norm": 0.39409664273262024, "learning_rate": 5.056638079308277e-06, "loss": 0.4685, "step": 1011 }, { "epoch": 0.5471256082176968, "grad_norm": 0.37422487139701843, "learning_rate": 5.047198707860496e-06, "loss": 0.4707, "step": 1012 }, { "epoch": 0.5476662461704812, "grad_norm": 0.37773483991622925, "learning_rate": 5.037759168175109e-06, "loss": 0.488, "step": 1013 }, { "epoch": 0.5482068841232655, "grad_norm": 0.4065093994140625, "learning_rate": 5.028319493898916e-06, "loss": 0.5103, "step": 1014 }, { "epoch": 0.5487475220760497, "grad_norm": 0.40094494819641113, "learning_rate": 5.018879718679199e-06, "loss": 0.4757, "step": 1015 }, { "epoch": 0.5492881600288341, "grad_norm": 0.3680800199508667, "learning_rate": 5.009439876163601e-06, "loss": 0.4682, "step": 1016 }, { "epoch": 0.5498287979816183, "grad_norm": 0.3533041775226593, "learning_rate": 5e-06, "loss": 0.4541, "step": 1017 }, { "epoch": 0.5503694359344026, "grad_norm": 0.38887202739715576, "learning_rate": 4.9905601238364006e-06, "loss": 0.4604, "step": 1018 }, { "epoch": 0.5509100738871869, "grad_norm": 0.38735654950141907, "learning_rate": 4.981120281320801e-06, "loss": 0.474, "step": 1019 }, { "epoch": 0.5514507118399712, "grad_norm": 0.3694019615650177, "learning_rate": 4.971680506101086e-06, "loss": 0.4734, "step": 1020 }, { "epoch": 0.5519913497927554, "grad_norm": 0.36134111881256104, "learning_rate": 4.9622408318248925e-06, "loss": 0.454, "step": 1021 }, { "epoch": 0.5525319877455397, "grad_norm": 0.40273579955101013, "learning_rate": 4.952801292139505e-06, "loss": 0.5016, "step": 1022 }, { "epoch": 0.553072625698324, "grad_norm": 0.39331087470054626, "learning_rate": 4.9433619206917234e-06, "loss": 0.4658, "step": 1023 }, { "epoch": 0.5536132636511083, "grad_norm": 0.40813592076301575, "learning_rate": 4.933922751127753e-06, "loss": 0.4767, "step": 1024 }, { "epoch": 0.5541539016038926, "grad_norm": 0.37781664729118347, "learning_rate": 4.924483817093071e-06, "loss": 0.4625, "step": 1025 }, { "epoch": 0.5546945395566769, "grad_norm": 0.420913964509964, "learning_rate": 4.915045152232324e-06, "loss": 0.4834, "step": 1026 }, { "epoch": 0.5552351775094612, "grad_norm": 0.3994501233100891, "learning_rate": 4.9056067901891945e-06, "loss": 0.496, "step": 1027 }, { "epoch": 0.5557758154622454, "grad_norm": 0.471672922372818, "learning_rate": 4.896168764606289e-06, "loss": 0.4774, "step": 1028 }, { "epoch": 0.5563164534150298, "grad_norm": 0.4291553497314453, "learning_rate": 4.886731109125007e-06, "loss": 0.4909, "step": 1029 }, { "epoch": 0.556857091367814, "grad_norm": 0.3480027914047241, "learning_rate": 4.87729385738544e-06, "loss": 0.4796, "step": 1030 }, { "epoch": 0.5573977293205983, "grad_norm": 0.448752760887146, "learning_rate": 4.867857043026229e-06, "loss": 0.4718, "step": 1031 }, { "epoch": 0.5579383672733826, "grad_norm": 0.429802805185318, "learning_rate": 4.858420699684464e-06, "loss": 0.4695, "step": 1032 }, { "epoch": 0.5584790052261669, "grad_norm": 0.40032947063446045, "learning_rate": 4.848984860995557e-06, "loss": 0.4741, "step": 1033 }, { "epoch": 0.5590196431789511, "grad_norm": 0.35695409774780273, "learning_rate": 4.839549560593111e-06, "loss": 0.4628, "step": 1034 }, { "epoch": 0.5595602811317354, "grad_norm": 0.35745665431022644, "learning_rate": 4.830114832108822e-06, "loss": 0.4973, "step": 1035 }, { "epoch": 0.5601009190845198, "grad_norm": 0.37396466732025146, "learning_rate": 4.82068070917234e-06, "loss": 0.463, "step": 1036 }, { "epoch": 0.560641557037304, "grad_norm": 0.3474443554878235, "learning_rate": 4.81124722541116e-06, "loss": 0.4761, "step": 1037 }, { "epoch": 0.5611821949900883, "grad_norm": 0.40390846133232117, "learning_rate": 4.801814414450498e-06, "loss": 0.4875, "step": 1038 }, { "epoch": 0.5617228329428726, "grad_norm": 0.3489498794078827, "learning_rate": 4.7923823099131694e-06, "loss": 0.4772, "step": 1039 }, { "epoch": 0.5622634708956569, "grad_norm": 0.4154996871948242, "learning_rate": 4.782950945419475e-06, "loss": 0.4778, "step": 1040 }, { "epoch": 0.5628041088484411, "grad_norm": 0.36867937445640564, "learning_rate": 4.7735203545870794e-06, "loss": 0.474, "step": 1041 }, { "epoch": 0.5633447468012255, "grad_norm": 0.3696093261241913, "learning_rate": 4.764090571030882e-06, "loss": 0.4813, "step": 1042 }, { "epoch": 0.5638853847540097, "grad_norm": 0.3766731023788452, "learning_rate": 4.75466162836291e-06, "loss": 0.4941, "step": 1043 }, { "epoch": 0.564426022706794, "grad_norm": 0.39763715863227844, "learning_rate": 4.745233560192195e-06, "loss": 0.4716, "step": 1044 }, { "epoch": 0.5649666606595783, "grad_norm": 0.3585834801197052, "learning_rate": 4.735806400124648e-06, "loss": 0.474, "step": 1045 }, { "epoch": 0.5655072986123626, "grad_norm": 0.36759787797927856, "learning_rate": 4.726380181762943e-06, "loss": 0.4849, "step": 1046 }, { "epoch": 0.5660479365651468, "grad_norm": 0.37226635217666626, "learning_rate": 4.716954938706401e-06, "loss": 0.4766, "step": 1047 }, { "epoch": 0.5665885745179312, "grad_norm": 0.392599880695343, "learning_rate": 4.707530704550861e-06, "loss": 0.4906, "step": 1048 }, { "epoch": 0.5671292124707155, "grad_norm": 0.3526148200035095, "learning_rate": 4.69810751288857e-06, "loss": 0.4418, "step": 1049 }, { "epoch": 0.5676698504234997, "grad_norm": 0.37177005410194397, "learning_rate": 4.688685397308061e-06, "loss": 0.5155, "step": 1050 }, { "epoch": 0.568210488376284, "grad_norm": 0.42093127965927124, "learning_rate": 4.679264391394022e-06, "loss": 0.4817, "step": 1051 }, { "epoch": 0.5687511263290683, "grad_norm": 0.3944513499736786, "learning_rate": 4.669844528727197e-06, "loss": 0.4964, "step": 1052 }, { "epoch": 0.5692917642818526, "grad_norm": 0.339263379573822, "learning_rate": 4.660425842884249e-06, "loss": 0.4467, "step": 1053 }, { "epoch": 0.5698324022346368, "grad_norm": 0.3937620520591736, "learning_rate": 4.651008367437646e-06, "loss": 0.4803, "step": 1054 }, { "epoch": 0.5703730401874212, "grad_norm": 0.36690381169319153, "learning_rate": 4.641592135955545e-06, "loss": 0.4529, "step": 1055 }, { "epoch": 0.5709136781402054, "grad_norm": 0.373169481754303, "learning_rate": 4.6321771820016635e-06, "loss": 0.4899, "step": 1056 }, { "epoch": 0.5714543160929897, "grad_norm": 0.36590346693992615, "learning_rate": 4.62276353913517e-06, "loss": 0.4827, "step": 1057 }, { "epoch": 0.571994954045774, "grad_norm": 0.3776438534259796, "learning_rate": 4.6133512409105595e-06, "loss": 0.4724, "step": 1058 }, { "epoch": 0.5725355919985583, "grad_norm": 0.3477265536785126, "learning_rate": 4.603940320877533e-06, "loss": 0.4912, "step": 1059 }, { "epoch": 0.5730762299513426, "grad_norm": 0.340049684047699, "learning_rate": 4.594530812580876e-06, "loss": 0.4825, "step": 1060 }, { "epoch": 0.5736168679041269, "grad_norm": 0.3493627905845642, "learning_rate": 4.585122749560347e-06, "loss": 0.4692, "step": 1061 }, { "epoch": 0.5741575058569112, "grad_norm": 0.34757259488105774, "learning_rate": 4.575716165350549e-06, "loss": 0.4381, "step": 1062 }, { "epoch": 0.5746981438096954, "grad_norm": 0.3675667345523834, "learning_rate": 4.566311093480818e-06, "loss": 0.4431, "step": 1063 }, { "epoch": 0.5752387817624798, "grad_norm": 0.3604763448238373, "learning_rate": 4.556907567475094e-06, "loss": 0.4832, "step": 1064 }, { "epoch": 0.575779419715264, "grad_norm": 0.3719836473464966, "learning_rate": 4.547505620851812e-06, "loss": 0.4634, "step": 1065 }, { "epoch": 0.5763200576680483, "grad_norm": 0.3803561329841614, "learning_rate": 4.538105287123772e-06, "loss": 0.4679, "step": 1066 }, { "epoch": 0.5768606956208325, "grad_norm": 0.35499629378318787, "learning_rate": 4.528706599798033e-06, "loss": 0.4648, "step": 1067 }, { "epoch": 0.5774013335736169, "grad_norm": 0.3680323362350464, "learning_rate": 4.5193095923757745e-06, "loss": 0.4622, "step": 1068 }, { "epoch": 0.5779419715264011, "grad_norm": 0.3444634974002838, "learning_rate": 4.509914298352197e-06, "loss": 0.4707, "step": 1069 }, { "epoch": 0.5784826094791854, "grad_norm": 0.3498629927635193, "learning_rate": 4.5005207512163914e-06, "loss": 0.4618, "step": 1070 }, { "epoch": 0.5790232474319698, "grad_norm": 0.3526155352592468, "learning_rate": 4.491128984451219e-06, "loss": 0.4661, "step": 1071 }, { "epoch": 0.579563885384754, "grad_norm": 0.3570224344730377, "learning_rate": 4.481739031533201e-06, "loss": 0.4846, "step": 1072 }, { "epoch": 0.5801045233375383, "grad_norm": 0.3742653727531433, "learning_rate": 4.472350925932384e-06, "loss": 0.4589, "step": 1073 }, { "epoch": 0.5806451612903226, "grad_norm": 0.3513067662715912, "learning_rate": 4.462964701112237e-06, "loss": 0.4401, "step": 1074 }, { "epoch": 0.5811857992431069, "grad_norm": 0.3835412561893463, "learning_rate": 4.453580390529526e-06, "loss": 0.4607, "step": 1075 }, { "epoch": 0.5817264371958911, "grad_norm": 0.35909005999565125, "learning_rate": 4.444198027634191e-06, "loss": 0.4408, "step": 1076 }, { "epoch": 0.5822670751486755, "grad_norm": 0.3801157474517822, "learning_rate": 4.434817645869226e-06, "loss": 0.4678, "step": 1077 }, { "epoch": 0.5828077131014597, "grad_norm": 0.36517688632011414, "learning_rate": 4.42543927867057e-06, "loss": 0.4884, "step": 1078 }, { "epoch": 0.583348351054244, "grad_norm": 0.41169390082359314, "learning_rate": 4.416062959466978e-06, "loss": 0.491, "step": 1079 }, { "epoch": 0.5838889890070283, "grad_norm": 0.4015640616416931, "learning_rate": 4.4066887216799055e-06, "loss": 0.5044, "step": 1080 }, { "epoch": 0.5844296269598126, "grad_norm": 0.3767825663089752, "learning_rate": 4.397316598723385e-06, "loss": 0.4729, "step": 1081 }, { "epoch": 0.5849702649125968, "grad_norm": 0.37044841051101685, "learning_rate": 4.38794662400392e-06, "loss": 0.4901, "step": 1082 }, { "epoch": 0.5855109028653811, "grad_norm": 0.3824319839477539, "learning_rate": 4.3785788309203466e-06, "loss": 0.454, "step": 1083 }, { "epoch": 0.5860515408181655, "grad_norm": 0.42856699228286743, "learning_rate": 4.369213252863733e-06, "loss": 0.4726, "step": 1084 }, { "epoch": 0.5865921787709497, "grad_norm": 0.3541294038295746, "learning_rate": 4.359849923217246e-06, "loss": 0.4546, "step": 1085 }, { "epoch": 0.587132816723734, "grad_norm": 0.4067661166191101, "learning_rate": 4.350488875356041e-06, "loss": 0.4584, "step": 1086 }, { "epoch": 0.5876734546765183, "grad_norm": 0.39768409729003906, "learning_rate": 4.341130142647136e-06, "loss": 0.4699, "step": 1087 }, { "epoch": 0.5882140926293026, "grad_norm": 0.35365110635757446, "learning_rate": 4.331773758449303e-06, "loss": 0.4863, "step": 1088 }, { "epoch": 0.5887547305820868, "grad_norm": 0.4205717444419861, "learning_rate": 4.322419756112943e-06, "loss": 0.4603, "step": 1089 }, { "epoch": 0.5892953685348712, "grad_norm": 0.3971540033817291, "learning_rate": 4.313068168979957e-06, "loss": 0.4485, "step": 1090 }, { "epoch": 0.5898360064876554, "grad_norm": 0.3554147183895111, "learning_rate": 4.303719030383648e-06, "loss": 0.47, "step": 1091 }, { "epoch": 0.5903766444404397, "grad_norm": 0.4239473044872284, "learning_rate": 4.294372373648587e-06, "loss": 0.5029, "step": 1092 }, { "epoch": 0.590917282393224, "grad_norm": 0.3685072958469391, "learning_rate": 4.285028232090499e-06, "loss": 0.4611, "step": 1093 }, { "epoch": 0.5914579203460083, "grad_norm": 0.39185526967048645, "learning_rate": 4.275686639016142e-06, "loss": 0.454, "step": 1094 }, { "epoch": 0.5919985582987926, "grad_norm": 0.3944680094718933, "learning_rate": 4.266347627723192e-06, "loss": 0.4956, "step": 1095 }, { "epoch": 0.5925391962515769, "grad_norm": 0.3366733491420746, "learning_rate": 4.257011231500122e-06, "loss": 0.4424, "step": 1096 }, { "epoch": 0.5930798342043612, "grad_norm": 0.4068387746810913, "learning_rate": 4.247677483626085e-06, "loss": 0.4794, "step": 1097 }, { "epoch": 0.5936204721571454, "grad_norm": 0.4122190773487091, "learning_rate": 4.238346417370793e-06, "loss": 0.4769, "step": 1098 }, { "epoch": 0.5941611101099297, "grad_norm": 0.38368502259254456, "learning_rate": 4.229018065994396e-06, "loss": 0.4576, "step": 1099 }, { "epoch": 0.594701748062714, "grad_norm": 0.33293166756629944, "learning_rate": 4.2196924627473715e-06, "loss": 0.4749, "step": 1100 }, { "epoch": 0.5952423860154983, "grad_norm": 0.43737560510635376, "learning_rate": 4.210369640870403e-06, "loss": 0.4989, "step": 1101 }, { "epoch": 0.5957830239682825, "grad_norm": 0.4038828909397125, "learning_rate": 4.201049633594254e-06, "loss": 0.4808, "step": 1102 }, { "epoch": 0.5963236619210669, "grad_norm": 0.3759760856628418, "learning_rate": 4.1917324741396595e-06, "loss": 0.482, "step": 1103 }, { "epoch": 0.5968642998738511, "grad_norm": 0.3900810778141022, "learning_rate": 4.1824181957172014e-06, "loss": 0.5048, "step": 1104 }, { "epoch": 0.5974049378266354, "grad_norm": 0.3988158106803894, "learning_rate": 4.173106831527194e-06, "loss": 0.4889, "step": 1105 }, { "epoch": 0.5979455757794198, "grad_norm": 0.393657386302948, "learning_rate": 4.163798414759566e-06, "loss": 0.4676, "step": 1106 }, { "epoch": 0.598486213732204, "grad_norm": 0.3507223427295685, "learning_rate": 4.154492978593733e-06, "loss": 0.4462, "step": 1107 }, { "epoch": 0.5990268516849883, "grad_norm": 0.3565762937068939, "learning_rate": 4.145190556198494e-06, "loss": 0.4725, "step": 1108 }, { "epoch": 0.5995674896377726, "grad_norm": 0.3660755157470703, "learning_rate": 4.135891180731903e-06, "loss": 0.4607, "step": 1109 }, { "epoch": 0.6001081275905569, "grad_norm": 0.3648075461387634, "learning_rate": 4.1265948853411506e-06, "loss": 0.4446, "step": 1110 }, { "epoch": 0.6006487655433411, "grad_norm": 0.3925120532512665, "learning_rate": 4.1173017031624544e-06, "loss": 0.4767, "step": 1111 }, { "epoch": 0.6011894034961254, "grad_norm": 0.35011157393455505, "learning_rate": 4.108011667320926e-06, "loss": 0.4739, "step": 1112 }, { "epoch": 0.6017300414489097, "grad_norm": 0.3578500747680664, "learning_rate": 4.098724810930472e-06, "loss": 0.4899, "step": 1113 }, { "epoch": 0.602270679401694, "grad_norm": 0.40307337045669556, "learning_rate": 4.08944116709366e-06, "loss": 0.4923, "step": 1114 }, { "epoch": 0.6028113173544782, "grad_norm": 0.39985835552215576, "learning_rate": 4.08016076890161e-06, "loss": 0.466, "step": 1115 }, { "epoch": 0.6033519553072626, "grad_norm": 0.40574347972869873, "learning_rate": 4.0708836494338695e-06, "loss": 0.478, "step": 1116 }, { "epoch": 0.6038925932600469, "grad_norm": 0.3304382264614105, "learning_rate": 4.061609841758302e-06, "loss": 0.4487, "step": 1117 }, { "epoch": 0.6044332312128311, "grad_norm": 0.3854810893535614, "learning_rate": 4.0523393789309625e-06, "loss": 0.4704, "step": 1118 }, { "epoch": 0.6049738691656155, "grad_norm": 0.3859521746635437, "learning_rate": 4.04307229399599e-06, "loss": 0.446, "step": 1119 }, { "epoch": 0.6055145071183997, "grad_norm": 0.3449578881263733, "learning_rate": 4.0338086199854765e-06, "loss": 0.4592, "step": 1120 }, { "epoch": 0.606055145071184, "grad_norm": 0.381989985704422, "learning_rate": 4.02454838991936e-06, "loss": 0.4553, "step": 1121 }, { "epoch": 0.6065957830239683, "grad_norm": 0.4080401659011841, "learning_rate": 4.0152916368053e-06, "loss": 0.5008, "step": 1122 }, { "epoch": 0.6071364209767526, "grad_norm": 0.35274338722229004, "learning_rate": 4.006038393638565e-06, "loss": 0.4687, "step": 1123 }, { "epoch": 0.6076770589295368, "grad_norm": 0.3596242070198059, "learning_rate": 3.996788693401914e-06, "loss": 0.4852, "step": 1124 }, { "epoch": 0.6082176968823212, "grad_norm": 0.44486021995544434, "learning_rate": 3.987542569065469e-06, "loss": 0.4747, "step": 1125 }, { "epoch": 0.6087583348351054, "grad_norm": 0.3718114495277405, "learning_rate": 3.978300053586617e-06, "loss": 0.5005, "step": 1126 }, { "epoch": 0.6092989727878897, "grad_norm": 0.4019679129123688, "learning_rate": 3.969061179909872e-06, "loss": 0.4566, "step": 1127 }, { "epoch": 0.6098396107406739, "grad_norm": 0.3548070788383484, "learning_rate": 3.959825980966777e-06, "loss": 0.4627, "step": 1128 }, { "epoch": 0.6103802486934583, "grad_norm": 0.35685357451438904, "learning_rate": 3.9505944896757635e-06, "loss": 0.491, "step": 1129 }, { "epoch": 0.6109208866462426, "grad_norm": 0.3496491312980652, "learning_rate": 3.941366738942058e-06, "loss": 0.4934, "step": 1130 }, { "epoch": 0.6114615245990268, "grad_norm": 0.39106976985931396, "learning_rate": 3.932142761657549e-06, "loss": 0.4896, "step": 1131 }, { "epoch": 0.6120021625518112, "grad_norm": 0.39134979248046875, "learning_rate": 3.922922590700679e-06, "loss": 0.5008, "step": 1132 }, { "epoch": 0.6125428005045954, "grad_norm": 0.3936554193496704, "learning_rate": 3.913706258936317e-06, "loss": 0.481, "step": 1133 }, { "epoch": 0.6130834384573797, "grad_norm": 0.3407648503780365, "learning_rate": 3.904493799215652e-06, "loss": 0.4668, "step": 1134 }, { "epoch": 0.613624076410164, "grad_norm": 0.3679908514022827, "learning_rate": 3.895285244376068e-06, "loss": 0.4934, "step": 1135 }, { "epoch": 0.6141647143629483, "grad_norm": 0.4318563640117645, "learning_rate": 3.886080627241034e-06, "loss": 0.4423, "step": 1136 }, { "epoch": 0.6147053523157325, "grad_norm": 0.3628011643886566, "learning_rate": 3.876879980619982e-06, "loss": 0.4939, "step": 1137 }, { "epoch": 0.6152459902685169, "grad_norm": 0.375052809715271, "learning_rate": 3.8676833373081864e-06, "loss": 0.4655, "step": 1138 }, { "epoch": 0.6157866282213011, "grad_norm": 0.39029353857040405, "learning_rate": 3.8584907300866595e-06, "loss": 0.4855, "step": 1139 }, { "epoch": 0.6163272661740854, "grad_norm": 0.4070568084716797, "learning_rate": 3.8493021917220225e-06, "loss": 0.4526, "step": 1140 }, { "epoch": 0.6168679041268698, "grad_norm": 0.4272588789463043, "learning_rate": 3.840117754966396e-06, "loss": 0.4892, "step": 1141 }, { "epoch": 0.617408542079654, "grad_norm": 0.4317014217376709, "learning_rate": 3.8309374525572765e-06, "loss": 0.4817, "step": 1142 }, { "epoch": 0.6179491800324383, "grad_norm": 0.4254457950592041, "learning_rate": 3.821761317217428e-06, "loss": 0.4631, "step": 1143 }, { "epoch": 0.6184898179852225, "grad_norm": 0.36125367879867554, "learning_rate": 3.81258938165476e-06, "loss": 0.4893, "step": 1144 }, { "epoch": 0.6190304559380069, "grad_norm": 0.3904247581958771, "learning_rate": 3.803421678562213e-06, "loss": 0.4599, "step": 1145 }, { "epoch": 0.6195710938907911, "grad_norm": 0.41387060284614563, "learning_rate": 3.794258240617636e-06, "loss": 0.4982, "step": 1146 }, { "epoch": 0.6201117318435754, "grad_norm": 0.349608838558197, "learning_rate": 3.7850991004836813e-06, "loss": 0.4859, "step": 1147 }, { "epoch": 0.6206523697963597, "grad_norm": 0.3353005647659302, "learning_rate": 3.7759442908076786e-06, "loss": 0.481, "step": 1148 }, { "epoch": 0.621193007749144, "grad_norm": 0.3263114392757416, "learning_rate": 3.7667938442215247e-06, "loss": 0.4775, "step": 1149 }, { "epoch": 0.6217336457019282, "grad_norm": 0.44235607981681824, "learning_rate": 3.7576477933415612e-06, "loss": 0.4519, "step": 1150 }, { "epoch": 0.6222742836547126, "grad_norm": 0.4153715670108795, "learning_rate": 3.748506170768462e-06, "loss": 0.474, "step": 1151 }, { "epoch": 0.6228149216074969, "grad_norm": 0.34854984283447266, "learning_rate": 3.739369009087117e-06, "loss": 0.4795, "step": 1152 }, { "epoch": 0.6233555595602811, "grad_norm": 0.37851929664611816, "learning_rate": 3.7302363408665155e-06, "loss": 0.5126, "step": 1153 }, { "epoch": 0.6238961975130655, "grad_norm": 0.3372579514980316, "learning_rate": 3.721108198659633e-06, "loss": 0.4704, "step": 1154 }, { "epoch": 0.6244368354658497, "grad_norm": 0.39567941427230835, "learning_rate": 3.7119846150033047e-06, "loss": 0.486, "step": 1155 }, { "epoch": 0.624977473418634, "grad_norm": 0.3714936673641205, "learning_rate": 3.702865622418125e-06, "loss": 0.4449, "step": 1156 }, { "epoch": 0.6255181113714183, "grad_norm": 0.35422173142433167, "learning_rate": 3.693751253408319e-06, "loss": 0.4686, "step": 1157 }, { "epoch": 0.6260587493242026, "grad_norm": 0.32026010751724243, "learning_rate": 3.6846415404616344e-06, "loss": 0.4818, "step": 1158 }, { "epoch": 0.6265993872769868, "grad_norm": 0.3986067771911621, "learning_rate": 3.6755365160492187e-06, "loss": 0.4787, "step": 1159 }, { "epoch": 0.6271400252297711, "grad_norm": 0.330572247505188, "learning_rate": 3.6664362126255087e-06, "loss": 0.4725, "step": 1160 }, { "epoch": 0.6276806631825554, "grad_norm": 0.3532167971134186, "learning_rate": 3.657340662628116e-06, "loss": 0.4659, "step": 1161 }, { "epoch": 0.6282213011353397, "grad_norm": 0.33118587732315063, "learning_rate": 3.648249898477707e-06, "loss": 0.4868, "step": 1162 }, { "epoch": 0.628761939088124, "grad_norm": 0.36354872584342957, "learning_rate": 3.6391639525778915e-06, "loss": 0.4936, "step": 1163 }, { "epoch": 0.6293025770409083, "grad_norm": 0.3409719467163086, "learning_rate": 3.6300828573150977e-06, "loss": 0.4667, "step": 1164 }, { "epoch": 0.6298432149936926, "grad_norm": 0.35681483149528503, "learning_rate": 3.621006645058472e-06, "loss": 0.4615, "step": 1165 }, { "epoch": 0.6303838529464768, "grad_norm": 0.3582918643951416, "learning_rate": 3.6119353481597504e-06, "loss": 0.4789, "step": 1166 }, { "epoch": 0.6309244908992612, "grad_norm": 0.39356887340545654, "learning_rate": 3.6028689989531533e-06, "loss": 0.4494, "step": 1167 }, { "epoch": 0.6314651288520454, "grad_norm": 0.36148393154144287, "learning_rate": 3.593807629755258e-06, "loss": 0.4842, "step": 1168 }, { "epoch": 0.6320057668048297, "grad_norm": 0.35367411375045776, "learning_rate": 3.584751272864899e-06, "loss": 0.4716, "step": 1169 }, { "epoch": 0.632546404757614, "grad_norm": 0.3774411976337433, "learning_rate": 3.575699960563038e-06, "loss": 0.4452, "step": 1170 }, { "epoch": 0.6330870427103983, "grad_norm": 0.36172568798065186, "learning_rate": 3.566653725112661e-06, "loss": 0.4657, "step": 1171 }, { "epoch": 0.6336276806631825, "grad_norm": 0.368366539478302, "learning_rate": 3.557612598758652e-06, "loss": 0.4793, "step": 1172 }, { "epoch": 0.6341683186159668, "grad_norm": 0.36106520891189575, "learning_rate": 3.5485766137276894e-06, "loss": 0.4885, "step": 1173 }, { "epoch": 0.6347089565687511, "grad_norm": 0.3237360119819641, "learning_rate": 3.5395458022281205e-06, "loss": 0.4771, "step": 1174 }, { "epoch": 0.6352495945215354, "grad_norm": 0.3604816794395447, "learning_rate": 3.5305201964498557e-06, "loss": 0.4931, "step": 1175 }, { "epoch": 0.6357902324743196, "grad_norm": 0.3982067108154297, "learning_rate": 3.5214998285642517e-06, "loss": 0.4677, "step": 1176 }, { "epoch": 0.636330870427104, "grad_norm": 0.3789684474468231, "learning_rate": 3.5124847307239863e-06, "loss": 0.4994, "step": 1177 }, { "epoch": 0.6368715083798883, "grad_norm": 0.3449534773826599, "learning_rate": 3.5034749350629593e-06, "loss": 0.4724, "step": 1178 }, { "epoch": 0.6374121463326725, "grad_norm": 0.361224889755249, "learning_rate": 3.4944704736961722e-06, "loss": 0.4406, "step": 1179 }, { "epoch": 0.6379527842854569, "grad_norm": 0.41354840993881226, "learning_rate": 3.4854713787196105e-06, "loss": 0.4721, "step": 1180 }, { "epoch": 0.6384934222382411, "grad_norm": 0.351012647151947, "learning_rate": 3.4764776822101275e-06, "loss": 0.4582, "step": 1181 }, { "epoch": 0.6390340601910254, "grad_norm": 0.39335301518440247, "learning_rate": 3.4674894162253404e-06, "loss": 0.4826, "step": 1182 }, { "epoch": 0.6395746981438097, "grad_norm": 0.34970593452453613, "learning_rate": 3.458506612803505e-06, "loss": 0.4828, "step": 1183 }, { "epoch": 0.640115336096594, "grad_norm": 0.38310420513153076, "learning_rate": 3.4495293039634113e-06, "loss": 0.4851, "step": 1184 }, { "epoch": 0.6406559740493782, "grad_norm": 0.4005867838859558, "learning_rate": 3.440557521704256e-06, "loss": 0.4729, "step": 1185 }, { "epoch": 0.6411966120021626, "grad_norm": 0.4233829975128174, "learning_rate": 3.4315912980055433e-06, "loss": 0.4519, "step": 1186 }, { "epoch": 0.6417372499549469, "grad_norm": 0.3797195851802826, "learning_rate": 3.4226306648269616e-06, "loss": 0.4758, "step": 1187 }, { "epoch": 0.6422778879077311, "grad_norm": 0.35736843943595886, "learning_rate": 3.413675654108275e-06, "loss": 0.4627, "step": 1188 }, { "epoch": 0.6428185258605154, "grad_norm": 0.3385578989982605, "learning_rate": 3.4047262977692014e-06, "loss": 0.471, "step": 1189 }, { "epoch": 0.6433591638132997, "grad_norm": 0.3547685146331787, "learning_rate": 3.3957826277093074e-06, "loss": 0.4813, "step": 1190 }, { "epoch": 0.643899801766084, "grad_norm": 0.3947050869464874, "learning_rate": 3.3868446758078897e-06, "loss": 0.4988, "step": 1191 }, { "epoch": 0.6444404397188682, "grad_norm": 0.3719642162322998, "learning_rate": 3.3779124739238657e-06, "loss": 0.4632, "step": 1192 }, { "epoch": 0.6449810776716526, "grad_norm": 0.3777051866054535, "learning_rate": 3.3689860538956547e-06, "loss": 0.4482, "step": 1193 }, { "epoch": 0.6455217156244368, "grad_norm": 0.3597317337989807, "learning_rate": 3.3600654475410643e-06, "loss": 0.4687, "step": 1194 }, { "epoch": 0.6460623535772211, "grad_norm": 0.4228699803352356, "learning_rate": 3.351150686657185e-06, "loss": 0.4585, "step": 1195 }, { "epoch": 0.6466029915300054, "grad_norm": 0.3539244532585144, "learning_rate": 3.3422418030202696e-06, "loss": 0.4474, "step": 1196 }, { "epoch": 0.6471436294827897, "grad_norm": 0.3758021891117096, "learning_rate": 3.3333388283856195e-06, "loss": 0.4838, "step": 1197 }, { "epoch": 0.647684267435574, "grad_norm": 0.3555963635444641, "learning_rate": 3.324441794487475e-06, "loss": 0.4614, "step": 1198 }, { "epoch": 0.6482249053883583, "grad_norm": 0.3526434302330017, "learning_rate": 3.3155507330389004e-06, "loss": 0.4629, "step": 1199 }, { "epoch": 0.6487655433411426, "grad_norm": 0.38898134231567383, "learning_rate": 3.306665675731674e-06, "loss": 0.4907, "step": 1200 }, { "epoch": 0.6493061812939268, "grad_norm": 0.32450518012046814, "learning_rate": 3.297786654236169e-06, "loss": 0.4781, "step": 1201 }, { "epoch": 0.6498468192467112, "grad_norm": 0.36460715532302856, "learning_rate": 3.28891370020125e-06, "loss": 0.4958, "step": 1202 }, { "epoch": 0.6503874571994954, "grad_norm": 0.34277579188346863, "learning_rate": 3.280046845254145e-06, "loss": 0.4979, "step": 1203 }, { "epoch": 0.6509280951522797, "grad_norm": 0.33557090163230896, "learning_rate": 3.2711861210003503e-06, "loss": 0.4811, "step": 1204 }, { "epoch": 0.6514687331050639, "grad_norm": 0.3325314521789551, "learning_rate": 3.2623315590235076e-06, "loss": 0.4755, "step": 1205 }, { "epoch": 0.6520093710578483, "grad_norm": 0.3976009488105774, "learning_rate": 3.2534831908852914e-06, "loss": 0.4736, "step": 1206 }, { "epoch": 0.6525500090106325, "grad_norm": 0.3497442305088043, "learning_rate": 3.244641048125301e-06, "loss": 0.4567, "step": 1207 }, { "epoch": 0.6530906469634168, "grad_norm": 0.3687249422073364, "learning_rate": 3.235805162260942e-06, "loss": 0.4592, "step": 1208 }, { "epoch": 0.6536312849162011, "grad_norm": 0.36228495836257935, "learning_rate": 3.226975564787322e-06, "loss": 0.4593, "step": 1209 }, { "epoch": 0.6541719228689854, "grad_norm": 0.34000781178474426, "learning_rate": 3.218152287177133e-06, "loss": 0.4895, "step": 1210 }, { "epoch": 0.6547125608217697, "grad_norm": 0.37587717175483704, "learning_rate": 3.2093353608805368e-06, "loss": 0.4689, "step": 1211 }, { "epoch": 0.655253198774554, "grad_norm": 0.35287749767303467, "learning_rate": 3.2005248173250593e-06, "loss": 0.4846, "step": 1212 }, { "epoch": 0.6557938367273383, "grad_norm": 0.4108916223049164, "learning_rate": 3.1917206879154762e-06, "loss": 0.4592, "step": 1213 }, { "epoch": 0.6563344746801225, "grad_norm": 0.3818206489086151, "learning_rate": 3.1829230040336967e-06, "loss": 0.4836, "step": 1214 }, { "epoch": 0.6568751126329069, "grad_norm": 0.38580086827278137, "learning_rate": 3.1741317970386597e-06, "loss": 0.4723, "step": 1215 }, { "epoch": 0.6574157505856911, "grad_norm": 0.37795159220695496, "learning_rate": 3.1653470982662114e-06, "loss": 0.472, "step": 1216 }, { "epoch": 0.6579563885384754, "grad_norm": 0.46705180406570435, "learning_rate": 3.1565689390290067e-06, "loss": 0.4617, "step": 1217 }, { "epoch": 0.6584970264912597, "grad_norm": 0.3606800138950348, "learning_rate": 3.147797350616385e-06, "loss": 0.494, "step": 1218 }, { "epoch": 0.659037664444044, "grad_norm": 0.335356742143631, "learning_rate": 3.139032364294271e-06, "loss": 0.4466, "step": 1219 }, { "epoch": 0.6595783023968282, "grad_norm": 0.3441433906555176, "learning_rate": 3.130274011305047e-06, "loss": 0.4745, "step": 1220 }, { "epoch": 0.6601189403496125, "grad_norm": 0.33748725056648254, "learning_rate": 3.1215223228674587e-06, "loss": 0.4989, "step": 1221 }, { "epoch": 0.6606595783023969, "grad_norm": 0.3482097387313843, "learning_rate": 3.1127773301764935e-06, "loss": 0.4774, "step": 1222 }, { "epoch": 0.6612002162551811, "grad_norm": 0.3453892171382904, "learning_rate": 3.1040390644032746e-06, "loss": 0.4759, "step": 1223 }, { "epoch": 0.6617408542079654, "grad_norm": 0.36808252334594727, "learning_rate": 3.095307556694942e-06, "loss": 0.4692, "step": 1224 }, { "epoch": 0.6622814921607497, "grad_norm": 0.4075857102870941, "learning_rate": 3.0865828381745515e-06, "loss": 0.4607, "step": 1225 }, { "epoch": 0.662822130113534, "grad_norm": 0.3461955189704895, "learning_rate": 3.077864939940959e-06, "loss": 0.4696, "step": 1226 }, { "epoch": 0.6633627680663182, "grad_norm": 0.3581470847129822, "learning_rate": 3.0691538930687076e-06, "loss": 0.468, "step": 1227 }, { "epoch": 0.6639034060191026, "grad_norm": 0.34455424547195435, "learning_rate": 3.0604497286079227e-06, "loss": 0.4435, "step": 1228 }, { "epoch": 0.6644440439718868, "grad_norm": 0.35402852296829224, "learning_rate": 3.051752477584191e-06, "loss": 0.4831, "step": 1229 }, { "epoch": 0.6649846819246711, "grad_norm": 0.35135895013809204, "learning_rate": 3.043062170998464e-06, "loss": 0.4827, "step": 1230 }, { "epoch": 0.6655253198774554, "grad_norm": 0.3202992081642151, "learning_rate": 3.0343788398269342e-06, "loss": 0.4442, "step": 1231 }, { "epoch": 0.6660659578302397, "grad_norm": 0.3709985613822937, "learning_rate": 3.025702515020937e-06, "loss": 0.4635, "step": 1232 }, { "epoch": 0.666606595783024, "grad_norm": 0.358951598405838, "learning_rate": 3.0170332275068247e-06, "loss": 0.4464, "step": 1233 }, { "epoch": 0.6671472337358082, "grad_norm": 0.3573668897151947, "learning_rate": 3.0083710081858748e-06, "loss": 0.48, "step": 1234 }, { "epoch": 0.6676878716885926, "grad_norm": 0.34597110748291016, "learning_rate": 2.9997158879341647e-06, "loss": 0.4718, "step": 1235 }, { "epoch": 0.6682285096413768, "grad_norm": 0.3602089583873749, "learning_rate": 2.9910678976024733e-06, "loss": 0.449, "step": 1236 }, { "epoch": 0.6687691475941611, "grad_norm": 0.34547102451324463, "learning_rate": 2.982427068016155e-06, "loss": 0.4584, "step": 1237 }, { "epoch": 0.6693097855469454, "grad_norm": 0.3194965124130249, "learning_rate": 2.9737934299750514e-06, "loss": 0.475, "step": 1238 }, { "epoch": 0.6698504234997297, "grad_norm": 0.37800660729408264, "learning_rate": 2.965167014253363e-06, "loss": 0.4739, "step": 1239 }, { "epoch": 0.6703910614525139, "grad_norm": 0.35336002707481384, "learning_rate": 2.956547851599548e-06, "loss": 0.4768, "step": 1240 }, { "epoch": 0.6709316994052983, "grad_norm": 0.3484102785587311, "learning_rate": 2.947935972736217e-06, "loss": 0.4606, "step": 1241 }, { "epoch": 0.6714723373580825, "grad_norm": 0.33098798990249634, "learning_rate": 2.9393314083600076e-06, "loss": 0.4737, "step": 1242 }, { "epoch": 0.6720129753108668, "grad_norm": 0.33991244435310364, "learning_rate": 2.930734189141492e-06, "loss": 0.4609, "step": 1243 }, { "epoch": 0.6725536132636512, "grad_norm": 0.3231086730957031, "learning_rate": 2.922144345725062e-06, "loss": 0.4668, "step": 1244 }, { "epoch": 0.6730942512164354, "grad_norm": 0.3015232980251312, "learning_rate": 2.9135619087288153e-06, "loss": 0.4378, "step": 1245 }, { "epoch": 0.6736348891692197, "grad_norm": 0.3297620415687561, "learning_rate": 2.9049869087444493e-06, "loss": 0.4843, "step": 1246 }, { "epoch": 0.674175527122004, "grad_norm": 0.3333333134651184, "learning_rate": 2.8964193763371546e-06, "loss": 0.4523, "step": 1247 }, { "epoch": 0.6747161650747883, "grad_norm": 0.3433511257171631, "learning_rate": 2.887859342045506e-06, "loss": 0.4698, "step": 1248 }, { "epoch": 0.6752568030275725, "grad_norm": 0.3787667751312256, "learning_rate": 2.879306836381345e-06, "loss": 0.4556, "step": 1249 }, { "epoch": 0.6757974409803568, "grad_norm": 0.3427095413208008, "learning_rate": 2.8707618898296864e-06, "loss": 0.4689, "step": 1250 }, { "epoch": 0.6763380789331411, "grad_norm": 0.3421780467033386, "learning_rate": 2.862224532848591e-06, "loss": 0.4805, "step": 1251 }, { "epoch": 0.6768787168859254, "grad_norm": 0.37503376603126526, "learning_rate": 2.853694795869074e-06, "loss": 0.4946, "step": 1252 }, { "epoch": 0.6774193548387096, "grad_norm": 0.33137035369873047, "learning_rate": 2.845172709294989e-06, "loss": 0.4447, "step": 1253 }, { "epoch": 0.677959992791494, "grad_norm": 0.38808560371398926, "learning_rate": 2.8366583035029194e-06, "loss": 0.4918, "step": 1254 }, { "epoch": 0.6785006307442782, "grad_norm": 0.31489184498786926, "learning_rate": 2.8281516088420665e-06, "loss": 0.4527, "step": 1255 }, { "epoch": 0.6790412686970625, "grad_norm": 0.33485811948776245, "learning_rate": 2.819652655634151e-06, "loss": 0.4692, "step": 1256 }, { "epoch": 0.6795819066498469, "grad_norm": 0.3656138777732849, "learning_rate": 2.8111614741732975e-06, "loss": 0.4612, "step": 1257 }, { "epoch": 0.6801225446026311, "grad_norm": 0.3511470854282379, "learning_rate": 2.802678094725931e-06, "loss": 0.4532, "step": 1258 }, { "epoch": 0.6806631825554154, "grad_norm": 0.3320247530937195, "learning_rate": 2.794202547530661e-06, "loss": 0.4452, "step": 1259 }, { "epoch": 0.6812038205081997, "grad_norm": 0.3250149190425873, "learning_rate": 2.785734862798184e-06, "loss": 0.4722, "step": 1260 }, { "epoch": 0.681744458460984, "grad_norm": 0.34146052598953247, "learning_rate": 2.77727507071117e-06, "loss": 0.4769, "step": 1261 }, { "epoch": 0.6822850964137682, "grad_norm": 0.3707115054130554, "learning_rate": 2.768823201424158e-06, "loss": 0.4716, "step": 1262 }, { "epoch": 0.6828257343665526, "grad_norm": 0.3220188617706299, "learning_rate": 2.7603792850634402e-06, "loss": 0.4858, "step": 1263 }, { "epoch": 0.6833663723193368, "grad_norm": 0.3371463716030121, "learning_rate": 2.7519433517269665e-06, "loss": 0.4778, "step": 1264 }, { "epoch": 0.6839070102721211, "grad_norm": 0.33046388626098633, "learning_rate": 2.7435154314842337e-06, "loss": 0.449, "step": 1265 }, { "epoch": 0.6844476482249053, "grad_norm": 0.33768966794013977, "learning_rate": 2.7350955543761682e-06, "loss": 0.4591, "step": 1266 }, { "epoch": 0.6849882861776897, "grad_norm": 0.35337385535240173, "learning_rate": 2.7266837504150345e-06, "loss": 0.4649, "step": 1267 }, { "epoch": 0.685528924130474, "grad_norm": 0.4103326201438904, "learning_rate": 2.7182800495843166e-06, "loss": 0.4778, "step": 1268 }, { "epoch": 0.6860695620832582, "grad_norm": 0.3523549437522888, "learning_rate": 2.7098844818386164e-06, "loss": 0.4656, "step": 1269 }, { "epoch": 0.6866102000360426, "grad_norm": 0.3538891077041626, "learning_rate": 2.7014970771035474e-06, "loss": 0.4821, "step": 1270 }, { "epoch": 0.6871508379888268, "grad_norm": 0.3851732313632965, "learning_rate": 2.6931178652756262e-06, "loss": 0.4928, "step": 1271 }, { "epoch": 0.6876914759416111, "grad_norm": 0.3899889290332794, "learning_rate": 2.6847468762221616e-06, "loss": 0.4867, "step": 1272 }, { "epoch": 0.6882321138943954, "grad_norm": 0.4064536690711975, "learning_rate": 2.6763841397811576e-06, "loss": 0.4629, "step": 1273 }, { "epoch": 0.6887727518471797, "grad_norm": 0.34950196743011475, "learning_rate": 2.668029685761201e-06, "loss": 0.4284, "step": 1274 }, { "epoch": 0.6893133897999639, "grad_norm": 0.36954036355018616, "learning_rate": 2.6596835439413584e-06, "loss": 0.4834, "step": 1275 }, { "epoch": 0.6898540277527483, "grad_norm": 0.3687545657157898, "learning_rate": 2.6513457440710612e-06, "loss": 0.4632, "step": 1276 }, { "epoch": 0.6903946657055325, "grad_norm": 0.41740676760673523, "learning_rate": 2.6430163158700116e-06, "loss": 0.4802, "step": 1277 }, { "epoch": 0.6909353036583168, "grad_norm": 0.3302917182445526, "learning_rate": 2.634695289028072e-06, "loss": 0.4417, "step": 1278 }, { "epoch": 0.6914759416111012, "grad_norm": 0.3250352144241333, "learning_rate": 2.6263826932051562e-06, "loss": 0.465, "step": 1279 }, { "epoch": 0.6920165795638854, "grad_norm": 0.35972073674201965, "learning_rate": 2.6180785580311284e-06, "loss": 0.467, "step": 1280 }, { "epoch": 0.6925572175166697, "grad_norm": 0.34688135981559753, "learning_rate": 2.609782913105691e-06, "loss": 0.4528, "step": 1281 }, { "epoch": 0.6930978554694539, "grad_norm": 0.38405537605285645, "learning_rate": 2.601495787998288e-06, "loss": 0.4902, "step": 1282 }, { "epoch": 0.6936384934222383, "grad_norm": 0.38303545117378235, "learning_rate": 2.59321721224799e-06, "loss": 0.4536, "step": 1283 }, { "epoch": 0.6941791313750225, "grad_norm": 0.34185272455215454, "learning_rate": 2.5849472153634003e-06, "loss": 0.4441, "step": 1284 }, { "epoch": 0.6947197693278068, "grad_norm": 0.41703319549560547, "learning_rate": 2.576685826822535e-06, "loss": 0.4781, "step": 1285 }, { "epoch": 0.6952604072805911, "grad_norm": 0.3938962519168854, "learning_rate": 2.568433076072734e-06, "loss": 0.4755, "step": 1286 }, { "epoch": 0.6958010452333754, "grad_norm": 0.36541613936424255, "learning_rate": 2.5601889925305433e-06, "loss": 0.4789, "step": 1287 }, { "epoch": 0.6963416831861596, "grad_norm": 0.3563724458217621, "learning_rate": 2.5519536055816194e-06, "loss": 0.4781, "step": 1288 }, { "epoch": 0.696882321138944, "grad_norm": 0.34606099128723145, "learning_rate": 2.5437269445806146e-06, "loss": 0.4485, "step": 1289 }, { "epoch": 0.6974229590917282, "grad_norm": 0.42106860876083374, "learning_rate": 2.5355090388510806e-06, "loss": 0.4739, "step": 1290 }, { "epoch": 0.6979635970445125, "grad_norm": 0.4748215973377228, "learning_rate": 2.527299917685362e-06, "loss": 0.4659, "step": 1291 }, { "epoch": 0.6985042349972969, "grad_norm": 0.38426297903060913, "learning_rate": 2.519099610344492e-06, "loss": 0.4976, "step": 1292 }, { "epoch": 0.6990448729500811, "grad_norm": 0.3022260069847107, "learning_rate": 2.5109081460580875e-06, "loss": 0.4347, "step": 1293 }, { "epoch": 0.6995855109028654, "grad_norm": 0.35534849762916565, "learning_rate": 2.502725554024239e-06, "loss": 0.4635, "step": 1294 }, { "epoch": 0.7001261488556497, "grad_norm": 0.35701191425323486, "learning_rate": 2.494551863409418e-06, "loss": 0.4827, "step": 1295 }, { "epoch": 0.700666786808434, "grad_norm": 0.37284964323043823, "learning_rate": 2.4863871033483693e-06, "loss": 0.4857, "step": 1296 }, { "epoch": 0.7012074247612182, "grad_norm": 0.35588300228118896, "learning_rate": 2.478231302943997e-06, "loss": 0.4709, "step": 1297 }, { "epoch": 0.7017480627140025, "grad_norm": 0.3689674735069275, "learning_rate": 2.470084491267278e-06, "loss": 0.4734, "step": 1298 }, { "epoch": 0.7022887006667868, "grad_norm": 0.3724137246608734, "learning_rate": 2.46194669735714e-06, "loss": 0.4638, "step": 1299 }, { "epoch": 0.7028293386195711, "grad_norm": 0.42699527740478516, "learning_rate": 2.4538179502203753e-06, "loss": 0.5024, "step": 1300 }, { "epoch": 0.7033699765723553, "grad_norm": 0.35276705026626587, "learning_rate": 2.445698278831528e-06, "loss": 0.4693, "step": 1301 }, { "epoch": 0.7039106145251397, "grad_norm": 0.3660089075565338, "learning_rate": 2.437587712132787e-06, "loss": 0.4544, "step": 1302 }, { "epoch": 0.704451252477924, "grad_norm": 0.3719891905784607, "learning_rate": 2.429486279033892e-06, "loss": 0.4498, "step": 1303 }, { "epoch": 0.7049918904307082, "grad_norm": 0.34804993867874146, "learning_rate": 2.4213940084120274e-06, "loss": 0.4814, "step": 1304 }, { "epoch": 0.7055325283834926, "grad_norm": 0.35139375925064087, "learning_rate": 2.4133109291117156e-06, "loss": 0.472, "step": 1305 }, { "epoch": 0.7060731663362768, "grad_norm": 0.3766683042049408, "learning_rate": 2.405237069944721e-06, "loss": 0.4688, "step": 1306 }, { "epoch": 0.7066138042890611, "grad_norm": 0.37273478507995605, "learning_rate": 2.397172459689936e-06, "loss": 0.5089, "step": 1307 }, { "epoch": 0.7071544422418454, "grad_norm": 0.33368778228759766, "learning_rate": 2.3891171270932923e-06, "loss": 0.4666, "step": 1308 }, { "epoch": 0.7076950801946297, "grad_norm": 0.3467375338077545, "learning_rate": 2.3810711008676495e-06, "loss": 0.467, "step": 1309 }, { "epoch": 0.7082357181474139, "grad_norm": 0.39090535044670105, "learning_rate": 2.3730344096926974e-06, "loss": 0.4934, "step": 1310 }, { "epoch": 0.7087763561001982, "grad_norm": 0.35073044896125793, "learning_rate": 2.3650070822148447e-06, "loss": 0.4396, "step": 1311 }, { "epoch": 0.7093169940529825, "grad_norm": 0.321822851896286, "learning_rate": 2.3569891470471308e-06, "loss": 0.4375, "step": 1312 }, { "epoch": 0.7098576320057668, "grad_norm": 0.3142220377922058, "learning_rate": 2.3489806327691156e-06, "loss": 0.4711, "step": 1313 }, { "epoch": 0.710398269958551, "grad_norm": 0.3423789143562317, "learning_rate": 2.3409815679267733e-06, "loss": 0.4384, "step": 1314 }, { "epoch": 0.7109389079113354, "grad_norm": 0.3237120509147644, "learning_rate": 2.3329919810324036e-06, "loss": 0.4744, "step": 1315 }, { "epoch": 0.7114795458641197, "grad_norm": 0.3413024842739105, "learning_rate": 2.325011900564515e-06, "loss": 0.4676, "step": 1316 }, { "epoch": 0.7120201838169039, "grad_norm": 0.3537076711654663, "learning_rate": 2.3170413549677367e-06, "loss": 0.4839, "step": 1317 }, { "epoch": 0.7125608217696883, "grad_norm": 0.3629612624645233, "learning_rate": 2.3090803726527083e-06, "loss": 0.5076, "step": 1318 }, { "epoch": 0.7131014597224725, "grad_norm": 0.32516616582870483, "learning_rate": 2.301128981995985e-06, "loss": 0.4485, "step": 1319 }, { "epoch": 0.7136420976752568, "grad_norm": 0.38808518648147583, "learning_rate": 2.293187211339926e-06, "loss": 0.4638, "step": 1320 }, { "epoch": 0.7141827356280411, "grad_norm": 0.3397766351699829, "learning_rate": 2.2852550889926067e-06, "loss": 0.4937, "step": 1321 }, { "epoch": 0.7147233735808254, "grad_norm": 0.3628048896789551, "learning_rate": 2.2773326432277097e-06, "loss": 0.4832, "step": 1322 }, { "epoch": 0.7152640115336096, "grad_norm": 0.32281026244163513, "learning_rate": 2.2694199022844284e-06, "loss": 0.4313, "step": 1323 }, { "epoch": 0.715804649486394, "grad_norm": 0.33235985040664673, "learning_rate": 2.261516894367356e-06, "loss": 0.4402, "step": 1324 }, { "epoch": 0.7163452874391782, "grad_norm": 0.3623601198196411, "learning_rate": 2.2536236476464007e-06, "loss": 0.4644, "step": 1325 }, { "epoch": 0.7168859253919625, "grad_norm": 0.32898762822151184, "learning_rate": 2.2457401902566745e-06, "loss": 0.4539, "step": 1326 }, { "epoch": 0.7174265633447467, "grad_norm": 0.34005796909332275, "learning_rate": 2.2378665502983976e-06, "loss": 0.4869, "step": 1327 }, { "epoch": 0.7179672012975311, "grad_norm": 0.3692139983177185, "learning_rate": 2.2300027558367917e-06, "loss": 0.4671, "step": 1328 }, { "epoch": 0.7185078392503154, "grad_norm": 0.3715006113052368, "learning_rate": 2.2221488349019903e-06, "loss": 0.4843, "step": 1329 }, { "epoch": 0.7190484772030996, "grad_norm": 0.30792778730392456, "learning_rate": 2.2143048154889272e-06, "loss": 0.4696, "step": 1330 }, { "epoch": 0.719589115155884, "grad_norm": 0.3531022071838379, "learning_rate": 2.2064707255572494e-06, "loss": 0.4716, "step": 1331 }, { "epoch": 0.7201297531086682, "grad_norm": 0.3335738182067871, "learning_rate": 2.1986465930312067e-06, "loss": 0.4389, "step": 1332 }, { "epoch": 0.7206703910614525, "grad_norm": 0.3675576150417328, "learning_rate": 2.1908324457995556e-06, "loss": 0.4436, "step": 1333 }, { "epoch": 0.7212110290142368, "grad_norm": 0.33168819546699524, "learning_rate": 2.1830283117154616e-06, "loss": 0.4308, "step": 1334 }, { "epoch": 0.7217516669670211, "grad_norm": 0.36723217368125916, "learning_rate": 2.1752342185964003e-06, "loss": 0.4769, "step": 1335 }, { "epoch": 0.7222923049198053, "grad_norm": 0.36966997385025024, "learning_rate": 2.1674501942240567e-06, "loss": 0.4624, "step": 1336 }, { "epoch": 0.7228329428725897, "grad_norm": 0.3729422986507416, "learning_rate": 2.159676266344222e-06, "loss": 0.4774, "step": 1337 }, { "epoch": 0.723373580825374, "grad_norm": 0.3431287407875061, "learning_rate": 2.151912462666703e-06, "loss": 0.4451, "step": 1338 }, { "epoch": 0.7239142187781582, "grad_norm": 0.3624579906463623, "learning_rate": 2.144158810865217e-06, "loss": 0.4711, "step": 1339 }, { "epoch": 0.7244548567309426, "grad_norm": 0.3302533030509949, "learning_rate": 2.1364153385773007e-06, "loss": 0.4802, "step": 1340 }, { "epoch": 0.7249954946837268, "grad_norm": 0.34277042746543884, "learning_rate": 2.128682073404197e-06, "loss": 0.4794, "step": 1341 }, { "epoch": 0.7255361326365111, "grad_norm": 0.3795274794101715, "learning_rate": 2.1209590429107734e-06, "loss": 0.4614, "step": 1342 }, { "epoch": 0.7260767705892953, "grad_norm": 0.3579147458076477, "learning_rate": 2.1132462746254147e-06, "loss": 0.4946, "step": 1343 }, { "epoch": 0.7266174085420797, "grad_norm": 0.36164700984954834, "learning_rate": 2.1055437960399266e-06, "loss": 0.4427, "step": 1344 }, { "epoch": 0.7271580464948639, "grad_norm": 0.3287413418292999, "learning_rate": 2.0978516346094342e-06, "loss": 0.4723, "step": 1345 }, { "epoch": 0.7276986844476482, "grad_norm": 0.33054518699645996, "learning_rate": 2.0901698177522944e-06, "loss": 0.4589, "step": 1346 }, { "epoch": 0.7282393224004325, "grad_norm": 0.3471773862838745, "learning_rate": 2.082498372849983e-06, "loss": 0.4746, "step": 1347 }, { "epoch": 0.7287799603532168, "grad_norm": 0.34974491596221924, "learning_rate": 2.074837327247012e-06, "loss": 0.487, "step": 1348 }, { "epoch": 0.729320598306001, "grad_norm": 0.3569546937942505, "learning_rate": 2.067186708250826e-06, "loss": 0.4852, "step": 1349 }, { "epoch": 0.7298612362587854, "grad_norm": 0.31537672877311707, "learning_rate": 2.059546543131696e-06, "loss": 0.4561, "step": 1350 }, { "epoch": 0.7304018742115697, "grad_norm": 0.2963314950466156, "learning_rate": 2.051916859122641e-06, "loss": 0.4487, "step": 1351 }, { "epoch": 0.7309425121643539, "grad_norm": 0.31170910596847534, "learning_rate": 2.0442976834193146e-06, "loss": 0.4585, "step": 1352 }, { "epoch": 0.7314831501171383, "grad_norm": 0.3321332037448883, "learning_rate": 2.036689043179917e-06, "loss": 0.4635, "step": 1353 }, { "epoch": 0.7320237880699225, "grad_norm": 0.34618934988975525, "learning_rate": 2.0290909655250913e-06, "loss": 0.4654, "step": 1354 }, { "epoch": 0.7325644260227068, "grad_norm": 0.3425121009349823, "learning_rate": 2.0215034775378336e-06, "loss": 0.4692, "step": 1355 }, { "epoch": 0.7331050639754911, "grad_norm": 0.33614981174468994, "learning_rate": 2.013926606263394e-06, "loss": 0.473, "step": 1356 }, { "epoch": 0.7336457019282754, "grad_norm": 0.3264980614185333, "learning_rate": 2.0063603787091788e-06, "loss": 0.452, "step": 1357 }, { "epoch": 0.7341863398810596, "grad_norm": 0.33285725116729736, "learning_rate": 1.9988048218446577e-06, "loss": 0.4593, "step": 1358 }, { "epoch": 0.7347269778338439, "grad_norm": 0.367969274520874, "learning_rate": 1.9912599626012593e-06, "loss": 0.4649, "step": 1359 }, { "epoch": 0.7352676157866282, "grad_norm": 0.33582252264022827, "learning_rate": 1.9837258278722855e-06, "loss": 0.4726, "step": 1360 }, { "epoch": 0.7358082537394125, "grad_norm": 0.3242082893848419, "learning_rate": 1.976202444512813e-06, "loss": 0.4946, "step": 1361 }, { "epoch": 0.7363488916921967, "grad_norm": 0.33312639594078064, "learning_rate": 1.96868983933959e-06, "loss": 0.4802, "step": 1362 }, { "epoch": 0.7368895296449811, "grad_norm": 0.3373234272003174, "learning_rate": 1.9611880391309524e-06, "loss": 0.4847, "step": 1363 }, { "epoch": 0.7374301675977654, "grad_norm": 0.3237764835357666, "learning_rate": 1.9536970706267156e-06, "loss": 0.4641, "step": 1364 }, { "epoch": 0.7379708055505496, "grad_norm": 0.33678045868873596, "learning_rate": 1.946216960528092e-06, "loss": 0.476, "step": 1365 }, { "epoch": 0.738511443503334, "grad_norm": 0.3523315489292145, "learning_rate": 1.9387477354975885e-06, "loss": 0.4559, "step": 1366 }, { "epoch": 0.7390520814561182, "grad_norm": 0.3299519121646881, "learning_rate": 1.9312894221589085e-06, "loss": 0.4554, "step": 1367 }, { "epoch": 0.7395927194089025, "grad_norm": 0.3041582703590393, "learning_rate": 1.9238420470968665e-06, "loss": 0.4544, "step": 1368 }, { "epoch": 0.7401333573616868, "grad_norm": 0.30898869037628174, "learning_rate": 1.9164056368572847e-06, "loss": 0.4635, "step": 1369 }, { "epoch": 0.7406739953144711, "grad_norm": 0.3286183178424835, "learning_rate": 1.9089802179469036e-06, "loss": 0.4673, "step": 1370 }, { "epoch": 0.7412146332672553, "grad_norm": 0.3371999263763428, "learning_rate": 1.9015658168332863e-06, "loss": 0.4848, "step": 1371 }, { "epoch": 0.7417552712200396, "grad_norm": 0.33480530977249146, "learning_rate": 1.8941624599447178e-06, "loss": 0.4687, "step": 1372 }, { "epoch": 0.742295909172824, "grad_norm": 0.3701348304748535, "learning_rate": 1.8867701736701238e-06, "loss": 0.4683, "step": 1373 }, { "epoch": 0.7428365471256082, "grad_norm": 0.33396846055984497, "learning_rate": 1.8793889843589647e-06, "loss": 0.4728, "step": 1374 }, { "epoch": 0.7433771850783925, "grad_norm": 0.3385055363178253, "learning_rate": 1.87201891832115e-06, "loss": 0.4715, "step": 1375 }, { "epoch": 0.7439178230311768, "grad_norm": 0.33360961079597473, "learning_rate": 1.8646600018269356e-06, "loss": 0.4777, "step": 1376 }, { "epoch": 0.7444584609839611, "grad_norm": 0.2998606264591217, "learning_rate": 1.8573122611068406e-06, "loss": 0.4671, "step": 1377 }, { "epoch": 0.7449990989367453, "grad_norm": 0.3257642984390259, "learning_rate": 1.8499757223515442e-06, "loss": 0.4966, "step": 1378 }, { "epoch": 0.7455397368895297, "grad_norm": 0.3353062868118286, "learning_rate": 1.8426504117118011e-06, "loss": 0.4495, "step": 1379 }, { "epoch": 0.7460803748423139, "grad_norm": 0.3439493179321289, "learning_rate": 1.8353363552983382e-06, "loss": 0.4777, "step": 1380 }, { "epoch": 0.7466210127950982, "grad_norm": 0.38241538405418396, "learning_rate": 1.8280335791817733e-06, "loss": 0.4726, "step": 1381 }, { "epoch": 0.7471616507478825, "grad_norm": 0.3326997458934784, "learning_rate": 1.8207421093925127e-06, "loss": 0.4798, "step": 1382 }, { "epoch": 0.7477022887006668, "grad_norm": 0.33782845735549927, "learning_rate": 1.8134619719206624e-06, "loss": 0.4439, "step": 1383 }, { "epoch": 0.748242926653451, "grad_norm": 0.34692955017089844, "learning_rate": 1.8061931927159377e-06, "loss": 0.4354, "step": 1384 }, { "epoch": 0.7487835646062354, "grad_norm": 0.3597027659416199, "learning_rate": 1.7989357976875603e-06, "loss": 0.4989, "step": 1385 }, { "epoch": 0.7493242025590197, "grad_norm": 0.3161109983921051, "learning_rate": 1.7916898127041815e-06, "loss": 0.4341, "step": 1386 }, { "epoch": 0.7498648405118039, "grad_norm": 0.32751956582069397, "learning_rate": 1.7844552635937784e-06, "loss": 0.4657, "step": 1387 }, { "epoch": 0.7504054784645882, "grad_norm": 0.32368284463882446, "learning_rate": 1.7772321761435674e-06, "loss": 0.4705, "step": 1388 }, { "epoch": 0.7509461164173725, "grad_norm": 0.3560972511768341, "learning_rate": 1.7700205760999061e-06, "loss": 0.4442, "step": 1389 }, { "epoch": 0.7514867543701568, "grad_norm": 0.35852131247520447, "learning_rate": 1.76282048916821e-06, "loss": 0.4745, "step": 1390 }, { "epoch": 0.752027392322941, "grad_norm": 0.35611191391944885, "learning_rate": 1.7556319410128557e-06, "loss": 0.4735, "step": 1391 }, { "epoch": 0.7525680302757254, "grad_norm": 0.33755627274513245, "learning_rate": 1.7484549572570913e-06, "loss": 0.4822, "step": 1392 }, { "epoch": 0.7531086682285096, "grad_norm": 0.33882173895835876, "learning_rate": 1.7412895634829391e-06, "loss": 0.4906, "step": 1393 }, { "epoch": 0.7536493061812939, "grad_norm": 0.3680201768875122, "learning_rate": 1.7341357852311175e-06, "loss": 0.5208, "step": 1394 }, { "epoch": 0.7541899441340782, "grad_norm": 0.3476288914680481, "learning_rate": 1.726993648000933e-06, "loss": 0.4545, "step": 1395 }, { "epoch": 0.7547305820868625, "grad_norm": 0.35119006037712097, "learning_rate": 1.7198631772502057e-06, "loss": 0.4769, "step": 1396 }, { "epoch": 0.7552712200396468, "grad_norm": 0.3414047360420227, "learning_rate": 1.7127443983951687e-06, "loss": 0.455, "step": 1397 }, { "epoch": 0.7558118579924311, "grad_norm": 0.3647000193595886, "learning_rate": 1.7056373368103756e-06, "loss": 0.5124, "step": 1398 }, { "epoch": 0.7563524959452154, "grad_norm": 0.34969907999038696, "learning_rate": 1.6985420178286216e-06, "loss": 0.4956, "step": 1399 }, { "epoch": 0.7568931338979996, "grad_norm": 0.3402901887893677, "learning_rate": 1.6914584667408408e-06, "loss": 0.455, "step": 1400 }, { "epoch": 0.757433771850784, "grad_norm": 0.36687296628952026, "learning_rate": 1.6843867087960252e-06, "loss": 0.4694, "step": 1401 }, { "epoch": 0.7579744098035682, "grad_norm": 0.34027478098869324, "learning_rate": 1.6773267692011242e-06, "loss": 0.4763, "step": 1402 }, { "epoch": 0.7585150477563525, "grad_norm": 0.30456939339637756, "learning_rate": 1.6702786731209681e-06, "loss": 0.4545, "step": 1403 }, { "epoch": 0.7590556857091367, "grad_norm": 0.32559987902641296, "learning_rate": 1.6632424456781675e-06, "loss": 0.484, "step": 1404 }, { "epoch": 0.7595963236619211, "grad_norm": 0.3651071786880493, "learning_rate": 1.6562181119530314e-06, "loss": 0.4981, "step": 1405 }, { "epoch": 0.7601369616147053, "grad_norm": 0.36218157410621643, "learning_rate": 1.649205696983468e-06, "loss": 0.4905, "step": 1406 }, { "epoch": 0.7606775995674896, "grad_norm": 0.35047683119773865, "learning_rate": 1.642205225764908e-06, "loss": 0.4697, "step": 1407 }, { "epoch": 0.761218237520274, "grad_norm": 0.3518739342689514, "learning_rate": 1.635216723250206e-06, "loss": 0.4665, "step": 1408 }, { "epoch": 0.7617588754730582, "grad_norm": 0.3698537349700928, "learning_rate": 1.6282402143495568e-06, "loss": 0.4555, "step": 1409 }, { "epoch": 0.7622995134258425, "grad_norm": 0.3216371536254883, "learning_rate": 1.6212757239304e-06, "loss": 0.4591, "step": 1410 }, { "epoch": 0.7628401513786268, "grad_norm": 0.33843740820884705, "learning_rate": 1.6143232768173428e-06, "loss": 0.4754, "step": 1411 }, { "epoch": 0.7633807893314111, "grad_norm": 0.3162449300289154, "learning_rate": 1.6073828977920564e-06, "loss": 0.4683, "step": 1412 }, { "epoch": 0.7639214272841953, "grad_norm": 0.3222675025463104, "learning_rate": 1.6004546115932023e-06, "loss": 0.4774, "step": 1413 }, { "epoch": 0.7644620652369797, "grad_norm": 0.3448139727115631, "learning_rate": 1.5935384429163376e-06, "loss": 0.4532, "step": 1414 }, { "epoch": 0.7650027031897639, "grad_norm": 0.3369153141975403, "learning_rate": 1.5866344164138214e-06, "loss": 0.4624, "step": 1415 }, { "epoch": 0.7655433411425482, "grad_norm": 0.3553886115550995, "learning_rate": 1.5797425566947378e-06, "loss": 0.4782, "step": 1416 }, { "epoch": 0.7660839790953325, "grad_norm": 0.31446409225463867, "learning_rate": 1.572862888324801e-06, "loss": 0.4476, "step": 1417 }, { "epoch": 0.7666246170481168, "grad_norm": 0.3353008031845093, "learning_rate": 1.5659954358262724e-06, "loss": 0.4717, "step": 1418 }, { "epoch": 0.767165255000901, "grad_norm": 0.3172055780887604, "learning_rate": 1.5591402236778647e-06, "loss": 0.4599, "step": 1419 }, { "epoch": 0.7677058929536853, "grad_norm": 0.3034406900405884, "learning_rate": 1.5522972763146653e-06, "loss": 0.439, "step": 1420 }, { "epoch": 0.7682465309064697, "grad_norm": 0.36666351556777954, "learning_rate": 1.5454666181280437e-06, "loss": 0.498, "step": 1421 }, { "epoch": 0.7687871688592539, "grad_norm": 0.36715036630630493, "learning_rate": 1.5386482734655633e-06, "loss": 0.489, "step": 1422 }, { "epoch": 0.7693278068120382, "grad_norm": 0.3549302816390991, "learning_rate": 1.5318422666308997e-06, "loss": 0.4685, "step": 1423 }, { "epoch": 0.7698684447648225, "grad_norm": 0.3297741413116455, "learning_rate": 1.5250486218837458e-06, "loss": 0.493, "step": 1424 }, { "epoch": 0.7704090827176068, "grad_norm": 0.3559105396270752, "learning_rate": 1.5182673634397365e-06, "loss": 0.4517, "step": 1425 }, { "epoch": 0.770949720670391, "grad_norm": 0.3720037639141083, "learning_rate": 1.5114985154703505e-06, "loss": 0.4604, "step": 1426 }, { "epoch": 0.7714903586231754, "grad_norm": 0.384896844625473, "learning_rate": 1.5047421021028353e-06, "loss": 0.4563, "step": 1427 }, { "epoch": 0.7720309965759596, "grad_norm": 0.35539141297340393, "learning_rate": 1.4979981474201106e-06, "loss": 0.458, "step": 1428 }, { "epoch": 0.7725716345287439, "grad_norm": 0.3983554244041443, "learning_rate": 1.4912666754606914e-06, "loss": 0.4836, "step": 1429 }, { "epoch": 0.7731122724815283, "grad_norm": 0.34799396991729736, "learning_rate": 1.4845477102185974e-06, "loss": 0.4661, "step": 1430 }, { "epoch": 0.7736529104343125, "grad_norm": 0.3554874360561371, "learning_rate": 1.4778412756432709e-06, "loss": 0.479, "step": 1431 }, { "epoch": 0.7741935483870968, "grad_norm": 0.37535446882247925, "learning_rate": 1.471147395639484e-06, "loss": 0.4847, "step": 1432 }, { "epoch": 0.7747341863398811, "grad_norm": 0.3827463686466217, "learning_rate": 1.4644660940672628e-06, "loss": 0.4806, "step": 1433 }, { "epoch": 0.7752748242926654, "grad_norm": 0.3442119359970093, "learning_rate": 1.457797394741798e-06, "loss": 0.5032, "step": 1434 }, { "epoch": 0.7758154622454496, "grad_norm": 0.35367828607559204, "learning_rate": 1.451141321433358e-06, "loss": 0.4948, "step": 1435 }, { "epoch": 0.7763561001982339, "grad_norm": 0.3889831304550171, "learning_rate": 1.4444978978672103e-06, "loss": 0.4688, "step": 1436 }, { "epoch": 0.7768967381510182, "grad_norm": 0.3154084086418152, "learning_rate": 1.4378671477235268e-06, "loss": 0.4786, "step": 1437 }, { "epoch": 0.7774373761038025, "grad_norm": 0.34436705708503723, "learning_rate": 1.431249094637311e-06, "loss": 0.472, "step": 1438 }, { "epoch": 0.7779780140565867, "grad_norm": 0.3308955132961273, "learning_rate": 1.4246437621983057e-06, "loss": 0.4739, "step": 1439 }, { "epoch": 0.7785186520093711, "grad_norm": 0.319321870803833, "learning_rate": 1.418051173950914e-06, "loss": 0.4603, "step": 1440 }, { "epoch": 0.7790592899621553, "grad_norm": 0.31957656145095825, "learning_rate": 1.4114713533941082e-06, "loss": 0.4533, "step": 1441 }, { "epoch": 0.7795999279149396, "grad_norm": 0.3338441848754883, "learning_rate": 1.4049043239813575e-06, "loss": 0.4805, "step": 1442 }, { "epoch": 0.780140565867724, "grad_norm": 0.33430007100105286, "learning_rate": 1.3983501091205298e-06, "loss": 0.494, "step": 1443 }, { "epoch": 0.7806812038205082, "grad_norm": 0.3184848129749298, "learning_rate": 1.3918087321738244e-06, "loss": 0.48, "step": 1444 }, { "epoch": 0.7812218417732925, "grad_norm": 0.37600019574165344, "learning_rate": 1.3852802164576717e-06, "loss": 0.4767, "step": 1445 }, { "epoch": 0.7817624797260768, "grad_norm": 0.3498574197292328, "learning_rate": 1.3787645852426663e-06, "loss": 0.4664, "step": 1446 }, { "epoch": 0.7823031176788611, "grad_norm": 0.3013286292552948, "learning_rate": 1.3722618617534727e-06, "loss": 0.4444, "step": 1447 }, { "epoch": 0.7828437556316453, "grad_norm": 0.32576611638069153, "learning_rate": 1.3657720691687481e-06, "loss": 0.4616, "step": 1448 }, { "epoch": 0.7833843935844296, "grad_norm": 0.3226719796657562, "learning_rate": 1.3592952306210589e-06, "loss": 0.4776, "step": 1449 }, { "epoch": 0.7839250315372139, "grad_norm": 0.30378836393356323, "learning_rate": 1.3528313691967926e-06, "loss": 0.4671, "step": 1450 }, { "epoch": 0.7844656694899982, "grad_norm": 0.36785420775413513, "learning_rate": 1.3463805079360854e-06, "loss": 0.4758, "step": 1451 }, { "epoch": 0.7850063074427824, "grad_norm": 0.332956463098526, "learning_rate": 1.3399426698327329e-06, "loss": 0.4767, "step": 1452 }, { "epoch": 0.7855469453955668, "grad_norm": 0.34059152007102966, "learning_rate": 1.3335178778341123e-06, "loss": 0.4863, "step": 1453 }, { "epoch": 0.786087583348351, "grad_norm": 0.3034917712211609, "learning_rate": 1.3271061548410947e-06, "loss": 0.4506, "step": 1454 }, { "epoch": 0.7866282213011353, "grad_norm": 0.30233845114707947, "learning_rate": 1.3207075237079702e-06, "loss": 0.4683, "step": 1455 }, { "epoch": 0.7871688592539197, "grad_norm": 0.311905175447464, "learning_rate": 1.3143220072423647e-06, "loss": 0.44, "step": 1456 }, { "epoch": 0.7877094972067039, "grad_norm": 0.3118400275707245, "learning_rate": 1.307949628205153e-06, "loss": 0.4273, "step": 1457 }, { "epoch": 0.7882501351594882, "grad_norm": 0.3080877363681793, "learning_rate": 1.301590409310387e-06, "loss": 0.4283, "step": 1458 }, { "epoch": 0.7887907731122725, "grad_norm": 0.34099653363227844, "learning_rate": 1.2952443732252058e-06, "loss": 0.4688, "step": 1459 }, { "epoch": 0.7893314110650568, "grad_norm": 0.3410574495792389, "learning_rate": 1.2889115425697612e-06, "loss": 0.4751, "step": 1460 }, { "epoch": 0.789872049017841, "grad_norm": 0.3065154552459717, "learning_rate": 1.282591939917136e-06, "loss": 0.4832, "step": 1461 }, { "epoch": 0.7904126869706254, "grad_norm": 0.3287332355976105, "learning_rate": 1.2762855877932617e-06, "loss": 0.4662, "step": 1462 }, { "epoch": 0.7909533249234096, "grad_norm": 0.3344971239566803, "learning_rate": 1.269992508676835e-06, "loss": 0.4654, "step": 1463 }, { "epoch": 0.7914939628761939, "grad_norm": 0.3270774781703949, "learning_rate": 1.2637127249992465e-06, "loss": 0.456, "step": 1464 }, { "epoch": 0.7920346008289781, "grad_norm": 0.31552377343177795, "learning_rate": 1.257446259144494e-06, "loss": 0.4917, "step": 1465 }, { "epoch": 0.7925752387817625, "grad_norm": 0.3612023591995239, "learning_rate": 1.2511931334491068e-06, "loss": 0.5151, "step": 1466 }, { "epoch": 0.7931158767345468, "grad_norm": 0.326369047164917, "learning_rate": 1.2449533702020578e-06, "loss": 0.4871, "step": 1467 }, { "epoch": 0.793656514687331, "grad_norm": 0.32858172059059143, "learning_rate": 1.238726991644696e-06, "loss": 0.4776, "step": 1468 }, { "epoch": 0.7941971526401154, "grad_norm": 0.30760154128074646, "learning_rate": 1.232514019970658e-06, "loss": 0.4504, "step": 1469 }, { "epoch": 0.7947377905928996, "grad_norm": 0.3285392224788666, "learning_rate": 1.2263144773257967e-06, "loss": 0.4873, "step": 1470 }, { "epoch": 0.7952784285456839, "grad_norm": 0.3165012001991272, "learning_rate": 1.2201283858080903e-06, "loss": 0.4435, "step": 1471 }, { "epoch": 0.7958190664984682, "grad_norm": 0.3612854480743408, "learning_rate": 1.2139557674675773e-06, "loss": 0.4748, "step": 1472 }, { "epoch": 0.7963597044512525, "grad_norm": 0.3201451003551483, "learning_rate": 1.2077966443062706e-06, "loss": 0.499, "step": 1473 }, { "epoch": 0.7969003424040367, "grad_norm": 0.3280227780342102, "learning_rate": 1.2016510382780772e-06, "loss": 0.4836, "step": 1474 }, { "epoch": 0.7974409803568211, "grad_norm": 0.28860074281692505, "learning_rate": 1.1955189712887272e-06, "loss": 0.4607, "step": 1475 }, { "epoch": 0.7979816183096053, "grad_norm": 0.3293115198612213, "learning_rate": 1.189400465195687e-06, "loss": 0.4633, "step": 1476 }, { "epoch": 0.7985222562623896, "grad_norm": 0.3143051862716675, "learning_rate": 1.183295541808089e-06, "loss": 0.4549, "step": 1477 }, { "epoch": 0.799062894215174, "grad_norm": 0.30853116512298584, "learning_rate": 1.1772042228866493e-06, "loss": 0.4824, "step": 1478 }, { "epoch": 0.7996035321679582, "grad_norm": 0.32995665073394775, "learning_rate": 1.1711265301435937e-06, "loss": 0.4844, "step": 1479 }, { "epoch": 0.8001441701207425, "grad_norm": 0.3321036100387573, "learning_rate": 1.165062485242574e-06, "loss": 0.4591, "step": 1480 }, { "epoch": 0.8006848080735267, "grad_norm": 0.29632872343063354, "learning_rate": 1.159012109798598e-06, "loss": 0.4732, "step": 1481 }, { "epoch": 0.8012254460263111, "grad_norm": 0.29381921887397766, "learning_rate": 1.1529754253779486e-06, "loss": 0.4685, "step": 1482 }, { "epoch": 0.8017660839790953, "grad_norm": 0.321435809135437, "learning_rate": 1.1469524534981091e-06, "loss": 0.4575, "step": 1483 }, { "epoch": 0.8023067219318796, "grad_norm": 0.3351998031139374, "learning_rate": 1.1409432156276805e-06, "loss": 0.4951, "step": 1484 }, { "epoch": 0.8028473598846639, "grad_norm": 0.32599401473999023, "learning_rate": 1.134947733186315e-06, "loss": 0.4516, "step": 1485 }, { "epoch": 0.8033879978374482, "grad_norm": 0.31513264775276184, "learning_rate": 1.1289660275446318e-06, "loss": 0.4816, "step": 1486 }, { "epoch": 0.8039286357902324, "grad_norm": 0.3273223042488098, "learning_rate": 1.1229981200241424e-06, "loss": 0.4561, "step": 1487 }, { "epoch": 0.8044692737430168, "grad_norm": 0.2985652685165405, "learning_rate": 1.1170440318971788e-06, "loss": 0.4808, "step": 1488 }, { "epoch": 0.805009911695801, "grad_norm": 0.318036288022995, "learning_rate": 1.1111037843868095e-06, "loss": 0.4812, "step": 1489 }, { "epoch": 0.8055505496485853, "grad_norm": 0.326748788356781, "learning_rate": 1.1051773986667735e-06, "loss": 0.4737, "step": 1490 }, { "epoch": 0.8060911876013697, "grad_norm": 0.3135136663913727, "learning_rate": 1.0992648958613961e-06, "loss": 0.4618, "step": 1491 }, { "epoch": 0.8066318255541539, "grad_norm": 0.2945253551006317, "learning_rate": 1.0933662970455217e-06, "loss": 0.4769, "step": 1492 }, { "epoch": 0.8071724635069382, "grad_norm": 0.3053777813911438, "learning_rate": 1.0874816232444297e-06, "loss": 0.4672, "step": 1493 }, { "epoch": 0.8077131014597225, "grad_norm": 0.3037196695804596, "learning_rate": 1.081610895433769e-06, "loss": 0.4399, "step": 1494 }, { "epoch": 0.8082537394125068, "grad_norm": 0.30045562982559204, "learning_rate": 1.0757541345394768e-06, "loss": 0.4467, "step": 1495 }, { "epoch": 0.808794377365291, "grad_norm": 0.2896043062210083, "learning_rate": 1.0699113614377065e-06, "loss": 0.4441, "step": 1496 }, { "epoch": 0.8093350153180753, "grad_norm": 0.2978215217590332, "learning_rate": 1.0640825969547498e-06, "loss": 0.4531, "step": 1497 }, { "epoch": 0.8098756532708596, "grad_norm": 0.32296404242515564, "learning_rate": 1.058267861866969e-06, "loss": 0.4619, "step": 1498 }, { "epoch": 0.8104162912236439, "grad_norm": 0.32622790336608887, "learning_rate": 1.0524671769007177e-06, "loss": 0.4852, "step": 1499 }, { "epoch": 0.8109569291764281, "grad_norm": 0.31101804971694946, "learning_rate": 1.0466805627322685e-06, "loss": 0.4534, "step": 1500 }, { "epoch": 0.8114975671292125, "grad_norm": 0.29956305027008057, "learning_rate": 1.0409080399877413e-06, "loss": 0.4594, "step": 1501 }, { "epoch": 0.8120382050819968, "grad_norm": 0.320745050907135, "learning_rate": 1.035149629243023e-06, "loss": 0.4689, "step": 1502 }, { "epoch": 0.812578843034781, "grad_norm": 0.30797460675239563, "learning_rate": 1.0294053510237028e-06, "loss": 0.4573, "step": 1503 }, { "epoch": 0.8131194809875654, "grad_norm": 0.3377775549888611, "learning_rate": 1.0236752258049954e-06, "loss": 0.4752, "step": 1504 }, { "epoch": 0.8136601189403496, "grad_norm": 0.3108988106250763, "learning_rate": 1.017959274011665e-06, "loss": 0.4693, "step": 1505 }, { "epoch": 0.8142007568931339, "grad_norm": 0.33605459332466125, "learning_rate": 1.0122575160179582e-06, "loss": 0.5001, "step": 1506 }, { "epoch": 0.8147413948459182, "grad_norm": 0.33161184191703796, "learning_rate": 1.0065699721475253e-06, "loss": 0.4664, "step": 1507 }, { "epoch": 0.8152820327987025, "grad_norm": 0.31771883368492126, "learning_rate": 1.0008966626733541e-06, "loss": 0.4783, "step": 1508 }, { "epoch": 0.8158226707514867, "grad_norm": 0.3252415657043457, "learning_rate": 9.95237607817694e-07, "loss": 0.473, "step": 1509 }, { "epoch": 0.816363308704271, "grad_norm": 0.33228546380996704, "learning_rate": 9.895928277519822e-07, "loss": 0.4814, "step": 1510 }, { "epoch": 0.8169039466570553, "grad_norm": 0.3342330753803253, "learning_rate": 9.83962342596776e-07, "loss": 0.4642, "step": 1511 }, { "epoch": 0.8174445846098396, "grad_norm": 0.3376791477203369, "learning_rate": 9.783461724216793e-07, "loss": 0.4795, "step": 1512 }, { "epoch": 0.8179852225626238, "grad_norm": 0.3592437505722046, "learning_rate": 9.7274433724527e-07, "loss": 0.4655, "step": 1513 }, { "epoch": 0.8185258605154082, "grad_norm": 0.3259372115135193, "learning_rate": 9.671568570350321e-07, "loss": 0.4775, "step": 1514 }, { "epoch": 0.8190664984681925, "grad_norm": 0.3325469493865967, "learning_rate": 9.615837517072758e-07, "loss": 0.476, "step": 1515 }, { "epoch": 0.8196071364209767, "grad_norm": 0.2931354343891144, "learning_rate": 9.560250411270794e-07, "loss": 0.4563, "step": 1516 }, { "epoch": 0.8201477743737611, "grad_norm": 0.320726215839386, "learning_rate": 9.504807451082088e-07, "loss": 0.4857, "step": 1517 }, { "epoch": 0.8206884123265453, "grad_norm": 0.32242387533187866, "learning_rate": 9.449508834130517e-07, "loss": 0.4836, "step": 1518 }, { "epoch": 0.8212290502793296, "grad_norm": 0.3245270252227783, "learning_rate": 9.394354757525404e-07, "loss": 0.4671, "step": 1519 }, { "epoch": 0.8217696882321139, "grad_norm": 0.3327704668045044, "learning_rate": 9.339345417860918e-07, "loss": 0.485, "step": 1520 }, { "epoch": 0.8223103261848982, "grad_norm": 0.30964258313179016, "learning_rate": 9.284481011215318e-07, "loss": 0.4814, "step": 1521 }, { "epoch": 0.8228509641376824, "grad_norm": 0.3185735046863556, "learning_rate": 9.229761733150205e-07, "loss": 0.4372, "step": 1522 }, { "epoch": 0.8233916020904668, "grad_norm": 0.33612483739852905, "learning_rate": 9.175187778709937e-07, "loss": 0.4454, "step": 1523 }, { "epoch": 0.823932240043251, "grad_norm": 0.31606701016426086, "learning_rate": 9.120759342420821e-07, "loss": 0.4578, "step": 1524 }, { "epoch": 0.8244728779960353, "grad_norm": 0.31590044498443604, "learning_rate": 9.066476618290515e-07, "loss": 0.4715, "step": 1525 }, { "epoch": 0.8250135159488196, "grad_norm": 0.2985929548740387, "learning_rate": 9.012339799807263e-07, "loss": 0.4654, "step": 1526 }, { "epoch": 0.8255541539016039, "grad_norm": 0.35035449266433716, "learning_rate": 8.95834907993926e-07, "loss": 0.4557, "step": 1527 }, { "epoch": 0.8260947918543882, "grad_norm": 0.3264734148979187, "learning_rate": 8.904504651133905e-07, "loss": 0.4634, "step": 1528 }, { "epoch": 0.8266354298071724, "grad_norm": 0.32273826003074646, "learning_rate": 8.850806705317183e-07, "loss": 0.4608, "step": 1529 }, { "epoch": 0.8271760677599568, "grad_norm": 0.29928576946258545, "learning_rate": 8.797255433892926e-07, "loss": 0.4553, "step": 1530 }, { "epoch": 0.827716705712741, "grad_norm": 0.30989155173301697, "learning_rate": 8.743851027742172e-07, "loss": 0.4553, "step": 1531 }, { "epoch": 0.8282573436655253, "grad_norm": 0.337877482175827, "learning_rate": 8.690593677222431e-07, "loss": 0.442, "step": 1532 }, { "epoch": 0.8287979816183096, "grad_norm": 0.34234437346458435, "learning_rate": 8.637483572167077e-07, "loss": 0.4639, "step": 1533 }, { "epoch": 0.8293386195710939, "grad_norm": 0.3071967363357544, "learning_rate": 8.584520901884608e-07, "loss": 0.4784, "step": 1534 }, { "epoch": 0.8298792575238781, "grad_norm": 0.32178065180778503, "learning_rate": 8.531705855158024e-07, "loss": 0.4483, "step": 1535 }, { "epoch": 0.8304198954766625, "grad_norm": 0.33801355957984924, "learning_rate": 8.479038620244089e-07, "loss": 0.4655, "step": 1536 }, { "epoch": 0.8309605334294468, "grad_norm": 0.3588986396789551, "learning_rate": 8.426519384872733e-07, "loss": 0.4633, "step": 1537 }, { "epoch": 0.831501171382231, "grad_norm": 0.3052517771720886, "learning_rate": 8.374148336246352e-07, "loss": 0.4667, "step": 1538 }, { "epoch": 0.8320418093350154, "grad_norm": 0.32133039832115173, "learning_rate": 8.321925661039088e-07, "loss": 0.4773, "step": 1539 }, { "epoch": 0.8325824472877996, "grad_norm": 0.3321249783039093, "learning_rate": 8.269851545396279e-07, "loss": 0.4829, "step": 1540 }, { "epoch": 0.8331230852405839, "grad_norm": 0.3377304673194885, "learning_rate": 8.217926174933665e-07, "loss": 0.4632, "step": 1541 }, { "epoch": 0.8336637231933681, "grad_norm": 0.3176397383213043, "learning_rate": 8.166149734736845e-07, "loss": 0.4814, "step": 1542 }, { "epoch": 0.8342043611461525, "grad_norm": 0.33456897735595703, "learning_rate": 8.114522409360531e-07, "loss": 0.4667, "step": 1543 }, { "epoch": 0.8347449990989367, "grad_norm": 0.29794400930404663, "learning_rate": 8.063044382827945e-07, "loss": 0.4635, "step": 1544 }, { "epoch": 0.835285637051721, "grad_norm": 0.3277740478515625, "learning_rate": 8.011715838630107e-07, "loss": 0.4805, "step": 1545 }, { "epoch": 0.8358262750045053, "grad_norm": 0.3014332950115204, "learning_rate": 7.960536959725252e-07, "loss": 0.4587, "step": 1546 }, { "epoch": 0.8363669129572896, "grad_norm": 0.3108069598674774, "learning_rate": 7.909507928538107e-07, "loss": 0.452, "step": 1547 }, { "epoch": 0.8369075509100738, "grad_norm": 0.3274773061275482, "learning_rate": 7.858628926959311e-07, "loss": 0.4805, "step": 1548 }, { "epoch": 0.8374481888628582, "grad_norm": 0.31421011686325073, "learning_rate": 7.807900136344676e-07, "loss": 0.4582, "step": 1549 }, { "epoch": 0.8379888268156425, "grad_norm": 0.30205944180488586, "learning_rate": 7.757321737514645e-07, "loss": 0.4902, "step": 1550 }, { "epoch": 0.8385294647684267, "grad_norm": 0.33638957142829895, "learning_rate": 7.706893910753571e-07, "loss": 0.4946, "step": 1551 }, { "epoch": 0.8390701027212111, "grad_norm": 0.3180740475654602, "learning_rate": 7.656616835809122e-07, "loss": 0.4895, "step": 1552 }, { "epoch": 0.8396107406739953, "grad_norm": 0.3404751121997833, "learning_rate": 7.606490691891577e-07, "loss": 0.4654, "step": 1553 }, { "epoch": 0.8401513786267796, "grad_norm": 0.32554861903190613, "learning_rate": 7.556515657673274e-07, "loss": 0.4784, "step": 1554 }, { "epoch": 0.8406920165795639, "grad_norm": 0.30992451310157776, "learning_rate": 7.506691911287883e-07, "loss": 0.4746, "step": 1555 }, { "epoch": 0.8412326545323482, "grad_norm": 0.2857951521873474, "learning_rate": 7.457019630329848e-07, "loss": 0.435, "step": 1556 }, { "epoch": 0.8417732924851324, "grad_norm": 0.3287476599216461, "learning_rate": 7.407498991853729e-07, "loss": 0.4949, "step": 1557 }, { "epoch": 0.8423139304379167, "grad_norm": 0.3094945251941681, "learning_rate": 7.358130172373523e-07, "loss": 0.4399, "step": 1558 }, { "epoch": 0.842854568390701, "grad_norm": 0.33573803305625916, "learning_rate": 7.308913347862112e-07, "loss": 0.4661, "step": 1559 }, { "epoch": 0.8433952063434853, "grad_norm": 0.32553064823150635, "learning_rate": 7.259848693750582e-07, "loss": 0.4646, "step": 1560 }, { "epoch": 0.8439358442962696, "grad_norm": 0.3170788288116455, "learning_rate": 7.210936384927631e-07, "loss": 0.4743, "step": 1561 }, { "epoch": 0.8444764822490539, "grad_norm": 0.2907802164554596, "learning_rate": 7.162176595738895e-07, "loss": 0.4486, "step": 1562 }, { "epoch": 0.8450171202018382, "grad_norm": 0.31983017921447754, "learning_rate": 7.113569499986401e-07, "loss": 0.4635, "step": 1563 }, { "epoch": 0.8455577581546224, "grad_norm": 0.2787870168685913, "learning_rate": 7.065115270927875e-07, "loss": 0.4483, "step": 1564 }, { "epoch": 0.8460983961074068, "grad_norm": 0.31309157609939575, "learning_rate": 7.01681408127618e-07, "loss": 0.4546, "step": 1565 }, { "epoch": 0.846639034060191, "grad_norm": 0.29454049468040466, "learning_rate": 6.968666103198679e-07, "loss": 0.4706, "step": 1566 }, { "epoch": 0.8471796720129753, "grad_norm": 0.31175103783607483, "learning_rate": 6.920671508316584e-07, "loss": 0.4844, "step": 1567 }, { "epoch": 0.8477203099657596, "grad_norm": 0.3265729546546936, "learning_rate": 6.872830467704417e-07, "loss": 0.4614, "step": 1568 }, { "epoch": 0.8482609479185439, "grad_norm": 0.310225248336792, "learning_rate": 6.825143151889358e-07, "loss": 0.4786, "step": 1569 }, { "epoch": 0.8488015858713281, "grad_norm": 0.31501367688179016, "learning_rate": 6.777609730850615e-07, "loss": 0.4904, "step": 1570 }, { "epoch": 0.8493422238241125, "grad_norm": 0.31551438570022583, "learning_rate": 6.730230374018886e-07, "loss": 0.4804, "step": 1571 }, { "epoch": 0.8498828617768968, "grad_norm": 0.2776893079280853, "learning_rate": 6.683005250275676e-07, "loss": 0.4579, "step": 1572 }, { "epoch": 0.850423499729681, "grad_norm": 0.29054975509643555, "learning_rate": 6.635934527952747e-07, "loss": 0.4565, "step": 1573 }, { "epoch": 0.8509641376824653, "grad_norm": 0.30911266803741455, "learning_rate": 6.589018374831529e-07, "loss": 0.4731, "step": 1574 }, { "epoch": 0.8515047756352496, "grad_norm": 0.3118568956851959, "learning_rate": 6.542256958142456e-07, "loss": 0.4669, "step": 1575 }, { "epoch": 0.8520454135880339, "grad_norm": 0.29929646849632263, "learning_rate": 6.495650444564433e-07, "loss": 0.4549, "step": 1576 }, { "epoch": 0.8525860515408181, "grad_norm": 0.3033123314380646, "learning_rate": 6.449199000224221e-07, "loss": 0.4539, "step": 1577 }, { "epoch": 0.8531266894936025, "grad_norm": 0.30189841985702515, "learning_rate": 6.402902790695842e-07, "loss": 0.4844, "step": 1578 }, { "epoch": 0.8536673274463867, "grad_norm": 0.3111442029476166, "learning_rate": 6.356761980999998e-07, "loss": 0.4744, "step": 1579 }, { "epoch": 0.854207965399171, "grad_norm": 0.2925800681114197, "learning_rate": 6.310776735603452e-07, "loss": 0.4781, "step": 1580 }, { "epoch": 0.8547486033519553, "grad_norm": 0.2876686453819275, "learning_rate": 6.264947218418482e-07, "loss": 0.4422, "step": 1581 }, { "epoch": 0.8552892413047396, "grad_norm": 0.29705557227134705, "learning_rate": 6.219273592802278e-07, "loss": 0.4675, "step": 1582 }, { "epoch": 0.8558298792575239, "grad_norm": 0.2854503095149994, "learning_rate": 6.173756021556377e-07, "loss": 0.4537, "step": 1583 }, { "epoch": 0.8563705172103082, "grad_norm": 0.31599560379981995, "learning_rate": 6.128394666926035e-07, "loss": 0.5045, "step": 1584 }, { "epoch": 0.8569111551630925, "grad_norm": 0.33695438504219055, "learning_rate": 6.083189690599712e-07, "loss": 0.4945, "step": 1585 }, { "epoch": 0.8574517931158767, "grad_norm": 0.3071337342262268, "learning_rate": 6.038141253708429e-07, "loss": 0.4765, "step": 1586 }, { "epoch": 0.857992431068661, "grad_norm": 0.3113190829753876, "learning_rate": 5.993249516825278e-07, "loss": 0.4864, "step": 1587 }, { "epoch": 0.8585330690214453, "grad_norm": 0.26970189809799194, "learning_rate": 5.948514639964748e-07, "loss": 0.4399, "step": 1588 }, { "epoch": 0.8590737069742296, "grad_norm": 0.3173854947090149, "learning_rate": 5.903936782582253e-07, "loss": 0.4575, "step": 1589 }, { "epoch": 0.8596143449270138, "grad_norm": 0.2729853689670563, "learning_rate": 5.859516103573492e-07, "loss": 0.4714, "step": 1590 }, { "epoch": 0.8601549828797982, "grad_norm": 0.3082450330257416, "learning_rate": 5.815252761273927e-07, "loss": 0.4524, "step": 1591 }, { "epoch": 0.8606956208325824, "grad_norm": 0.3227364718914032, "learning_rate": 5.771146913458187e-07, "loss": 0.4739, "step": 1592 }, { "epoch": 0.8612362587853667, "grad_norm": 0.3056773841381073, "learning_rate": 5.727198717339511e-07, "loss": 0.4696, "step": 1593 }, { "epoch": 0.861776896738151, "grad_norm": 0.3208467364311218, "learning_rate": 5.683408329569212e-07, "loss": 0.4797, "step": 1594 }, { "epoch": 0.8623175346909353, "grad_norm": 0.312785267829895, "learning_rate": 5.6397759062361e-07, "loss": 0.4487, "step": 1595 }, { "epoch": 0.8628581726437196, "grad_norm": 0.31125733256340027, "learning_rate": 5.596301602865938e-07, "loss": 0.4564, "step": 1596 }, { "epoch": 0.8633988105965039, "grad_norm": 0.2956276834011078, "learning_rate": 5.55298557442085e-07, "loss": 0.4526, "step": 1597 }, { "epoch": 0.8639394485492882, "grad_norm": 0.30352863669395447, "learning_rate": 5.509827975298809e-07, "loss": 0.4756, "step": 1598 }, { "epoch": 0.8644800865020724, "grad_norm": 0.30768123269081116, "learning_rate": 5.466828959333087e-07, "loss": 0.4793, "step": 1599 }, { "epoch": 0.8650207244548568, "grad_norm": 0.32290348410606384, "learning_rate": 5.423988679791686e-07, "loss": 0.4746, "step": 1600 }, { "epoch": 0.865561362407641, "grad_norm": 0.32862550020217896, "learning_rate": 5.381307289376786e-07, "loss": 0.4604, "step": 1601 }, { "epoch": 0.8661020003604253, "grad_norm": 0.32285216450691223, "learning_rate": 5.338784940224239e-07, "loss": 0.4757, "step": 1602 }, { "epoch": 0.8666426383132095, "grad_norm": 0.31145691871643066, "learning_rate": 5.296421783902972e-07, "loss": 0.4591, "step": 1603 }, { "epoch": 0.8671832762659939, "grad_norm": 0.28722888231277466, "learning_rate": 5.254217971414499e-07, "loss": 0.4738, "step": 1604 }, { "epoch": 0.8677239142187781, "grad_norm": 0.32165801525115967, "learning_rate": 5.212173653192365e-07, "loss": 0.498, "step": 1605 }, { "epoch": 0.8682645521715624, "grad_norm": 0.3288798928260803, "learning_rate": 5.170288979101573e-07, "loss": 0.4666, "step": 1606 }, { "epoch": 0.8688051901243468, "grad_norm": 0.3006567656993866, "learning_rate": 5.128564098438116e-07, "loss": 0.4625, "step": 1607 }, { "epoch": 0.869345828077131, "grad_norm": 0.3048226833343506, "learning_rate": 5.086999159928391e-07, "loss": 0.4746, "step": 1608 }, { "epoch": 0.8698864660299153, "grad_norm": 0.2663028836250305, "learning_rate": 5.045594311728708e-07, "loss": 0.4526, "step": 1609 }, { "epoch": 0.8704271039826996, "grad_norm": 0.35595017671585083, "learning_rate": 5.00434970142471e-07, "loss": 0.4808, "step": 1610 }, { "epoch": 0.8709677419354839, "grad_norm": 0.33726397156715393, "learning_rate": 4.963265476030916e-07, "loss": 0.4678, "step": 1611 }, { "epoch": 0.8715083798882681, "grad_norm": 0.29663407802581787, "learning_rate": 4.922341781990131e-07, "loss": 0.4845, "step": 1612 }, { "epoch": 0.8720490178410525, "grad_norm": 0.31134849786758423, "learning_rate": 4.881578765172979e-07, "loss": 0.5056, "step": 1613 }, { "epoch": 0.8725896557938367, "grad_norm": 0.3079214096069336, "learning_rate": 4.840976570877332e-07, "loss": 0.4556, "step": 1614 }, { "epoch": 0.873130293746621, "grad_norm": 0.3087759017944336, "learning_rate": 4.800535343827834e-07, "loss": 0.4654, "step": 1615 }, { "epoch": 0.8736709316994054, "grad_norm": 0.3534053862094879, "learning_rate": 4.7602552281753647e-07, "loss": 0.4545, "step": 1616 }, { "epoch": 0.8742115696521896, "grad_norm": 0.30727800726890564, "learning_rate": 4.720136367496536e-07, "loss": 0.4578, "step": 1617 }, { "epoch": 0.8747522076049739, "grad_norm": 0.3113386631011963, "learning_rate": 4.6801789047931535e-07, "loss": 0.4606, "step": 1618 }, { "epoch": 0.8752928455577581, "grad_norm": 0.30020859837532043, "learning_rate": 4.6403829824917643e-07, "loss": 0.4627, "step": 1619 }, { "epoch": 0.8758334835105425, "grad_norm": 0.3038908541202545, "learning_rate": 4.6007487424430565e-07, "loss": 0.4521, "step": 1620 }, { "epoch": 0.8763741214633267, "grad_norm": 0.28542560338974, "learning_rate": 4.5612763259214653e-07, "loss": 0.4602, "step": 1621 }, { "epoch": 0.876914759416111, "grad_norm": 0.29688113927841187, "learning_rate": 4.52196587362459e-07, "loss": 0.461, "step": 1622 }, { "epoch": 0.8774553973688953, "grad_norm": 0.30379223823547363, "learning_rate": 4.4828175256727056e-07, "loss": 0.4739, "step": 1623 }, { "epoch": 0.8779960353216796, "grad_norm": 0.27639734745025635, "learning_rate": 4.4438314216082856e-07, "loss": 0.4268, "step": 1624 }, { "epoch": 0.8785366732744638, "grad_norm": 0.2714707553386688, "learning_rate": 4.405007700395497e-07, "loss": 0.4475, "step": 1625 }, { "epoch": 0.8790773112272482, "grad_norm": 0.31498268246650696, "learning_rate": 4.3663465004196995e-07, "loss": 0.4623, "step": 1626 }, { "epoch": 0.8796179491800324, "grad_norm": 0.29611799120903015, "learning_rate": 4.3278479594869307e-07, "loss": 0.4435, "step": 1627 }, { "epoch": 0.8801585871328167, "grad_norm": 0.3016403615474701, "learning_rate": 4.289512214823466e-07, "loss": 0.4643, "step": 1628 }, { "epoch": 0.8806992250856011, "grad_norm": 0.3306654393672943, "learning_rate": 4.251339403075294e-07, "loss": 0.4731, "step": 1629 }, { "epoch": 0.8812398630383853, "grad_norm": 0.2894127368927002, "learning_rate": 4.21332966030763e-07, "loss": 0.4743, "step": 1630 }, { "epoch": 0.8817805009911696, "grad_norm": 0.296880304813385, "learning_rate": 4.175483122004448e-07, "loss": 0.4616, "step": 1631 }, { "epoch": 0.8823211389439539, "grad_norm": 0.2968302369117737, "learning_rate": 4.1377999230679646e-07, "loss": 0.4585, "step": 1632 }, { "epoch": 0.8828617768967382, "grad_norm": 0.3259296119213104, "learning_rate": 4.100280197818207e-07, "loss": 0.4732, "step": 1633 }, { "epoch": 0.8834024148495224, "grad_norm": 0.301943838596344, "learning_rate": 4.062924079992492e-07, "loss": 0.4769, "step": 1634 }, { "epoch": 0.8839430528023067, "grad_norm": 0.31166207790374756, "learning_rate": 4.025731702744978e-07, "loss": 0.4528, "step": 1635 }, { "epoch": 0.884483690755091, "grad_norm": 0.3266367018222809, "learning_rate": 3.9887031986461546e-07, "loss": 0.4766, "step": 1636 }, { "epoch": 0.8850243287078753, "grad_norm": 0.29861709475517273, "learning_rate": 3.9518386996824196e-07, "loss": 0.4517, "step": 1637 }, { "epoch": 0.8855649666606595, "grad_norm": 0.2750012278556824, "learning_rate": 3.9151383372555696e-07, "loss": 0.4419, "step": 1638 }, { "epoch": 0.8861056046134439, "grad_norm": 0.29816916584968567, "learning_rate": 3.8786022421823497e-07, "loss": 0.4679, "step": 1639 }, { "epoch": 0.8866462425662281, "grad_norm": 0.29293686151504517, "learning_rate": 3.84223054469397e-07, "loss": 0.4898, "step": 1640 }, { "epoch": 0.8871868805190124, "grad_norm": 0.3161197900772095, "learning_rate": 3.8060233744356634e-07, "loss": 0.4795, "step": 1641 }, { "epoch": 0.8877275184717968, "grad_norm": 0.2860853970050812, "learning_rate": 3.76998086046621e-07, "loss": 0.4689, "step": 1642 }, { "epoch": 0.888268156424581, "grad_norm": 0.29909902811050415, "learning_rate": 3.7341031312574827e-07, "loss": 0.4548, "step": 1643 }, { "epoch": 0.8888087943773653, "grad_norm": 0.3215901553630829, "learning_rate": 3.6983903146939894e-07, "loss": 0.4874, "step": 1644 }, { "epoch": 0.8893494323301496, "grad_norm": 0.2928643524646759, "learning_rate": 3.6628425380723975e-07, "loss": 0.4469, "step": 1645 }, { "epoch": 0.8898900702829339, "grad_norm": 0.3444850444793701, "learning_rate": 3.627459928101118e-07, "loss": 0.4671, "step": 1646 }, { "epoch": 0.8904307082357181, "grad_norm": 0.30591514706611633, "learning_rate": 3.5922426108998154e-07, "loss": 0.4835, "step": 1647 }, { "epoch": 0.8909713461885024, "grad_norm": 0.33051443099975586, "learning_rate": 3.5571907119990033e-07, "loss": 0.453, "step": 1648 }, { "epoch": 0.8915119841412867, "grad_norm": 0.29916903376579285, "learning_rate": 3.522304356339529e-07, "loss": 0.4541, "step": 1649 }, { "epoch": 0.892052622094071, "grad_norm": 0.31293609738349915, "learning_rate": 3.4875836682722096e-07, "loss": 0.4572, "step": 1650 }, { "epoch": 0.8925932600468552, "grad_norm": 0.3154950439929962, "learning_rate": 3.45302877155731e-07, "loss": 0.4799, "step": 1651 }, { "epoch": 0.8931338979996396, "grad_norm": 0.2948947846889496, "learning_rate": 3.418639789364175e-07, "loss": 0.4914, "step": 1652 }, { "epoch": 0.8936745359524239, "grad_norm": 0.32319676876068115, "learning_rate": 3.3844168442707213e-07, "loss": 0.4846, "step": 1653 }, { "epoch": 0.8942151739052081, "grad_norm": 0.30142778158187866, "learning_rate": 3.350360058263058e-07, "loss": 0.4656, "step": 1654 }, { "epoch": 0.8947558118579925, "grad_norm": 0.31285157799720764, "learning_rate": 3.3164695527350244e-07, "loss": 0.4622, "step": 1655 }, { "epoch": 0.8952964498107767, "grad_norm": 0.3004051744937897, "learning_rate": 3.2827454484877564e-07, "loss": 0.4715, "step": 1656 }, { "epoch": 0.895837087763561, "grad_norm": 0.3092837631702423, "learning_rate": 3.2491878657292643e-07, "loss": 0.486, "step": 1657 }, { "epoch": 0.8963777257163453, "grad_norm": 0.31189343333244324, "learning_rate": 3.215796924073983e-07, "loss": 0.4275, "step": 1658 }, { "epoch": 0.8969183636691296, "grad_norm": 0.2800220549106598, "learning_rate": 3.1825727425423837e-07, "loss": 0.4642, "step": 1659 }, { "epoch": 0.8974590016219138, "grad_norm": 0.31216147541999817, "learning_rate": 3.149515439560524e-07, "loss": 0.4516, "step": 1660 }, { "epoch": 0.8979996395746982, "grad_norm": 0.30997899174690247, "learning_rate": 3.116625132959633e-07, "loss": 0.4794, "step": 1661 }, { "epoch": 0.8985402775274824, "grad_norm": 0.31042930483818054, "learning_rate": 3.083901939975675e-07, "loss": 0.4699, "step": 1662 }, { "epoch": 0.8990809154802667, "grad_norm": 0.3064749836921692, "learning_rate": 3.051345977248954e-07, "loss": 0.4366, "step": 1663 }, { "epoch": 0.899621553433051, "grad_norm": 0.27594202756881714, "learning_rate": 3.018957360823699e-07, "loss": 0.4572, "step": 1664 }, { "epoch": 0.9001621913858353, "grad_norm": 0.2951033115386963, "learning_rate": 2.986736206147628e-07, "loss": 0.4475, "step": 1665 }, { "epoch": 0.9007028293386196, "grad_norm": 0.29796889424324036, "learning_rate": 2.9546826280715536e-07, "loss": 0.4708, "step": 1666 }, { "epoch": 0.9012434672914038, "grad_norm": 0.32082822918891907, "learning_rate": 2.9227967408489653e-07, "loss": 0.4723, "step": 1667 }, { "epoch": 0.9017841052441882, "grad_norm": 0.31180939078330994, "learning_rate": 2.891078658135632e-07, "loss": 0.4659, "step": 1668 }, { "epoch": 0.9023247431969724, "grad_norm": 0.30132412910461426, "learning_rate": 2.859528492989194e-07, "loss": 0.4867, "step": 1669 }, { "epoch": 0.9028653811497567, "grad_norm": 0.3208564221858978, "learning_rate": 2.828146357868755e-07, "loss": 0.4589, "step": 1670 }, { "epoch": 0.903406019102541, "grad_norm": 0.2799170911312103, "learning_rate": 2.796932364634475e-07, "loss": 0.4563, "step": 1671 }, { "epoch": 0.9039466570553253, "grad_norm": 0.2884426712989807, "learning_rate": 2.765886624547182e-07, "loss": 0.4782, "step": 1672 }, { "epoch": 0.9044872950081095, "grad_norm": 0.28514596819877625, "learning_rate": 2.7350092482679836e-07, "loss": 0.4549, "step": 1673 }, { "epoch": 0.9050279329608939, "grad_norm": 0.3076411485671997, "learning_rate": 2.7043003458578685e-07, "loss": 0.4737, "step": 1674 }, { "epoch": 0.9055685709136782, "grad_norm": 0.34531399607658386, "learning_rate": 2.673760026777272e-07, "loss": 0.479, "step": 1675 }, { "epoch": 0.9061092088664624, "grad_norm": 0.32933852076530457, "learning_rate": 2.6433883998857657e-07, "loss": 0.4612, "step": 1676 }, { "epoch": 0.9066498468192468, "grad_norm": 0.2830871045589447, "learning_rate": 2.61318557344159e-07, "loss": 0.4248, "step": 1677 }, { "epoch": 0.907190484772031, "grad_norm": 0.33129066228866577, "learning_rate": 2.5831516551013405e-07, "loss": 0.4706, "step": 1678 }, { "epoch": 0.9077311227248153, "grad_norm": 0.3337078392505646, "learning_rate": 2.553286751919509e-07, "loss": 0.4877, "step": 1679 }, { "epoch": 0.9082717606775995, "grad_norm": 0.3119748830795288, "learning_rate": 2.523590970348166e-07, "loss": 0.4523, "step": 1680 }, { "epoch": 0.9088123986303839, "grad_norm": 0.29456672072410583, "learning_rate": 2.4940644162365523e-07, "loss": 0.4758, "step": 1681 }, { "epoch": 0.9093530365831681, "grad_norm": 0.3045862317085266, "learning_rate": 2.46470719483069e-07, "loss": 0.4765, "step": 1682 }, { "epoch": 0.9098936745359524, "grad_norm": 0.2948693037033081, "learning_rate": 2.435519410773052e-07, "loss": 0.4615, "step": 1683 }, { "epoch": 0.9104343124887367, "grad_norm": 0.30374738574028015, "learning_rate": 2.4065011681021266e-07, "loss": 0.4846, "step": 1684 }, { "epoch": 0.910974950441521, "grad_norm": 0.3169853985309601, "learning_rate": 2.3776525702520925e-07, "loss": 0.4746, "step": 1685 }, { "epoch": 0.9115155883943052, "grad_norm": 0.297050803899765, "learning_rate": 2.3489737200524498e-07, "loss": 0.4785, "step": 1686 }, { "epoch": 0.9120562263470896, "grad_norm": 0.3017227351665497, "learning_rate": 2.3204647197276387e-07, "loss": 0.4769, "step": 1687 }, { "epoch": 0.9125968642998739, "grad_norm": 0.3048754334449768, "learning_rate": 2.29212567089665e-07, "loss": 0.4772, "step": 1688 }, { "epoch": 0.9131375022526581, "grad_norm": 0.3351805508136749, "learning_rate": 2.2639566745727203e-07, "loss": 0.4579, "step": 1689 }, { "epoch": 0.9136781402054425, "grad_norm": 0.32740268111228943, "learning_rate": 2.2359578311629272e-07, "loss": 0.4945, "step": 1690 }, { "epoch": 0.9142187781582267, "grad_norm": 0.3174039125442505, "learning_rate": 2.2081292404678655e-07, "loss": 0.4647, "step": 1691 }, { "epoch": 0.914759416111011, "grad_norm": 0.30359378457069397, "learning_rate": 2.1804710016812337e-07, "loss": 0.469, "step": 1692 }, { "epoch": 0.9153000540637953, "grad_norm": 0.29310646653175354, "learning_rate": 2.152983213389559e-07, "loss": 0.4867, "step": 1693 }, { "epoch": 0.9158406920165796, "grad_norm": 0.3022826910018921, "learning_rate": 2.1256659735717777e-07, "loss": 0.4552, "step": 1694 }, { "epoch": 0.9163813299693638, "grad_norm": 0.2924525737762451, "learning_rate": 2.0985193795989345e-07, "loss": 0.4371, "step": 1695 }, { "epoch": 0.9169219679221481, "grad_norm": 0.3041470944881439, "learning_rate": 2.071543528233805e-07, "loss": 0.5015, "step": 1696 }, { "epoch": 0.9174626058749324, "grad_norm": 0.299441397190094, "learning_rate": 2.0447385156305565e-07, "loss": 0.4557, "step": 1697 }, { "epoch": 0.9180032438277167, "grad_norm": 0.29456791281700134, "learning_rate": 2.0181044373344172e-07, "loss": 0.4744, "step": 1698 }, { "epoch": 0.918543881780501, "grad_norm": 0.31137824058532715, "learning_rate": 1.9916413882813235e-07, "loss": 0.4564, "step": 1699 }, { "epoch": 0.9190845197332853, "grad_norm": 0.29632532596588135, "learning_rate": 1.9653494627975888e-07, "loss": 0.4657, "step": 1700 }, { "epoch": 0.9196251576860696, "grad_norm": 0.28566405177116394, "learning_rate": 1.9392287545995536e-07, "loss": 0.4983, "step": 1701 }, { "epoch": 0.9201657956388538, "grad_norm": 0.31092479825019836, "learning_rate": 1.913279356793285e-07, "loss": 0.4584, "step": 1702 }, { "epoch": 0.9207064335916382, "grad_norm": 0.2855190634727478, "learning_rate": 1.8875013618742e-07, "loss": 0.4444, "step": 1703 }, { "epoch": 0.9212470715444224, "grad_norm": 0.29378846287727356, "learning_rate": 1.8618948617267764e-07, "loss": 0.4697, "step": 1704 }, { "epoch": 0.9217877094972067, "grad_norm": 0.30169007182121277, "learning_rate": 1.8364599476241862e-07, "loss": 0.4616, "step": 1705 }, { "epoch": 0.922328347449991, "grad_norm": 0.3052110970020294, "learning_rate": 1.8111967102280082e-07, "loss": 0.4854, "step": 1706 }, { "epoch": 0.9228689854027753, "grad_norm": 0.3236134350299835, "learning_rate": 1.7861052395878764e-07, "loss": 0.4437, "step": 1707 }, { "epoch": 0.9234096233555595, "grad_norm": 0.29827356338500977, "learning_rate": 1.7611856251411818e-07, "loss": 0.4458, "step": 1708 }, { "epoch": 0.9239502613083439, "grad_norm": 0.2997409701347351, "learning_rate": 1.7364379557127387e-07, "loss": 0.4891, "step": 1709 }, { "epoch": 0.9244908992611282, "grad_norm": 0.2794349491596222, "learning_rate": 1.711862319514457e-07, "loss": 0.4808, "step": 1710 }, { "epoch": 0.9250315372139124, "grad_norm": 0.299027681350708, "learning_rate": 1.6874588041450535e-07, "loss": 0.4743, "step": 1711 }, { "epoch": 0.9255721751666967, "grad_norm": 0.291069895029068, "learning_rate": 1.6632274965897365e-07, "loss": 0.4791, "step": 1712 }, { "epoch": 0.926112813119481, "grad_norm": 0.3018452227115631, "learning_rate": 1.639168483219872e-07, "loss": 0.479, "step": 1713 }, { "epoch": 0.9266534510722653, "grad_norm": 0.2994323670864105, "learning_rate": 1.6152818497926993e-07, "loss": 0.4573, "step": 1714 }, { "epoch": 0.9271940890250495, "grad_norm": 0.27843761444091797, "learning_rate": 1.5915676814510173e-07, "loss": 0.4585, "step": 1715 }, { "epoch": 0.9277347269778339, "grad_norm": 0.30757150053977966, "learning_rate": 1.5680260627228772e-07, "loss": 0.4708, "step": 1716 }, { "epoch": 0.9282753649306181, "grad_norm": 0.31326669454574585, "learning_rate": 1.5446570775212944e-07, "loss": 0.4671, "step": 1717 }, { "epoch": 0.9288160028834024, "grad_norm": 0.31248635053634644, "learning_rate": 1.5214608091439265e-07, "loss": 0.4639, "step": 1718 }, { "epoch": 0.9293566408361867, "grad_norm": 0.29233023524284363, "learning_rate": 1.4984373402728014e-07, "loss": 0.4691, "step": 1719 }, { "epoch": 0.929897278788971, "grad_norm": 0.32531052827835083, "learning_rate": 1.4755867529740064e-07, "loss": 0.5093, "step": 1720 }, { "epoch": 0.9304379167417552, "grad_norm": 0.35300230979919434, "learning_rate": 1.4529091286973994e-07, "loss": 0.488, "step": 1721 }, { "epoch": 0.9309785546945396, "grad_norm": 0.3052767813205719, "learning_rate": 1.4304045482763263e-07, "loss": 0.4674, "step": 1722 }, { "epoch": 0.9315191926473239, "grad_norm": 0.2952291667461395, "learning_rate": 1.408073091927309e-07, "loss": 0.4572, "step": 1723 }, { "epoch": 0.9320598306001081, "grad_norm": 0.2960074841976166, "learning_rate": 1.3859148392498023e-07, "loss": 0.464, "step": 1724 }, { "epoch": 0.9326004685528924, "grad_norm": 0.28832143545150757, "learning_rate": 1.3639298692258606e-07, "loss": 0.4699, "step": 1725 }, { "epoch": 0.9331411065056767, "grad_norm": 0.29948270320892334, "learning_rate": 1.342118260219899e-07, "loss": 0.4722, "step": 1726 }, { "epoch": 0.933681744458461, "grad_norm": 0.28257155418395996, "learning_rate": 1.320480089978382e-07, "loss": 0.4768, "step": 1727 }, { "epoch": 0.9342223824112452, "grad_norm": 0.2848031520843506, "learning_rate": 1.2990154356295636e-07, "loss": 0.453, "step": 1728 }, { "epoch": 0.9347630203640296, "grad_norm": 0.29781103134155273, "learning_rate": 1.2777243736832202e-07, "loss": 0.4798, "step": 1729 }, { "epoch": 0.9353036583168138, "grad_norm": 0.30655109882354736, "learning_rate": 1.2566069800303393e-07, "loss": 0.4732, "step": 1730 }, { "epoch": 0.9358442962695981, "grad_norm": 0.30582815408706665, "learning_rate": 1.2356633299429044e-07, "loss": 0.4601, "step": 1731 }, { "epoch": 0.9363849342223824, "grad_norm": 0.2904635965824127, "learning_rate": 1.2148934980735772e-07, "loss": 0.4922, "step": 1732 }, { "epoch": 0.9369255721751667, "grad_norm": 0.30475184321403503, "learning_rate": 1.1942975584554594e-07, "loss": 0.4747, "step": 1733 }, { "epoch": 0.937466210127951, "grad_norm": 0.29005348682403564, "learning_rate": 1.1738755845018323e-07, "loss": 0.4601, "step": 1734 }, { "epoch": 0.9380068480807353, "grad_norm": 0.29658186435699463, "learning_rate": 1.1536276490058784e-07, "loss": 0.4417, "step": 1735 }, { "epoch": 0.9385474860335196, "grad_norm": 0.2936530113220215, "learning_rate": 1.1335538241404099e-07, "loss": 0.4629, "step": 1736 }, { "epoch": 0.9390881239863038, "grad_norm": 0.3370441198348999, "learning_rate": 1.1136541814576574e-07, "loss": 0.4861, "step": 1737 }, { "epoch": 0.9396287619390882, "grad_norm": 0.2809031009674072, "learning_rate": 1.0939287918889652e-07, "loss": 0.4676, "step": 1738 }, { "epoch": 0.9401693998918724, "grad_norm": 0.29533958435058594, "learning_rate": 1.0743777257445853e-07, "loss": 0.4947, "step": 1739 }, { "epoch": 0.9407100378446567, "grad_norm": 0.2839764654636383, "learning_rate": 1.055001052713378e-07, "loss": 0.4924, "step": 1740 }, { "epoch": 0.9412506757974409, "grad_norm": 0.31136807799339294, "learning_rate": 1.0357988418625897e-07, "loss": 0.467, "step": 1741 }, { "epoch": 0.9417913137502253, "grad_norm": 0.3142279386520386, "learning_rate": 1.0167711616376196e-07, "loss": 0.4629, "step": 1742 }, { "epoch": 0.9423319517030095, "grad_norm": 0.27500444650650024, "learning_rate": 9.979180798617538e-08, "loss": 0.4686, "step": 1743 }, { "epoch": 0.9428725896557938, "grad_norm": 0.3138892650604248, "learning_rate": 9.792396637359203e-08, "loss": 0.4638, "step": 1744 }, { "epoch": 0.9434132276085782, "grad_norm": 0.3079321086406708, "learning_rate": 9.607359798384785e-08, "loss": 0.4491, "step": 1745 }, { "epoch": 0.9439538655613624, "grad_norm": 0.29620155692100525, "learning_rate": 9.424070941249419e-08, "loss": 0.4593, "step": 1746 }, { "epoch": 0.9444945035141467, "grad_norm": 0.32409822940826416, "learning_rate": 9.242530719277776e-08, "loss": 0.4248, "step": 1747 }, { "epoch": 0.945035141466931, "grad_norm": 0.2947947680950165, "learning_rate": 9.062739779561624e-08, "loss": 0.4769, "step": 1748 }, { "epoch": 0.9455757794197153, "grad_norm": 0.28672558069229126, "learning_rate": 8.884698762957334e-08, "loss": 0.4895, "step": 1749 }, { "epoch": 0.9461164173724995, "grad_norm": 0.2779400050640106, "learning_rate": 8.708408304083927e-08, "loss": 0.4694, "step": 1750 }, { "epoch": 0.9466570553252839, "grad_norm": 0.3198831379413605, "learning_rate": 8.53386903132053e-08, "loss": 0.4591, "step": 1751 }, { "epoch": 0.9471976932780681, "grad_norm": 0.3286699652671814, "learning_rate": 8.361081566804318e-08, "loss": 0.4965, "step": 1752 }, { "epoch": 0.9477383312308524, "grad_norm": 0.2861204743385315, "learning_rate": 8.190046526428241e-08, "loss": 0.4464, "step": 1753 }, { "epoch": 0.9482789691836367, "grad_norm": 0.28904879093170166, "learning_rate": 8.020764519838686e-08, "loss": 0.4665, "step": 1754 }, { "epoch": 0.948819607136421, "grad_norm": 0.308715283870697, "learning_rate": 7.853236150433541e-08, "loss": 0.4604, "step": 1755 }, { "epoch": 0.9493602450892052, "grad_norm": 0.2850310206413269, "learning_rate": 7.687462015360026e-08, "loss": 0.4536, "step": 1756 }, { "epoch": 0.9499008830419895, "grad_norm": 0.32719627022743225, "learning_rate": 7.523442705512196e-08, "loss": 0.4873, "step": 1757 }, { "epoch": 0.9504415209947739, "grad_norm": 0.29945439100265503, "learning_rate": 7.36117880552939e-08, "loss": 0.4827, "step": 1758 }, { "epoch": 0.9509821589475581, "grad_norm": 0.31181010603904724, "learning_rate": 7.200670893793727e-08, "loss": 0.4435, "step": 1759 }, { "epoch": 0.9515227969003424, "grad_norm": 0.29942837357521057, "learning_rate": 7.041919542428221e-08, "loss": 0.4747, "step": 1760 }, { "epoch": 0.9520634348531267, "grad_norm": 0.3144790232181549, "learning_rate": 6.884925317294678e-08, "loss": 0.481, "step": 1761 }, { "epoch": 0.952604072805911, "grad_norm": 0.321038156747818, "learning_rate": 6.72968877799185e-08, "loss": 0.4891, "step": 1762 }, { "epoch": 0.9531447107586952, "grad_norm": 0.29290351271629333, "learning_rate": 6.576210477853007e-08, "loss": 0.4755, "step": 1763 }, { "epoch": 0.9536853487114796, "grad_norm": 0.31532248854637146, "learning_rate": 6.424490963944597e-08, "loss": 0.4916, "step": 1764 }, { "epoch": 0.9542259866642638, "grad_norm": 0.29246556758880615, "learning_rate": 6.274530777063747e-08, "loss": 0.4765, "step": 1765 }, { "epoch": 0.9547666246170481, "grad_norm": 0.28569135069847107, "learning_rate": 6.126330451736495e-08, "loss": 0.4446, "step": 1766 }, { "epoch": 0.9553072625698324, "grad_norm": 0.2923862338066101, "learning_rate": 5.97989051621617e-08, "loss": 0.4793, "step": 1767 }, { "epoch": 0.9558479005226167, "grad_norm": 0.27991148829460144, "learning_rate": 5.835211492481063e-08, "loss": 0.4756, "step": 1768 }, { "epoch": 0.956388538475401, "grad_norm": 0.30270445346832275, "learning_rate": 5.6922938962329364e-08, "loss": 0.4498, "step": 1769 }, { "epoch": 0.9569291764281853, "grad_norm": 0.29934385418891907, "learning_rate": 5.551138236894793e-08, "loss": 0.4677, "step": 1770 }, { "epoch": 0.9574698143809696, "grad_norm": 0.2954845428466797, "learning_rate": 5.411745017609493e-08, "loss": 0.4762, "step": 1771 }, { "epoch": 0.9580104523337538, "grad_norm": 0.29608166217803955, "learning_rate": 5.274114735237812e-08, "loss": 0.4584, "step": 1772 }, { "epoch": 0.9585510902865381, "grad_norm": 0.3020345866680145, "learning_rate": 5.138247880356384e-08, "loss": 0.4686, "step": 1773 }, { "epoch": 0.9590917282393224, "grad_norm": 0.3264354169368744, "learning_rate": 5.004144937256372e-08, "loss": 0.4808, "step": 1774 }, { "epoch": 0.9596323661921067, "grad_norm": 0.29560211300849915, "learning_rate": 4.8718063839414683e-08, "loss": 0.4731, "step": 1775 }, { "epoch": 0.9601730041448909, "grad_norm": 0.28680717945098877, "learning_rate": 4.741232692126396e-08, "loss": 0.4502, "step": 1776 }, { "epoch": 0.9607136420976753, "grad_norm": 0.30334657430648804, "learning_rate": 4.612424327234966e-08, "loss": 0.4602, "step": 1777 }, { "epoch": 0.9612542800504595, "grad_norm": 0.28252384066581726, "learning_rate": 4.485381748398576e-08, "loss": 0.4527, "step": 1778 }, { "epoch": 0.9617949180032438, "grad_norm": 0.3101551830768585, "learning_rate": 4.360105408454718e-08, "loss": 0.4967, "step": 1779 }, { "epoch": 0.9623355559560282, "grad_norm": 0.2830714285373688, "learning_rate": 4.236595753944972e-08, "loss": 0.4613, "step": 1780 }, { "epoch": 0.9628761939088124, "grad_norm": 0.298048198223114, "learning_rate": 4.114853225113902e-08, "loss": 0.459, "step": 1781 }, { "epoch": 0.9634168318615967, "grad_norm": 0.30758899450302124, "learning_rate": 3.994878255907053e-08, "loss": 0.4355, "step": 1782 }, { "epoch": 0.963957469814381, "grad_norm": 0.31392624974250793, "learning_rate": 3.8766712739696786e-08, "loss": 0.434, "step": 1783 }, { "epoch": 0.9644981077671653, "grad_norm": 0.3108672797679901, "learning_rate": 3.7602327006450166e-08, "loss": 0.4323, "step": 1784 }, { "epoch": 0.9650387457199495, "grad_norm": 0.3078506290912628, "learning_rate": 3.645562950973014e-08, "loss": 0.4671, "step": 1785 }, { "epoch": 0.9655793836727338, "grad_norm": 0.29575785994529724, "learning_rate": 3.5326624336886604e-08, "loss": 0.4592, "step": 1786 }, { "epoch": 0.9661200216255181, "grad_norm": 0.2994788885116577, "learning_rate": 3.4215315512206584e-08, "loss": 0.4864, "step": 1787 }, { "epoch": 0.9666606595783024, "grad_norm": 0.2929299771785736, "learning_rate": 3.312170699689865e-08, "loss": 0.4609, "step": 1788 }, { "epoch": 0.9672012975310866, "grad_norm": 0.31516411900520325, "learning_rate": 3.204580268907909e-08, "loss": 0.4905, "step": 1789 }, { "epoch": 0.967741935483871, "grad_norm": 0.31366169452667236, "learning_rate": 3.0987606423759644e-08, "loss": 0.4793, "step": 1790 }, { "epoch": 0.9682825734366552, "grad_norm": 0.30144020915031433, "learning_rate": 2.9947121972832e-08, "loss": 0.4619, "step": 1791 }, { "epoch": 0.9688232113894395, "grad_norm": 0.2976379692554474, "learning_rate": 2.8924353045054475e-08, "loss": 0.4555, "step": 1792 }, { "epoch": 0.9693638493422239, "grad_norm": 0.3140285909175873, "learning_rate": 2.7919303286039202e-08, "loss": 0.4592, "step": 1793 }, { "epoch": 0.9699044872950081, "grad_norm": 0.28508275747299194, "learning_rate": 2.693197627823996e-08, "loss": 0.4784, "step": 1794 }, { "epoch": 0.9704451252477924, "grad_norm": 0.29108139872550964, "learning_rate": 2.5962375540937724e-08, "loss": 0.4857, "step": 1795 }, { "epoch": 0.9709857632005767, "grad_norm": 0.3323630094528198, "learning_rate": 2.5010504530229574e-08, "loss": 0.4616, "step": 1796 }, { "epoch": 0.971526401153361, "grad_norm": 0.31090638041496277, "learning_rate": 2.4076366639015914e-08, "loss": 0.4782, "step": 1797 }, { "epoch": 0.9720670391061452, "grad_norm": 0.31540656089782715, "learning_rate": 2.3159965196987156e-08, "loss": 0.4851, "step": 1798 }, { "epoch": 0.9726076770589296, "grad_norm": 0.283552885055542, "learning_rate": 2.2261303470614282e-08, "loss": 0.4473, "step": 1799 }, { "epoch": 0.9731483150117138, "grad_norm": 0.2891945540904999, "learning_rate": 2.1380384663135523e-08, "loss": 0.4574, "step": 1800 }, { "epoch": 0.9736889529644981, "grad_norm": 0.2970227897167206, "learning_rate": 2.0517211914545254e-08, "loss": 0.4785, "step": 1801 }, { "epoch": 0.9742295909172823, "grad_norm": 0.30604010820388794, "learning_rate": 1.967178830158234e-08, "loss": 0.4464, "step": 1802 }, { "epoch": 0.9747702288700667, "grad_norm": 0.29695698618888855, "learning_rate": 1.8844116837719582e-08, "loss": 0.498, "step": 1803 }, { "epoch": 0.975310866822851, "grad_norm": 0.29107484221458435, "learning_rate": 1.803420047315485e-08, "loss": 0.4752, "step": 1804 }, { "epoch": 0.9758515047756352, "grad_norm": 0.2813703119754791, "learning_rate": 1.724204209479663e-08, "loss": 0.4509, "step": 1805 }, { "epoch": 0.9763921427284196, "grad_norm": 0.2851557731628418, "learning_rate": 1.646764452625682e-08, "loss": 0.4704, "step": 1806 }, { "epoch": 0.9769327806812038, "grad_norm": 0.2960273325443268, "learning_rate": 1.5711010527839633e-08, "loss": 0.4505, "step": 1807 }, { "epoch": 0.9774734186339881, "grad_norm": 0.3089202642440796, "learning_rate": 1.4972142796532696e-08, "loss": 0.4747, "step": 1808 }, { "epoch": 0.9780140565867724, "grad_norm": 0.30507782101631165, "learning_rate": 1.4251043965994304e-08, "loss": 0.4496, "step": 1809 }, { "epoch": 0.9785546945395567, "grad_norm": 0.29044288396835327, "learning_rate": 1.3547716606548967e-08, "loss": 0.4645, "step": 1810 }, { "epoch": 0.9790953324923409, "grad_norm": 0.2851908802986145, "learning_rate": 1.2862163225174084e-08, "loss": 0.4587, "step": 1811 }, { "epoch": 0.9796359704451253, "grad_norm": 0.2960631251335144, "learning_rate": 1.2194386265492742e-08, "loss": 0.4429, "step": 1812 }, { "epoch": 0.9801766083979095, "grad_norm": 0.3134407103061676, "learning_rate": 1.1544388107765924e-08, "loss": 0.4452, "step": 1813 }, { "epoch": 0.9807172463506938, "grad_norm": 0.29251259565353394, "learning_rate": 1.0912171068880318e-08, "loss": 0.4647, "step": 1814 }, { "epoch": 0.9812578843034782, "grad_norm": 0.3051975667476654, "learning_rate": 1.029773740234552e-08, "loss": 0.4604, "step": 1815 }, { "epoch": 0.9817985222562624, "grad_norm": 0.31170353293418884, "learning_rate": 9.701089298281285e-09, "loss": 0.453, "step": 1816 }, { "epoch": 0.9823391602090467, "grad_norm": 0.2902314066886902, "learning_rate": 9.12222888341252e-09, "loss": 0.4533, "step": 1817 }, { "epoch": 0.9828797981618309, "grad_norm": 0.3045901656150818, "learning_rate": 8.561158221060406e-09, "loss": 0.4514, "step": 1818 }, { "epoch": 0.9834204361146153, "grad_norm": 0.29368510842323303, "learning_rate": 8.017879311134624e-09, "loss": 0.4808, "step": 1819 }, { "epoch": 0.9839610740673995, "grad_norm": 0.27795591950416565, "learning_rate": 7.492394090128364e-09, "loss": 0.4974, "step": 1820 }, { "epoch": 0.9845017120201838, "grad_norm": 0.29451608657836914, "learning_rate": 6.98470443110888e-09, "loss": 0.4874, "step": 1821 }, { "epoch": 0.9850423499729681, "grad_norm": 0.29039186239242554, "learning_rate": 6.4948121437125035e-09, "loss": 0.4587, "step": 1822 }, { "epoch": 0.9855829879257524, "grad_norm": 0.29998692870140076, "learning_rate": 6.022718974137976e-09, "loss": 0.4895, "step": 1823 }, { "epoch": 0.9861236258785366, "grad_norm": 0.27220648527145386, "learning_rate": 5.568426605139232e-09, "loss": 0.475, "step": 1824 }, { "epoch": 0.986664263831321, "grad_norm": 0.3113320767879486, "learning_rate": 5.131936656020409e-09, "loss": 0.4675, "step": 1825 }, { "epoch": 0.9872049017841052, "grad_norm": 0.2876143157482147, "learning_rate": 4.713250682629733e-09, "loss": 0.445, "step": 1826 }, { "epoch": 0.9877455397368895, "grad_norm": 0.2922397553920746, "learning_rate": 4.312370177353975e-09, "loss": 0.4666, "step": 1827 }, { "epoch": 0.9882861776896739, "grad_norm": 0.3193817436695099, "learning_rate": 3.929296569112895e-09, "loss": 0.4823, "step": 1828 }, { "epoch": 0.9888268156424581, "grad_norm": 0.30332955718040466, "learning_rate": 3.5640312233548024e-09, "loss": 0.4637, "step": 1829 }, { "epoch": 0.9893674535952424, "grad_norm": 0.28622302412986755, "learning_rate": 3.2165754420510063e-09, "loss": 0.4491, "step": 1830 }, { "epoch": 0.9899080915480267, "grad_norm": 0.2920747399330139, "learning_rate": 2.886930463691928e-09, "loss": 0.4712, "step": 1831 }, { "epoch": 0.990448729500811, "grad_norm": 0.2950655221939087, "learning_rate": 2.5750974632809955e-09, "loss": 0.4571, "step": 1832 }, { "epoch": 0.9909893674535952, "grad_norm": 0.30665168166160583, "learning_rate": 2.2810775523329775e-09, "loss": 0.4675, "step": 1833 }, { "epoch": 0.9915300054063795, "grad_norm": 0.30462467670440674, "learning_rate": 2.0048717788684335e-09, "loss": 0.4618, "step": 1834 }, { "epoch": 0.9920706433591638, "grad_norm": 0.28570374846458435, "learning_rate": 1.746481127409827e-09, "loss": 0.4415, "step": 1835 }, { "epoch": 0.9926112813119481, "grad_norm": 0.33271685242652893, "learning_rate": 1.5059065189787502e-09, "loss": 0.4603, "step": 1836 }, { "epoch": 0.9931519192647323, "grad_norm": 0.3160949647426605, "learning_rate": 1.2831488110920386e-09, "loss": 0.4551, "step": 1837 }, { "epoch": 0.9936925572175167, "grad_norm": 0.2587985694408417, "learning_rate": 1.07820879775955e-09, "loss": 0.4744, "step": 1838 }, { "epoch": 0.994233195170301, "grad_norm": 0.32415860891342163, "learning_rate": 8.910872094802792e-10, "loss": 0.4525, "step": 1839 }, { "epoch": 0.9947738331230852, "grad_norm": 0.27735623717308044, "learning_rate": 7.217847132401367e-10, "loss": 0.4508, "step": 1840 }, { "epoch": 0.9953144710758696, "grad_norm": 0.2976240813732147, "learning_rate": 5.703019125102849e-10, "loss": 0.5082, "step": 1841 }, { "epoch": 0.9958551090286538, "grad_norm": 0.28493812680244446, "learning_rate": 4.3663934724436086e-10, "loss": 0.4465, "step": 1842 }, { "epoch": 0.9963957469814381, "grad_norm": 0.3043738603591919, "learning_rate": 3.20797493876257e-10, "loss": 0.4566, "step": 1843 }, { "epoch": 0.9969363849342224, "grad_norm": 0.3110690116882324, "learning_rate": 2.227767653190105e-10, "loss": 0.5034, "step": 1844 }, { "epoch": 0.9974770228870067, "grad_norm": 0.29455140233039856, "learning_rate": 1.4257751096202755e-10, "loss": 0.4548, "step": 1845 }, { "epoch": 0.9980176608397909, "grad_norm": 0.2990035116672516, "learning_rate": 8.020001667330412e-11, "loss": 0.4927, "step": 1846 }, { "epoch": 0.9985582987925753, "grad_norm": 0.29310280084609985, "learning_rate": 3.564450479387471e-11, "loss": 0.4643, "step": 1847 }, { "epoch": 0.9990989367453595, "grad_norm": 0.2967851161956787, "learning_rate": 8.911134139477639e-12, "loss": 0.4731, "step": 1848 }, { "epoch": 0.9996395746981438, "grad_norm": 0.3127877712249756, "learning_rate": 0.0, "loss": 0.4589, "step": 1849 }, { "epoch": 0.9996395746981438, "step": 1849, "total_flos": 2827754950098944.0, "train_loss": 0.4939856140145487, "train_runtime": 65758.0763, "train_samples_per_second": 2.7, "train_steps_per_second": 0.028 } ], "logging_steps": 1.0, "max_steps": 1849, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2827754950098944.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }