{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100.0, "global_step": 3741, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008019246190858059, "grad_norm": 5.836986541748047, "learning_rate": 1.7699115044247788e-07, "loss": 1.6116, "step": 1 }, { "epoch": 0.0016038492381716118, "grad_norm": 5.846986293792725, "learning_rate": 3.5398230088495575e-07, "loss": 1.5618, "step": 2 }, { "epoch": 0.0024057738572574178, "grad_norm": 10.19965648651123, "learning_rate": 5.309734513274336e-07, "loss": 1.6029, "step": 3 }, { "epoch": 0.0032076984763432237, "grad_norm": 4.988247394561768, "learning_rate": 7.079646017699115e-07, "loss": 1.4261, "step": 4 }, { "epoch": 0.00400962309542903, "grad_norm": 2.4512386322021484, "learning_rate": 8.849557522123895e-07, "loss": 1.253, "step": 5 }, { "epoch": 0.0048115477145148355, "grad_norm": 2.286949396133423, "learning_rate": 1.0619469026548673e-06, "loss": 1.3126, "step": 6 }, { "epoch": 0.0056134723336006415, "grad_norm": 1.9182817935943604, "learning_rate": 1.2389380530973452e-06, "loss": 1.2349, "step": 7 }, { "epoch": 0.006415396952686447, "grad_norm": 1.8475348949432373, "learning_rate": 1.415929203539823e-06, "loss": 1.2502, "step": 8 }, { "epoch": 0.007217321571772253, "grad_norm": 1.5276049375534058, "learning_rate": 1.592920353982301e-06, "loss": 1.2345, "step": 9 }, { "epoch": 0.00801924619085806, "grad_norm": 1.4844948053359985, "learning_rate": 1.769911504424779e-06, "loss": 1.1793, "step": 10 }, { "epoch": 0.008821170809943865, "grad_norm": 1.4817630052566528, "learning_rate": 1.9469026548672567e-06, "loss": 1.1696, "step": 11 }, { "epoch": 0.009623095429029671, "grad_norm": 1.3712340593338013, "learning_rate": 2.1238938053097345e-06, "loss": 1.1284, "step": 12 }, { "epoch": 0.010425020048115477, "grad_norm": 1.4231791496276855, "learning_rate": 2.3008849557522127e-06, "loss": 1.1471, "step": 13 }, { "epoch": 0.011226944667201283, "grad_norm": 1.3379884958267212, "learning_rate": 2.4778761061946905e-06, "loss": 1.161, "step": 14 }, { "epoch": 0.012028869286287089, "grad_norm": 1.452820897102356, "learning_rate": 2.6548672566371687e-06, "loss": 1.2179, "step": 15 }, { "epoch": 0.012830793905372895, "grad_norm": 1.2341582775115967, "learning_rate": 2.831858407079646e-06, "loss": 1.145, "step": 16 }, { "epoch": 0.0136327185244587, "grad_norm": 1.3975578546524048, "learning_rate": 3.0088495575221242e-06, "loss": 1.1374, "step": 17 }, { "epoch": 0.014434643143544507, "grad_norm": 1.4167269468307495, "learning_rate": 3.185840707964602e-06, "loss": 1.1401, "step": 18 }, { "epoch": 0.015236567762630313, "grad_norm": 1.321500301361084, "learning_rate": 3.36283185840708e-06, "loss": 1.162, "step": 19 }, { "epoch": 0.01603849238171612, "grad_norm": 1.303618311882019, "learning_rate": 3.539823008849558e-06, "loss": 1.1027, "step": 20 }, { "epoch": 0.016840417000801924, "grad_norm": 1.3029676675796509, "learning_rate": 3.7168141592920357e-06, "loss": 1.1181, "step": 21 }, { "epoch": 0.01764234161988773, "grad_norm": 1.4097836017608643, "learning_rate": 3.8938053097345135e-06, "loss": 1.0703, "step": 22 }, { "epoch": 0.018444266238973536, "grad_norm": 1.1947312355041504, "learning_rate": 4.070796460176992e-06, "loss": 1.0716, "step": 23 }, { "epoch": 0.019246190858059342, "grad_norm": 1.2278361320495605, "learning_rate": 4.247787610619469e-06, "loss": 1.0814, "step": 24 }, { "epoch": 0.020048115477145148, "grad_norm": 1.2241326570510864, "learning_rate": 4.424778761061948e-06, "loss": 1.0598, "step": 25 }, { "epoch": 0.020850040096230954, "grad_norm": 1.271500587463379, "learning_rate": 4.6017699115044254e-06, "loss": 1.0802, "step": 26 }, { "epoch": 0.02165196471531676, "grad_norm": 1.3000115156173706, "learning_rate": 4.778761061946903e-06, "loss": 1.0799, "step": 27 }, { "epoch": 0.022453889334402566, "grad_norm": 1.1881065368652344, "learning_rate": 4.955752212389381e-06, "loss": 1.1282, "step": 28 }, { "epoch": 0.023255813953488372, "grad_norm": 1.1343086957931519, "learning_rate": 5.132743362831859e-06, "loss": 1.0606, "step": 29 }, { "epoch": 0.024057738572574178, "grad_norm": 1.2190392017364502, "learning_rate": 5.309734513274337e-06, "loss": 1.0884, "step": 30 }, { "epoch": 0.024859663191659984, "grad_norm": 1.234042763710022, "learning_rate": 5.486725663716814e-06, "loss": 1.0716, "step": 31 }, { "epoch": 0.02566158781074579, "grad_norm": 1.2421082258224487, "learning_rate": 5.663716814159292e-06, "loss": 1.0962, "step": 32 }, { "epoch": 0.026463512429831595, "grad_norm": 1.2859286069869995, "learning_rate": 5.840707964601771e-06, "loss": 1.0681, "step": 33 }, { "epoch": 0.0272654370489174, "grad_norm": 1.1014221906661987, "learning_rate": 6.0176991150442484e-06, "loss": 1.026, "step": 34 }, { "epoch": 0.028067361668003207, "grad_norm": 1.2788565158843994, "learning_rate": 6.194690265486726e-06, "loss": 1.0742, "step": 35 }, { "epoch": 0.028869286287089013, "grad_norm": 1.2129113674163818, "learning_rate": 6.371681415929204e-06, "loss": 1.0913, "step": 36 }, { "epoch": 0.02967121090617482, "grad_norm": 1.1455950736999512, "learning_rate": 6.548672566371682e-06, "loss": 1.0863, "step": 37 }, { "epoch": 0.030473135525260625, "grad_norm": 1.3164713382720947, "learning_rate": 6.72566371681416e-06, "loss": 1.0514, "step": 38 }, { "epoch": 0.03127506014434643, "grad_norm": 1.119469404220581, "learning_rate": 6.902654867256637e-06, "loss": 1.0738, "step": 39 }, { "epoch": 0.03207698476343224, "grad_norm": 1.3728922605514526, "learning_rate": 7.079646017699116e-06, "loss": 1.0777, "step": 40 }, { "epoch": 0.03287890938251804, "grad_norm": 1.2131261825561523, "learning_rate": 7.256637168141594e-06, "loss": 1.1212, "step": 41 }, { "epoch": 0.03368083400160385, "grad_norm": 1.2123545408248901, "learning_rate": 7.4336283185840714e-06, "loss": 1.0375, "step": 42 }, { "epoch": 0.034482758620689655, "grad_norm": 1.1865596771240234, "learning_rate": 7.610619469026549e-06, "loss": 1.0203, "step": 43 }, { "epoch": 0.03528468323977546, "grad_norm": 1.120460033416748, "learning_rate": 7.787610619469027e-06, "loss": 1.0839, "step": 44 }, { "epoch": 0.03608660785886127, "grad_norm": 1.2285219430923462, "learning_rate": 7.964601769911505e-06, "loss": 1.0964, "step": 45 }, { "epoch": 0.03688853247794707, "grad_norm": 1.398607850074768, "learning_rate": 8.141592920353984e-06, "loss": 1.0015, "step": 46 }, { "epoch": 0.03769045709703288, "grad_norm": 1.0944195985794067, "learning_rate": 8.31858407079646e-06, "loss": 1.0503, "step": 47 }, { "epoch": 0.038492381716118684, "grad_norm": 1.28694486618042, "learning_rate": 8.495575221238938e-06, "loss": 1.0536, "step": 48 }, { "epoch": 0.03929430633520449, "grad_norm": 1.1280242204666138, "learning_rate": 8.672566371681418e-06, "loss": 1.0503, "step": 49 }, { "epoch": 0.040096230954290296, "grad_norm": 1.1579207181930542, "learning_rate": 8.849557522123895e-06, "loss": 1.0481, "step": 50 }, { "epoch": 0.0408981555733761, "grad_norm": 1.3872171640396118, "learning_rate": 9.026548672566371e-06, "loss": 1.0827, "step": 51 }, { "epoch": 0.04170008019246191, "grad_norm": 1.05665123462677, "learning_rate": 9.203539823008851e-06, "loss": 1.0334, "step": 52 }, { "epoch": 0.042502004811547714, "grad_norm": 1.2471959590911865, "learning_rate": 9.380530973451329e-06, "loss": 1.0903, "step": 53 }, { "epoch": 0.04330392943063352, "grad_norm": 1.1287788152694702, "learning_rate": 9.557522123893806e-06, "loss": 1.0782, "step": 54 }, { "epoch": 0.044105854049719326, "grad_norm": 1.2654865980148315, "learning_rate": 9.734513274336284e-06, "loss": 1.0912, "step": 55 }, { "epoch": 0.04490777866880513, "grad_norm": 1.0894954204559326, "learning_rate": 9.911504424778762e-06, "loss": 1.0549, "step": 56 }, { "epoch": 0.04570970328789094, "grad_norm": 1.3154832124710083, "learning_rate": 1.008849557522124e-05, "loss": 1.0503, "step": 57 }, { "epoch": 0.046511627906976744, "grad_norm": 1.2271111011505127, "learning_rate": 1.0265486725663717e-05, "loss": 1.0748, "step": 58 }, { "epoch": 0.04731355252606255, "grad_norm": 1.06815767288208, "learning_rate": 1.0442477876106197e-05, "loss": 1.0622, "step": 59 }, { "epoch": 0.048115477145148355, "grad_norm": 1.261401891708374, "learning_rate": 1.0619469026548675e-05, "loss": 1.0666, "step": 60 }, { "epoch": 0.04891740176423416, "grad_norm": 1.1487725973129272, "learning_rate": 1.079646017699115e-05, "loss": 1.0687, "step": 61 }, { "epoch": 0.04971932638331997, "grad_norm": 1.0832433700561523, "learning_rate": 1.0973451327433629e-05, "loss": 1.0784, "step": 62 }, { "epoch": 0.05052125100240577, "grad_norm": 1.0512681007385254, "learning_rate": 1.1150442477876106e-05, "loss": 1.0862, "step": 63 }, { "epoch": 0.05132317562149158, "grad_norm": 1.0937095880508423, "learning_rate": 1.1327433628318584e-05, "loss": 1.098, "step": 64 }, { "epoch": 0.052125100240577385, "grad_norm": 1.1490569114685059, "learning_rate": 1.1504424778761064e-05, "loss": 1.068, "step": 65 }, { "epoch": 0.05292702485966319, "grad_norm": 1.1199543476104736, "learning_rate": 1.1681415929203541e-05, "loss": 1.0263, "step": 66 }, { "epoch": 0.053728949478749, "grad_norm": 1.1037191152572632, "learning_rate": 1.1858407079646019e-05, "loss": 1.031, "step": 67 }, { "epoch": 0.0545308740978348, "grad_norm": 1.168930172920227, "learning_rate": 1.2035398230088497e-05, "loss": 1.0939, "step": 68 }, { "epoch": 0.05533279871692061, "grad_norm": 1.1001696586608887, "learning_rate": 1.2212389380530973e-05, "loss": 1.0531, "step": 69 }, { "epoch": 0.056134723336006415, "grad_norm": 1.248002052307129, "learning_rate": 1.2389380530973452e-05, "loss": 1.0443, "step": 70 }, { "epoch": 0.05693664795509222, "grad_norm": 1.1349272727966309, "learning_rate": 1.256637168141593e-05, "loss": 1.0606, "step": 71 }, { "epoch": 0.057738572574178026, "grad_norm": 1.0885883569717407, "learning_rate": 1.2743362831858408e-05, "loss": 1.104, "step": 72 }, { "epoch": 0.05854049719326383, "grad_norm": 1.1218723058700562, "learning_rate": 1.2920353982300886e-05, "loss": 1.0647, "step": 73 }, { "epoch": 0.05934242181234964, "grad_norm": 0.999465823173523, "learning_rate": 1.3097345132743363e-05, "loss": 1.0358, "step": 74 }, { "epoch": 0.060144346431435444, "grad_norm": 1.1456373929977417, "learning_rate": 1.3274336283185843e-05, "loss": 1.0156, "step": 75 }, { "epoch": 0.06094627105052125, "grad_norm": 1.0239201784133911, "learning_rate": 1.345132743362832e-05, "loss": 1.0935, "step": 76 }, { "epoch": 0.061748195669607056, "grad_norm": 1.1573493480682373, "learning_rate": 1.3628318584070797e-05, "loss": 1.0202, "step": 77 }, { "epoch": 0.06255012028869286, "grad_norm": 1.12896728515625, "learning_rate": 1.3805309734513275e-05, "loss": 1.0681, "step": 78 }, { "epoch": 0.06335204490777867, "grad_norm": 1.1681842803955078, "learning_rate": 1.3982300884955752e-05, "loss": 1.1181, "step": 79 }, { "epoch": 0.06415396952686447, "grad_norm": 1.1300599575042725, "learning_rate": 1.4159292035398232e-05, "loss": 1.0787, "step": 80 }, { "epoch": 0.06495589414595028, "grad_norm": 0.9626098275184631, "learning_rate": 1.433628318584071e-05, "loss": 1.0674, "step": 81 }, { "epoch": 0.06575781876503609, "grad_norm": 1.2202208042144775, "learning_rate": 1.4513274336283187e-05, "loss": 1.076, "step": 82 }, { "epoch": 0.06655974338412189, "grad_norm": 1.2100845575332642, "learning_rate": 1.4690265486725665e-05, "loss": 1.0548, "step": 83 }, { "epoch": 0.0673616680032077, "grad_norm": 1.066315770149231, "learning_rate": 1.4867256637168143e-05, "loss": 1.0481, "step": 84 }, { "epoch": 0.0681635926222935, "grad_norm": 1.0019817352294922, "learning_rate": 1.5044247787610619e-05, "loss": 1.019, "step": 85 }, { "epoch": 0.06896551724137931, "grad_norm": 1.112805724143982, "learning_rate": 1.5221238938053098e-05, "loss": 1.0574, "step": 86 }, { "epoch": 0.06976744186046512, "grad_norm": 1.1630192995071411, "learning_rate": 1.5398230088495576e-05, "loss": 1.0499, "step": 87 }, { "epoch": 0.07056936647955092, "grad_norm": 1.2215139865875244, "learning_rate": 1.5575221238938054e-05, "loss": 1.1022, "step": 88 }, { "epoch": 0.07137129109863673, "grad_norm": 1.0800572633743286, "learning_rate": 1.5752212389380532e-05, "loss": 1.112, "step": 89 }, { "epoch": 0.07217321571772253, "grad_norm": 1.1478803157806396, "learning_rate": 1.592920353982301e-05, "loss": 1.0589, "step": 90 }, { "epoch": 0.07297514033680834, "grad_norm": 1.1359870433807373, "learning_rate": 1.6106194690265487e-05, "loss": 1.026, "step": 91 }, { "epoch": 0.07377706495589414, "grad_norm": 1.0617109537124634, "learning_rate": 1.628318584070797e-05, "loss": 1.112, "step": 92 }, { "epoch": 0.07457898957497995, "grad_norm": 1.0477237701416016, "learning_rate": 1.6460176991150443e-05, "loss": 1.1037, "step": 93 }, { "epoch": 0.07538091419406576, "grad_norm": 1.1191864013671875, "learning_rate": 1.663716814159292e-05, "loss": 1.0756, "step": 94 }, { "epoch": 0.07618283881315156, "grad_norm": 0.9961270093917847, "learning_rate": 1.68141592920354e-05, "loss": 1.0885, "step": 95 }, { "epoch": 0.07698476343223737, "grad_norm": 1.0917061567306519, "learning_rate": 1.6991150442477876e-05, "loss": 1.0989, "step": 96 }, { "epoch": 0.07778668805132317, "grad_norm": 0.9411718845367432, "learning_rate": 1.7168141592920354e-05, "loss": 1.0161, "step": 97 }, { "epoch": 0.07858861267040898, "grad_norm": 1.052199363708496, "learning_rate": 1.7345132743362835e-05, "loss": 1.0793, "step": 98 }, { "epoch": 0.07939053728949479, "grad_norm": 1.0183230638504028, "learning_rate": 1.7522123893805313e-05, "loss": 1.0708, "step": 99 }, { "epoch": 0.08019246190858059, "grad_norm": 0.964535653591156, "learning_rate": 1.769911504424779e-05, "loss": 1.0328, "step": 100 }, { "epoch": 0.0809943865276664, "grad_norm": 0.971592903137207, "learning_rate": 1.7876106194690265e-05, "loss": 1.0246, "step": 101 }, { "epoch": 0.0817963111467522, "grad_norm": 0.9727990627288818, "learning_rate": 1.8053097345132743e-05, "loss": 1.0664, "step": 102 }, { "epoch": 0.08259823576583801, "grad_norm": 1.0569050312042236, "learning_rate": 1.823008849557522e-05, "loss": 1.1197, "step": 103 }, { "epoch": 0.08340016038492382, "grad_norm": 0.9434515237808228, "learning_rate": 1.8407079646017702e-05, "loss": 0.9806, "step": 104 }, { "epoch": 0.08420208500400962, "grad_norm": 0.9337801337242126, "learning_rate": 1.858407079646018e-05, "loss": 1.0387, "step": 105 }, { "epoch": 0.08500400962309543, "grad_norm": 1.0005512237548828, "learning_rate": 1.8761061946902657e-05, "loss": 1.0928, "step": 106 }, { "epoch": 0.08580593424218123, "grad_norm": 0.9581108093261719, "learning_rate": 1.8938053097345135e-05, "loss": 1.081, "step": 107 }, { "epoch": 0.08660785886126704, "grad_norm": 0.947929322719574, "learning_rate": 1.9115044247787613e-05, "loss": 1.078, "step": 108 }, { "epoch": 0.08740978348035285, "grad_norm": 0.989020824432373, "learning_rate": 1.929203539823009e-05, "loss": 1.0446, "step": 109 }, { "epoch": 0.08821170809943865, "grad_norm": 0.9517629742622375, "learning_rate": 1.946902654867257e-05, "loss": 1.0393, "step": 110 }, { "epoch": 0.08901363271852446, "grad_norm": 0.9815518260002136, "learning_rate": 1.9646017699115046e-05, "loss": 1.1276, "step": 111 }, { "epoch": 0.08981555733761026, "grad_norm": 0.9295392632484436, "learning_rate": 1.9823008849557524e-05, "loss": 1.0595, "step": 112 }, { "epoch": 0.09061748195669607, "grad_norm": 0.9898239970207214, "learning_rate": 2e-05, "loss": 1.0555, "step": 113 }, { "epoch": 0.09141940657578188, "grad_norm": 0.9700495004653931, "learning_rate": 1.999999625082972e-05, "loss": 1.0749, "step": 114 }, { "epoch": 0.09222133119486768, "grad_norm": 1.0816751718521118, "learning_rate": 1.9999985003321688e-05, "loss": 1.0688, "step": 115 }, { "epoch": 0.09302325581395349, "grad_norm": 1.0361632108688354, "learning_rate": 1.999996625748434e-05, "loss": 1.0596, "step": 116 }, { "epoch": 0.09382518043303929, "grad_norm": 0.9109675884246826, "learning_rate": 1.999994001333173e-05, "loss": 1.0544, "step": 117 }, { "epoch": 0.0946271050521251, "grad_norm": 0.9128645658493042, "learning_rate": 1.9999906270883536e-05, "loss": 1.0718, "step": 118 }, { "epoch": 0.0954290296712109, "grad_norm": 0.8604353070259094, "learning_rate": 1.999986503016506e-05, "loss": 1.0443, "step": 119 }, { "epoch": 0.09623095429029671, "grad_norm": 0.9511041641235352, "learning_rate": 1.999981629120723e-05, "loss": 1.0433, "step": 120 }, { "epoch": 0.09703287890938252, "grad_norm": 0.9402782320976257, "learning_rate": 1.999976005404659e-05, "loss": 1.0483, "step": 121 }, { "epoch": 0.09783480352846832, "grad_norm": 1.1139960289001465, "learning_rate": 1.9999696318725305e-05, "loss": 1.0563, "step": 122 }, { "epoch": 0.09863672814755413, "grad_norm": 0.9203987717628479, "learning_rate": 1.999962508529117e-05, "loss": 1.0902, "step": 123 }, { "epoch": 0.09943865276663993, "grad_norm": 0.9191240072250366, "learning_rate": 1.999954635379759e-05, "loss": 1.0501, "step": 124 }, { "epoch": 0.10024057738572574, "grad_norm": 0.920760452747345, "learning_rate": 1.9999460124303614e-05, "loss": 1.075, "step": 125 }, { "epoch": 0.10104250200481155, "grad_norm": 0.8883569836616516, "learning_rate": 1.999936639687389e-05, "loss": 1.0414, "step": 126 }, { "epoch": 0.10184442662389735, "grad_norm": 0.9747478365898132, "learning_rate": 1.9999265171578705e-05, "loss": 1.0719, "step": 127 }, { "epoch": 0.10264635124298316, "grad_norm": 0.9460424780845642, "learning_rate": 1.999915644849395e-05, "loss": 1.0065, "step": 128 }, { "epoch": 0.10344827586206896, "grad_norm": 0.8998356461524963, "learning_rate": 1.999904022770116e-05, "loss": 1.0219, "step": 129 }, { "epoch": 0.10425020048115477, "grad_norm": 0.8906877636909485, "learning_rate": 1.9998916509287477e-05, "loss": 1.0518, "step": 130 }, { "epoch": 0.10505212510024058, "grad_norm": 1.0315589904785156, "learning_rate": 1.999878529334567e-05, "loss": 1.0851, "step": 131 }, { "epoch": 0.10585404971932638, "grad_norm": 0.8850352764129639, "learning_rate": 1.9998646579974133e-05, "loss": 1.0648, "step": 132 }, { "epoch": 0.10665597433841219, "grad_norm": 0.9172976613044739, "learning_rate": 1.9998500369276873e-05, "loss": 1.1327, "step": 133 }, { "epoch": 0.107457898957498, "grad_norm": 0.8636643886566162, "learning_rate": 1.999834666136352e-05, "loss": 1.0097, "step": 134 }, { "epoch": 0.1082598235765838, "grad_norm": 0.8971249461174011, "learning_rate": 1.9998185456349338e-05, "loss": 1.1087, "step": 135 }, { "epoch": 0.1090617481956696, "grad_norm": 0.9222013354301453, "learning_rate": 1.9998016754355198e-05, "loss": 1.0297, "step": 136 }, { "epoch": 0.10986367281475541, "grad_norm": 0.9246983528137207, "learning_rate": 1.9997840555507605e-05, "loss": 1.073, "step": 137 }, { "epoch": 0.11066559743384122, "grad_norm": 0.7840830087661743, "learning_rate": 1.9997656859938673e-05, "loss": 1.0303, "step": 138 }, { "epoch": 0.11146752205292702, "grad_norm": 0.919251024723053, "learning_rate": 1.9997465667786143e-05, "loss": 1.0788, "step": 139 }, { "epoch": 0.11226944667201283, "grad_norm": 0.9178858995437622, "learning_rate": 1.999726697919338e-05, "loss": 1.092, "step": 140 }, { "epoch": 0.11307137129109864, "grad_norm": 0.8240802884101868, "learning_rate": 1.9997060794309367e-05, "loss": 1.0582, "step": 141 }, { "epoch": 0.11387329591018444, "grad_norm": 0.8621988892555237, "learning_rate": 1.999684711328871e-05, "loss": 1.0451, "step": 142 }, { "epoch": 0.11467522052927025, "grad_norm": 0.8760347962379456, "learning_rate": 1.999662593629163e-05, "loss": 1.0468, "step": 143 }, { "epoch": 0.11547714514835605, "grad_norm": 0.8880921006202698, "learning_rate": 1.9996397263483973e-05, "loss": 1.0447, "step": 144 }, { "epoch": 0.11627906976744186, "grad_norm": 0.8683561682701111, "learning_rate": 1.9996161095037215e-05, "loss": 1.0719, "step": 145 }, { "epoch": 0.11708099438652766, "grad_norm": 0.8298245668411255, "learning_rate": 1.999591743112843e-05, "loss": 0.9996, "step": 146 }, { "epoch": 0.11788291900561347, "grad_norm": 0.8646122813224792, "learning_rate": 1.9995666271940334e-05, "loss": 0.9995, "step": 147 }, { "epoch": 0.11868484362469928, "grad_norm": 0.8935620188713074, "learning_rate": 1.9995407617661254e-05, "loss": 1.05, "step": 148 }, { "epoch": 0.11948676824378508, "grad_norm": 0.8892669081687927, "learning_rate": 1.9995141468485138e-05, "loss": 1.0375, "step": 149 }, { "epoch": 0.12028869286287089, "grad_norm": 0.8888627886772156, "learning_rate": 1.9994867824611552e-05, "loss": 1.0586, "step": 150 }, { "epoch": 0.1210906174819567, "grad_norm": 0.8920398354530334, "learning_rate": 1.9994586686245682e-05, "loss": 1.0305, "step": 151 }, { "epoch": 0.1218925421010425, "grad_norm": 0.8657960295677185, "learning_rate": 1.9994298053598335e-05, "loss": 1.069, "step": 152 }, { "epoch": 0.1226944667201283, "grad_norm": 0.8122588396072388, "learning_rate": 1.9994001926885936e-05, "loss": 1.0708, "step": 153 }, { "epoch": 0.12349639133921411, "grad_norm": 0.8348774313926697, "learning_rate": 1.9993698306330542e-05, "loss": 1.0157, "step": 154 }, { "epoch": 0.12429831595829992, "grad_norm": 0.9911849498748779, "learning_rate": 1.9993387192159807e-05, "loss": 1.0413, "step": 155 }, { "epoch": 0.12510024057738572, "grad_norm": 0.8572826385498047, "learning_rate": 1.9993068584607018e-05, "loss": 1.0975, "step": 156 }, { "epoch": 0.12590216519647154, "grad_norm": 0.8724331259727478, "learning_rate": 1.999274248391108e-05, "loss": 1.01, "step": 157 }, { "epoch": 0.12670408981555734, "grad_norm": 0.9225433468818665, "learning_rate": 1.999240889031651e-05, "loss": 1.0625, "step": 158 }, { "epoch": 0.12750601443464316, "grad_norm": 0.8641144633293152, "learning_rate": 1.999206780407345e-05, "loss": 1.0393, "step": 159 }, { "epoch": 0.12830793905372895, "grad_norm": 0.8191494941711426, "learning_rate": 1.999171922543766e-05, "loss": 1.002, "step": 160 }, { "epoch": 0.12910986367281477, "grad_norm": 0.769394040107727, "learning_rate": 1.9991363154670512e-05, "loss": 0.9976, "step": 161 }, { "epoch": 0.12991178829190056, "grad_norm": 0.9122303128242493, "learning_rate": 1.9990999592039007e-05, "loss": 1.0378, "step": 162 }, { "epoch": 0.13071371291098638, "grad_norm": 1.0107554197311401, "learning_rate": 1.9990628537815748e-05, "loss": 0.9659, "step": 163 }, { "epoch": 0.13151563753007217, "grad_norm": 0.8204858303070068, "learning_rate": 1.999024999227897e-05, "loss": 1.0627, "step": 164 }, { "epoch": 0.132317562149158, "grad_norm": 0.7805432081222534, "learning_rate": 1.9989863955712518e-05, "loss": 1.0367, "step": 165 }, { "epoch": 0.13311948676824378, "grad_norm": 1.51397705078125, "learning_rate": 1.9989470428405853e-05, "loss": 1.0363, "step": 166 }, { "epoch": 0.1339214113873296, "grad_norm": 0.8241011500358582, "learning_rate": 1.9989069410654055e-05, "loss": 1.0326, "step": 167 }, { "epoch": 0.1347233360064154, "grad_norm": 0.7944259643554688, "learning_rate": 1.998866090275783e-05, "loss": 1.0861, "step": 168 }, { "epoch": 0.13552526062550121, "grad_norm": 1.0405583381652832, "learning_rate": 1.9988244905023476e-05, "loss": 1.0366, "step": 169 }, { "epoch": 0.136327185244587, "grad_norm": 0.7776370048522949, "learning_rate": 1.9987821417762937e-05, "loss": 1.0174, "step": 170 }, { "epoch": 0.13712910986367283, "grad_norm": 0.8183755874633789, "learning_rate": 1.9987390441293747e-05, "loss": 1.0243, "step": 171 }, { "epoch": 0.13793103448275862, "grad_norm": 0.776903510093689, "learning_rate": 1.9986951975939073e-05, "loss": 1.058, "step": 172 }, { "epoch": 0.13873295910184444, "grad_norm": 0.7709075212478638, "learning_rate": 1.998650602202769e-05, "loss": 1.0626, "step": 173 }, { "epoch": 0.13953488372093023, "grad_norm": 0.8181372284889221, "learning_rate": 1.998605257989399e-05, "loss": 0.9818, "step": 174 }, { "epoch": 0.14033680834001605, "grad_norm": 0.8568731546401978, "learning_rate": 1.9985591649877974e-05, "loss": 1.0647, "step": 175 }, { "epoch": 0.14113873295910184, "grad_norm": 0.7709632515907288, "learning_rate": 1.998512323232527e-05, "loss": 1.0027, "step": 176 }, { "epoch": 0.14194065757818766, "grad_norm": 0.8511577248573303, "learning_rate": 1.998464732758711e-05, "loss": 1.0465, "step": 177 }, { "epoch": 0.14274258219727345, "grad_norm": 0.8334391117095947, "learning_rate": 1.9984163936020348e-05, "loss": 0.9812, "step": 178 }, { "epoch": 0.14354450681635927, "grad_norm": 0.7931066155433655, "learning_rate": 1.9983673057987438e-05, "loss": 1.0139, "step": 179 }, { "epoch": 0.14434643143544507, "grad_norm": 0.8163535594940186, "learning_rate": 1.9983174693856465e-05, "loss": 1.0131, "step": 180 }, { "epoch": 0.14514835605453089, "grad_norm": 0.7544008493423462, "learning_rate": 1.998266884400112e-05, "loss": 1.0507, "step": 181 }, { "epoch": 0.14595028067361668, "grad_norm": 0.8450154066085815, "learning_rate": 1.99821555088007e-05, "loss": 0.9851, "step": 182 }, { "epoch": 0.1467522052927025, "grad_norm": 0.9036790132522583, "learning_rate": 1.9981634688640126e-05, "loss": 1.0312, "step": 183 }, { "epoch": 0.1475541299117883, "grad_norm": 0.903751015663147, "learning_rate": 1.998110638390993e-05, "loss": 1.0143, "step": 184 }, { "epoch": 0.1483560545308741, "grad_norm": 0.8345926403999329, "learning_rate": 1.9980570595006243e-05, "loss": 1.0127, "step": 185 }, { "epoch": 0.1491579791499599, "grad_norm": 0.9414835572242737, "learning_rate": 1.9980027322330825e-05, "loss": 1.0239, "step": 186 }, { "epoch": 0.14995990376904572, "grad_norm": 0.801048219203949, "learning_rate": 1.9979476566291038e-05, "loss": 1.0562, "step": 187 }, { "epoch": 0.1507618283881315, "grad_norm": 0.8513798117637634, "learning_rate": 1.9978918327299855e-05, "loss": 1.0221, "step": 188 }, { "epoch": 0.15156375300721733, "grad_norm": 0.8290636539459229, "learning_rate": 1.9978352605775874e-05, "loss": 1.0666, "step": 189 }, { "epoch": 0.15236567762630313, "grad_norm": 0.7645179629325867, "learning_rate": 1.9977779402143277e-05, "loss": 0.9785, "step": 190 }, { "epoch": 0.15316760224538895, "grad_norm": 0.7943917512893677, "learning_rate": 1.997719871683188e-05, "loss": 1.0015, "step": 191 }, { "epoch": 0.15396952686447474, "grad_norm": 0.8007308840751648, "learning_rate": 1.9976610550277104e-05, "loss": 0.9936, "step": 192 }, { "epoch": 0.15477145148356056, "grad_norm": 0.8672998547554016, "learning_rate": 1.997601490291997e-05, "loss": 1.0385, "step": 193 }, { "epoch": 0.15557337610264635, "grad_norm": 0.9103288650512695, "learning_rate": 1.9975411775207113e-05, "loss": 1.0314, "step": 194 }, { "epoch": 0.15637530072173217, "grad_norm": 0.8059409856796265, "learning_rate": 1.997480116759078e-05, "loss": 0.9887, "step": 195 }, { "epoch": 0.15717722534081796, "grad_norm": 0.8400903940200806, "learning_rate": 1.9974183080528835e-05, "loss": 1.0218, "step": 196 }, { "epoch": 0.15797914995990378, "grad_norm": 0.7969584465026855, "learning_rate": 1.9973557514484726e-05, "loss": 0.9769, "step": 197 }, { "epoch": 0.15878107457898957, "grad_norm": 0.8063225150108337, "learning_rate": 1.997292446992754e-05, "loss": 1.0702, "step": 198 }, { "epoch": 0.1595829991980754, "grad_norm": 0.7932840585708618, "learning_rate": 1.9972283947331937e-05, "loss": 1.0466, "step": 199 }, { "epoch": 0.16038492381716118, "grad_norm": 0.8667788505554199, "learning_rate": 1.9971635947178214e-05, "loss": 1.0006, "step": 200 }, { "epoch": 0.161186848436247, "grad_norm": 0.8451849222183228, "learning_rate": 1.9970980469952264e-05, "loss": 0.9824, "step": 201 }, { "epoch": 0.1619887730553328, "grad_norm": 0.7452207207679749, "learning_rate": 1.9970317516145582e-05, "loss": 1.0637, "step": 202 }, { "epoch": 0.16279069767441862, "grad_norm": 0.7727819085121155, "learning_rate": 1.9969647086255274e-05, "loss": 0.9842, "step": 203 }, { "epoch": 0.1635926222935044, "grad_norm": 0.8708699941635132, "learning_rate": 1.9968969180784055e-05, "loss": 0.9804, "step": 204 }, { "epoch": 0.16439454691259023, "grad_norm": 0.7909506559371948, "learning_rate": 1.996828380024024e-05, "loss": 1.0178, "step": 205 }, { "epoch": 0.16519647153167602, "grad_norm": 0.8187145590782166, "learning_rate": 1.9967590945137744e-05, "loss": 1.0755, "step": 206 }, { "epoch": 0.16599839615076184, "grad_norm": 0.8718328475952148, "learning_rate": 1.99668906159961e-05, "loss": 1.07, "step": 207 }, { "epoch": 0.16680032076984763, "grad_norm": 0.7883650660514832, "learning_rate": 1.996618281334044e-05, "loss": 1.0534, "step": 208 }, { "epoch": 0.16760224538893345, "grad_norm": 0.7847347259521484, "learning_rate": 1.9965467537701496e-05, "loss": 1.0075, "step": 209 }, { "epoch": 0.16840417000801924, "grad_norm": 0.7670832872390747, "learning_rate": 1.9964744789615605e-05, "loss": 1.0417, "step": 210 }, { "epoch": 0.16920609462710506, "grad_norm": 0.7731572389602661, "learning_rate": 1.996401456962471e-05, "loss": 1.0227, "step": 211 }, { "epoch": 0.17000801924619086, "grad_norm": 0.7575690150260925, "learning_rate": 1.996327687827635e-05, "loss": 0.9834, "step": 212 }, { "epoch": 0.17080994386527668, "grad_norm": 0.8696389198303223, "learning_rate": 1.996253171612368e-05, "loss": 1.029, "step": 213 }, { "epoch": 0.17161186848436247, "grad_norm": 0.8823282718658447, "learning_rate": 1.9961779083725438e-05, "loss": 1.0698, "step": 214 }, { "epoch": 0.1724137931034483, "grad_norm": 0.7903878688812256, "learning_rate": 1.9961018981645985e-05, "loss": 1.0445, "step": 215 }, { "epoch": 0.17321571772253408, "grad_norm": 0.9023079872131348, "learning_rate": 1.996025141045526e-05, "loss": 0.9562, "step": 216 }, { "epoch": 0.1740176423416199, "grad_norm": 0.7859614491462708, "learning_rate": 1.995947637072882e-05, "loss": 0.9911, "step": 217 }, { "epoch": 0.1748195669607057, "grad_norm": 0.7968483567237854, "learning_rate": 1.9958693863047816e-05, "loss": 0.9593, "step": 218 }, { "epoch": 0.1756214915797915, "grad_norm": 0.744019091129303, "learning_rate": 1.9957903887998993e-05, "loss": 1.038, "step": 219 }, { "epoch": 0.1764234161988773, "grad_norm": 0.7552821636199951, "learning_rate": 1.9957106446174712e-05, "loss": 1.005, "step": 220 }, { "epoch": 0.17722534081796312, "grad_norm": 0.7813109755516052, "learning_rate": 1.9956301538172913e-05, "loss": 1.0407, "step": 221 }, { "epoch": 0.17802726543704891, "grad_norm": 0.734769880771637, "learning_rate": 1.995548916459715e-05, "loss": 1.0074, "step": 222 }, { "epoch": 0.17882919005613473, "grad_norm": 0.737060010433197, "learning_rate": 1.995466932605656e-05, "loss": 0.9774, "step": 223 }, { "epoch": 0.17963111467522053, "grad_norm": 0.7727075219154358, "learning_rate": 1.9953842023165894e-05, "loss": 0.9878, "step": 224 }, { "epoch": 0.18043303929430635, "grad_norm": 0.7828347086906433, "learning_rate": 1.995300725654549e-05, "loss": 0.9883, "step": 225 }, { "epoch": 0.18123496391339214, "grad_norm": 0.7647070288658142, "learning_rate": 1.995216502682128e-05, "loss": 1.0475, "step": 226 }, { "epoch": 0.18203688853247796, "grad_norm": 0.8048544526100159, "learning_rate": 1.99513153346248e-05, "loss": 1.0019, "step": 227 }, { "epoch": 0.18283881315156375, "grad_norm": 0.7421616911888123, "learning_rate": 1.995045818059318e-05, "loss": 0.9968, "step": 228 }, { "epoch": 0.18364073777064957, "grad_norm": 0.694659948348999, "learning_rate": 1.994959356536914e-05, "loss": 0.9692, "step": 229 }, { "epoch": 0.18444266238973536, "grad_norm": 0.7353253364562988, "learning_rate": 1.9948721489601e-05, "loss": 1.0244, "step": 230 }, { "epoch": 0.18524458700882118, "grad_norm": 0.8362675309181213, "learning_rate": 1.994784195394267e-05, "loss": 1.0315, "step": 231 }, { "epoch": 0.18604651162790697, "grad_norm": 0.6850599646568298, "learning_rate": 1.9946954959053656e-05, "loss": 0.9722, "step": 232 }, { "epoch": 0.1868484362469928, "grad_norm": 0.777386486530304, "learning_rate": 1.9946060505599058e-05, "loss": 1.0284, "step": 233 }, { "epoch": 0.18765036086607859, "grad_norm": 0.7967085242271423, "learning_rate": 1.994515859424957e-05, "loss": 0.9779, "step": 234 }, { "epoch": 0.1884522854851644, "grad_norm": 0.7552576661109924, "learning_rate": 1.9944249225681468e-05, "loss": 1.033, "step": 235 }, { "epoch": 0.1892542101042502, "grad_norm": 0.8289223313331604, "learning_rate": 1.994333240057664e-05, "loss": 0.9745, "step": 236 }, { "epoch": 0.19005613472333602, "grad_norm": 0.754082977771759, "learning_rate": 1.994240811962254e-05, "loss": 1.023, "step": 237 }, { "epoch": 0.1908580593424218, "grad_norm": 0.7701601386070251, "learning_rate": 1.9941476383512236e-05, "loss": 1.0095, "step": 238 }, { "epoch": 0.19165998396150763, "grad_norm": 0.7313913702964783, "learning_rate": 1.9940537192944366e-05, "loss": 1.0282, "step": 239 }, { "epoch": 0.19246190858059342, "grad_norm": 0.7340826392173767, "learning_rate": 1.9939590548623173e-05, "loss": 0.9947, "step": 240 }, { "epoch": 0.19326383319967924, "grad_norm": 0.7086189389228821, "learning_rate": 1.993863645125848e-05, "loss": 1.0004, "step": 241 }, { "epoch": 0.19406575781876503, "grad_norm": 0.7488327622413635, "learning_rate": 1.993767490156571e-05, "loss": 0.9864, "step": 242 }, { "epoch": 0.19486768243785085, "grad_norm": 0.7784487009048462, "learning_rate": 1.9936705900265853e-05, "loss": 1.0219, "step": 243 }, { "epoch": 0.19566960705693665, "grad_norm": 0.784127414226532, "learning_rate": 1.9935729448085507e-05, "loss": 1.0036, "step": 244 }, { "epoch": 0.19647153167602247, "grad_norm": 0.7619081139564514, "learning_rate": 1.9934745545756847e-05, "loss": 0.9849, "step": 245 }, { "epoch": 0.19727345629510826, "grad_norm": 0.7807731628417969, "learning_rate": 1.9933754194017636e-05, "loss": 1.0147, "step": 246 }, { "epoch": 0.19807538091419408, "grad_norm": 0.7649911642074585, "learning_rate": 1.9932755393611223e-05, "loss": 0.9643, "step": 247 }, { "epoch": 0.19887730553327987, "grad_norm": 0.7342953085899353, "learning_rate": 1.993174914528655e-05, "loss": 0.9754, "step": 248 }, { "epoch": 0.1996792301523657, "grad_norm": 0.7341395020484924, "learning_rate": 1.9930735449798125e-05, "loss": 0.9807, "step": 249 }, { "epoch": 0.20048115477145148, "grad_norm": 0.8070521354675293, "learning_rate": 1.9929714307906053e-05, "loss": 0.9697, "step": 250 }, { "epoch": 0.2012830793905373, "grad_norm": 0.7484433650970459, "learning_rate": 1.992868572037603e-05, "loss": 0.9969, "step": 251 }, { "epoch": 0.2020850040096231, "grad_norm": 0.7424092292785645, "learning_rate": 1.992764968797932e-05, "loss": 1.0005, "step": 252 }, { "epoch": 0.2028869286287089, "grad_norm": 0.7775983810424805, "learning_rate": 1.9926606211492773e-05, "loss": 1.0033, "step": 253 }, { "epoch": 0.2036888532477947, "grad_norm": 0.7295064926147461, "learning_rate": 1.9925555291698826e-05, "loss": 1.0001, "step": 254 }, { "epoch": 0.20449077786688052, "grad_norm": 0.7483360171318054, "learning_rate": 1.9924496929385496e-05, "loss": 0.998, "step": 255 }, { "epoch": 0.20529270248596632, "grad_norm": 0.7402914762496948, "learning_rate": 1.9923431125346376e-05, "loss": 0.9109, "step": 256 }, { "epoch": 0.20609462710505214, "grad_norm": 0.784766674041748, "learning_rate": 1.9922357880380644e-05, "loss": 1.0262, "step": 257 }, { "epoch": 0.20689655172413793, "grad_norm": 0.8008718490600586, "learning_rate": 1.9921277195293057e-05, "loss": 1.0357, "step": 258 }, { "epoch": 0.20769847634322375, "grad_norm": 0.7571882009506226, "learning_rate": 1.9920189070893947e-05, "loss": 1.0125, "step": 259 }, { "epoch": 0.20850040096230954, "grad_norm": 0.7246387600898743, "learning_rate": 1.9919093507999226e-05, "loss": 1.0514, "step": 260 }, { "epoch": 0.20930232558139536, "grad_norm": 0.7044005990028381, "learning_rate": 1.9917990507430385e-05, "loss": 0.9613, "step": 261 }, { "epoch": 0.21010425020048115, "grad_norm": 0.7408387064933777, "learning_rate": 1.9916880070014494e-05, "loss": 0.997, "step": 262 }, { "epoch": 0.21090617481956697, "grad_norm": 0.7599897384643555, "learning_rate": 1.9915762196584193e-05, "loss": 0.9701, "step": 263 }, { "epoch": 0.21170809943865276, "grad_norm": 0.7892153859138489, "learning_rate": 1.9914636887977706e-05, "loss": 1.0608, "step": 264 }, { "epoch": 0.21251002405773858, "grad_norm": 0.9341477155685425, "learning_rate": 1.9913504145038823e-05, "loss": 0.9908, "step": 265 }, { "epoch": 0.21331194867682438, "grad_norm": 0.8016869425773621, "learning_rate": 1.991236396861692e-05, "loss": 1.0324, "step": 266 }, { "epoch": 0.2141138732959102, "grad_norm": 0.7510752081871033, "learning_rate": 1.991121635956693e-05, "loss": 0.981, "step": 267 }, { "epoch": 0.214915797914996, "grad_norm": 0.791854739189148, "learning_rate": 1.9910061318749375e-05, "loss": 0.9654, "step": 268 }, { "epoch": 0.2157177225340818, "grad_norm": 0.7618932127952576, "learning_rate": 1.9908898847030348e-05, "loss": 1.0033, "step": 269 }, { "epoch": 0.2165196471531676, "grad_norm": 0.7162818312644958, "learning_rate": 1.9907728945281504e-05, "loss": 0.9684, "step": 270 }, { "epoch": 0.21732157177225342, "grad_norm": 0.8062979578971863, "learning_rate": 1.9906551614380077e-05, "loss": 0.964, "step": 271 }, { "epoch": 0.2181234963913392, "grad_norm": 0.7345139384269714, "learning_rate": 1.990536685520887e-05, "loss": 0.9878, "step": 272 }, { "epoch": 0.21892542101042503, "grad_norm": 0.8036599159240723, "learning_rate": 1.9904174668656252e-05, "loss": 0.9344, "step": 273 }, { "epoch": 0.21972734562951082, "grad_norm": 0.7124471664428711, "learning_rate": 1.990297505561617e-05, "loss": 0.9884, "step": 274 }, { "epoch": 0.22052927024859664, "grad_norm": 0.7641183733940125, "learning_rate": 1.9901768016988136e-05, "loss": 1.0405, "step": 275 }, { "epoch": 0.22133119486768243, "grad_norm": 0.7073462605476379, "learning_rate": 1.9900553553677227e-05, "loss": 0.9733, "step": 276 }, { "epoch": 0.22213311948676825, "grad_norm": 0.7479486465454102, "learning_rate": 1.9899331666594085e-05, "loss": 1.0005, "step": 277 }, { "epoch": 0.22293504410585405, "grad_norm": 0.7818754315376282, "learning_rate": 1.9898102356654926e-05, "loss": 1.0504, "step": 278 }, { "epoch": 0.22373696872493987, "grad_norm": 0.7143021821975708, "learning_rate": 1.989686562478153e-05, "loss": 1.0098, "step": 279 }, { "epoch": 0.22453889334402566, "grad_norm": 0.8301097750663757, "learning_rate": 1.9895621471901236e-05, "loss": 1.0214, "step": 280 }, { "epoch": 0.22534081796311148, "grad_norm": 0.7973410487174988, "learning_rate": 1.9894369898946955e-05, "loss": 0.9639, "step": 281 }, { "epoch": 0.22614274258219727, "grad_norm": 0.7219634652137756, "learning_rate": 1.9893110906857158e-05, "loss": 0.9978, "step": 282 }, { "epoch": 0.2269446672012831, "grad_norm": 0.7005908489227295, "learning_rate": 1.9891844496575883e-05, "loss": 0.9704, "step": 283 }, { "epoch": 0.22774659182036888, "grad_norm": 0.8081759214401245, "learning_rate": 1.9890570669052724e-05, "loss": 0.9644, "step": 284 }, { "epoch": 0.2285485164394547, "grad_norm": 0.7537055015563965, "learning_rate": 1.9889289425242845e-05, "loss": 1.0097, "step": 285 }, { "epoch": 0.2293504410585405, "grad_norm": 0.7490015029907227, "learning_rate": 1.9888000766106962e-05, "loss": 1.0267, "step": 286 }, { "epoch": 0.2301523656776263, "grad_norm": 0.7152953743934631, "learning_rate": 1.9886704692611355e-05, "loss": 1.0233, "step": 287 }, { "epoch": 0.2309542902967121, "grad_norm": 0.6974421143531799, "learning_rate": 1.9885401205727864e-05, "loss": 0.9456, "step": 288 }, { "epoch": 0.23175621491579793, "grad_norm": 0.7040266990661621, "learning_rate": 1.9884090306433892e-05, "loss": 0.9826, "step": 289 }, { "epoch": 0.23255813953488372, "grad_norm": 0.8025934100151062, "learning_rate": 1.9882771995712393e-05, "loss": 0.9948, "step": 290 }, { "epoch": 0.23336006415396954, "grad_norm": 0.736258864402771, "learning_rate": 1.988144627455188e-05, "loss": 1.0012, "step": 291 }, { "epoch": 0.23416198877305533, "grad_norm": 0.7188357710838318, "learning_rate": 1.9880113143946428e-05, "loss": 1.0366, "step": 292 }, { "epoch": 0.23496391339214115, "grad_norm": 0.7842757701873779, "learning_rate": 1.9878772604895657e-05, "loss": 1.02, "step": 293 }, { "epoch": 0.23576583801122694, "grad_norm": 0.694830060005188, "learning_rate": 1.9877424658404757e-05, "loss": 0.9438, "step": 294 }, { "epoch": 0.23656776263031276, "grad_norm": 0.8712981343269348, "learning_rate": 1.987606930548446e-05, "loss": 1.0039, "step": 295 }, { "epoch": 0.23736968724939855, "grad_norm": 0.7846871614456177, "learning_rate": 1.9874706547151054e-05, "loss": 0.9635, "step": 296 }, { "epoch": 0.23817161186848437, "grad_norm": 0.7604832053184509, "learning_rate": 1.9873336384426388e-05, "loss": 0.9759, "step": 297 }, { "epoch": 0.23897353648757017, "grad_norm": 0.7759199142456055, "learning_rate": 1.987195881833785e-05, "loss": 1.0331, "step": 298 }, { "epoch": 0.23977546110665598, "grad_norm": 0.7688100934028625, "learning_rate": 1.9870573849918387e-05, "loss": 1.0101, "step": 299 }, { "epoch": 0.24057738572574178, "grad_norm": 0.7552735805511475, "learning_rate": 1.98691814802065e-05, "loss": 1.0091, "step": 300 }, { "epoch": 0.2413793103448276, "grad_norm": 0.7121011018753052, "learning_rate": 1.9867781710246228e-05, "loss": 0.9895, "step": 301 }, { "epoch": 0.2421812349639134, "grad_norm": 0.721831202507019, "learning_rate": 1.986637454108717e-05, "loss": 0.9863, "step": 302 }, { "epoch": 0.2429831595829992, "grad_norm": 0.7194176316261292, "learning_rate": 1.9864959973784474e-05, "loss": 0.9869, "step": 303 }, { "epoch": 0.243785084202085, "grad_norm": 0.7214140892028809, "learning_rate": 1.9863538009398824e-05, "loss": 1.0065, "step": 304 }, { "epoch": 0.24458700882117082, "grad_norm": 0.7053404450416565, "learning_rate": 1.9862108648996457e-05, "loss": 0.9836, "step": 305 }, { "epoch": 0.2453889334402566, "grad_norm": 0.7068779468536377, "learning_rate": 1.986067189364916e-05, "loss": 0.9545, "step": 306 }, { "epoch": 0.24619085805934243, "grad_norm": 0.7764230370521545, "learning_rate": 1.9859227744434264e-05, "loss": 0.9764, "step": 307 }, { "epoch": 0.24699278267842822, "grad_norm": 0.6795384287834167, "learning_rate": 1.9857776202434633e-05, "loss": 1.026, "step": 308 }, { "epoch": 0.24779470729751404, "grad_norm": 0.7431665062904358, "learning_rate": 1.985631726873869e-05, "loss": 1.008, "step": 309 }, { "epoch": 0.24859663191659984, "grad_norm": 0.7341127991676331, "learning_rate": 1.9854850944440386e-05, "loss": 0.9737, "step": 310 }, { "epoch": 0.24939855653568566, "grad_norm": 0.6869488954544067, "learning_rate": 1.9853377230639227e-05, "loss": 0.9644, "step": 311 }, { "epoch": 0.25020048115477145, "grad_norm": 0.6559352874755859, "learning_rate": 1.9851896128440252e-05, "loss": 0.9908, "step": 312 }, { "epoch": 0.25100240577385724, "grad_norm": 0.7657687067985535, "learning_rate": 1.985040763895404e-05, "loss": 1.008, "step": 313 }, { "epoch": 0.2518043303929431, "grad_norm": 0.7205827832221985, "learning_rate": 1.9848911763296712e-05, "loss": 1.0024, "step": 314 }, { "epoch": 0.2526062550120289, "grad_norm": 0.6896849274635315, "learning_rate": 1.9847408502589928e-05, "loss": 1.0026, "step": 315 }, { "epoch": 0.25340817963111467, "grad_norm": 0.706331193447113, "learning_rate": 1.9845897857960886e-05, "loss": 1.0175, "step": 316 }, { "epoch": 0.25421010425020046, "grad_norm": 0.7068176865577698, "learning_rate": 1.9844379830542312e-05, "loss": 0.9796, "step": 317 }, { "epoch": 0.2550120288692863, "grad_norm": 0.705200731754303, "learning_rate": 1.9842854421472478e-05, "loss": 1.0286, "step": 318 }, { "epoch": 0.2558139534883721, "grad_norm": 0.7059184312820435, "learning_rate": 1.984132163189519e-05, "loss": 1.0374, "step": 319 }, { "epoch": 0.2566158781074579, "grad_norm": 0.7251558303833008, "learning_rate": 1.9839781462959787e-05, "loss": 0.9749, "step": 320 }, { "epoch": 0.2574178027265437, "grad_norm": 0.7353153824806213, "learning_rate": 1.9838233915821133e-05, "loss": 0.9462, "step": 321 }, { "epoch": 0.25821972734562953, "grad_norm": 0.714249849319458, "learning_rate": 1.9836678991639638e-05, "loss": 0.9907, "step": 322 }, { "epoch": 0.2590216519647153, "grad_norm": 0.7212216258049011, "learning_rate": 1.9835116691581232e-05, "loss": 1.0044, "step": 323 }, { "epoch": 0.2598235765838011, "grad_norm": 0.6976492404937744, "learning_rate": 1.9833547016817386e-05, "loss": 0.9828, "step": 324 }, { "epoch": 0.2606255012028869, "grad_norm": 0.7598298788070679, "learning_rate": 1.9831969968525096e-05, "loss": 0.9409, "step": 325 }, { "epoch": 0.26142742582197276, "grad_norm": 0.6750239729881287, "learning_rate": 1.983038554788688e-05, "loss": 0.9493, "step": 326 }, { "epoch": 0.26222935044105855, "grad_norm": 0.7285559773445129, "learning_rate": 1.9828793756090794e-05, "loss": 1.0369, "step": 327 }, { "epoch": 0.26303127506014434, "grad_norm": 0.7011874914169312, "learning_rate": 1.9827194594330418e-05, "loss": 0.9345, "step": 328 }, { "epoch": 0.26383319967923013, "grad_norm": 0.6847050786018372, "learning_rate": 1.982558806380486e-05, "loss": 1.0196, "step": 329 }, { "epoch": 0.264635124298316, "grad_norm": 0.728759765625, "learning_rate": 1.9823974165718748e-05, "loss": 0.9736, "step": 330 }, { "epoch": 0.2654370489174018, "grad_norm": 0.6663811802864075, "learning_rate": 1.982235290128224e-05, "loss": 0.9749, "step": 331 }, { "epoch": 0.26623897353648757, "grad_norm": 0.6871486902236938, "learning_rate": 1.9820724271711012e-05, "loss": 0.9692, "step": 332 }, { "epoch": 0.26704089815557336, "grad_norm": 0.6922380328178406, "learning_rate": 1.9819088278226273e-05, "loss": 0.9636, "step": 333 }, { "epoch": 0.2678428227746592, "grad_norm": 0.7264043092727661, "learning_rate": 1.9817444922054738e-05, "loss": 0.9986, "step": 334 }, { "epoch": 0.268644747393745, "grad_norm": 0.701865553855896, "learning_rate": 1.9815794204428655e-05, "loss": 1.0301, "step": 335 }, { "epoch": 0.2694466720128308, "grad_norm": 0.6781471371650696, "learning_rate": 1.981413612658579e-05, "loss": 0.9804, "step": 336 }, { "epoch": 0.2702485966319166, "grad_norm": 0.6568560004234314, "learning_rate": 1.9812470689769424e-05, "loss": 1.0257, "step": 337 }, { "epoch": 0.27105052125100243, "grad_norm": 0.6593422889709473, "learning_rate": 1.9810797895228358e-05, "loss": 0.9611, "step": 338 }, { "epoch": 0.2718524458700882, "grad_norm": 0.6801155805587769, "learning_rate": 1.9809117744216916e-05, "loss": 0.975, "step": 339 }, { "epoch": 0.272654370489174, "grad_norm": 0.7064613699913025, "learning_rate": 1.9807430237994925e-05, "loss": 1.013, "step": 340 }, { "epoch": 0.2734562951082598, "grad_norm": 0.7264713048934937, "learning_rate": 1.9805735377827738e-05, "loss": 0.945, "step": 341 }, { "epoch": 0.27425821972734565, "grad_norm": 0.6898838877677917, "learning_rate": 1.9804033164986215e-05, "loss": 1.0042, "step": 342 }, { "epoch": 0.27506014434643145, "grad_norm": 0.69865483045578, "learning_rate": 1.980232360074674e-05, "loss": 1.0004, "step": 343 }, { "epoch": 0.27586206896551724, "grad_norm": 0.6840596199035645, "learning_rate": 1.98006066863912e-05, "loss": 0.9489, "step": 344 }, { "epoch": 0.27666399358460303, "grad_norm": 0.7413778305053711, "learning_rate": 1.979888242320699e-05, "loss": 0.9722, "step": 345 }, { "epoch": 0.2774659182036889, "grad_norm": 0.692329466342926, "learning_rate": 1.9797150812487028e-05, "loss": 0.997, "step": 346 }, { "epoch": 0.27826784282277467, "grad_norm": 0.6711648106575012, "learning_rate": 1.9795411855529735e-05, "loss": 1.0132, "step": 347 }, { "epoch": 0.27906976744186046, "grad_norm": 0.7313749194145203, "learning_rate": 1.9793665553639038e-05, "loss": 0.9723, "step": 348 }, { "epoch": 0.27987169206094625, "grad_norm": 0.7389296293258667, "learning_rate": 1.979191190812437e-05, "loss": 1.0407, "step": 349 }, { "epoch": 0.2806736166800321, "grad_norm": 0.7041956186294556, "learning_rate": 1.9790150920300683e-05, "loss": 1.0093, "step": 350 }, { "epoch": 0.2814755412991179, "grad_norm": 0.7673702836036682, "learning_rate": 1.9788382591488412e-05, "loss": 0.9872, "step": 351 }, { "epoch": 0.2822774659182037, "grad_norm": 0.7097181081771851, "learning_rate": 1.9786606923013525e-05, "loss": 0.9479, "step": 352 }, { "epoch": 0.2830793905372895, "grad_norm": 0.6917240619659424, "learning_rate": 1.9784823916207472e-05, "loss": 1.0284, "step": 353 }, { "epoch": 0.2838813151563753, "grad_norm": 0.7378222942352295, "learning_rate": 1.978303357240721e-05, "loss": 0.9602, "step": 354 }, { "epoch": 0.2846832397754611, "grad_norm": 0.6886555552482605, "learning_rate": 1.9781235892955206e-05, "loss": 0.9283, "step": 355 }, { "epoch": 0.2854851643945469, "grad_norm": 0.7055453658103943, "learning_rate": 1.9779430879199414e-05, "loss": 1.0428, "step": 356 }, { "epoch": 0.2862870890136327, "grad_norm": 0.6854853630065918, "learning_rate": 1.9777618532493298e-05, "loss": 0.964, "step": 357 }, { "epoch": 0.28708901363271855, "grad_norm": 0.6832833886146545, "learning_rate": 1.977579885419582e-05, "loss": 0.9413, "step": 358 }, { "epoch": 0.28789093825180434, "grad_norm": 0.7093089818954468, "learning_rate": 1.9773971845671435e-05, "loss": 0.9909, "step": 359 }, { "epoch": 0.28869286287089013, "grad_norm": 0.7339062690734863, "learning_rate": 1.977213750829009e-05, "loss": 0.9815, "step": 360 }, { "epoch": 0.2894947874899759, "grad_norm": 0.6688426733016968, "learning_rate": 1.9770295843427242e-05, "loss": 0.9744, "step": 361 }, { "epoch": 0.29029671210906177, "grad_norm": 0.675520122051239, "learning_rate": 1.9768446852463832e-05, "loss": 1.0187, "step": 362 }, { "epoch": 0.29109863672814756, "grad_norm": 0.6580377817153931, "learning_rate": 1.9766590536786294e-05, "loss": 0.9624, "step": 363 }, { "epoch": 0.29190056134723336, "grad_norm": 0.7211337089538574, "learning_rate": 1.976472689778656e-05, "loss": 0.9689, "step": 364 }, { "epoch": 0.29270248596631915, "grad_norm": 0.6154484748840332, "learning_rate": 1.976285593686205e-05, "loss": 0.9385, "step": 365 }, { "epoch": 0.293504410585405, "grad_norm": 0.6870887279510498, "learning_rate": 1.976097765541567e-05, "loss": 0.9684, "step": 366 }, { "epoch": 0.2943063352044908, "grad_norm": 0.6625701785087585, "learning_rate": 1.9759092054855822e-05, "loss": 0.9666, "step": 367 }, { "epoch": 0.2951082598235766, "grad_norm": 0.7698168158531189, "learning_rate": 1.975719913659639e-05, "loss": 0.9957, "step": 368 }, { "epoch": 0.29591018444266237, "grad_norm": 0.6859940886497498, "learning_rate": 1.9755298902056758e-05, "loss": 0.967, "step": 369 }, { "epoch": 0.2967121090617482, "grad_norm": 0.6606349349021912, "learning_rate": 1.975339135266178e-05, "loss": 0.9968, "step": 370 }, { "epoch": 0.297514033680834, "grad_norm": 0.7765418291091919, "learning_rate": 1.9751476489841796e-05, "loss": 0.9744, "step": 371 }, { "epoch": 0.2983159582999198, "grad_norm": 0.7239444851875305, "learning_rate": 1.974955431503265e-05, "loss": 1.0206, "step": 372 }, { "epoch": 0.2991178829190056, "grad_norm": 0.7017782330513, "learning_rate": 1.974762482967564e-05, "loss": 0.9825, "step": 373 }, { "epoch": 0.29991980753809144, "grad_norm": 0.730739951133728, "learning_rate": 1.9745688035217563e-05, "loss": 1.012, "step": 374 }, { "epoch": 0.30072173215717724, "grad_norm": 0.7359748482704163, "learning_rate": 1.97437439331107e-05, "loss": 0.9267, "step": 375 }, { "epoch": 0.301523656776263, "grad_norm": 0.7002710103988647, "learning_rate": 1.97417925248128e-05, "loss": 0.967, "step": 376 }, { "epoch": 0.3023255813953488, "grad_norm": 0.7859190702438354, "learning_rate": 1.9739833811787097e-05, "loss": 0.9648, "step": 377 }, { "epoch": 0.30312750601443467, "grad_norm": 0.7745339870452881, "learning_rate": 1.9737867795502298e-05, "loss": 0.9899, "step": 378 }, { "epoch": 0.30392943063352046, "grad_norm": 0.6724187731742859, "learning_rate": 1.973589447743259e-05, "loss": 0.9445, "step": 379 }, { "epoch": 0.30473135525260625, "grad_norm": 0.7949398159980774, "learning_rate": 1.9733913859057637e-05, "loss": 0.9603, "step": 380 }, { "epoch": 0.30553327987169204, "grad_norm": 0.7890524864196777, "learning_rate": 1.9731925941862573e-05, "loss": 0.9573, "step": 381 }, { "epoch": 0.3063352044907779, "grad_norm": 0.6943206787109375, "learning_rate": 1.9729930727338004e-05, "loss": 1.0042, "step": 382 }, { "epoch": 0.3071371291098637, "grad_norm": 0.6898202300071716, "learning_rate": 1.972792821698001e-05, "loss": 0.9527, "step": 383 }, { "epoch": 0.3079390537289495, "grad_norm": 0.7329158782958984, "learning_rate": 1.9725918412290142e-05, "loss": 0.9755, "step": 384 }, { "epoch": 0.30874097834803527, "grad_norm": 0.7760060429573059, "learning_rate": 1.9723901314775423e-05, "loss": 1.0134, "step": 385 }, { "epoch": 0.3095429029671211, "grad_norm": 0.6890391111373901, "learning_rate": 1.9721876925948336e-05, "loss": 0.973, "step": 386 }, { "epoch": 0.3103448275862069, "grad_norm": 0.6739248037338257, "learning_rate": 1.971984524732684e-05, "loss": 1.0187, "step": 387 }, { "epoch": 0.3111467522052927, "grad_norm": 0.729393482208252, "learning_rate": 1.971780628043436e-05, "loss": 0.9873, "step": 388 }, { "epoch": 0.3119486768243785, "grad_norm": 0.7092537879943848, "learning_rate": 1.9715760026799776e-05, "loss": 0.9992, "step": 389 }, { "epoch": 0.31275060144346434, "grad_norm": 0.657490074634552, "learning_rate": 1.971370648795744e-05, "loss": 0.9793, "step": 390 }, { "epoch": 0.31355252606255013, "grad_norm": 0.7291781902313232, "learning_rate": 1.971164566544717e-05, "loss": 0.9412, "step": 391 }, { "epoch": 0.3143544506816359, "grad_norm": 0.8005679845809937, "learning_rate": 1.970957756081424e-05, "loss": 0.994, "step": 392 }, { "epoch": 0.3151563753007217, "grad_norm": 0.6840459704399109, "learning_rate": 1.9707502175609377e-05, "loss": 1.0069, "step": 393 }, { "epoch": 0.31595829991980756, "grad_norm": 0.6707590222358704, "learning_rate": 1.9705419511388784e-05, "loss": 0.949, "step": 394 }, { "epoch": 0.31676022453889335, "grad_norm": 0.66581130027771, "learning_rate": 1.9703329569714114e-05, "loss": 1.0329, "step": 395 }, { "epoch": 0.31756214915797915, "grad_norm": 0.7102177739143372, "learning_rate": 1.9701232352152472e-05, "loss": 0.9578, "step": 396 }, { "epoch": 0.31836407377706494, "grad_norm": 0.7258641719818115, "learning_rate": 1.9699127860276426e-05, "loss": 0.989, "step": 397 }, { "epoch": 0.3191659983961508, "grad_norm": 0.646165668964386, "learning_rate": 1.969701609566399e-05, "loss": 0.9845, "step": 398 }, { "epoch": 0.3199679230152366, "grad_norm": 0.6825928688049316, "learning_rate": 1.9694897059898648e-05, "loss": 1.009, "step": 399 }, { "epoch": 0.32076984763432237, "grad_norm": 0.7049286365509033, "learning_rate": 1.9692770754569316e-05, "loss": 0.966, "step": 400 }, { "epoch": 0.32157177225340816, "grad_norm": 0.6968984603881836, "learning_rate": 1.9690637181270372e-05, "loss": 0.9642, "step": 401 }, { "epoch": 0.322373696872494, "grad_norm": 0.7258249521255493, "learning_rate": 1.9688496341601647e-05, "loss": 0.9723, "step": 402 }, { "epoch": 0.3231756214915798, "grad_norm": 0.6794790029525757, "learning_rate": 1.9686348237168408e-05, "loss": 0.9803, "step": 403 }, { "epoch": 0.3239775461106656, "grad_norm": 0.666203498840332, "learning_rate": 1.9684192869581376e-05, "loss": 0.9987, "step": 404 }, { "epoch": 0.3247794707297514, "grad_norm": 0.6866022348403931, "learning_rate": 1.968203024045673e-05, "loss": 0.9196, "step": 405 }, { "epoch": 0.32558139534883723, "grad_norm": 0.6703433990478516, "learning_rate": 1.9679860351416075e-05, "loss": 0.9677, "step": 406 }, { "epoch": 0.326383319967923, "grad_norm": 0.7272356152534485, "learning_rate": 1.967768320408647e-05, "loss": 0.9897, "step": 407 }, { "epoch": 0.3271852445870088, "grad_norm": 0.7687215805053711, "learning_rate": 1.967549880010041e-05, "loss": 0.9674, "step": 408 }, { "epoch": 0.3279871692060946, "grad_norm": 0.6770045757293701, "learning_rate": 1.967330714109584e-05, "loss": 1.0214, "step": 409 }, { "epoch": 0.32878909382518046, "grad_norm": 0.7255867123603821, "learning_rate": 1.9671108228716142e-05, "loss": 1.0004, "step": 410 }, { "epoch": 0.32959101844426625, "grad_norm": 0.6760930418968201, "learning_rate": 1.9668902064610128e-05, "loss": 0.9386, "step": 411 }, { "epoch": 0.33039294306335204, "grad_norm": 0.708378255367279, "learning_rate": 1.9666688650432063e-05, "loss": 0.9547, "step": 412 }, { "epoch": 0.33119486768243783, "grad_norm": 0.7338705658912659, "learning_rate": 1.9664467987841632e-05, "loss": 0.9848, "step": 413 }, { "epoch": 0.3319967923015237, "grad_norm": 0.6751573085784912, "learning_rate": 1.9662240078503975e-05, "loss": 1.0165, "step": 414 }, { "epoch": 0.33279871692060947, "grad_norm": 0.6512075066566467, "learning_rate": 1.9660004924089644e-05, "loss": 0.9326, "step": 415 }, { "epoch": 0.33360064153969526, "grad_norm": 0.6776466369628906, "learning_rate": 1.965776252627464e-05, "loss": 0.9494, "step": 416 }, { "epoch": 0.33440256615878106, "grad_norm": 0.7271110415458679, "learning_rate": 1.9655512886740383e-05, "loss": 0.9866, "step": 417 }, { "epoch": 0.3352044907778669, "grad_norm": 0.6701963543891907, "learning_rate": 1.9653256007173735e-05, "loss": 0.9433, "step": 418 }, { "epoch": 0.3360064153969527, "grad_norm": 0.7227078080177307, "learning_rate": 1.965099188926698e-05, "loss": 1.0014, "step": 419 }, { "epoch": 0.3368083400160385, "grad_norm": 0.6832866668701172, "learning_rate": 1.964872053471783e-05, "loss": 0.9595, "step": 420 }, { "epoch": 0.3376102646351243, "grad_norm": 0.6797085404396057, "learning_rate": 1.9646441945229424e-05, "loss": 0.9574, "step": 421 }, { "epoch": 0.3384121892542101, "grad_norm": 0.6965078711509705, "learning_rate": 1.9644156122510326e-05, "loss": 0.98, "step": 422 }, { "epoch": 0.3392141138732959, "grad_norm": 0.6841316223144531, "learning_rate": 1.9641863068274523e-05, "loss": 0.9619, "step": 423 }, { "epoch": 0.3400160384923817, "grad_norm": 0.7349586486816406, "learning_rate": 1.9639562784241426e-05, "loss": 0.979, "step": 424 }, { "epoch": 0.3408179631114675, "grad_norm": 0.6306387782096863, "learning_rate": 1.9637255272135863e-05, "loss": 0.9508, "step": 425 }, { "epoch": 0.34161988773055335, "grad_norm": 0.6524477601051331, "learning_rate": 1.9634940533688094e-05, "loss": 0.9172, "step": 426 }, { "epoch": 0.34242181234963914, "grad_norm": 0.6789130568504333, "learning_rate": 1.9632618570633782e-05, "loss": 0.986, "step": 427 }, { "epoch": 0.34322373696872494, "grad_norm": 0.6674696803092957, "learning_rate": 1.9630289384714014e-05, "loss": 0.9511, "step": 428 }, { "epoch": 0.3440256615878107, "grad_norm": 0.703323245048523, "learning_rate": 1.9627952977675292e-05, "loss": 0.9889, "step": 429 }, { "epoch": 0.3448275862068966, "grad_norm": 0.670559823513031, "learning_rate": 1.962560935126954e-05, "loss": 0.9957, "step": 430 }, { "epoch": 0.34562951082598237, "grad_norm": 0.6248535513877869, "learning_rate": 1.962325850725408e-05, "loss": 0.9794, "step": 431 }, { "epoch": 0.34643143544506816, "grad_norm": 0.6565462946891785, "learning_rate": 1.9620900447391663e-05, "loss": 0.951, "step": 432 }, { "epoch": 0.34723336006415395, "grad_norm": 0.6794214844703674, "learning_rate": 1.9618535173450434e-05, "loss": 1.0089, "step": 433 }, { "epoch": 0.3480352846832398, "grad_norm": 0.6835659742355347, "learning_rate": 1.9616162687203966e-05, "loss": 1.0291, "step": 434 }, { "epoch": 0.3488372093023256, "grad_norm": 0.6657028794288635, "learning_rate": 1.9613782990431223e-05, "loss": 0.9599, "step": 435 }, { "epoch": 0.3496391339214114, "grad_norm": 0.6608729362487793, "learning_rate": 1.9611396084916587e-05, "loss": 0.9688, "step": 436 }, { "epoch": 0.3504410585404972, "grad_norm": 0.6747941374778748, "learning_rate": 1.9609001972449834e-05, "loss": 0.9353, "step": 437 }, { "epoch": 0.351242983159583, "grad_norm": 0.6459916234016418, "learning_rate": 1.960660065482616e-05, "loss": 0.9698, "step": 438 }, { "epoch": 0.3520449077786688, "grad_norm": 0.6540493369102478, "learning_rate": 1.9604192133846147e-05, "loss": 0.9672, "step": 439 }, { "epoch": 0.3528468323977546, "grad_norm": 0.6698585152626038, "learning_rate": 1.960177641131579e-05, "loss": 0.9312, "step": 440 }, { "epoch": 0.3536487570168404, "grad_norm": 0.6463971734046936, "learning_rate": 1.959935348904648e-05, "loss": 0.939, "step": 441 }, { "epoch": 0.35445068163592625, "grad_norm": 0.7033871412277222, "learning_rate": 1.9596923368855006e-05, "loss": 0.9928, "step": 442 }, { "epoch": 0.35525260625501204, "grad_norm": 0.6210078597068787, "learning_rate": 1.9594486052563556e-05, "loss": 0.8954, "step": 443 }, { "epoch": 0.35605453087409783, "grad_norm": 0.658398449420929, "learning_rate": 1.959204154199971e-05, "loss": 1.0045, "step": 444 }, { "epoch": 0.3568564554931836, "grad_norm": 0.6751113533973694, "learning_rate": 1.958958983899645e-05, "loss": 0.9873, "step": 445 }, { "epoch": 0.35765838011226947, "grad_norm": 0.6434077024459839, "learning_rate": 1.958713094539214e-05, "loss": 0.9433, "step": 446 }, { "epoch": 0.35846030473135526, "grad_norm": 0.7159045338630676, "learning_rate": 1.958466486303055e-05, "loss": 0.9705, "step": 447 }, { "epoch": 0.35926222935044105, "grad_norm": 0.6778410077095032, "learning_rate": 1.9582191593760825e-05, "loss": 0.9326, "step": 448 }, { "epoch": 0.36006415396952685, "grad_norm": 0.6995593905448914, "learning_rate": 1.957971113943751e-05, "loss": 0.9582, "step": 449 }, { "epoch": 0.3608660785886127, "grad_norm": 0.6641433835029602, "learning_rate": 1.9577223501920532e-05, "loss": 0.9635, "step": 450 }, { "epoch": 0.3616680032076985, "grad_norm": 0.6719247698783875, "learning_rate": 1.957472868307521e-05, "loss": 1.0151, "step": 451 }, { "epoch": 0.3624699278267843, "grad_norm": 0.6560412049293518, "learning_rate": 1.9572226684772243e-05, "loss": 0.9371, "step": 452 }, { "epoch": 0.36327185244587007, "grad_norm": 0.6818994879722595, "learning_rate": 1.956971750888771e-05, "loss": 0.9462, "step": 453 }, { "epoch": 0.3640737770649559, "grad_norm": 0.7130508422851562, "learning_rate": 1.9567201157303086e-05, "loss": 0.9549, "step": 454 }, { "epoch": 0.3648757016840417, "grad_norm": 0.6851775050163269, "learning_rate": 1.956467763190521e-05, "loss": 0.9852, "step": 455 }, { "epoch": 0.3656776263031275, "grad_norm": 0.6840097308158875, "learning_rate": 1.9562146934586307e-05, "loss": 0.9623, "step": 456 }, { "epoch": 0.3664795509222133, "grad_norm": 0.6426949501037598, "learning_rate": 1.955960906724398e-05, "loss": 0.9375, "step": 457 }, { "epoch": 0.36728147554129914, "grad_norm": 0.6146557927131653, "learning_rate": 1.9557064031781216e-05, "loss": 0.9336, "step": 458 }, { "epoch": 0.36808340016038493, "grad_norm": 0.6573811769485474, "learning_rate": 1.9554511830106356e-05, "loss": 0.95, "step": 459 }, { "epoch": 0.3688853247794707, "grad_norm": 0.6667237877845764, "learning_rate": 1.955195246413314e-05, "loss": 0.946, "step": 460 }, { "epoch": 0.3696872493985565, "grad_norm": 0.6584280729293823, "learning_rate": 1.9549385935780664e-05, "loss": 0.9359, "step": 461 }, { "epoch": 0.37048917401764236, "grad_norm": 0.6643354296684265, "learning_rate": 1.9546812246973395e-05, "loss": 0.9396, "step": 462 }, { "epoch": 0.37129109863672816, "grad_norm": 0.6772244572639465, "learning_rate": 1.9544231399641176e-05, "loss": 0.9443, "step": 463 }, { "epoch": 0.37209302325581395, "grad_norm": 0.6740161180496216, "learning_rate": 1.954164339571921e-05, "loss": 0.9429, "step": 464 }, { "epoch": 0.37289494787489974, "grad_norm": 0.70747309923172, "learning_rate": 1.9539048237148078e-05, "loss": 0.923, "step": 465 }, { "epoch": 0.3736968724939856, "grad_norm": 0.6899964809417725, "learning_rate": 1.953644592587371e-05, "loss": 0.9421, "step": 466 }, { "epoch": 0.3744987971130714, "grad_norm": 0.6563026905059814, "learning_rate": 1.953383646384741e-05, "loss": 0.9893, "step": 467 }, { "epoch": 0.37530072173215717, "grad_norm": 0.624575674533844, "learning_rate": 1.953121985302585e-05, "loss": 0.902, "step": 468 }, { "epoch": 0.37610264635124296, "grad_norm": 0.6469770669937134, "learning_rate": 1.952859609537104e-05, "loss": 0.9884, "step": 469 }, { "epoch": 0.3769045709703288, "grad_norm": 0.6481389999389648, "learning_rate": 1.952596519285037e-05, "loss": 0.9554, "step": 470 }, { "epoch": 0.3777064955894146, "grad_norm": 0.65255206823349, "learning_rate": 1.9523327147436585e-05, "loss": 0.9758, "step": 471 }, { "epoch": 0.3785084202085004, "grad_norm": 0.6691866517066956, "learning_rate": 1.9520681961107772e-05, "loss": 0.9768, "step": 472 }, { "epoch": 0.3793103448275862, "grad_norm": 0.6792327165603638, "learning_rate": 1.9518029635847387e-05, "loss": 0.9436, "step": 473 }, { "epoch": 0.38011226944667204, "grad_norm": 0.6820612549781799, "learning_rate": 1.9515370173644235e-05, "loss": 0.9722, "step": 474 }, { "epoch": 0.3809141940657578, "grad_norm": 0.6797659397125244, "learning_rate": 1.9512703576492466e-05, "loss": 1.0122, "step": 475 }, { "epoch": 0.3817161186848436, "grad_norm": 0.6471715569496155, "learning_rate": 1.9510029846391588e-05, "loss": 0.954, "step": 476 }, { "epoch": 0.3825180433039294, "grad_norm": 0.7427453398704529, "learning_rate": 1.9507348985346458e-05, "loss": 0.9461, "step": 477 }, { "epoch": 0.38331996792301526, "grad_norm": 0.7047792077064514, "learning_rate": 1.9504660995367275e-05, "loss": 0.9503, "step": 478 }, { "epoch": 0.38412189254210105, "grad_norm": 0.6744017601013184, "learning_rate": 1.950196587846958e-05, "loss": 0.9848, "step": 479 }, { "epoch": 0.38492381716118684, "grad_norm": 0.7120094895362854, "learning_rate": 1.9499263636674273e-05, "loss": 0.9156, "step": 480 }, { "epoch": 0.38572574178027264, "grad_norm": 0.6583890914916992, "learning_rate": 1.949655427200758e-05, "loss": 0.9404, "step": 481 }, { "epoch": 0.3865276663993585, "grad_norm": 0.7101068496704102, "learning_rate": 1.9493837786501077e-05, "loss": 0.9957, "step": 482 }, { "epoch": 0.3873295910184443, "grad_norm": 0.7440847754478455, "learning_rate": 1.949111418219168e-05, "loss": 1.0279, "step": 483 }, { "epoch": 0.38813151563753007, "grad_norm": 0.7091655135154724, "learning_rate": 1.9488383461121634e-05, "loss": 0.9855, "step": 484 }, { "epoch": 0.38893344025661586, "grad_norm": 0.6298947334289551, "learning_rate": 1.948564562533853e-05, "loss": 0.9564, "step": 485 }, { "epoch": 0.3897353648757017, "grad_norm": 0.6431513428688049, "learning_rate": 1.9482900676895297e-05, "loss": 0.9372, "step": 486 }, { "epoch": 0.3905372894947875, "grad_norm": 0.7604116201400757, "learning_rate": 1.948014861785018e-05, "loss": 0.9654, "step": 487 }, { "epoch": 0.3913392141138733, "grad_norm": 0.652585506439209, "learning_rate": 1.9477389450266768e-05, "loss": 0.9184, "step": 488 }, { "epoch": 0.3921411387329591, "grad_norm": 0.6592057943344116, "learning_rate": 1.9474623176213988e-05, "loss": 0.9951, "step": 489 }, { "epoch": 0.39294306335204493, "grad_norm": 0.7231782674789429, "learning_rate": 1.9471849797766075e-05, "loss": 0.9337, "step": 490 }, { "epoch": 0.3937449879711307, "grad_norm": 0.6437721848487854, "learning_rate": 1.9469069317002614e-05, "loss": 0.9529, "step": 491 }, { "epoch": 0.3945469125902165, "grad_norm": 0.6871363520622253, "learning_rate": 1.9466281736008495e-05, "loss": 1.0073, "step": 492 }, { "epoch": 0.3953488372093023, "grad_norm": 0.6335092782974243, "learning_rate": 1.9463487056873945e-05, "loss": 0.89, "step": 493 }, { "epoch": 0.39615076182838815, "grad_norm": 0.6468705534934998, "learning_rate": 1.946068528169451e-05, "loss": 0.9542, "step": 494 }, { "epoch": 0.39695268644747395, "grad_norm": 0.6464216709136963, "learning_rate": 1.9457876412571053e-05, "loss": 0.926, "step": 495 }, { "epoch": 0.39775461106655974, "grad_norm": 0.6910549998283386, "learning_rate": 1.9455060451609765e-05, "loss": 0.9718, "step": 496 }, { "epoch": 0.39855653568564553, "grad_norm": 0.6526033878326416, "learning_rate": 1.9452237400922142e-05, "loss": 0.9153, "step": 497 }, { "epoch": 0.3993584603047314, "grad_norm": 0.6653629541397095, "learning_rate": 1.9449407262625015e-05, "loss": 0.9803, "step": 498 }, { "epoch": 0.40016038492381717, "grad_norm": 0.6513515710830688, "learning_rate": 1.9446570038840505e-05, "loss": 0.9739, "step": 499 }, { "epoch": 0.40096230954290296, "grad_norm": 0.7147772908210754, "learning_rate": 1.944372573169607e-05, "loss": 1.0026, "step": 500 }, { "epoch": 0.40176423416198875, "grad_norm": 0.6582165360450745, "learning_rate": 1.9440874343324464e-05, "loss": 1.0261, "step": 501 }, { "epoch": 0.4025661587810746, "grad_norm": 0.6714770197868347, "learning_rate": 1.943801587586375e-05, "loss": 0.9979, "step": 502 }, { "epoch": 0.4033680834001604, "grad_norm": 0.6295056939125061, "learning_rate": 1.9435150331457314e-05, "loss": 1.0059, "step": 503 }, { "epoch": 0.4041700080192462, "grad_norm": 0.6907420754432678, "learning_rate": 1.943227771225383e-05, "loss": 0.9456, "step": 504 }, { "epoch": 0.404971932638332, "grad_norm": 0.6090110540390015, "learning_rate": 1.9429398020407292e-05, "loss": 0.9187, "step": 505 }, { "epoch": 0.4057738572574178, "grad_norm": 0.6557995080947876, "learning_rate": 1.9426511258076988e-05, "loss": 0.952, "step": 506 }, { "epoch": 0.4065757818765036, "grad_norm": 0.6791728138923645, "learning_rate": 1.942361742742751e-05, "loss": 0.9657, "step": 507 }, { "epoch": 0.4073777064955894, "grad_norm": 0.6913565993309021, "learning_rate": 1.9420716530628752e-05, "loss": 1.0223, "step": 508 }, { "epoch": 0.4081796311146752, "grad_norm": 0.6940714716911316, "learning_rate": 1.9417808569855907e-05, "loss": 0.9489, "step": 509 }, { "epoch": 0.40898155573376105, "grad_norm": 0.733680009841919, "learning_rate": 1.9414893547289458e-05, "loss": 0.9388, "step": 510 }, { "epoch": 0.40978348035284684, "grad_norm": 0.6628260016441345, "learning_rate": 1.9411971465115197e-05, "loss": 0.9455, "step": 511 }, { "epoch": 0.41058540497193263, "grad_norm": 0.6788282990455627, "learning_rate": 1.940904232552419e-05, "loss": 0.9224, "step": 512 }, { "epoch": 0.4113873295910184, "grad_norm": 0.6449699997901917, "learning_rate": 1.9406106130712813e-05, "loss": 0.9927, "step": 513 }, { "epoch": 0.41218925421010427, "grad_norm": 0.6500270962715149, "learning_rate": 1.9403162882882722e-05, "loss": 0.9647, "step": 514 }, { "epoch": 0.41299117882919006, "grad_norm": 0.6693797707557678, "learning_rate": 1.9400212584240867e-05, "loss": 0.967, "step": 515 }, { "epoch": 0.41379310344827586, "grad_norm": 0.714789628982544, "learning_rate": 1.9397255236999478e-05, "loss": 0.9768, "step": 516 }, { "epoch": 0.41459502806736165, "grad_norm": 0.6399978399276733, "learning_rate": 1.939429084337608e-05, "loss": 0.9542, "step": 517 }, { "epoch": 0.4153969526864475, "grad_norm": 0.644829273223877, "learning_rate": 1.939131940559347e-05, "loss": 0.9968, "step": 518 }, { "epoch": 0.4161988773055333, "grad_norm": 0.7262901067733765, "learning_rate": 1.938834092587974e-05, "loss": 0.9511, "step": 519 }, { "epoch": 0.4170008019246191, "grad_norm": 0.6648424863815308, "learning_rate": 1.938535540646825e-05, "loss": 0.9986, "step": 520 }, { "epoch": 0.41780272654370487, "grad_norm": 0.7087076902389526, "learning_rate": 1.938236284959765e-05, "loss": 0.9664, "step": 521 }, { "epoch": 0.4186046511627907, "grad_norm": 0.7221333384513855, "learning_rate": 1.9379363257511855e-05, "loss": 0.9482, "step": 522 }, { "epoch": 0.4194065757818765, "grad_norm": 0.6906344294548035, "learning_rate": 1.9376356632460063e-05, "loss": 1.0003, "step": 523 }, { "epoch": 0.4202085004009623, "grad_norm": 0.7014548778533936, "learning_rate": 1.9373342976696742e-05, "loss": 0.9728, "step": 524 }, { "epoch": 0.4210104250200481, "grad_norm": 0.6935135722160339, "learning_rate": 1.9370322292481642e-05, "loss": 0.9788, "step": 525 }, { "epoch": 0.42181234963913394, "grad_norm": 0.6556846499443054, "learning_rate": 1.9367294582079768e-05, "loss": 0.982, "step": 526 }, { "epoch": 0.42261427425821974, "grad_norm": 0.6862344145774841, "learning_rate": 1.93642598477614e-05, "loss": 0.9361, "step": 527 }, { "epoch": 0.4234161988773055, "grad_norm": 0.6807497143745422, "learning_rate": 1.9361218091802088e-05, "loss": 0.9717, "step": 528 }, { "epoch": 0.4242181234963913, "grad_norm": 0.646615743637085, "learning_rate": 1.935816931648264e-05, "loss": 0.9416, "step": 529 }, { "epoch": 0.42502004811547717, "grad_norm": 0.646940290927887, "learning_rate": 1.9355113524089137e-05, "loss": 0.952, "step": 530 }, { "epoch": 0.42582197273456296, "grad_norm": 0.7170730233192444, "learning_rate": 1.9352050716912915e-05, "loss": 0.9744, "step": 531 }, { "epoch": 0.42662389735364875, "grad_norm": 0.6803928017616272, "learning_rate": 1.934898089725057e-05, "loss": 0.9409, "step": 532 }, { "epoch": 0.42742582197273454, "grad_norm": 0.6328278183937073, "learning_rate": 1.9345904067403953e-05, "loss": 0.9368, "step": 533 }, { "epoch": 0.4282277465918204, "grad_norm": 0.6864063143730164, "learning_rate": 1.9342820229680185e-05, "loss": 0.9771, "step": 534 }, { "epoch": 0.4290296712109062, "grad_norm": 0.6935616135597229, "learning_rate": 1.9339729386391622e-05, "loss": 0.9774, "step": 535 }, { "epoch": 0.429831595829992, "grad_norm": 0.6815831065177917, "learning_rate": 1.9336631539855895e-05, "loss": 0.9468, "step": 536 }, { "epoch": 0.43063352044907777, "grad_norm": 0.6866287589073181, "learning_rate": 1.9333526692395863e-05, "loss": 0.9433, "step": 537 }, { "epoch": 0.4314354450681636, "grad_norm": 0.7279961109161377, "learning_rate": 1.9330414846339656e-05, "loss": 0.9595, "step": 538 }, { "epoch": 0.4322373696872494, "grad_norm": 0.659054160118103, "learning_rate": 1.9327296004020638e-05, "loss": 0.9593, "step": 539 }, { "epoch": 0.4330392943063352, "grad_norm": 0.6249253749847412, "learning_rate": 1.9324170167777425e-05, "loss": 0.9569, "step": 540 }, { "epoch": 0.433841218925421, "grad_norm": 0.6949421167373657, "learning_rate": 1.9321037339953873e-05, "loss": 0.9529, "step": 541 }, { "epoch": 0.43464314354450684, "grad_norm": 0.7360992431640625, "learning_rate": 1.9317897522899082e-05, "loss": 1.0171, "step": 542 }, { "epoch": 0.43544506816359263, "grad_norm": 0.6973049640655518, "learning_rate": 1.93147507189674e-05, "loss": 0.9597, "step": 543 }, { "epoch": 0.4362469927826784, "grad_norm": 0.6927620768547058, "learning_rate": 1.93115969305184e-05, "loss": 0.9106, "step": 544 }, { "epoch": 0.4370489174017642, "grad_norm": 0.6799963712692261, "learning_rate": 1.9308436159916905e-05, "loss": 0.9958, "step": 545 }, { "epoch": 0.43785084202085006, "grad_norm": 0.6450375914573669, "learning_rate": 1.9305268409532968e-05, "loss": 0.9605, "step": 546 }, { "epoch": 0.43865276663993585, "grad_norm": 0.6617172360420227, "learning_rate": 1.9302093681741874e-05, "loss": 0.9424, "step": 547 }, { "epoch": 0.43945469125902165, "grad_norm": 0.7010754346847534, "learning_rate": 1.9298911978924142e-05, "loss": 0.9857, "step": 548 }, { "epoch": 0.44025661587810744, "grad_norm": 0.665642499923706, "learning_rate": 1.9295723303465523e-05, "loss": 0.9495, "step": 549 }, { "epoch": 0.4410585404971933, "grad_norm": 0.6675366759300232, "learning_rate": 1.9292527657756994e-05, "loss": 0.9411, "step": 550 }, { "epoch": 0.4418604651162791, "grad_norm": 0.6773011684417725, "learning_rate": 1.928932504419476e-05, "loss": 0.9939, "step": 551 }, { "epoch": 0.44266238973536487, "grad_norm": 0.691259503364563, "learning_rate": 1.9286115465180248e-05, "loss": 0.9641, "step": 552 }, { "epoch": 0.44346431435445066, "grad_norm": 0.6108399033546448, "learning_rate": 1.928289892312011e-05, "loss": 0.9077, "step": 553 }, { "epoch": 0.4442662389735365, "grad_norm": 0.6582357287406921, "learning_rate": 1.927967542042622e-05, "loss": 0.9379, "step": 554 }, { "epoch": 0.4450681635926223, "grad_norm": 0.7069655060768127, "learning_rate": 1.9276444959515664e-05, "loss": 0.9621, "step": 555 }, { "epoch": 0.4458700882117081, "grad_norm": 0.6511080265045166, "learning_rate": 1.9273207542810764e-05, "loss": 0.9675, "step": 556 }, { "epoch": 0.4466720128307939, "grad_norm": 0.6380482912063599, "learning_rate": 1.9269963172739033e-05, "loss": 0.9744, "step": 557 }, { "epoch": 0.44747393744987973, "grad_norm": 0.6568742394447327, "learning_rate": 1.9266711851733214e-05, "loss": 0.9644, "step": 558 }, { "epoch": 0.4482758620689655, "grad_norm": 0.6376577019691467, "learning_rate": 1.9263453582231265e-05, "loss": 0.9969, "step": 559 }, { "epoch": 0.4490777866880513, "grad_norm": 0.6453221440315247, "learning_rate": 1.9260188366676337e-05, "loss": 0.9894, "step": 560 }, { "epoch": 0.4498797113071371, "grad_norm": 0.6480368375778198, "learning_rate": 1.9256916207516806e-05, "loss": 0.9315, "step": 561 }, { "epoch": 0.45068163592622296, "grad_norm": 0.6618868708610535, "learning_rate": 1.9253637107206246e-05, "loss": 0.9886, "step": 562 }, { "epoch": 0.45148356054530875, "grad_norm": 0.646225094795227, "learning_rate": 1.9250351068203442e-05, "loss": 0.9983, "step": 563 }, { "epoch": 0.45228548516439454, "grad_norm": 0.6107761859893799, "learning_rate": 1.9247058092972372e-05, "loss": 0.9496, "step": 564 }, { "epoch": 0.45308740978348033, "grad_norm": 0.6536424160003662, "learning_rate": 1.9243758183982226e-05, "loss": 0.9751, "step": 565 }, { "epoch": 0.4538893344025662, "grad_norm": 0.5984099507331848, "learning_rate": 1.9240451343707382e-05, "loss": 0.9534, "step": 566 }, { "epoch": 0.454691259021652, "grad_norm": 0.622818112373352, "learning_rate": 1.9237137574627433e-05, "loss": 0.9064, "step": 567 }, { "epoch": 0.45549318364073776, "grad_norm": 0.6724113821983337, "learning_rate": 1.923381687922714e-05, "loss": 0.9416, "step": 568 }, { "epoch": 0.45629510825982356, "grad_norm": 0.6443886160850525, "learning_rate": 1.9230489259996487e-05, "loss": 0.9413, "step": 569 }, { "epoch": 0.4570970328789094, "grad_norm": 0.6603150963783264, "learning_rate": 1.922715471943063e-05, "loss": 0.9813, "step": 570 }, { "epoch": 0.4578989574979952, "grad_norm": 0.642634928226471, "learning_rate": 1.9223813260029922e-05, "loss": 0.9405, "step": 571 }, { "epoch": 0.458700882117081, "grad_norm": 0.668830931186676, "learning_rate": 1.92204648842999e-05, "loss": 0.9891, "step": 572 }, { "epoch": 0.4595028067361668, "grad_norm": 0.617743968963623, "learning_rate": 1.9217109594751303e-05, "loss": 0.971, "step": 573 }, { "epoch": 0.4603047313552526, "grad_norm": 0.6333216428756714, "learning_rate": 1.9213747393900025e-05, "loss": 0.9542, "step": 574 }, { "epoch": 0.4611066559743384, "grad_norm": 0.6373317241668701, "learning_rate": 1.9210378284267166e-05, "loss": 0.9329, "step": 575 }, { "epoch": 0.4619085805934242, "grad_norm": 0.617574155330658, "learning_rate": 1.9207002268378998e-05, "loss": 0.9708, "step": 576 }, { "epoch": 0.46271050521251, "grad_norm": 0.6191926002502441, "learning_rate": 1.9203619348766974e-05, "loss": 0.9154, "step": 577 }, { "epoch": 0.46351242983159585, "grad_norm": 0.6222400069236755, "learning_rate": 1.9200229527967724e-05, "loss": 0.9354, "step": 578 }, { "epoch": 0.46431435445068164, "grad_norm": 0.6831260919570923, "learning_rate": 1.9196832808523048e-05, "loss": 0.9424, "step": 579 }, { "epoch": 0.46511627906976744, "grad_norm": 0.6363519430160522, "learning_rate": 1.919342919297992e-05, "loss": 0.9589, "step": 580 }, { "epoch": 0.4659182036888532, "grad_norm": 0.6219954490661621, "learning_rate": 1.9190018683890492e-05, "loss": 0.9204, "step": 581 }, { "epoch": 0.4667201283079391, "grad_norm": 0.6711027026176453, "learning_rate": 1.9186601283812077e-05, "loss": 0.9249, "step": 582 }, { "epoch": 0.46752205292702487, "grad_norm": 0.656484067440033, "learning_rate": 1.9183176995307156e-05, "loss": 0.9821, "step": 583 }, { "epoch": 0.46832397754611066, "grad_norm": 0.6418080925941467, "learning_rate": 1.9179745820943382e-05, "loss": 0.9759, "step": 584 }, { "epoch": 0.46912590216519645, "grad_norm": 0.7414655089378357, "learning_rate": 1.9176307763293563e-05, "loss": 0.9328, "step": 585 }, { "epoch": 0.4699278267842823, "grad_norm": 0.634429931640625, "learning_rate": 1.9172862824935677e-05, "loss": 0.918, "step": 586 }, { "epoch": 0.4707297514033681, "grad_norm": 0.6168124675750732, "learning_rate": 1.9169411008452847e-05, "loss": 0.9247, "step": 587 }, { "epoch": 0.4715316760224539, "grad_norm": 0.6918452978134155, "learning_rate": 1.9165952316433367e-05, "loss": 0.9379, "step": 588 }, { "epoch": 0.4723336006415397, "grad_norm": 0.6637231111526489, "learning_rate": 1.9162486751470687e-05, "loss": 0.9685, "step": 589 }, { "epoch": 0.4731355252606255, "grad_norm": 0.6197507381439209, "learning_rate": 1.9159014316163395e-05, "loss": 0.9876, "step": 590 }, { "epoch": 0.4739374498797113, "grad_norm": 0.6182752251625061, "learning_rate": 1.915553501311525e-05, "loss": 0.9288, "step": 591 }, { "epoch": 0.4747393744987971, "grad_norm": 0.6498412489891052, "learning_rate": 1.9152048844935152e-05, "loss": 0.9284, "step": 592 }, { "epoch": 0.4755412991178829, "grad_norm": 0.6597406268119812, "learning_rate": 1.914855581423714e-05, "loss": 0.9159, "step": 593 }, { "epoch": 0.47634322373696875, "grad_norm": 0.6668150424957275, "learning_rate": 1.9145055923640417e-05, "loss": 0.9473, "step": 594 }, { "epoch": 0.47714514835605454, "grad_norm": 0.7026738524436951, "learning_rate": 1.9141549175769315e-05, "loss": 0.9343, "step": 595 }, { "epoch": 0.47794707297514033, "grad_norm": 0.7704558372497559, "learning_rate": 1.9138035573253316e-05, "loss": 0.9569, "step": 596 }, { "epoch": 0.4787489975942261, "grad_norm": 0.6594985723495483, "learning_rate": 1.9134515118727035e-05, "loss": 0.9666, "step": 597 }, { "epoch": 0.47955092221331197, "grad_norm": 0.6233870387077332, "learning_rate": 1.913098781483023e-05, "loss": 0.9473, "step": 598 }, { "epoch": 0.48035284683239776, "grad_norm": 0.6997066736221313, "learning_rate": 1.9127453664207798e-05, "loss": 0.8946, "step": 599 }, { "epoch": 0.48115477145148355, "grad_norm": 0.6761658191680908, "learning_rate": 1.912391266950976e-05, "loss": 0.9911, "step": 600 }, { "epoch": 0.48195669607056935, "grad_norm": 0.6300480365753174, "learning_rate": 1.9120364833391277e-05, "loss": 0.9955, "step": 601 }, { "epoch": 0.4827586206896552, "grad_norm": 0.6605967283248901, "learning_rate": 1.9116810158512635e-05, "loss": 0.9853, "step": 602 }, { "epoch": 0.483560545308741, "grad_norm": 0.6040114164352417, "learning_rate": 1.9113248647539253e-05, "loss": 0.9011, "step": 603 }, { "epoch": 0.4843624699278268, "grad_norm": 0.6693778038024902, "learning_rate": 1.9109680303141673e-05, "loss": 0.9038, "step": 604 }, { "epoch": 0.48516439454691257, "grad_norm": 0.6784869432449341, "learning_rate": 1.910610512799556e-05, "loss": 0.9332, "step": 605 }, { "epoch": 0.4859663191659984, "grad_norm": 0.6835043430328369, "learning_rate": 1.91025231247817e-05, "loss": 0.985, "step": 606 }, { "epoch": 0.4867682437850842, "grad_norm": 0.6370753645896912, "learning_rate": 1.9098934296186006e-05, "loss": 1.0014, "step": 607 }, { "epoch": 0.48757016840417, "grad_norm": 0.7216833233833313, "learning_rate": 1.9095338644899502e-05, "loss": 0.948, "step": 608 }, { "epoch": 0.4883720930232558, "grad_norm": 0.6614647507667542, "learning_rate": 1.9091736173618326e-05, "loss": 0.9399, "step": 609 }, { "epoch": 0.48917401764234164, "grad_norm": 0.6034402251243591, "learning_rate": 1.908812688504374e-05, "loss": 0.9501, "step": 610 }, { "epoch": 0.48997594226142743, "grad_norm": 0.628848135471344, "learning_rate": 1.9084510781882108e-05, "loss": 0.9393, "step": 611 }, { "epoch": 0.4907778668805132, "grad_norm": 0.5977146625518799, "learning_rate": 1.9080887866844902e-05, "loss": 0.9689, "step": 612 }, { "epoch": 0.491579791499599, "grad_norm": 0.6800901889801025, "learning_rate": 1.907725814264872e-05, "loss": 0.9777, "step": 613 }, { "epoch": 0.49238171611868486, "grad_norm": 0.6149044036865234, "learning_rate": 1.9073621612015244e-05, "loss": 0.9549, "step": 614 }, { "epoch": 0.49318364073777066, "grad_norm": 0.7120502591133118, "learning_rate": 1.9069978277671266e-05, "loss": 0.9653, "step": 615 }, { "epoch": 0.49398556535685645, "grad_norm": 0.59898442029953, "learning_rate": 1.906632814234869e-05, "loss": 0.9387, "step": 616 }, { "epoch": 0.49478748997594224, "grad_norm": 0.6274296045303345, "learning_rate": 1.9062671208784508e-05, "loss": 0.9482, "step": 617 }, { "epoch": 0.4955894145950281, "grad_norm": 0.6537023186683655, "learning_rate": 1.9059007479720807e-05, "loss": 0.9233, "step": 618 }, { "epoch": 0.4963913392141139, "grad_norm": 0.6578821539878845, "learning_rate": 1.905533695790479e-05, "loss": 0.9676, "step": 619 }, { "epoch": 0.4971932638331997, "grad_norm": 0.6332679986953735, "learning_rate": 1.9051659646088726e-05, "loss": 0.9104, "step": 620 }, { "epoch": 0.49799518845228546, "grad_norm": 0.66425621509552, "learning_rate": 1.9047975547029998e-05, "loss": 0.9788, "step": 621 }, { "epoch": 0.4987971130713713, "grad_norm": 0.6680029630661011, "learning_rate": 1.9044284663491065e-05, "loss": 0.9555, "step": 622 }, { "epoch": 0.4995990376904571, "grad_norm": 0.6043557524681091, "learning_rate": 1.9040586998239472e-05, "loss": 0.988, "step": 623 }, { "epoch": 0.5004009623095429, "grad_norm": 0.6627247929573059, "learning_rate": 1.903688255404786e-05, "loss": 0.953, "step": 624 }, { "epoch": 0.5012028869286287, "grad_norm": 0.6448099613189697, "learning_rate": 1.9033171333693952e-05, "loss": 0.9308, "step": 625 }, { "epoch": 0.5020048115477145, "grad_norm": 0.5838706493377686, "learning_rate": 1.902945333996054e-05, "loss": 0.9421, "step": 626 }, { "epoch": 0.5028067361668003, "grad_norm": 0.6396023631095886, "learning_rate": 1.9025728575635503e-05, "loss": 0.9472, "step": 627 }, { "epoch": 0.5036086607858862, "grad_norm": 0.5953710675239563, "learning_rate": 1.9021997043511798e-05, "loss": 0.9113, "step": 628 }, { "epoch": 0.504410585404972, "grad_norm": 0.7014410495758057, "learning_rate": 1.9018258746387458e-05, "loss": 0.9839, "step": 629 }, { "epoch": 0.5052125100240578, "grad_norm": 0.6346995830535889, "learning_rate": 1.901451368706558e-05, "loss": 0.9552, "step": 630 }, { "epoch": 0.5060144346431436, "grad_norm": 0.6501613855361938, "learning_rate": 1.9010761868354336e-05, "loss": 0.9407, "step": 631 }, { "epoch": 0.5068163592622293, "grad_norm": 0.7061483860015869, "learning_rate": 1.9007003293066973e-05, "loss": 0.9881, "step": 632 }, { "epoch": 0.5076182838813151, "grad_norm": 0.6285912394523621, "learning_rate": 1.9003237964021796e-05, "loss": 0.9514, "step": 633 }, { "epoch": 0.5084202085004009, "grad_norm": 0.7684087753295898, "learning_rate": 1.899946588404218e-05, "loss": 0.9336, "step": 634 }, { "epoch": 0.5092221331194867, "grad_norm": 0.7490344047546387, "learning_rate": 1.8995687055956555e-05, "loss": 0.8914, "step": 635 }, { "epoch": 0.5100240577385726, "grad_norm": 0.8029311299324036, "learning_rate": 1.8991901482598414e-05, "loss": 0.9701, "step": 636 }, { "epoch": 0.5108259823576584, "grad_norm": 0.6485514044761658, "learning_rate": 1.8988109166806313e-05, "loss": 0.9437, "step": 637 }, { "epoch": 0.5116279069767442, "grad_norm": 0.6395050883293152, "learning_rate": 1.8984310111423855e-05, "loss": 0.9561, "step": 638 }, { "epoch": 0.51242983159583, "grad_norm": 0.6431874632835388, "learning_rate": 1.8980504319299705e-05, "loss": 0.9247, "step": 639 }, { "epoch": 0.5132317562149158, "grad_norm": 0.675888180732727, "learning_rate": 1.8976691793287575e-05, "loss": 0.9203, "step": 640 }, { "epoch": 0.5140336808340016, "grad_norm": 0.6630160212516785, "learning_rate": 1.8972872536246224e-05, "loss": 0.9709, "step": 641 }, { "epoch": 0.5148356054530874, "grad_norm": 0.6319396495819092, "learning_rate": 1.8969046551039466e-05, "loss": 0.987, "step": 642 }, { "epoch": 0.5156375300721732, "grad_norm": 0.6689966320991516, "learning_rate": 1.8965213840536152e-05, "loss": 0.9802, "step": 643 }, { "epoch": 0.5164394546912591, "grad_norm": 0.6527170538902283, "learning_rate": 1.8961374407610177e-05, "loss": 0.9682, "step": 644 }, { "epoch": 0.5172413793103449, "grad_norm": 0.5882049202919006, "learning_rate": 1.8957528255140482e-05, "loss": 0.9256, "step": 645 }, { "epoch": 0.5180433039294307, "grad_norm": 0.6243289709091187, "learning_rate": 1.895367538601104e-05, "loss": 0.9512, "step": 646 }, { "epoch": 0.5188452285485164, "grad_norm": 0.6396244764328003, "learning_rate": 1.894981580311087e-05, "loss": 0.9402, "step": 647 }, { "epoch": 0.5196471531676022, "grad_norm": 0.6135784387588501, "learning_rate": 1.8945949509334008e-05, "loss": 0.9745, "step": 648 }, { "epoch": 0.520449077786688, "grad_norm": 0.6294798851013184, "learning_rate": 1.894207650757954e-05, "loss": 0.929, "step": 649 }, { "epoch": 0.5212510024057738, "grad_norm": 0.6499471664428711, "learning_rate": 1.8938196800751575e-05, "loss": 0.9595, "step": 650 }, { "epoch": 0.5220529270248596, "grad_norm": 0.6707350611686707, "learning_rate": 1.8934310391759247e-05, "loss": 0.9328, "step": 651 }, { "epoch": 0.5228548516439455, "grad_norm": 0.6414554715156555, "learning_rate": 1.8930417283516717e-05, "loss": 0.8878, "step": 652 }, { "epoch": 0.5236567762630313, "grad_norm": 0.6393246650695801, "learning_rate": 1.892651747894317e-05, "loss": 0.9511, "step": 653 }, { "epoch": 0.5244587008821171, "grad_norm": 0.658134937286377, "learning_rate": 1.892261098096282e-05, "loss": 0.9845, "step": 654 }, { "epoch": 0.5252606255012029, "grad_norm": 0.6066871881484985, "learning_rate": 1.891869779250488e-05, "loss": 0.9655, "step": 655 }, { "epoch": 0.5260625501202887, "grad_norm": 0.6272629499435425, "learning_rate": 1.8914777916503602e-05, "loss": 0.9605, "step": 656 }, { "epoch": 0.5268644747393745, "grad_norm": 0.6052728295326233, "learning_rate": 1.8910851355898238e-05, "loss": 0.8884, "step": 657 }, { "epoch": 0.5276663993584603, "grad_norm": 0.6381072998046875, "learning_rate": 1.8906918113633054e-05, "loss": 0.9684, "step": 658 }, { "epoch": 0.5284683239775461, "grad_norm": 0.6366999745368958, "learning_rate": 1.8902978192657334e-05, "loss": 0.8999, "step": 659 }, { "epoch": 0.529270248596632, "grad_norm": 0.6535215377807617, "learning_rate": 1.8899031595925362e-05, "loss": 0.9436, "step": 660 }, { "epoch": 0.5300721732157178, "grad_norm": 0.6399298310279846, "learning_rate": 1.8895078326396436e-05, "loss": 0.9122, "step": 661 }, { "epoch": 0.5308740978348035, "grad_norm": 0.6174817681312561, "learning_rate": 1.8891118387034845e-05, "loss": 0.9312, "step": 662 }, { "epoch": 0.5316760224538893, "grad_norm": 0.6312207579612732, "learning_rate": 1.888715178080989e-05, "loss": 0.9274, "step": 663 }, { "epoch": 0.5324779470729751, "grad_norm": 0.6061504483222961, "learning_rate": 1.8883178510695868e-05, "loss": 0.9038, "step": 664 }, { "epoch": 0.5332798716920609, "grad_norm": 0.62549889087677, "learning_rate": 1.8879198579672068e-05, "loss": 0.9193, "step": 665 }, { "epoch": 0.5340817963111467, "grad_norm": 0.6522451043128967, "learning_rate": 1.8875211990722785e-05, "loss": 0.931, "step": 666 }, { "epoch": 0.5348837209302325, "grad_norm": 0.6099725365638733, "learning_rate": 1.8871218746837294e-05, "loss": 0.9345, "step": 667 }, { "epoch": 0.5356856455493184, "grad_norm": 0.6159772872924805, "learning_rate": 1.8867218851009862e-05, "loss": 0.9469, "step": 668 }, { "epoch": 0.5364875701684042, "grad_norm": 0.6051928400993347, "learning_rate": 1.8863212306239753e-05, "loss": 0.8725, "step": 669 }, { "epoch": 0.53728949478749, "grad_norm": 0.5804814100265503, "learning_rate": 1.8859199115531213e-05, "loss": 0.9943, "step": 670 }, { "epoch": 0.5380914194065758, "grad_norm": 0.6454379558563232, "learning_rate": 1.8855179281893464e-05, "loss": 1.008, "step": 671 }, { "epoch": 0.5388933440256616, "grad_norm": 0.5961251258850098, "learning_rate": 1.8851152808340715e-05, "loss": 0.9135, "step": 672 }, { "epoch": 0.5396952686447474, "grad_norm": 0.644010066986084, "learning_rate": 1.884711969789215e-05, "loss": 0.9762, "step": 673 }, { "epoch": 0.5404971932638332, "grad_norm": 0.6359036564826965, "learning_rate": 1.884307995357194e-05, "loss": 0.9054, "step": 674 }, { "epoch": 0.541299117882919, "grad_norm": 0.5981766581535339, "learning_rate": 1.883903357840922e-05, "loss": 0.9705, "step": 675 }, { "epoch": 0.5421010425020049, "grad_norm": 0.6233227849006653, "learning_rate": 1.8834980575438094e-05, "loss": 0.9594, "step": 676 }, { "epoch": 0.5429029671210907, "grad_norm": 0.6139412522315979, "learning_rate": 1.883092094769765e-05, "loss": 0.9626, "step": 677 }, { "epoch": 0.5437048917401764, "grad_norm": 0.6309959292411804, "learning_rate": 1.882685469823193e-05, "loss": 0.9812, "step": 678 }, { "epoch": 0.5445068163592622, "grad_norm": 0.6182360649108887, "learning_rate": 1.882278183008995e-05, "loss": 0.9537, "step": 679 }, { "epoch": 0.545308740978348, "grad_norm": 0.6408948302268982, "learning_rate": 1.881870234632568e-05, "loss": 0.9611, "step": 680 }, { "epoch": 0.5461106655974338, "grad_norm": 0.6162562966346741, "learning_rate": 1.8814616249998063e-05, "loss": 0.9661, "step": 681 }, { "epoch": 0.5469125902165196, "grad_norm": 0.6035715341567993, "learning_rate": 1.8810523544170986e-05, "loss": 0.9394, "step": 682 }, { "epoch": 0.5477145148356054, "grad_norm": 0.644282877445221, "learning_rate": 1.88064242319133e-05, "loss": 0.9357, "step": 683 }, { "epoch": 0.5485164394546913, "grad_norm": 0.6356081962585449, "learning_rate": 1.8802318316298817e-05, "loss": 0.9142, "step": 684 }, { "epoch": 0.5493183640737771, "grad_norm": 0.6344892978668213, "learning_rate": 1.8798205800406283e-05, "loss": 0.928, "step": 685 }, { "epoch": 0.5501202886928629, "grad_norm": 0.6985346674919128, "learning_rate": 1.8794086687319405e-05, "loss": 0.9173, "step": 686 }, { "epoch": 0.5509222133119487, "grad_norm": 0.6084068417549133, "learning_rate": 1.8789960980126836e-05, "loss": 0.9559, "step": 687 }, { "epoch": 0.5517241379310345, "grad_norm": 0.6394224166870117, "learning_rate": 1.8785828681922176e-05, "loss": 0.9761, "step": 688 }, { "epoch": 0.5525260625501203, "grad_norm": 0.637833833694458, "learning_rate": 1.8781689795803954e-05, "loss": 0.9265, "step": 689 }, { "epoch": 0.5533279871692061, "grad_norm": 0.631105899810791, "learning_rate": 1.8777544324875653e-05, "loss": 0.9381, "step": 690 }, { "epoch": 0.5541299117882919, "grad_norm": 0.6532770991325378, "learning_rate": 1.8773392272245687e-05, "loss": 0.938, "step": 691 }, { "epoch": 0.5549318364073778, "grad_norm": 0.653390109539032, "learning_rate": 1.8769233641027406e-05, "loss": 0.9557, "step": 692 }, { "epoch": 0.5557337610264635, "grad_norm": 0.6561689376831055, "learning_rate": 1.8765068434339095e-05, "loss": 0.8861, "step": 693 }, { "epoch": 0.5565356856455493, "grad_norm": 0.6327289342880249, "learning_rate": 1.8760896655303968e-05, "loss": 0.9646, "step": 694 }, { "epoch": 0.5573376102646351, "grad_norm": 0.6778370141983032, "learning_rate": 1.875671830705016e-05, "loss": 0.9892, "step": 695 }, { "epoch": 0.5581395348837209, "grad_norm": 0.6418762803077698, "learning_rate": 1.875253339271075e-05, "loss": 0.9706, "step": 696 }, { "epoch": 0.5589414595028067, "grad_norm": 0.6594840884208679, "learning_rate": 1.8748341915423723e-05, "loss": 0.9193, "step": 697 }, { "epoch": 0.5597433841218925, "grad_norm": 0.6546462178230286, "learning_rate": 1.874414387833199e-05, "loss": 0.9528, "step": 698 }, { "epoch": 0.5605453087409783, "grad_norm": 0.666907548904419, "learning_rate": 1.8739939284583385e-05, "loss": 0.9301, "step": 699 }, { "epoch": 0.5613472333600642, "grad_norm": 0.6767510771751404, "learning_rate": 1.873572813733066e-05, "loss": 0.9624, "step": 700 }, { "epoch": 0.56214915797915, "grad_norm": 0.6574323773384094, "learning_rate": 1.8731510439731465e-05, "loss": 0.9672, "step": 701 }, { "epoch": 0.5629510825982358, "grad_norm": 0.6924127340316772, "learning_rate": 1.872728619494838e-05, "loss": 0.9405, "step": 702 }, { "epoch": 0.5637530072173216, "grad_norm": 0.6515429615974426, "learning_rate": 1.8723055406148894e-05, "loss": 0.9477, "step": 703 }, { "epoch": 0.5645549318364074, "grad_norm": 0.7073892951011658, "learning_rate": 1.8718818076505385e-05, "loss": 0.9403, "step": 704 }, { "epoch": 0.5653568564554932, "grad_norm": 0.706851065158844, "learning_rate": 1.8714574209195153e-05, "loss": 0.9704, "step": 705 }, { "epoch": 0.566158781074579, "grad_norm": 0.624336838722229, "learning_rate": 1.8710323807400393e-05, "loss": 0.9558, "step": 706 }, { "epoch": 0.5669607056936647, "grad_norm": 0.6605740785598755, "learning_rate": 1.8706066874308205e-05, "loss": 0.9467, "step": 707 }, { "epoch": 0.5677626303127506, "grad_norm": 0.7018135190010071, "learning_rate": 1.870180341311057e-05, "loss": 0.9277, "step": 708 }, { "epoch": 0.5685645549318364, "grad_norm": 0.6792058348655701, "learning_rate": 1.8697533427004395e-05, "loss": 0.9706, "step": 709 }, { "epoch": 0.5693664795509222, "grad_norm": 0.6452786326408386, "learning_rate": 1.8693256919191446e-05, "loss": 0.9426, "step": 710 }, { "epoch": 0.570168404170008, "grad_norm": 0.7065607309341431, "learning_rate": 1.8688973892878405e-05, "loss": 0.9299, "step": 711 }, { "epoch": 0.5709703287890938, "grad_norm": 0.6309828758239746, "learning_rate": 1.8684684351276822e-05, "loss": 0.9521, "step": 712 }, { "epoch": 0.5717722534081796, "grad_norm": 0.6651354432106018, "learning_rate": 1.868038829760314e-05, "loss": 0.9899, "step": 713 }, { "epoch": 0.5725741780272654, "grad_norm": 0.6422202587127686, "learning_rate": 1.8676085735078696e-05, "loss": 0.9125, "step": 714 }, { "epoch": 0.5733761026463512, "grad_norm": 0.6563206315040588, "learning_rate": 1.8671776666929694e-05, "loss": 0.9854, "step": 715 }, { "epoch": 0.5741780272654371, "grad_norm": 0.5996401906013489, "learning_rate": 1.8667461096387217e-05, "loss": 0.9754, "step": 716 }, { "epoch": 0.5749799518845229, "grad_norm": 0.5983526706695557, "learning_rate": 1.866313902668723e-05, "loss": 0.9524, "step": 717 }, { "epoch": 0.5757818765036087, "grad_norm": 0.5966793298721313, "learning_rate": 1.8658810461070566e-05, "loss": 0.8934, "step": 718 }, { "epoch": 0.5765838011226945, "grad_norm": 0.636679470539093, "learning_rate": 1.865447540278293e-05, "loss": 0.9368, "step": 719 }, { "epoch": 0.5773857257417803, "grad_norm": 0.6102825403213501, "learning_rate": 1.8650133855074905e-05, "loss": 0.9498, "step": 720 }, { "epoch": 0.5781876503608661, "grad_norm": 0.6652585864067078, "learning_rate": 1.8645785821201918e-05, "loss": 0.9235, "step": 721 }, { "epoch": 0.5789895749799518, "grad_norm": 0.6623063087463379, "learning_rate": 1.864143130442428e-05, "loss": 0.965, "step": 722 }, { "epoch": 0.5797914995990376, "grad_norm": 0.6263592839241028, "learning_rate": 1.8637070308007156e-05, "loss": 0.9354, "step": 723 }, { "epoch": 0.5805934242181235, "grad_norm": 0.6858724355697632, "learning_rate": 1.8632702835220572e-05, "loss": 0.9467, "step": 724 }, { "epoch": 0.5813953488372093, "grad_norm": 0.63621586561203, "learning_rate": 1.8628328889339403e-05, "loss": 0.8885, "step": 725 }, { "epoch": 0.5821972734562951, "grad_norm": 0.629024088382721, "learning_rate": 1.8623948473643383e-05, "loss": 0.9344, "step": 726 }, { "epoch": 0.5829991980753809, "grad_norm": 0.6625981330871582, "learning_rate": 1.86195615914171e-05, "loss": 0.9756, "step": 727 }, { "epoch": 0.5838011226944667, "grad_norm": 0.6435332894325256, "learning_rate": 1.8615168245949982e-05, "loss": 0.9895, "step": 728 }, { "epoch": 0.5846030473135525, "grad_norm": 0.6450731158256531, "learning_rate": 1.8610768440536317e-05, "loss": 0.9327, "step": 729 }, { "epoch": 0.5854049719326383, "grad_norm": 0.6825403571128845, "learning_rate": 1.8606362178475227e-05, "loss": 0.961, "step": 730 }, { "epoch": 0.5862068965517241, "grad_norm": 0.6117799878120422, "learning_rate": 1.860194946307067e-05, "loss": 0.9043, "step": 731 }, { "epoch": 0.58700882117081, "grad_norm": 0.6143025159835815, "learning_rate": 1.859753029763146e-05, "loss": 0.993, "step": 732 }, { "epoch": 0.5878107457898958, "grad_norm": 0.5972070693969727, "learning_rate": 1.859310468547123e-05, "loss": 0.9069, "step": 733 }, { "epoch": 0.5886126704089816, "grad_norm": 0.6459053158760071, "learning_rate": 1.8588672629908462e-05, "loss": 0.9822, "step": 734 }, { "epoch": 0.5894145950280674, "grad_norm": 0.674164891242981, "learning_rate": 1.8584234134266456e-05, "loss": 0.9833, "step": 735 }, { "epoch": 0.5902165196471532, "grad_norm": 0.6549596190452576, "learning_rate": 1.857978920187335e-05, "loss": 0.9851, "step": 736 }, { "epoch": 0.591018444266239, "grad_norm": 0.621340811252594, "learning_rate": 1.85753378360621e-05, "loss": 0.9541, "step": 737 }, { "epoch": 0.5918203688853247, "grad_norm": 0.652487576007843, "learning_rate": 1.8570880040170504e-05, "loss": 0.9206, "step": 738 }, { "epoch": 0.5926222935044105, "grad_norm": 0.6780267953872681, "learning_rate": 1.8566415817541157e-05, "loss": 0.9676, "step": 739 }, { "epoch": 0.5934242181234964, "grad_norm": 0.6120235323905945, "learning_rate": 1.8561945171521498e-05, "loss": 0.9223, "step": 740 }, { "epoch": 0.5942261427425822, "grad_norm": 0.6822912096977234, "learning_rate": 1.8557468105463753e-05, "loss": 0.9164, "step": 741 }, { "epoch": 0.595028067361668, "grad_norm": 0.6549542546272278, "learning_rate": 1.855298462272499e-05, "loss": 0.9028, "step": 742 }, { "epoch": 0.5958299919807538, "grad_norm": 0.6103249788284302, "learning_rate": 1.8548494726667076e-05, "loss": 0.9741, "step": 743 }, { "epoch": 0.5966319165998396, "grad_norm": 0.6277962923049927, "learning_rate": 1.8543998420656686e-05, "loss": 0.9629, "step": 744 }, { "epoch": 0.5974338412189254, "grad_norm": 0.6683188676834106, "learning_rate": 1.8539495708065304e-05, "loss": 1.0021, "step": 745 }, { "epoch": 0.5982357658380112, "grad_norm": 0.621095597743988, "learning_rate": 1.8534986592269218e-05, "loss": 0.9854, "step": 746 }, { "epoch": 0.599037690457097, "grad_norm": 0.6299651861190796, "learning_rate": 1.853047107664951e-05, "loss": 0.966, "step": 747 }, { "epoch": 0.5998396150761829, "grad_norm": 0.7200894355773926, "learning_rate": 1.852594916459208e-05, "loss": 0.9201, "step": 748 }, { "epoch": 0.6006415396952687, "grad_norm": 0.6269078850746155, "learning_rate": 1.85214208594876e-05, "loss": 1.0063, "step": 749 }, { "epoch": 0.6014434643143545, "grad_norm": 0.5880782008171082, "learning_rate": 1.8516886164731554e-05, "loss": 0.9167, "step": 750 }, { "epoch": 0.6022453889334403, "grad_norm": 0.6221625208854675, "learning_rate": 1.851234508372421e-05, "loss": 0.9314, "step": 751 }, { "epoch": 0.603047313552526, "grad_norm": 0.6242570281028748, "learning_rate": 1.850779761987062e-05, "loss": 0.9383, "step": 752 }, { "epoch": 0.6038492381716118, "grad_norm": 0.6036713719367981, "learning_rate": 1.8503243776580637e-05, "loss": 0.9046, "step": 753 }, { "epoch": 0.6046511627906976, "grad_norm": 0.6600368022918701, "learning_rate": 1.8498683557268878e-05, "loss": 0.9427, "step": 754 }, { "epoch": 0.6054530874097834, "grad_norm": 0.6118487119674683, "learning_rate": 1.8494116965354756e-05, "loss": 0.9301, "step": 755 }, { "epoch": 0.6062550120288693, "grad_norm": 0.6600939035415649, "learning_rate": 1.8489544004262456e-05, "loss": 0.9867, "step": 756 }, { "epoch": 0.6070569366479551, "grad_norm": 0.6410656571388245, "learning_rate": 1.8484964677420937e-05, "loss": 0.904, "step": 757 }, { "epoch": 0.6078588612670409, "grad_norm": 0.6048609614372253, "learning_rate": 1.848037898826394e-05, "loss": 0.9244, "step": 758 }, { "epoch": 0.6086607858861267, "grad_norm": 0.600308895111084, "learning_rate": 1.8475786940229965e-05, "loss": 0.9042, "step": 759 }, { "epoch": 0.6094627105052125, "grad_norm": 0.6293653249740601, "learning_rate": 1.847118853676229e-05, "loss": 1.0067, "step": 760 }, { "epoch": 0.6102646351242983, "grad_norm": 0.6423448324203491, "learning_rate": 1.8466583781308954e-05, "loss": 0.9437, "step": 761 }, { "epoch": 0.6110665597433841, "grad_norm": 0.591410756111145, "learning_rate": 1.846197267732276e-05, "loss": 0.8932, "step": 762 }, { "epoch": 0.6118684843624699, "grad_norm": 0.602726936340332, "learning_rate": 1.845735522826127e-05, "loss": 0.8843, "step": 763 }, { "epoch": 0.6126704089815558, "grad_norm": 0.6235020756721497, "learning_rate": 1.84527314375868e-05, "loss": 0.9544, "step": 764 }, { "epoch": 0.6134723336006416, "grad_norm": 0.6325739622116089, "learning_rate": 1.8448101308766433e-05, "loss": 0.8938, "step": 765 }, { "epoch": 0.6142742582197274, "grad_norm": 0.6697767972946167, "learning_rate": 1.8443464845271995e-05, "loss": 0.9345, "step": 766 }, { "epoch": 0.6150761828388132, "grad_norm": 0.6331246495246887, "learning_rate": 1.843882205058006e-05, "loss": 0.9425, "step": 767 }, { "epoch": 0.615878107457899, "grad_norm": 0.7046418190002441, "learning_rate": 1.8434172928171962e-05, "loss": 0.9709, "step": 768 }, { "epoch": 0.6166800320769847, "grad_norm": 0.7394378185272217, "learning_rate": 1.8429517481533762e-05, "loss": 0.9588, "step": 769 }, { "epoch": 0.6174819566960705, "grad_norm": 0.6277191638946533, "learning_rate": 1.8424855714156277e-05, "loss": 0.9141, "step": 770 }, { "epoch": 0.6182838813151563, "grad_norm": 0.6583722233772278, "learning_rate": 1.842018762953506e-05, "loss": 0.9488, "step": 771 }, { "epoch": 0.6190858059342422, "grad_norm": 0.6868898272514343, "learning_rate": 1.8415513231170398e-05, "loss": 0.9369, "step": 772 }, { "epoch": 0.619887730553328, "grad_norm": 0.6717788577079773, "learning_rate": 1.8410832522567318e-05, "loss": 0.9142, "step": 773 }, { "epoch": 0.6206896551724138, "grad_norm": 0.5902653932571411, "learning_rate": 1.8406145507235566e-05, "loss": 0.8938, "step": 774 }, { "epoch": 0.6214915797914996, "grad_norm": 0.644224226474762, "learning_rate": 1.8401452188689635e-05, "loss": 0.9601, "step": 775 }, { "epoch": 0.6222935044105854, "grad_norm": 0.7199499607086182, "learning_rate": 1.839675257044873e-05, "loss": 0.9192, "step": 776 }, { "epoch": 0.6230954290296712, "grad_norm": 0.7300478219985962, "learning_rate": 1.8392046656036788e-05, "loss": 0.9351, "step": 777 }, { "epoch": 0.623897353648757, "grad_norm": 0.7216119170188904, "learning_rate": 1.8387334448982454e-05, "loss": 0.9561, "step": 778 }, { "epoch": 0.6246992782678428, "grad_norm": 0.6239175200462341, "learning_rate": 1.8382615952819116e-05, "loss": 0.9391, "step": 779 }, { "epoch": 0.6255012028869287, "grad_norm": 0.6322103142738342, "learning_rate": 1.8377891171084858e-05, "loss": 0.998, "step": 780 }, { "epoch": 0.6263031275060145, "grad_norm": 0.681839644908905, "learning_rate": 1.8373160107322476e-05, "loss": 0.9308, "step": 781 }, { "epoch": 0.6271050521251003, "grad_norm": 0.6046080589294434, "learning_rate": 1.8368422765079486e-05, "loss": 0.9486, "step": 782 }, { "epoch": 0.627906976744186, "grad_norm": 0.675331711769104, "learning_rate": 1.8363679147908115e-05, "loss": 0.907, "step": 783 }, { "epoch": 0.6287089013632718, "grad_norm": 0.6665434241294861, "learning_rate": 1.835892925936528e-05, "loss": 0.9345, "step": 784 }, { "epoch": 0.6295108259823576, "grad_norm": 0.6315925717353821, "learning_rate": 1.8354173103012614e-05, "loss": 0.9132, "step": 785 }, { "epoch": 0.6303127506014434, "grad_norm": 0.6950697302818298, "learning_rate": 1.8349410682416442e-05, "loss": 0.8736, "step": 786 }, { "epoch": 0.6311146752205292, "grad_norm": 0.6428248286247253, "learning_rate": 1.8344642001147793e-05, "loss": 0.9271, "step": 787 }, { "epoch": 0.6319165998396151, "grad_norm": 0.6300097107887268, "learning_rate": 1.8339867062782384e-05, "loss": 0.9271, "step": 788 }, { "epoch": 0.6327185244587009, "grad_norm": 0.6257496476173401, "learning_rate": 1.8335085870900627e-05, "loss": 0.9489, "step": 789 }, { "epoch": 0.6335204490777867, "grad_norm": 0.5959362983703613, "learning_rate": 1.8330298429087624e-05, "loss": 0.926, "step": 790 }, { "epoch": 0.6343223736968725, "grad_norm": 0.6299023032188416, "learning_rate": 1.8325504740933157e-05, "loss": 0.948, "step": 791 }, { "epoch": 0.6351242983159583, "grad_norm": 0.632050633430481, "learning_rate": 1.8320704810031702e-05, "loss": 0.9001, "step": 792 }, { "epoch": 0.6359262229350441, "grad_norm": 0.635412335395813, "learning_rate": 1.8315898639982404e-05, "loss": 0.8965, "step": 793 }, { "epoch": 0.6367281475541299, "grad_norm": 0.5949950218200684, "learning_rate": 1.8311086234389104e-05, "loss": 0.9294, "step": 794 }, { "epoch": 0.6375300721732157, "grad_norm": 0.6535398364067078, "learning_rate": 1.83062675968603e-05, "loss": 0.9333, "step": 795 }, { "epoch": 0.6383319967923016, "grad_norm": 0.6044979095458984, "learning_rate": 1.8301442731009168e-05, "loss": 0.91, "step": 796 }, { "epoch": 0.6391339214113874, "grad_norm": 0.607458770275116, "learning_rate": 1.8296611640453562e-05, "loss": 0.9109, "step": 797 }, { "epoch": 0.6399358460304732, "grad_norm": 0.6543724536895752, "learning_rate": 1.8291774328816e-05, "loss": 0.9502, "step": 798 }, { "epoch": 0.640737770649559, "grad_norm": 0.5994077920913696, "learning_rate": 1.8286930799723658e-05, "loss": 0.8956, "step": 799 }, { "epoch": 0.6415396952686447, "grad_norm": 0.5721734166145325, "learning_rate": 1.828208105680838e-05, "loss": 0.9113, "step": 800 }, { "epoch": 0.6423416198877305, "grad_norm": 0.611034631729126, "learning_rate": 1.827722510370667e-05, "loss": 0.9111, "step": 801 }, { "epoch": 0.6431435445068163, "grad_norm": 0.6357942819595337, "learning_rate": 1.8272362944059684e-05, "loss": 0.9313, "step": 802 }, { "epoch": 0.6439454691259021, "grad_norm": 0.6018952131271362, "learning_rate": 1.8267494581513236e-05, "loss": 0.9279, "step": 803 }, { "epoch": 0.644747393744988, "grad_norm": 0.5941787958145142, "learning_rate": 1.8262620019717794e-05, "loss": 0.9433, "step": 804 }, { "epoch": 0.6455493183640738, "grad_norm": 0.601996123790741, "learning_rate": 1.825773926232847e-05, "loss": 0.9269, "step": 805 }, { "epoch": 0.6463512429831596, "grad_norm": 0.6564491987228394, "learning_rate": 1.8252852313005015e-05, "loss": 0.9359, "step": 806 }, { "epoch": 0.6471531676022454, "grad_norm": 0.6465602517127991, "learning_rate": 1.8247959175411836e-05, "loss": 0.9534, "step": 807 }, { "epoch": 0.6479550922213312, "grad_norm": 0.6189476251602173, "learning_rate": 1.824305985321797e-05, "loss": 0.939, "step": 808 }, { "epoch": 0.648757016840417, "grad_norm": 0.5999793410301208, "learning_rate": 1.8238154350097103e-05, "loss": 0.9447, "step": 809 }, { "epoch": 0.6495589414595028, "grad_norm": 0.6540852785110474, "learning_rate": 1.8233242669727544e-05, "loss": 0.917, "step": 810 }, { "epoch": 0.6503608660785886, "grad_norm": 0.6192000508308411, "learning_rate": 1.8228324815792236e-05, "loss": 0.921, "step": 811 }, { "epoch": 0.6511627906976745, "grad_norm": 0.6083493232727051, "learning_rate": 1.8223400791978756e-05, "loss": 0.9884, "step": 812 }, { "epoch": 0.6519647153167603, "grad_norm": 0.6045847535133362, "learning_rate": 1.8218470601979302e-05, "loss": 0.9191, "step": 813 }, { "epoch": 0.652766639935846, "grad_norm": 0.5809303522109985, "learning_rate": 1.8213534249490706e-05, "loss": 0.9332, "step": 814 }, { "epoch": 0.6535685645549318, "grad_norm": 0.5929029583930969, "learning_rate": 1.8208591738214403e-05, "loss": 0.9094, "step": 815 }, { "epoch": 0.6543704891740176, "grad_norm": 0.6310725212097168, "learning_rate": 1.8203643071856462e-05, "loss": 0.9628, "step": 816 }, { "epoch": 0.6551724137931034, "grad_norm": 0.664486825466156, "learning_rate": 1.819868825412756e-05, "loss": 0.9297, "step": 817 }, { "epoch": 0.6559743384121892, "grad_norm": 0.6123178601264954, "learning_rate": 1.8193727288742987e-05, "loss": 0.9559, "step": 818 }, { "epoch": 0.656776263031275, "grad_norm": 0.6100270748138428, "learning_rate": 1.818876017942265e-05, "loss": 0.921, "step": 819 }, { "epoch": 0.6575781876503609, "grad_norm": 0.6273778080940247, "learning_rate": 1.818378692989105e-05, "loss": 0.9472, "step": 820 }, { "epoch": 0.6583801122694467, "grad_norm": 0.6654192805290222, "learning_rate": 1.8178807543877303e-05, "loss": 0.9388, "step": 821 }, { "epoch": 0.6591820368885325, "grad_norm": 0.6100279688835144, "learning_rate": 1.817382202511512e-05, "loss": 0.9477, "step": 822 }, { "epoch": 0.6599839615076183, "grad_norm": 0.6125680208206177, "learning_rate": 1.816883037734281e-05, "loss": 0.9419, "step": 823 }, { "epoch": 0.6607858861267041, "grad_norm": 0.6193715333938599, "learning_rate": 1.8163832604303284e-05, "loss": 1.0237, "step": 824 }, { "epoch": 0.6615878107457899, "grad_norm": 0.6256586313247681, "learning_rate": 1.815882870974404e-05, "loss": 0.9228, "step": 825 }, { "epoch": 0.6623897353648757, "grad_norm": 0.6021474599838257, "learning_rate": 1.8153818697417176e-05, "loss": 0.9198, "step": 826 }, { "epoch": 0.6631916599839615, "grad_norm": 0.5720776319503784, "learning_rate": 1.814880257107936e-05, "loss": 0.8507, "step": 827 }, { "epoch": 0.6639935846030474, "grad_norm": 0.5865132808685303, "learning_rate": 1.8143780334491863e-05, "loss": 0.9298, "step": 828 }, { "epoch": 0.6647955092221332, "grad_norm": 0.585963785648346, "learning_rate": 1.8138751991420524e-05, "loss": 0.8927, "step": 829 }, { "epoch": 0.6655974338412189, "grad_norm": 0.6248182058334351, "learning_rate": 1.8133717545635764e-05, "loss": 0.972, "step": 830 }, { "epoch": 0.6663993584603047, "grad_norm": 0.6154810190200806, "learning_rate": 1.812867700091258e-05, "loss": 0.9437, "step": 831 }, { "epoch": 0.6672012830793905, "grad_norm": 0.603408932685852, "learning_rate": 1.8123630361030557e-05, "loss": 0.8818, "step": 832 }, { "epoch": 0.6680032076984763, "grad_norm": 0.5872328877449036, "learning_rate": 1.8118577629773824e-05, "loss": 0.9342, "step": 833 }, { "epoch": 0.6688051323175621, "grad_norm": 0.5850470066070557, "learning_rate": 1.81135188109311e-05, "loss": 0.9535, "step": 834 }, { "epoch": 0.6696070569366479, "grad_norm": 0.6239657402038574, "learning_rate": 1.8108453908295655e-05, "loss": 0.9408, "step": 835 }, { "epoch": 0.6704089815557338, "grad_norm": 0.6208472847938538, "learning_rate": 1.8103382925665324e-05, "loss": 0.9907, "step": 836 }, { "epoch": 0.6712109061748196, "grad_norm": 0.5864999890327454, "learning_rate": 1.8098305866842506e-05, "loss": 0.964, "step": 837 }, { "epoch": 0.6720128307939054, "grad_norm": 0.6111268997192383, "learning_rate": 1.809322273563415e-05, "loss": 0.969, "step": 838 }, { "epoch": 0.6728147554129912, "grad_norm": 0.6360272169113159, "learning_rate": 1.8088133535851763e-05, "loss": 0.9177, "step": 839 }, { "epoch": 0.673616680032077, "grad_norm": 0.6175538897514343, "learning_rate": 1.80830382713114e-05, "loss": 0.9047, "step": 840 }, { "epoch": 0.6744186046511628, "grad_norm": 0.6100848317146301, "learning_rate": 1.8077936945833662e-05, "loss": 0.9443, "step": 841 }, { "epoch": 0.6752205292702486, "grad_norm": 0.6124653220176697, "learning_rate": 1.80728295632437e-05, "loss": 0.9368, "step": 842 }, { "epoch": 0.6760224538893344, "grad_norm": 0.6022012829780579, "learning_rate": 1.8067716127371197e-05, "loss": 0.9087, "step": 843 }, { "epoch": 0.6768243785084203, "grad_norm": 0.6640161275863647, "learning_rate": 1.806259664205039e-05, "loss": 0.9418, "step": 844 }, { "epoch": 0.677626303127506, "grad_norm": 0.5954174995422363, "learning_rate": 1.805747111112004e-05, "loss": 0.9169, "step": 845 }, { "epoch": 0.6784282277465918, "grad_norm": 0.6202585101127625, "learning_rate": 1.805233953842344e-05, "loss": 0.9537, "step": 846 }, { "epoch": 0.6792301523656776, "grad_norm": 0.5560839176177979, "learning_rate": 1.8047201927808423e-05, "loss": 0.9279, "step": 847 }, { "epoch": 0.6800320769847634, "grad_norm": 0.6648291945457458, "learning_rate": 1.8042058283127345e-05, "loss": 0.934, "step": 848 }, { "epoch": 0.6808340016038492, "grad_norm": 0.7005195021629333, "learning_rate": 1.8036908608237085e-05, "loss": 0.9258, "step": 849 }, { "epoch": 0.681635926222935, "grad_norm": 0.6536465883255005, "learning_rate": 1.803175290699904e-05, "loss": 0.9609, "step": 850 }, { "epoch": 0.6824378508420208, "grad_norm": 0.6565441489219666, "learning_rate": 1.8026591183279136e-05, "loss": 0.9085, "step": 851 }, { "epoch": 0.6832397754611067, "grad_norm": 0.6199874877929688, "learning_rate": 1.8021423440947808e-05, "loss": 0.9386, "step": 852 }, { "epoch": 0.6840417000801925, "grad_norm": 0.6430292725563049, "learning_rate": 1.801624968388e-05, "loss": 0.9389, "step": 853 }, { "epoch": 0.6848436246992783, "grad_norm": 0.601648211479187, "learning_rate": 1.801106991595518e-05, "loss": 0.9225, "step": 854 }, { "epoch": 0.6856455493183641, "grad_norm": 0.591111421585083, "learning_rate": 1.800588414105731e-05, "loss": 0.9545, "step": 855 }, { "epoch": 0.6864474739374499, "grad_norm": 0.6806792616844177, "learning_rate": 1.8000692363074862e-05, "loss": 0.942, "step": 856 }, { "epoch": 0.6872493985565357, "grad_norm": 0.5764021277427673, "learning_rate": 1.7995494585900802e-05, "loss": 0.9303, "step": 857 }, { "epoch": 0.6880513231756215, "grad_norm": 0.6204013228416443, "learning_rate": 1.7990290813432613e-05, "loss": 0.955, "step": 858 }, { "epoch": 0.6888532477947072, "grad_norm": 0.618166446685791, "learning_rate": 1.7985081049572244e-05, "loss": 0.9287, "step": 859 }, { "epoch": 0.6896551724137931, "grad_norm": 0.5855494141578674, "learning_rate": 1.797986529822617e-05, "loss": 0.9297, "step": 860 }, { "epoch": 0.6904570970328789, "grad_norm": 0.6061149835586548, "learning_rate": 1.7974643563305326e-05, "loss": 0.9884, "step": 861 }, { "epoch": 0.6912590216519647, "grad_norm": 0.5847954750061035, "learning_rate": 1.7969415848725155e-05, "loss": 0.9607, "step": 862 }, { "epoch": 0.6920609462710505, "grad_norm": 0.652940034866333, "learning_rate": 1.7964182158405567e-05, "loss": 0.9519, "step": 863 }, { "epoch": 0.6928628708901363, "grad_norm": 0.6230655908584595, "learning_rate": 1.795894249627097e-05, "loss": 0.9627, "step": 864 }, { "epoch": 0.6936647955092221, "grad_norm": 0.5886598825454712, "learning_rate": 1.795369686625024e-05, "loss": 0.8989, "step": 865 }, { "epoch": 0.6944667201283079, "grad_norm": 0.6408997178077698, "learning_rate": 1.7948445272276727e-05, "loss": 0.9438, "step": 866 }, { "epoch": 0.6952686447473937, "grad_norm": 0.6148324012756348, "learning_rate": 1.794318771828825e-05, "loss": 0.9283, "step": 867 }, { "epoch": 0.6960705693664796, "grad_norm": 0.6214705109596252, "learning_rate": 1.793792420822711e-05, "loss": 0.955, "step": 868 }, { "epoch": 0.6968724939855654, "grad_norm": 0.6310122013092041, "learning_rate": 1.7932654746040063e-05, "loss": 0.9252, "step": 869 }, { "epoch": 0.6976744186046512, "grad_norm": 0.6389340758323669, "learning_rate": 1.7927379335678333e-05, "loss": 0.9219, "step": 870 }, { "epoch": 0.698476343223737, "grad_norm": 0.597550630569458, "learning_rate": 1.7922097981097596e-05, "loss": 0.9396, "step": 871 }, { "epoch": 0.6992782678428228, "grad_norm": 0.5725171566009521, "learning_rate": 1.7916810686257998e-05, "loss": 0.9493, "step": 872 }, { "epoch": 0.7000801924619086, "grad_norm": 0.5874449014663696, "learning_rate": 1.791151745512413e-05, "loss": 0.9032, "step": 873 }, { "epoch": 0.7008821170809943, "grad_norm": 0.6453227400779724, "learning_rate": 1.790621829166504e-05, "loss": 0.8794, "step": 874 }, { "epoch": 0.7016840417000801, "grad_norm": 0.5927412509918213, "learning_rate": 1.7900913199854218e-05, "loss": 0.946, "step": 875 }, { "epoch": 0.702485966319166, "grad_norm": 0.6199756860733032, "learning_rate": 1.7895602183669602e-05, "loss": 0.9298, "step": 876 }, { "epoch": 0.7032878909382518, "grad_norm": 0.6697978377342224, "learning_rate": 1.7890285247093574e-05, "loss": 0.9928, "step": 877 }, { "epoch": 0.7040898155573376, "grad_norm": 0.6035749912261963, "learning_rate": 1.7884962394112953e-05, "loss": 0.9256, "step": 878 }, { "epoch": 0.7048917401764234, "grad_norm": 0.6426158547401428, "learning_rate": 1.7879633628719e-05, "loss": 0.9228, "step": 879 }, { "epoch": 0.7056936647955092, "grad_norm": 0.5659704804420471, "learning_rate": 1.7874298954907405e-05, "loss": 0.9229, "step": 880 }, { "epoch": 0.706495589414595, "grad_norm": 0.598106324672699, "learning_rate": 1.786895837667828e-05, "loss": 0.9421, "step": 881 }, { "epoch": 0.7072975140336808, "grad_norm": 0.5607869029045105, "learning_rate": 1.7863611898036175e-05, "loss": 0.9289, "step": 882 }, { "epoch": 0.7080994386527666, "grad_norm": 0.6277954578399658, "learning_rate": 1.7858259522990067e-05, "loss": 0.9785, "step": 883 }, { "epoch": 0.7089013632718525, "grad_norm": 0.7224546670913696, "learning_rate": 1.7852901255553346e-05, "loss": 0.9637, "step": 884 }, { "epoch": 0.7097032878909383, "grad_norm": 0.5827512145042419, "learning_rate": 1.7847537099743824e-05, "loss": 0.8912, "step": 885 }, { "epoch": 0.7105052125100241, "grad_norm": 0.6022170186042786, "learning_rate": 1.7842167059583723e-05, "loss": 0.9232, "step": 886 }, { "epoch": 0.7113071371291099, "grad_norm": 0.6745474934577942, "learning_rate": 1.783679113909969e-05, "loss": 0.9659, "step": 887 }, { "epoch": 0.7121090617481957, "grad_norm": 0.6338194608688354, "learning_rate": 1.7831409342322766e-05, "loss": 0.9329, "step": 888 }, { "epoch": 0.7129109863672815, "grad_norm": 0.638043224811554, "learning_rate": 1.7826021673288413e-05, "loss": 0.9881, "step": 889 }, { "epoch": 0.7137129109863672, "grad_norm": 0.5955981016159058, "learning_rate": 1.7820628136036483e-05, "loss": 0.908, "step": 890 }, { "epoch": 0.714514835605453, "grad_norm": 0.6151586771011353, "learning_rate": 1.7815228734611233e-05, "loss": 0.9438, "step": 891 }, { "epoch": 0.7153167602245389, "grad_norm": 0.6584967970848083, "learning_rate": 1.7809823473061324e-05, "loss": 0.9605, "step": 892 }, { "epoch": 0.7161186848436247, "grad_norm": 0.6425672769546509, "learning_rate": 1.7804412355439803e-05, "loss": 0.9248, "step": 893 }, { "epoch": 0.7169206094627105, "grad_norm": 0.6150190234184265, "learning_rate": 1.7798995385804107e-05, "loss": 0.874, "step": 894 }, { "epoch": 0.7177225340817963, "grad_norm": 0.6137731075286865, "learning_rate": 1.7793572568216063e-05, "loss": 0.9333, "step": 895 }, { "epoch": 0.7185244587008821, "grad_norm": 0.6165148019790649, "learning_rate": 1.778814390674189e-05, "loss": 0.9447, "step": 896 }, { "epoch": 0.7193263833199679, "grad_norm": 0.6286599040031433, "learning_rate": 1.7782709405452184e-05, "loss": 0.8696, "step": 897 }, { "epoch": 0.7201283079390537, "grad_norm": 0.6361931562423706, "learning_rate": 1.777726906842191e-05, "loss": 0.9514, "step": 898 }, { "epoch": 0.7209302325581395, "grad_norm": 0.7451531887054443, "learning_rate": 1.777182289973043e-05, "loss": 0.9249, "step": 899 }, { "epoch": 0.7217321571772254, "grad_norm": 0.6656950116157532, "learning_rate": 1.776637090346146e-05, "loss": 0.8892, "step": 900 }, { "epoch": 0.7225340817963112, "grad_norm": 0.7215845584869385, "learning_rate": 1.7760913083703088e-05, "loss": 0.8965, "step": 901 }, { "epoch": 0.723336006415397, "grad_norm": 0.6370206475257874, "learning_rate": 1.7755449444547783e-05, "loss": 0.954, "step": 902 }, { "epoch": 0.7241379310344828, "grad_norm": 0.6504797339439392, "learning_rate": 1.7749979990092364e-05, "loss": 0.8996, "step": 903 }, { "epoch": 0.7249398556535686, "grad_norm": 1.0392930507659912, "learning_rate": 1.774450472443801e-05, "loss": 0.9311, "step": 904 }, { "epoch": 0.7257417802726543, "grad_norm": 0.6109988689422607, "learning_rate": 1.7739023651690267e-05, "loss": 0.9424, "step": 905 }, { "epoch": 0.7265437048917401, "grad_norm": 0.677939772605896, "learning_rate": 1.7733536775959027e-05, "loss": 0.9875, "step": 906 }, { "epoch": 0.7273456295108259, "grad_norm": 0.5968044996261597, "learning_rate": 1.7728044101358538e-05, "loss": 0.9088, "step": 907 }, { "epoch": 0.7281475541299118, "grad_norm": 0.5922111868858337, "learning_rate": 1.7722545632007394e-05, "loss": 0.9306, "step": 908 }, { "epoch": 0.7289494787489976, "grad_norm": 0.6236370801925659, "learning_rate": 1.771704137202853e-05, "loss": 0.9302, "step": 909 }, { "epoch": 0.7297514033680834, "grad_norm": 0.5695316195487976, "learning_rate": 1.771153132554924e-05, "loss": 0.8739, "step": 910 }, { "epoch": 0.7305533279871692, "grad_norm": 0.6526502966880798, "learning_rate": 1.770601549670113e-05, "loss": 0.8775, "step": 911 }, { "epoch": 0.731355252606255, "grad_norm": 0.5602655410766602, "learning_rate": 1.7700493889620163e-05, "loss": 0.9219, "step": 912 }, { "epoch": 0.7321571772253408, "grad_norm": 0.6553357839584351, "learning_rate": 1.769496650844663e-05, "loss": 0.9495, "step": 913 }, { "epoch": 0.7329591018444266, "grad_norm": 0.6299150586128235, "learning_rate": 1.768943335732515e-05, "loss": 0.8947, "step": 914 }, { "epoch": 0.7337610264635124, "grad_norm": 0.6235426664352417, "learning_rate": 1.7683894440404663e-05, "loss": 0.9501, "step": 915 }, { "epoch": 0.7345629510825983, "grad_norm": 0.6619741916656494, "learning_rate": 1.7678349761838438e-05, "loss": 0.9679, "step": 916 }, { "epoch": 0.7353648757016841, "grad_norm": 0.6198428869247437, "learning_rate": 1.7672799325784066e-05, "loss": 0.9105, "step": 917 }, { "epoch": 0.7361668003207699, "grad_norm": 2.144275426864624, "learning_rate": 1.7667243136403455e-05, "loss": 0.8585, "step": 918 }, { "epoch": 0.7369687249398557, "grad_norm": 0.6071990728378296, "learning_rate": 1.7661681197862823e-05, "loss": 0.9773, "step": 919 }, { "epoch": 0.7377706495589414, "grad_norm": 0.5878254175186157, "learning_rate": 1.76561135143327e-05, "loss": 0.9344, "step": 920 }, { "epoch": 0.7385725741780272, "grad_norm": 0.616235077381134, "learning_rate": 1.7650540089987926e-05, "loss": 0.8986, "step": 921 }, { "epoch": 0.739374498797113, "grad_norm": 0.6328748464584351, "learning_rate": 1.7644960929007642e-05, "loss": 0.9162, "step": 922 }, { "epoch": 0.7401764234161988, "grad_norm": 0.610571563243866, "learning_rate": 1.7639376035575296e-05, "loss": 0.9292, "step": 923 }, { "epoch": 0.7409783480352847, "grad_norm": 0.6044664978981018, "learning_rate": 1.7633785413878634e-05, "loss": 0.9503, "step": 924 }, { "epoch": 0.7417802726543705, "grad_norm": 0.5878413319587708, "learning_rate": 1.762818906810969e-05, "loss": 0.8839, "step": 925 }, { "epoch": 0.7425821972734563, "grad_norm": 0.99688321352005, "learning_rate": 1.7622587002464792e-05, "loss": 0.9361, "step": 926 }, { "epoch": 0.7433841218925421, "grad_norm": 6.020138263702393, "learning_rate": 1.7616979221144565e-05, "loss": 0.9249, "step": 927 }, { "epoch": 0.7441860465116279, "grad_norm": 0.65313321352005, "learning_rate": 1.7611365728353907e-05, "loss": 0.8932, "step": 928 }, { "epoch": 0.7449879711307137, "grad_norm": 0.6319687962532043, "learning_rate": 1.7605746528302017e-05, "loss": 0.9224, "step": 929 }, { "epoch": 0.7457898957497995, "grad_norm": 0.6352254152297974, "learning_rate": 1.760012162520236e-05, "loss": 0.9589, "step": 930 }, { "epoch": 0.7465918203688853, "grad_norm": 0.6238382458686829, "learning_rate": 1.759449102327267e-05, "loss": 0.9495, "step": 931 }, { "epoch": 0.7473937449879712, "grad_norm": 0.6095400452613831, "learning_rate": 1.7588854726734974e-05, "loss": 0.9395, "step": 932 }, { "epoch": 0.748195669607057, "grad_norm": 0.5706982016563416, "learning_rate": 1.7583212739815555e-05, "loss": 0.9041, "step": 933 }, { "epoch": 0.7489975942261428, "grad_norm": 0.5789833664894104, "learning_rate": 1.757756506674497e-05, "loss": 0.8945, "step": 934 }, { "epoch": 0.7497995188452286, "grad_norm": 0.5853317975997925, "learning_rate": 1.7571911711758032e-05, "loss": 0.9189, "step": 935 }, { "epoch": 0.7506014434643143, "grad_norm": 0.6032062768936157, "learning_rate": 1.7566252679093826e-05, "loss": 0.9125, "step": 936 }, { "epoch": 0.7514033680834001, "grad_norm": 0.6213047504425049, "learning_rate": 1.7560587972995678e-05, "loss": 0.9299, "step": 937 }, { "epoch": 0.7522052927024859, "grad_norm": 0.68639075756073, "learning_rate": 1.7554917597711188e-05, "loss": 0.9627, "step": 938 }, { "epoch": 0.7530072173215717, "grad_norm": 0.5955672264099121, "learning_rate": 1.7549241557492187e-05, "loss": 0.9761, "step": 939 }, { "epoch": 0.7538091419406576, "grad_norm": 0.6289668679237366, "learning_rate": 1.754355985659477e-05, "loss": 0.9432, "step": 940 }, { "epoch": 0.7546110665597434, "grad_norm": 0.621410071849823, "learning_rate": 1.7537872499279265e-05, "loss": 0.9221, "step": 941 }, { "epoch": 0.7554129911788292, "grad_norm": 1.4276875257492065, "learning_rate": 1.753217948981025e-05, "loss": 0.9208, "step": 942 }, { "epoch": 0.756214915797915, "grad_norm": 0.6688864231109619, "learning_rate": 1.7526480832456538e-05, "loss": 0.9107, "step": 943 }, { "epoch": 0.7570168404170008, "grad_norm": 0.6159544587135315, "learning_rate": 1.752077653149117e-05, "loss": 0.9473, "step": 944 }, { "epoch": 0.7578187650360866, "grad_norm": 0.6279981732368469, "learning_rate": 1.751506659119143e-05, "loss": 0.908, "step": 945 }, { "epoch": 0.7586206896551724, "grad_norm": 0.6103554964065552, "learning_rate": 1.750935101583883e-05, "loss": 0.9023, "step": 946 }, { "epoch": 0.7594226142742582, "grad_norm": 0.6202152371406555, "learning_rate": 1.7503629809719095e-05, "loss": 0.9256, "step": 947 }, { "epoch": 0.7602245388933441, "grad_norm": 0.7202157378196716, "learning_rate": 1.749790297712218e-05, "loss": 0.93, "step": 948 }, { "epoch": 0.7610264635124299, "grad_norm": 0.6290937066078186, "learning_rate": 1.7492170522342267e-05, "loss": 0.9029, "step": 949 }, { "epoch": 0.7618283881315157, "grad_norm": 0.6004471778869629, "learning_rate": 1.748643244967774e-05, "loss": 0.9173, "step": 950 }, { "epoch": 0.7626303127506014, "grad_norm": 0.6735373139381409, "learning_rate": 1.7480688763431203e-05, "loss": 0.9121, "step": 951 }, { "epoch": 0.7634322373696872, "grad_norm": 0.6806999444961548, "learning_rate": 1.7474939467909468e-05, "loss": 0.9696, "step": 952 }, { "epoch": 0.764234161988773, "grad_norm": 0.6270620822906494, "learning_rate": 1.7469184567423548e-05, "loss": 0.8985, "step": 953 }, { "epoch": 0.7650360866078588, "grad_norm": 0.660423994064331, "learning_rate": 1.7463424066288668e-05, "loss": 0.9334, "step": 954 }, { "epoch": 0.7658380112269446, "grad_norm": 0.6471710205078125, "learning_rate": 1.745765796882425e-05, "loss": 0.9471, "step": 955 }, { "epoch": 0.7666399358460305, "grad_norm": 0.5963034629821777, "learning_rate": 1.7451886279353905e-05, "loss": 0.8939, "step": 956 }, { "epoch": 0.7674418604651163, "grad_norm": 0.6383598446846008, "learning_rate": 1.7446109002205444e-05, "loss": 0.9114, "step": 957 }, { "epoch": 0.7682437850842021, "grad_norm": 0.6523898839950562, "learning_rate": 1.744032614171087e-05, "loss": 0.9375, "step": 958 }, { "epoch": 0.7690457097032879, "grad_norm": 0.6452939510345459, "learning_rate": 1.743453770220636e-05, "loss": 0.9317, "step": 959 }, { "epoch": 0.7698476343223737, "grad_norm": 0.6215782165527344, "learning_rate": 1.7428743688032292e-05, "loss": 0.9467, "step": 960 }, { "epoch": 0.7706495589414595, "grad_norm": 0.6118282675743103, "learning_rate": 1.7422944103533212e-05, "loss": 0.9916, "step": 961 }, { "epoch": 0.7714514835605453, "grad_norm": 0.6718006730079651, "learning_rate": 1.7417138953057847e-05, "loss": 0.9415, "step": 962 }, { "epoch": 0.7722534081796311, "grad_norm": 0.6651148200035095, "learning_rate": 1.7411328240959095e-05, "loss": 0.9109, "step": 963 }, { "epoch": 0.773055332798717, "grad_norm": 0.6115936636924744, "learning_rate": 1.7405511971594022e-05, "loss": 0.9311, "step": 964 }, { "epoch": 0.7738572574178028, "grad_norm": 0.6545907855033875, "learning_rate": 1.739969014932387e-05, "loss": 0.8825, "step": 965 }, { "epoch": 0.7746591820368885, "grad_norm": 0.6140168905258179, "learning_rate": 1.7393862778514042e-05, "loss": 0.9522, "step": 966 }, { "epoch": 0.7754611066559743, "grad_norm": 0.5883581638336182, "learning_rate": 1.738802986353409e-05, "loss": 0.8981, "step": 967 }, { "epoch": 0.7762630312750601, "grad_norm": 0.6271137595176697, "learning_rate": 1.7382191408757744e-05, "loss": 0.9418, "step": 968 }, { "epoch": 0.7770649558941459, "grad_norm": 0.6357349157333374, "learning_rate": 1.7376347418562866e-05, "loss": 0.894, "step": 969 }, { "epoch": 0.7778668805132317, "grad_norm": 0.6148669123649597, "learning_rate": 1.7370497897331486e-05, "loss": 0.9197, "step": 970 }, { "epoch": 0.7786688051323175, "grad_norm": 0.589157223701477, "learning_rate": 1.7364642849449767e-05, "loss": 0.952, "step": 971 }, { "epoch": 0.7794707297514034, "grad_norm": 0.6091080904006958, "learning_rate": 1.735878227930803e-05, "loss": 0.9722, "step": 972 }, { "epoch": 0.7802726543704892, "grad_norm": 0.6389529705047607, "learning_rate": 1.735291619130073e-05, "loss": 0.924, "step": 973 }, { "epoch": 0.781074578989575, "grad_norm": 0.6408534049987793, "learning_rate": 1.7347044589826455e-05, "loss": 0.9491, "step": 974 }, { "epoch": 0.7818765036086608, "grad_norm": 0.5990006327629089, "learning_rate": 1.7341167479287934e-05, "loss": 0.9298, "step": 975 }, { "epoch": 0.7826784282277466, "grad_norm": 0.5884609222412109, "learning_rate": 1.7335284864092024e-05, "loss": 0.8903, "step": 976 }, { "epoch": 0.7834803528468324, "grad_norm": 0.5926967859268188, "learning_rate": 1.732939674864971e-05, "loss": 0.891, "step": 977 }, { "epoch": 0.7842822774659182, "grad_norm": 0.697799801826477, "learning_rate": 1.7323503137376102e-05, "loss": 0.968, "step": 978 }, { "epoch": 0.785084202085004, "grad_norm": 0.6278639435768127, "learning_rate": 1.7317604034690434e-05, "loss": 0.9672, "step": 979 }, { "epoch": 0.7858861267040899, "grad_norm": 0.6392386555671692, "learning_rate": 1.7311699445016046e-05, "loss": 0.8997, "step": 980 }, { "epoch": 0.7866880513231757, "grad_norm": 0.5999894738197327, "learning_rate": 1.730578937278041e-05, "loss": 0.9535, "step": 981 }, { "epoch": 0.7874899759422614, "grad_norm": 0.6001031398773193, "learning_rate": 1.7299873822415093e-05, "loss": 0.892, "step": 982 }, { "epoch": 0.7882919005613472, "grad_norm": 0.6019670963287354, "learning_rate": 1.7293952798355776e-05, "loss": 0.8658, "step": 983 }, { "epoch": 0.789093825180433, "grad_norm": 0.6335600018501282, "learning_rate": 1.728802630504225e-05, "loss": 0.9091, "step": 984 }, { "epoch": 0.7898957497995188, "grad_norm": 0.5757085680961609, "learning_rate": 1.7282094346918395e-05, "loss": 0.9317, "step": 985 }, { "epoch": 0.7906976744186046, "grad_norm": 0.6094053387641907, "learning_rate": 1.72761569284322e-05, "loss": 0.9256, "step": 986 }, { "epoch": 0.7914995990376904, "grad_norm": 0.6195594668388367, "learning_rate": 1.7270214054035736e-05, "loss": 0.9395, "step": 987 }, { "epoch": 0.7923015236567763, "grad_norm": 0.6129851937294006, "learning_rate": 1.7264265728185186e-05, "loss": 0.8758, "step": 988 }, { "epoch": 0.7931034482758621, "grad_norm": 0.6170161962509155, "learning_rate": 1.7258311955340794e-05, "loss": 0.9307, "step": 989 }, { "epoch": 0.7939053728949479, "grad_norm": 0.6150994300842285, "learning_rate": 1.725235273996691e-05, "loss": 0.9174, "step": 990 }, { "epoch": 0.7947072975140337, "grad_norm": 0.6161699891090393, "learning_rate": 1.7246388086531953e-05, "loss": 0.9244, "step": 991 }, { "epoch": 0.7955092221331195, "grad_norm": 0.6051169037818909, "learning_rate": 1.7240417999508424e-05, "loss": 0.9147, "step": 992 }, { "epoch": 0.7963111467522053, "grad_norm": 0.6212258338928223, "learning_rate": 1.7234442483372894e-05, "loss": 0.9861, "step": 993 }, { "epoch": 0.7971130713712911, "grad_norm": 0.6092191934585571, "learning_rate": 1.722846154260602e-05, "loss": 0.9064, "step": 994 }, { "epoch": 0.7979149959903769, "grad_norm": 0.6202679872512817, "learning_rate": 1.72224751816925e-05, "loss": 0.9129, "step": 995 }, { "epoch": 0.7987169206094628, "grad_norm": 0.6190642714500427, "learning_rate": 1.721648340512112e-05, "loss": 0.9291, "step": 996 }, { "epoch": 0.7995188452285485, "grad_norm": 0.5609696507453918, "learning_rate": 1.721048621738472e-05, "loss": 0.8931, "step": 997 }, { "epoch": 0.8003207698476343, "grad_norm": 0.6554841995239258, "learning_rate": 1.720448362298019e-05, "loss": 0.9463, "step": 998 }, { "epoch": 0.8011226944667201, "grad_norm": 0.661469042301178, "learning_rate": 1.719847562640848e-05, "loss": 0.9057, "step": 999 }, { "epoch": 0.8019246190858059, "grad_norm": 0.581844687461853, "learning_rate": 1.7192462232174595e-05, "loss": 0.9095, "step": 1000 }, { "epoch": 0.8027265437048917, "grad_norm": 0.6142575144767761, "learning_rate": 1.7186443444787578e-05, "loss": 0.8885, "step": 1001 }, { "epoch": 0.8035284683239775, "grad_norm": 0.5919050574302673, "learning_rate": 1.718041926876053e-05, "loss": 0.8893, "step": 1002 }, { "epoch": 0.8043303929430633, "grad_norm": 0.6128547787666321, "learning_rate": 1.7174389708610565e-05, "loss": 0.923, "step": 1003 }, { "epoch": 0.8051323175621492, "grad_norm": 0.5759849548339844, "learning_rate": 1.716835476885887e-05, "loss": 0.9256, "step": 1004 }, { "epoch": 0.805934242181235, "grad_norm": 0.5811640620231628, "learning_rate": 1.7162314454030644e-05, "loss": 0.9334, "step": 1005 }, { "epoch": 0.8067361668003208, "grad_norm": 0.6075664758682251, "learning_rate": 1.7156268768655118e-05, "loss": 0.8993, "step": 1006 }, { "epoch": 0.8075380914194066, "grad_norm": 0.6393078565597534, "learning_rate": 1.715021771726555e-05, "loss": 0.9181, "step": 1007 }, { "epoch": 0.8083400160384924, "grad_norm": 0.6739677786827087, "learning_rate": 1.714416130439923e-05, "loss": 0.9329, "step": 1008 }, { "epoch": 0.8091419406575782, "grad_norm": 0.5906496047973633, "learning_rate": 1.7138099534597464e-05, "loss": 0.9393, "step": 1009 }, { "epoch": 0.809943865276664, "grad_norm": 0.6302242875099182, "learning_rate": 1.7132032412405565e-05, "loss": 0.9145, "step": 1010 }, { "epoch": 0.8107457898957497, "grad_norm": 0.6030935645103455, "learning_rate": 1.7125959942372875e-05, "loss": 0.8723, "step": 1011 }, { "epoch": 0.8115477145148356, "grad_norm": 0.6145809292793274, "learning_rate": 1.711988212905274e-05, "loss": 0.8957, "step": 1012 }, { "epoch": 0.8123496391339214, "grad_norm": 0.5869849324226379, "learning_rate": 1.7113798977002506e-05, "loss": 0.9221, "step": 1013 }, { "epoch": 0.8131515637530072, "grad_norm": 0.7540897130966187, "learning_rate": 1.710771049078353e-05, "loss": 0.9337, "step": 1014 }, { "epoch": 0.813953488372093, "grad_norm": 0.6184853911399841, "learning_rate": 1.7101616674961165e-05, "loss": 0.8933, "step": 1015 }, { "epoch": 0.8147554129911788, "grad_norm": 0.592350959777832, "learning_rate": 1.7095517534104762e-05, "loss": 0.8933, "step": 1016 }, { "epoch": 0.8155573376102646, "grad_norm": 0.5875340104103088, "learning_rate": 1.7089413072787667e-05, "loss": 0.9336, "step": 1017 }, { "epoch": 0.8163592622293504, "grad_norm": 0.6324250102043152, "learning_rate": 1.7083303295587212e-05, "loss": 0.8972, "step": 1018 }, { "epoch": 0.8171611868484362, "grad_norm": 0.6096128225326538, "learning_rate": 1.7077188207084712e-05, "loss": 0.9375, "step": 1019 }, { "epoch": 0.8179631114675221, "grad_norm": 0.6442949771881104, "learning_rate": 1.7071067811865477e-05, "loss": 0.9045, "step": 1020 }, { "epoch": 0.8187650360866079, "grad_norm": 0.6120867133140564, "learning_rate": 1.706494211451878e-05, "loss": 0.9377, "step": 1021 }, { "epoch": 0.8195669607056937, "grad_norm": 0.630803644657135, "learning_rate": 1.7058811119637878e-05, "loss": 0.9255, "step": 1022 }, { "epoch": 0.8203688853247795, "grad_norm": 0.5878363251686096, "learning_rate": 1.7052674831820008e-05, "loss": 0.9195, "step": 1023 }, { "epoch": 0.8211708099438653, "grad_norm": 0.6276422739028931, "learning_rate": 1.704653325566636e-05, "loss": 0.9699, "step": 1024 }, { "epoch": 0.8219727345629511, "grad_norm": 0.5793137550354004, "learning_rate": 1.7040386395782093e-05, "loss": 0.8794, "step": 1025 }, { "epoch": 0.8227746591820368, "grad_norm": 0.6176061630249023, "learning_rate": 1.703423425677634e-05, "loss": 0.8908, "step": 1026 }, { "epoch": 0.8235765838011226, "grad_norm": 0.616875946521759, "learning_rate": 1.7028076843262185e-05, "loss": 0.9506, "step": 1027 }, { "epoch": 0.8243785084202085, "grad_norm": 0.5971503257751465, "learning_rate": 1.7021914159856664e-05, "loss": 0.9218, "step": 1028 }, { "epoch": 0.8251804330392943, "grad_norm": 0.6316090226173401, "learning_rate": 1.701574621118076e-05, "loss": 0.9296, "step": 1029 }, { "epoch": 0.8259823576583801, "grad_norm": 0.6042530536651611, "learning_rate": 1.700957300185942e-05, "loss": 0.895, "step": 1030 }, { "epoch": 0.8267842822774659, "grad_norm": 0.6263911128044128, "learning_rate": 1.7003394536521525e-05, "loss": 0.9031, "step": 1031 }, { "epoch": 0.8275862068965517, "grad_norm": 0.5868535041809082, "learning_rate": 1.6997210819799894e-05, "loss": 0.8886, "step": 1032 }, { "epoch": 0.8283881315156375, "grad_norm": 0.681711733341217, "learning_rate": 1.6991021856331297e-05, "loss": 0.9142, "step": 1033 }, { "epoch": 0.8291900561347233, "grad_norm": 0.6533603072166443, "learning_rate": 1.698482765075642e-05, "loss": 0.886, "step": 1034 }, { "epoch": 0.8299919807538091, "grad_norm": 0.6320748925209045, "learning_rate": 1.6978628207719892e-05, "loss": 0.8767, "step": 1035 }, { "epoch": 0.830793905372895, "grad_norm": 0.6173900365829468, "learning_rate": 1.6972423531870273e-05, "loss": 0.9081, "step": 1036 }, { "epoch": 0.8315958299919808, "grad_norm": 0.6053596138954163, "learning_rate": 1.696621362786003e-05, "loss": 0.9403, "step": 1037 }, { "epoch": 0.8323977546110666, "grad_norm": 0.5761358141899109, "learning_rate": 1.6959998500345572e-05, "loss": 0.9318, "step": 1038 }, { "epoch": 0.8331996792301524, "grad_norm": 0.6473420262336731, "learning_rate": 1.6953778153987205e-05, "loss": 0.9407, "step": 1039 }, { "epoch": 0.8340016038492382, "grad_norm": 0.5871930122375488, "learning_rate": 1.6947552593449154e-05, "loss": 0.8952, "step": 1040 }, { "epoch": 0.834803528468324, "grad_norm": 0.5863208770751953, "learning_rate": 1.6941321823399567e-05, "loss": 0.8676, "step": 1041 }, { "epoch": 0.8356054530874097, "grad_norm": 0.6157333254814148, "learning_rate": 1.6935085848510476e-05, "loss": 0.884, "step": 1042 }, { "epoch": 0.8364073777064955, "grad_norm": 0.6380476951599121, "learning_rate": 1.6928844673457838e-05, "loss": 0.9337, "step": 1043 }, { "epoch": 0.8372093023255814, "grad_norm": 0.6176585555076599, "learning_rate": 1.692259830292149e-05, "loss": 0.9369, "step": 1044 }, { "epoch": 0.8380112269446672, "grad_norm": 0.6162835359573364, "learning_rate": 1.691634674158518e-05, "loss": 0.9504, "step": 1045 }, { "epoch": 0.838813151563753, "grad_norm": 0.6078632473945618, "learning_rate": 1.6910089994136535e-05, "loss": 0.9074, "step": 1046 }, { "epoch": 0.8396150761828388, "grad_norm": 0.5939008593559265, "learning_rate": 1.6903828065267083e-05, "loss": 0.9469, "step": 1047 }, { "epoch": 0.8404170008019246, "grad_norm": 0.6359356641769409, "learning_rate": 1.6897560959672232e-05, "loss": 0.8914, "step": 1048 }, { "epoch": 0.8412189254210104, "grad_norm": 0.6040184497833252, "learning_rate": 1.6891288682051264e-05, "loss": 0.938, "step": 1049 }, { "epoch": 0.8420208500400962, "grad_norm": 0.6027700901031494, "learning_rate": 1.6885011237107353e-05, "loss": 0.8751, "step": 1050 }, { "epoch": 0.842822774659182, "grad_norm": 0.5934613943099976, "learning_rate": 1.6878728629547536e-05, "loss": 0.9169, "step": 1051 }, { "epoch": 0.8436246992782679, "grad_norm": 0.6678500771522522, "learning_rate": 1.6872440864082732e-05, "loss": 0.9461, "step": 1052 }, { "epoch": 0.8444266238973537, "grad_norm": 0.6098446249961853, "learning_rate": 1.686614794542772e-05, "loss": 0.9198, "step": 1053 }, { "epoch": 0.8452285485164395, "grad_norm": 0.5894660949707031, "learning_rate": 1.685984987830114e-05, "loss": 0.9057, "step": 1054 }, { "epoch": 0.8460304731355253, "grad_norm": 0.6063706874847412, "learning_rate": 1.68535466674255e-05, "loss": 0.9392, "step": 1055 }, { "epoch": 0.846832397754611, "grad_norm": 0.6084437966346741, "learning_rate": 1.6847238317527167e-05, "loss": 0.9146, "step": 1056 }, { "epoch": 0.8476343223736968, "grad_norm": 0.5813028812408447, "learning_rate": 1.684092483333635e-05, "loss": 0.9152, "step": 1057 }, { "epoch": 0.8484362469927826, "grad_norm": 0.6176558136940002, "learning_rate": 1.6834606219587114e-05, "loss": 0.8822, "step": 1058 }, { "epoch": 0.8492381716118684, "grad_norm": 0.5906162858009338, "learning_rate": 1.682828248101738e-05, "loss": 0.9067, "step": 1059 }, { "epoch": 0.8500400962309543, "grad_norm": 0.5896495580673218, "learning_rate": 1.682195362236889e-05, "loss": 0.931, "step": 1060 }, { "epoch": 0.8508420208500401, "grad_norm": 0.5951011776924133, "learning_rate": 1.681561964838725e-05, "loss": 0.9665, "step": 1061 }, { "epoch": 0.8516439454691259, "grad_norm": 0.6564264297485352, "learning_rate": 1.6809280563821878e-05, "loss": 0.8821, "step": 1062 }, { "epoch": 0.8524458700882117, "grad_norm": 0.5982756018638611, "learning_rate": 1.6802936373426045e-05, "loss": 0.8951, "step": 1063 }, { "epoch": 0.8532477947072975, "grad_norm": 0.6046779155731201, "learning_rate": 1.6796587081956833e-05, "loss": 0.9748, "step": 1064 }, { "epoch": 0.8540497193263833, "grad_norm": 0.5632441639900208, "learning_rate": 1.6790232694175164e-05, "loss": 0.8921, "step": 1065 }, { "epoch": 0.8548516439454691, "grad_norm": 0.5854066610336304, "learning_rate": 1.678387321484577e-05, "loss": 0.9368, "step": 1066 }, { "epoch": 0.8556535685645549, "grad_norm": 0.6365918517112732, "learning_rate": 1.6777508648737203e-05, "loss": 0.9264, "step": 1067 }, { "epoch": 0.8564554931836408, "grad_norm": 0.5902692675590515, "learning_rate": 1.677113900062184e-05, "loss": 0.9111, "step": 1068 }, { "epoch": 0.8572574178027266, "grad_norm": 0.6386597752571106, "learning_rate": 1.6764764275275852e-05, "loss": 0.9626, "step": 1069 }, { "epoch": 0.8580593424218124, "grad_norm": 0.6048006415367126, "learning_rate": 1.675838447747923e-05, "loss": 0.9684, "step": 1070 }, { "epoch": 0.8588612670408982, "grad_norm": 0.5801950693130493, "learning_rate": 1.675199961201576e-05, "loss": 0.9235, "step": 1071 }, { "epoch": 0.859663191659984, "grad_norm": 0.599275529384613, "learning_rate": 1.6745609683673034e-05, "loss": 0.9174, "step": 1072 }, { "epoch": 0.8604651162790697, "grad_norm": 0.6032297015190125, "learning_rate": 1.6739214697242437e-05, "loss": 0.9221, "step": 1073 }, { "epoch": 0.8612670408981555, "grad_norm": 0.6280431151390076, "learning_rate": 1.6732814657519146e-05, "loss": 0.9157, "step": 1074 }, { "epoch": 0.8620689655172413, "grad_norm": 0.7165436744689941, "learning_rate": 1.6726409569302134e-05, "loss": 0.9094, "step": 1075 }, { "epoch": 0.8628708901363272, "grad_norm": 0.6161721348762512, "learning_rate": 1.6719999437394146e-05, "loss": 0.8972, "step": 1076 }, { "epoch": 0.863672814755413, "grad_norm": 0.5874865055084229, "learning_rate": 1.6713584266601728e-05, "loss": 0.9607, "step": 1077 }, { "epoch": 0.8644747393744988, "grad_norm": 0.5720422863960266, "learning_rate": 1.6707164061735183e-05, "loss": 0.8646, "step": 1078 }, { "epoch": 0.8652766639935846, "grad_norm": 0.6087173223495483, "learning_rate": 1.6700738827608606e-05, "loss": 0.8971, "step": 1079 }, { "epoch": 0.8660785886126704, "grad_norm": 0.5989866256713867, "learning_rate": 1.6694308569039853e-05, "loss": 0.9118, "step": 1080 }, { "epoch": 0.8668805132317562, "grad_norm": 0.6564970016479492, "learning_rate": 1.6687873290850554e-05, "loss": 0.9619, "step": 1081 }, { "epoch": 0.867682437850842, "grad_norm": 0.6022130250930786, "learning_rate": 1.6681432997866097e-05, "loss": 0.9252, "step": 1082 }, { "epoch": 0.8684843624699278, "grad_norm": 0.5839381217956543, "learning_rate": 1.667498769491563e-05, "loss": 0.9207, "step": 1083 }, { "epoch": 0.8692862870890137, "grad_norm": 0.6378865242004395, "learning_rate": 1.666853738683207e-05, "loss": 0.9339, "step": 1084 }, { "epoch": 0.8700882117080995, "grad_norm": 0.667452335357666, "learning_rate": 1.6662082078452068e-05, "loss": 0.9323, "step": 1085 }, { "epoch": 0.8708901363271853, "grad_norm": 0.5752742886543274, "learning_rate": 1.665562177461604e-05, "loss": 0.8857, "step": 1086 }, { "epoch": 0.871692060946271, "grad_norm": 0.6381446719169617, "learning_rate": 1.6649156480168137e-05, "loss": 0.9146, "step": 1087 }, { "epoch": 0.8724939855653568, "grad_norm": 0.6204044818878174, "learning_rate": 1.6642686199956263e-05, "loss": 0.9048, "step": 1088 }, { "epoch": 0.8732959101844426, "grad_norm": 0.6212306618690491, "learning_rate": 1.6636210938832053e-05, "loss": 0.9792, "step": 1089 }, { "epoch": 0.8740978348035284, "grad_norm": 0.5908262133598328, "learning_rate": 1.662973070165088e-05, "loss": 0.9181, "step": 1090 }, { "epoch": 0.8748997594226142, "grad_norm": 0.6047478318214417, "learning_rate": 1.6623245493271832e-05, "loss": 0.953, "step": 1091 }, { "epoch": 0.8757016840417001, "grad_norm": 0.5832977294921875, "learning_rate": 1.6616755318557758e-05, "loss": 0.9327, "step": 1092 }, { "epoch": 0.8765036086607859, "grad_norm": 0.6228951811790466, "learning_rate": 1.6610260182375202e-05, "loss": 0.9074, "step": 1093 }, { "epoch": 0.8773055332798717, "grad_norm": 0.5714309811592102, "learning_rate": 1.660376008959444e-05, "loss": 0.9113, "step": 1094 }, { "epoch": 0.8781074578989575, "grad_norm": 0.57155442237854, "learning_rate": 1.6597255045089466e-05, "loss": 0.8875, "step": 1095 }, { "epoch": 0.8789093825180433, "grad_norm": 0.6274538040161133, "learning_rate": 1.6590745053737986e-05, "loss": 0.9663, "step": 1096 }, { "epoch": 0.8797113071371291, "grad_norm": 0.5771580934524536, "learning_rate": 1.65842301204214e-05, "loss": 0.9048, "step": 1097 }, { "epoch": 0.8805132317562149, "grad_norm": 0.5920909643173218, "learning_rate": 1.657771025002484e-05, "loss": 0.9001, "step": 1098 }, { "epoch": 0.8813151563753007, "grad_norm": 0.5597774386405945, "learning_rate": 1.657118544743712e-05, "loss": 0.921, "step": 1099 }, { "epoch": 0.8821170809943866, "grad_norm": 0.5892897248268127, "learning_rate": 1.6564655717550766e-05, "loss": 0.9508, "step": 1100 }, { "epoch": 0.8829190056134724, "grad_norm": 0.641369640827179, "learning_rate": 1.6558121065261982e-05, "loss": 0.9015, "step": 1101 }, { "epoch": 0.8837209302325582, "grad_norm": 0.5892067551612854, "learning_rate": 1.6551581495470683e-05, "loss": 0.8589, "step": 1102 }, { "epoch": 0.884522854851644, "grad_norm": 0.5978425145149231, "learning_rate": 1.6545037013080455e-05, "loss": 0.9548, "step": 1103 }, { "epoch": 0.8853247794707297, "grad_norm": 0.6504059433937073, "learning_rate": 1.6538487622998576e-05, "loss": 0.9711, "step": 1104 }, { "epoch": 0.8861267040898155, "grad_norm": 0.6230421662330627, "learning_rate": 1.6531933330136e-05, "loss": 0.9147, "step": 1105 }, { "epoch": 0.8869286287089013, "grad_norm": 0.6024095416069031, "learning_rate": 1.652537413940736e-05, "loss": 0.963, "step": 1106 }, { "epoch": 0.8877305533279871, "grad_norm": 0.643290102481842, "learning_rate": 1.6518810055730962e-05, "loss": 0.9197, "step": 1107 }, { "epoch": 0.888532477947073, "grad_norm": 0.6359246373176575, "learning_rate": 1.6512241084028775e-05, "loss": 0.9211, "step": 1108 }, { "epoch": 0.8893344025661588, "grad_norm": 0.5819621682167053, "learning_rate": 1.6505667229226445e-05, "loss": 0.8995, "step": 1109 }, { "epoch": 0.8901363271852446, "grad_norm": 0.624454140663147, "learning_rate": 1.6499088496253266e-05, "loss": 0.901, "step": 1110 }, { "epoch": 0.8909382518043304, "grad_norm": 0.608256459236145, "learning_rate": 1.6492504890042196e-05, "loss": 0.8551, "step": 1111 }, { "epoch": 0.8917401764234162, "grad_norm": 0.6560264825820923, "learning_rate": 1.6485916415529852e-05, "loss": 0.9358, "step": 1112 }, { "epoch": 0.892542101042502, "grad_norm": 0.5924005508422852, "learning_rate": 1.6479323077656492e-05, "loss": 0.9347, "step": 1113 }, { "epoch": 0.8933440256615878, "grad_norm": 0.6272872686386108, "learning_rate": 1.647272488136603e-05, "loss": 0.9396, "step": 1114 }, { "epoch": 0.8941459502806736, "grad_norm": 0.5873216986656189, "learning_rate": 1.6466121831606013e-05, "loss": 0.9505, "step": 1115 }, { "epoch": 0.8949478748997595, "grad_norm": 0.5705021023750305, "learning_rate": 1.6459513933327637e-05, "loss": 0.9651, "step": 1116 }, { "epoch": 0.8957497995188453, "grad_norm": 0.6147488951683044, "learning_rate": 1.6452901191485725e-05, "loss": 0.8757, "step": 1117 }, { "epoch": 0.896551724137931, "grad_norm": 0.589171826839447, "learning_rate": 1.6446283611038735e-05, "loss": 0.9019, "step": 1118 }, { "epoch": 0.8973536487570168, "grad_norm": 0.5974717736244202, "learning_rate": 1.643966119694876e-05, "loss": 0.9234, "step": 1119 }, { "epoch": 0.8981555733761026, "grad_norm": 0.5791064500808716, "learning_rate": 1.643303395418151e-05, "loss": 0.9127, "step": 1120 }, { "epoch": 0.8989574979951884, "grad_norm": 0.6018791198730469, "learning_rate": 1.642640188770632e-05, "loss": 0.8784, "step": 1121 }, { "epoch": 0.8997594226142742, "grad_norm": 0.5726858973503113, "learning_rate": 1.641976500249613e-05, "loss": 0.9173, "step": 1122 }, { "epoch": 0.90056134723336, "grad_norm": 0.6228300333023071, "learning_rate": 1.641312330352751e-05, "loss": 0.9295, "step": 1123 }, { "epoch": 0.9013632718524459, "grad_norm": 0.5762906670570374, "learning_rate": 1.6406476795780634e-05, "loss": 0.9149, "step": 1124 }, { "epoch": 0.9021651964715317, "grad_norm": 0.6083019375801086, "learning_rate": 1.639982548423927e-05, "loss": 0.962, "step": 1125 }, { "epoch": 0.9029671210906175, "grad_norm": 0.6070680022239685, "learning_rate": 1.6393169373890805e-05, "loss": 0.9129, "step": 1126 }, { "epoch": 0.9037690457097033, "grad_norm": 0.5900879502296448, "learning_rate": 1.6386508469726215e-05, "loss": 0.9209, "step": 1127 }, { "epoch": 0.9045709703287891, "grad_norm": 0.5943062901496887, "learning_rate": 1.637984277674008e-05, "loss": 0.9232, "step": 1128 }, { "epoch": 0.9053728949478749, "grad_norm": 0.6167227029800415, "learning_rate": 1.6373172299930553e-05, "loss": 0.9191, "step": 1129 }, { "epoch": 0.9061748195669607, "grad_norm": 0.5882411003112793, "learning_rate": 1.636649704429939e-05, "loss": 0.8991, "step": 1130 }, { "epoch": 0.9069767441860465, "grad_norm": 0.5918989777565002, "learning_rate": 1.6359817014851925e-05, "loss": 0.9584, "step": 1131 }, { "epoch": 0.9077786688051324, "grad_norm": 0.5983660817146301, "learning_rate": 1.635313221659707e-05, "loss": 0.9231, "step": 1132 }, { "epoch": 0.9085805934242182, "grad_norm": 0.5728667378425598, "learning_rate": 1.6346442654547314e-05, "loss": 0.9037, "step": 1133 }, { "epoch": 0.909382518043304, "grad_norm": 0.6043873429298401, "learning_rate": 1.633974833371872e-05, "loss": 0.8928, "step": 1134 }, { "epoch": 0.9101844426623897, "grad_norm": 0.604079008102417, "learning_rate": 1.633304925913092e-05, "loss": 0.9516, "step": 1135 }, { "epoch": 0.9109863672814755, "grad_norm": 0.611089825630188, "learning_rate": 1.6326345435807104e-05, "loss": 0.942, "step": 1136 }, { "epoch": 0.9117882919005613, "grad_norm": 0.61098313331604, "learning_rate": 1.631963686877403e-05, "loss": 0.9315, "step": 1137 }, { "epoch": 0.9125902165196471, "grad_norm": 0.597474217414856, "learning_rate": 1.6312923563062008e-05, "loss": 0.8947, "step": 1138 }, { "epoch": 0.9133921411387329, "grad_norm": 0.6015665531158447, "learning_rate": 1.6306205523704903e-05, "loss": 0.9241, "step": 1139 }, { "epoch": 0.9141940657578188, "grad_norm": 0.559998095035553, "learning_rate": 1.6299482755740132e-05, "loss": 0.9079, "step": 1140 }, { "epoch": 0.9149959903769046, "grad_norm": 0.5764912962913513, "learning_rate": 1.6292755264208656e-05, "loss": 0.9465, "step": 1141 }, { "epoch": 0.9157979149959904, "grad_norm": 0.6615179181098938, "learning_rate": 1.6286023054154973e-05, "loss": 0.9198, "step": 1142 }, { "epoch": 0.9165998396150762, "grad_norm": 0.6102979183197021, "learning_rate": 1.6279286130627124e-05, "loss": 0.9332, "step": 1143 }, { "epoch": 0.917401764234162, "grad_norm": 0.5873243808746338, "learning_rate": 1.627254449867669e-05, "loss": 0.9494, "step": 1144 }, { "epoch": 0.9182036888532478, "grad_norm": 0.5706033110618591, "learning_rate": 1.626579816335877e-05, "loss": 0.8697, "step": 1145 }, { "epoch": 0.9190056134723336, "grad_norm": 0.6418749094009399, "learning_rate": 1.6259047129731996e-05, "loss": 0.9287, "step": 1146 }, { "epoch": 0.9198075380914194, "grad_norm": 0.6301258206367493, "learning_rate": 1.6252291402858525e-05, "loss": 0.9095, "step": 1147 }, { "epoch": 0.9206094627105053, "grad_norm": 0.6077032685279846, "learning_rate": 1.6245530987804034e-05, "loss": 0.9062, "step": 1148 }, { "epoch": 0.921411387329591, "grad_norm": 0.6020398139953613, "learning_rate": 1.6238765889637704e-05, "loss": 0.9294, "step": 1149 }, { "epoch": 0.9222133119486768, "grad_norm": 0.6611399054527283, "learning_rate": 1.6231996113432242e-05, "loss": 0.9235, "step": 1150 }, { "epoch": 0.9230152365677626, "grad_norm": 0.6157788634300232, "learning_rate": 1.6225221664263857e-05, "loss": 0.9033, "step": 1151 }, { "epoch": 0.9238171611868484, "grad_norm": 0.59830242395401, "learning_rate": 1.6218442547212265e-05, "loss": 0.8995, "step": 1152 }, { "epoch": 0.9246190858059342, "grad_norm": 0.626473069190979, "learning_rate": 1.6211658767360667e-05, "loss": 0.9215, "step": 1153 }, { "epoch": 0.92542101042502, "grad_norm": 0.5951080322265625, "learning_rate": 1.620487032979578e-05, "loss": 0.9305, "step": 1154 }, { "epoch": 0.9262229350441058, "grad_norm": 0.6206769943237305, "learning_rate": 1.619807723960781e-05, "loss": 0.9093, "step": 1155 }, { "epoch": 0.9270248596631917, "grad_norm": 0.6188283562660217, "learning_rate": 1.619127950189044e-05, "loss": 0.9339, "step": 1156 }, { "epoch": 0.9278267842822775, "grad_norm": 0.5791252851486206, "learning_rate": 1.6184477121740848e-05, "loss": 0.8635, "step": 1157 }, { "epoch": 0.9286287089013633, "grad_norm": 0.5923981666564941, "learning_rate": 1.6177670104259694e-05, "loss": 0.8821, "step": 1158 }, { "epoch": 0.9294306335204491, "grad_norm": 0.5693655610084534, "learning_rate": 1.61708584545511e-05, "loss": 0.8967, "step": 1159 }, { "epoch": 0.9302325581395349, "grad_norm": 0.6008737087249756, "learning_rate": 1.616404217772269e-05, "loss": 0.9091, "step": 1160 }, { "epoch": 0.9310344827586207, "grad_norm": 0.5927824974060059, "learning_rate": 1.6157221278885523e-05, "loss": 0.9188, "step": 1161 }, { "epoch": 0.9318364073777065, "grad_norm": 0.6398462653160095, "learning_rate": 1.615039576315415e-05, "loss": 0.901, "step": 1162 }, { "epoch": 0.9326383319967922, "grad_norm": 0.6090993285179138, "learning_rate": 1.6143565635646575e-05, "loss": 0.9274, "step": 1163 }, { "epoch": 0.9334402566158782, "grad_norm": 0.6457433700561523, "learning_rate": 1.6136730901484267e-05, "loss": 0.9281, "step": 1164 }, { "epoch": 0.9342421812349639, "grad_norm": 0.627136766910553, "learning_rate": 1.612989156579213e-05, "loss": 0.9133, "step": 1165 }, { "epoch": 0.9350441058540497, "grad_norm": 0.6137925982475281, "learning_rate": 1.612304763369853e-05, "loss": 0.8857, "step": 1166 }, { "epoch": 0.9358460304731355, "grad_norm": 0.6183207035064697, "learning_rate": 1.6116199110335295e-05, "loss": 0.9099, "step": 1167 }, { "epoch": 0.9366479550922213, "grad_norm": 0.6730118989944458, "learning_rate": 1.610934600083767e-05, "loss": 0.9584, "step": 1168 }, { "epoch": 0.9374498797113071, "grad_norm": 0.6072790622711182, "learning_rate": 1.610248831034435e-05, "loss": 0.9138, "step": 1169 }, { "epoch": 0.9382518043303929, "grad_norm": 0.6239385008811951, "learning_rate": 1.609562604399747e-05, "loss": 0.938, "step": 1170 }, { "epoch": 0.9390537289494787, "grad_norm": 0.6454656720161438, "learning_rate": 1.6088759206942586e-05, "loss": 0.8756, "step": 1171 }, { "epoch": 0.9398556535685646, "grad_norm": 0.6884939074516296, "learning_rate": 1.6081887804328687e-05, "loss": 0.9057, "step": 1172 }, { "epoch": 0.9406575781876504, "grad_norm": 0.6258487105369568, "learning_rate": 1.607501184130819e-05, "loss": 0.9183, "step": 1173 }, { "epoch": 0.9414595028067362, "grad_norm": 0.576998770236969, "learning_rate": 1.606813132303692e-05, "loss": 0.9145, "step": 1174 }, { "epoch": 0.942261427425822, "grad_norm": 0.6313689351081848, "learning_rate": 1.606124625467413e-05, "loss": 0.9295, "step": 1175 }, { "epoch": 0.9430633520449078, "grad_norm": 0.6496961116790771, "learning_rate": 1.605435664138247e-05, "loss": 0.8613, "step": 1176 }, { "epoch": 0.9438652766639936, "grad_norm": 0.6497421264648438, "learning_rate": 1.6047462488328017e-05, "loss": 0.942, "step": 1177 }, { "epoch": 0.9446672012830793, "grad_norm": 0.6210437417030334, "learning_rate": 1.604056380068023e-05, "loss": 0.9454, "step": 1178 }, { "epoch": 0.9454691259021651, "grad_norm": 0.5845626592636108, "learning_rate": 1.6033660583611988e-05, "loss": 0.8651, "step": 1179 }, { "epoch": 0.946271050521251, "grad_norm": 0.643485426902771, "learning_rate": 1.6026752842299564e-05, "loss": 0.963, "step": 1180 }, { "epoch": 0.9470729751403368, "grad_norm": 0.6154069900512695, "learning_rate": 1.6019840581922604e-05, "loss": 0.9274, "step": 1181 }, { "epoch": 0.9478748997594226, "grad_norm": 0.6406455039978027, "learning_rate": 1.6012923807664164e-05, "loss": 0.936, "step": 1182 }, { "epoch": 0.9486768243785084, "grad_norm": 0.6149894595146179, "learning_rate": 1.6006002524710674e-05, "loss": 0.8924, "step": 1183 }, { "epoch": 0.9494787489975942, "grad_norm": 0.5921556353569031, "learning_rate": 1.599907673825195e-05, "loss": 0.9235, "step": 1184 }, { "epoch": 0.95028067361668, "grad_norm": 0.6019387245178223, "learning_rate": 1.599214645348118e-05, "loss": 0.8885, "step": 1185 }, { "epoch": 0.9510825982357658, "grad_norm": 0.6550159454345703, "learning_rate": 1.5985211675594933e-05, "loss": 0.9241, "step": 1186 }, { "epoch": 0.9518845228548516, "grad_norm": 0.639525294303894, "learning_rate": 1.5978272409793136e-05, "loss": 0.965, "step": 1187 }, { "epoch": 0.9526864474739375, "grad_norm": 0.6293081045150757, "learning_rate": 1.597132866127909e-05, "loss": 0.8969, "step": 1188 }, { "epoch": 0.9534883720930233, "grad_norm": 0.6015990376472473, "learning_rate": 1.5964380435259448e-05, "loss": 0.869, "step": 1189 }, { "epoch": 0.9542902967121091, "grad_norm": 0.5990379452705383, "learning_rate": 1.595742773694424e-05, "loss": 0.9152, "step": 1190 }, { "epoch": 0.9550922213311949, "grad_norm": 0.6142351031303406, "learning_rate": 1.5950470571546818e-05, "loss": 0.9237, "step": 1191 }, { "epoch": 0.9558941459502807, "grad_norm": 0.6138330101966858, "learning_rate": 1.5943508944283916e-05, "loss": 0.8922, "step": 1192 }, { "epoch": 0.9566960705693665, "grad_norm": 0.6090849041938782, "learning_rate": 1.5936542860375594e-05, "loss": 0.9292, "step": 1193 }, { "epoch": 0.9574979951884522, "grad_norm": 0.6265794634819031, "learning_rate": 1.592957232504526e-05, "loss": 0.8901, "step": 1194 }, { "epoch": 0.958299919807538, "grad_norm": 0.6023232936859131, "learning_rate": 1.5922597343519654e-05, "loss": 0.8742, "step": 1195 }, { "epoch": 0.9591018444266239, "grad_norm": 0.5652976632118225, "learning_rate": 1.591561792102886e-05, "loss": 0.8904, "step": 1196 }, { "epoch": 0.9599037690457097, "grad_norm": 0.6332113742828369, "learning_rate": 1.5908634062806285e-05, "loss": 0.9088, "step": 1197 }, { "epoch": 0.9607056936647955, "grad_norm": 0.6024392247200012, "learning_rate": 1.5901645774088662e-05, "loss": 0.8891, "step": 1198 }, { "epoch": 0.9615076182838813, "grad_norm": 0.6040472984313965, "learning_rate": 1.5894653060116053e-05, "loss": 0.9047, "step": 1199 }, { "epoch": 0.9623095429029671, "grad_norm": 0.5790461301803589, "learning_rate": 1.5887655926131832e-05, "loss": 0.9191, "step": 1200 }, { "epoch": 0.9631114675220529, "grad_norm": 0.6125912666320801, "learning_rate": 1.588065437738268e-05, "loss": 0.9197, "step": 1201 }, { "epoch": 0.9639133921411387, "grad_norm": 0.5846207141876221, "learning_rate": 1.587364841911861e-05, "loss": 0.879, "step": 1202 }, { "epoch": 0.9647153167602245, "grad_norm": 0.5989664196968079, "learning_rate": 1.5866638056592916e-05, "loss": 0.9328, "step": 1203 }, { "epoch": 0.9655172413793104, "grad_norm": 0.5732477307319641, "learning_rate": 1.5859623295062215e-05, "loss": 0.8551, "step": 1204 }, { "epoch": 0.9663191659983962, "grad_norm": 0.6074816584587097, "learning_rate": 1.585260413978641e-05, "loss": 0.9435, "step": 1205 }, { "epoch": 0.967121090617482, "grad_norm": 0.6296406388282776, "learning_rate": 1.5845580596028697e-05, "loss": 0.9607, "step": 1206 }, { "epoch": 0.9679230152365678, "grad_norm": 3.621976375579834, "learning_rate": 1.583855266905558e-05, "loss": 0.9418, "step": 1207 }, { "epoch": 0.9687249398556536, "grad_norm": 0.6303392648696899, "learning_rate": 1.5831520364136835e-05, "loss": 0.9094, "step": 1208 }, { "epoch": 0.9695268644747393, "grad_norm": 0.5891981720924377, "learning_rate": 1.5824483686545517e-05, "loss": 0.9088, "step": 1209 }, { "epoch": 0.9703287890938251, "grad_norm": 0.7623486518859863, "learning_rate": 1.581744264155797e-05, "loss": 0.9175, "step": 1210 }, { "epoch": 0.9711307137129109, "grad_norm": 0.5808781385421753, "learning_rate": 1.5810397234453816e-05, "loss": 0.8938, "step": 1211 }, { "epoch": 0.9719326383319968, "grad_norm": 0.5807702541351318, "learning_rate": 1.5803347470515933e-05, "loss": 0.9222, "step": 1212 }, { "epoch": 0.9727345629510826, "grad_norm": 0.5763218998908997, "learning_rate": 1.5796293355030476e-05, "loss": 0.909, "step": 1213 }, { "epoch": 0.9735364875701684, "grad_norm": 0.5519773364067078, "learning_rate": 1.578923489328686e-05, "loss": 0.8921, "step": 1214 }, { "epoch": 0.9743384121892542, "grad_norm": 0.579440176486969, "learning_rate": 1.5782172090577762e-05, "loss": 0.887, "step": 1215 }, { "epoch": 0.97514033680834, "grad_norm": 0.5852498412132263, "learning_rate": 1.5775104952199113e-05, "loss": 0.8632, "step": 1216 }, { "epoch": 0.9759422614274258, "grad_norm": 0.606121301651001, "learning_rate": 1.5768033483450088e-05, "loss": 0.9183, "step": 1217 }, { "epoch": 0.9767441860465116, "grad_norm": 0.6091791987419128, "learning_rate": 1.5760957689633127e-05, "loss": 0.9547, "step": 1218 }, { "epoch": 0.9775461106655974, "grad_norm": 0.6013908386230469, "learning_rate": 1.575387757605389e-05, "loss": 0.8725, "step": 1219 }, { "epoch": 0.9783480352846833, "grad_norm": 0.5744641423225403, "learning_rate": 1.5746793148021292e-05, "loss": 0.9157, "step": 1220 }, { "epoch": 0.9791499599037691, "grad_norm": 0.6412164568901062, "learning_rate": 1.5739704410847475e-05, "loss": 0.9291, "step": 1221 }, { "epoch": 0.9799518845228549, "grad_norm": 0.5992948412895203, "learning_rate": 1.5732611369847818e-05, "loss": 0.941, "step": 1222 }, { "epoch": 0.9807538091419407, "grad_norm": 0.613304078578949, "learning_rate": 1.5725514030340926e-05, "loss": 0.8843, "step": 1223 }, { "epoch": 0.9815557337610264, "grad_norm": 0.5996186137199402, "learning_rate": 1.5718412397648627e-05, "loss": 0.9606, "step": 1224 }, { "epoch": 0.9823576583801122, "grad_norm": 0.8090897798538208, "learning_rate": 1.5711306477095962e-05, "loss": 0.8808, "step": 1225 }, { "epoch": 0.983159582999198, "grad_norm": 0.5831454396247864, "learning_rate": 1.5704196274011198e-05, "loss": 0.9475, "step": 1226 }, { "epoch": 0.9839615076182838, "grad_norm": 0.6127690672874451, "learning_rate": 1.56970817937258e-05, "loss": 0.9014, "step": 1227 }, { "epoch": 0.9847634322373697, "grad_norm": 0.6199975609779358, "learning_rate": 1.5689963041574453e-05, "loss": 0.9017, "step": 1228 }, { "epoch": 0.9855653568564555, "grad_norm": 0.618943452835083, "learning_rate": 1.568284002289504e-05, "loss": 0.9638, "step": 1229 }, { "epoch": 0.9863672814755413, "grad_norm": 0.5724090337753296, "learning_rate": 1.567571274302864e-05, "loss": 0.9033, "step": 1230 }, { "epoch": 0.9871692060946271, "grad_norm": 0.6071799397468567, "learning_rate": 1.5668581207319536e-05, "loss": 0.8814, "step": 1231 }, { "epoch": 0.9879711307137129, "grad_norm": 0.6167645454406738, "learning_rate": 1.5661445421115188e-05, "loss": 0.9195, "step": 1232 }, { "epoch": 0.9887730553327987, "grad_norm": 0.5625812411308289, "learning_rate": 1.5654305389766257e-05, "loss": 0.8856, "step": 1233 }, { "epoch": 0.9895749799518845, "grad_norm": 0.6261424422264099, "learning_rate": 1.5647161118626583e-05, "loss": 0.8532, "step": 1234 }, { "epoch": 0.9903769045709703, "grad_norm": 0.5534703135490417, "learning_rate": 1.5640012613053176e-05, "loss": 0.9229, "step": 1235 }, { "epoch": 0.9911788291900562, "grad_norm": 0.5943836569786072, "learning_rate": 1.563285987840624e-05, "loss": 0.9122, "step": 1236 }, { "epoch": 0.991980753809142, "grad_norm": 0.5869540572166443, "learning_rate": 1.562570292004913e-05, "loss": 0.8596, "step": 1237 }, { "epoch": 0.9927826784282278, "grad_norm": 0.5831838846206665, "learning_rate": 1.561854174334838e-05, "loss": 0.8861, "step": 1238 }, { "epoch": 0.9935846030473136, "grad_norm": 0.6431090831756592, "learning_rate": 1.5611376353673686e-05, "loss": 0.9125, "step": 1239 }, { "epoch": 0.9943865276663993, "grad_norm": 0.5620553493499756, "learning_rate": 1.56042067563979e-05, "loss": 0.9392, "step": 1240 }, { "epoch": 0.9951884522854851, "grad_norm": 0.5939339399337769, "learning_rate": 1.5597032956897028e-05, "loss": 0.892, "step": 1241 }, { "epoch": 0.9959903769045709, "grad_norm": 0.6007006764411926, "learning_rate": 1.558985496055023e-05, "loss": 0.9498, "step": 1242 }, { "epoch": 0.9967923015236567, "grad_norm": 0.5932491421699524, "learning_rate": 1.5582672772739815e-05, "loss": 0.8872, "step": 1243 }, { "epoch": 0.9975942261427426, "grad_norm": 0.6062937378883362, "learning_rate": 1.5575486398851232e-05, "loss": 0.9013, "step": 1244 }, { "epoch": 0.9983961507618284, "grad_norm": 0.6182659268379211, "learning_rate": 1.5568295844273064e-05, "loss": 0.8867, "step": 1245 }, { "epoch": 0.9991980753809142, "grad_norm": 0.6514543294906616, "learning_rate": 1.5561101114397043e-05, "loss": 0.9485, "step": 1246 }, { "epoch": 1.0, "grad_norm": 0.610464870929718, "learning_rate": 1.555390221461801e-05, "loss": 0.9416, "step": 1247 }, { "epoch": 1.0008019246190858, "grad_norm": 0.5633330941200256, "learning_rate": 1.554669915033395e-05, "loss": 0.7783, "step": 1248 }, { "epoch": 1.0016038492381716, "grad_norm": 0.5994012951850891, "learning_rate": 1.553949192694597e-05, "loss": 0.7874, "step": 1249 }, { "epoch": 1.0024057738572574, "grad_norm": 0.6167377829551697, "learning_rate": 1.553228054985829e-05, "loss": 0.772, "step": 1250 }, { "epoch": 1.0032076984763432, "grad_norm": 0.6523487567901611, "learning_rate": 1.5525065024478245e-05, "loss": 0.7683, "step": 1251 }, { "epoch": 1.004009623095429, "grad_norm": 0.6528876423835754, "learning_rate": 1.5517845356216283e-05, "loss": 0.779, "step": 1252 }, { "epoch": 1.0048115477145148, "grad_norm": 0.6447154879570007, "learning_rate": 1.551062155048595e-05, "loss": 0.7917, "step": 1253 }, { "epoch": 1.0056134723336005, "grad_norm": 0.6762365698814392, "learning_rate": 1.550339361270391e-05, "loss": 0.7961, "step": 1254 }, { "epoch": 1.0064153969526863, "grad_norm": 0.6918103098869324, "learning_rate": 1.5496161548289918e-05, "loss": 0.764, "step": 1255 }, { "epoch": 1.0072173215717724, "grad_norm": 0.6805532574653625, "learning_rate": 1.5488925362666818e-05, "loss": 0.7675, "step": 1256 }, { "epoch": 1.0080192461908581, "grad_norm": 0.698422908782959, "learning_rate": 1.5481685061260547e-05, "loss": 0.7496, "step": 1257 }, { "epoch": 1.008821170809944, "grad_norm": 0.6384891271591187, "learning_rate": 1.5474440649500132e-05, "loss": 0.8026, "step": 1258 }, { "epoch": 1.0096230954290297, "grad_norm": 0.6521022319793701, "learning_rate": 1.5467192132817678e-05, "loss": 0.7986, "step": 1259 }, { "epoch": 1.0104250200481155, "grad_norm": 0.6222298741340637, "learning_rate": 1.5459939516648374e-05, "loss": 0.7312, "step": 1260 }, { "epoch": 1.0112269446672013, "grad_norm": 0.6429084539413452, "learning_rate": 1.5452682806430473e-05, "loss": 0.7311, "step": 1261 }, { "epoch": 1.012028869286287, "grad_norm": 0.7085431814193726, "learning_rate": 1.544542200760531e-05, "loss": 0.8077, "step": 1262 }, { "epoch": 1.012830793905373, "grad_norm": 0.6494969725608826, "learning_rate": 1.543815712561727e-05, "loss": 0.7795, "step": 1263 }, { "epoch": 1.0136327185244587, "grad_norm": 0.650427520275116, "learning_rate": 1.5430888165913814e-05, "loss": 0.7784, "step": 1264 }, { "epoch": 1.0144346431435445, "grad_norm": 0.674248456954956, "learning_rate": 1.5423615133945457e-05, "loss": 0.7681, "step": 1265 }, { "epoch": 1.0152365677626303, "grad_norm": 0.6563466191291809, "learning_rate": 1.5416338035165766e-05, "loss": 0.7758, "step": 1266 }, { "epoch": 1.016038492381716, "grad_norm": 0.6918492317199707, "learning_rate": 1.5409056875031355e-05, "loss": 0.7597, "step": 1267 }, { "epoch": 1.0168404170008019, "grad_norm": 0.6418508291244507, "learning_rate": 1.5401771659001885e-05, "loss": 0.7596, "step": 1268 }, { "epoch": 1.0176423416198876, "grad_norm": 0.6073652505874634, "learning_rate": 1.5394482392540066e-05, "loss": 0.7344, "step": 1269 }, { "epoch": 1.0184442662389734, "grad_norm": 0.7088474631309509, "learning_rate": 1.5387189081111628e-05, "loss": 0.7876, "step": 1270 }, { "epoch": 1.0192461908580595, "grad_norm": 0.6955075263977051, "learning_rate": 1.5379891730185352e-05, "loss": 0.7867, "step": 1271 }, { "epoch": 1.0200481154771452, "grad_norm": 0.6917376518249512, "learning_rate": 1.537259034523304e-05, "loss": 0.8059, "step": 1272 }, { "epoch": 1.020850040096231, "grad_norm": 0.6557261943817139, "learning_rate": 1.5365284931729513e-05, "loss": 0.7737, "step": 1273 }, { "epoch": 1.0216519647153168, "grad_norm": 0.6700888872146606, "learning_rate": 1.5357975495152628e-05, "loss": 0.7509, "step": 1274 }, { "epoch": 1.0224538893344026, "grad_norm": 0.6783604025840759, "learning_rate": 1.5350662040983236e-05, "loss": 0.8075, "step": 1275 }, { "epoch": 1.0232558139534884, "grad_norm": 0.6425763368606567, "learning_rate": 1.5343344574705234e-05, "loss": 0.7346, "step": 1276 }, { "epoch": 1.0240577385725742, "grad_norm": 0.6592726707458496, "learning_rate": 1.5336023101805486e-05, "loss": 0.785, "step": 1277 }, { "epoch": 1.02485966319166, "grad_norm": 0.6982232332229614, "learning_rate": 1.5328697627773898e-05, "loss": 0.7834, "step": 1278 }, { "epoch": 1.0256615878107458, "grad_norm": 0.6378937363624573, "learning_rate": 1.5321368158103346e-05, "loss": 0.7505, "step": 1279 }, { "epoch": 1.0264635124298316, "grad_norm": 0.6628844141960144, "learning_rate": 1.531403469828973e-05, "loss": 0.7627, "step": 1280 }, { "epoch": 1.0272654370489174, "grad_norm": 0.6596646904945374, "learning_rate": 1.5306697253831914e-05, "loss": 0.7615, "step": 1281 }, { "epoch": 1.0280673616680032, "grad_norm": 0.6652581095695496, "learning_rate": 1.5299355830231776e-05, "loss": 0.7921, "step": 1282 }, { "epoch": 1.028869286287089, "grad_norm": 0.6460443735122681, "learning_rate": 1.5292010432994162e-05, "loss": 0.7812, "step": 1283 }, { "epoch": 1.0296712109061747, "grad_norm": 0.6455625295639038, "learning_rate": 1.5284661067626897e-05, "loss": 0.7718, "step": 1284 }, { "epoch": 1.0304731355252605, "grad_norm": 0.653901994228363, "learning_rate": 1.5277307739640787e-05, "loss": 0.7546, "step": 1285 }, { "epoch": 1.0312750601443463, "grad_norm": 0.6660287976264954, "learning_rate": 1.526995045454961e-05, "loss": 0.7652, "step": 1286 }, { "epoch": 1.0320769847634321, "grad_norm": 0.6647891998291016, "learning_rate": 1.5262589217870106e-05, "loss": 0.7771, "step": 1287 }, { "epoch": 1.0328789093825181, "grad_norm": 0.6715826988220215, "learning_rate": 1.5255224035121986e-05, "loss": 0.7632, "step": 1288 }, { "epoch": 1.033680834001604, "grad_norm": 0.6524284482002258, "learning_rate": 1.524785491182791e-05, "loss": 0.8042, "step": 1289 }, { "epoch": 1.0344827586206897, "grad_norm": 0.7093574404716492, "learning_rate": 1.5240481853513495e-05, "loss": 0.8175, "step": 1290 }, { "epoch": 1.0352846832397755, "grad_norm": 0.6630702018737793, "learning_rate": 1.523310486570732e-05, "loss": 0.8186, "step": 1291 }, { "epoch": 1.0360866078588613, "grad_norm": 0.669937014579773, "learning_rate": 1.5225723953940896e-05, "loss": 0.7712, "step": 1292 }, { "epoch": 1.036888532477947, "grad_norm": 0.6852511167526245, "learning_rate": 1.5218339123748682e-05, "loss": 0.7704, "step": 1293 }, { "epoch": 1.037690457097033, "grad_norm": 0.6196748614311218, "learning_rate": 1.5210950380668074e-05, "loss": 0.7617, "step": 1294 }, { "epoch": 1.0384923817161187, "grad_norm": 0.6314553618431091, "learning_rate": 1.5203557730239408e-05, "loss": 0.7316, "step": 1295 }, { "epoch": 1.0392943063352045, "grad_norm": 0.6329060196876526, "learning_rate": 1.5196161178005941e-05, "loss": 0.7706, "step": 1296 }, { "epoch": 1.0400962309542903, "grad_norm": 0.642294704914093, "learning_rate": 1.5188760729513865e-05, "loss": 0.7561, "step": 1297 }, { "epoch": 1.040898155573376, "grad_norm": 0.6721711158752441, "learning_rate": 1.5181356390312279e-05, "loss": 0.8194, "step": 1298 }, { "epoch": 1.0417000801924619, "grad_norm": 0.6798752546310425, "learning_rate": 1.5173948165953216e-05, "loss": 0.7759, "step": 1299 }, { "epoch": 1.0425020048115476, "grad_norm": 0.6321367025375366, "learning_rate": 1.5166536061991615e-05, "loss": 0.7913, "step": 1300 }, { "epoch": 1.0433039294306334, "grad_norm": 0.6367747783660889, "learning_rate": 1.5159120083985319e-05, "loss": 0.751, "step": 1301 }, { "epoch": 1.0441058540497192, "grad_norm": 0.6426526308059692, "learning_rate": 1.5151700237495087e-05, "loss": 0.7406, "step": 1302 }, { "epoch": 1.0449077786688052, "grad_norm": 0.6288602352142334, "learning_rate": 1.5144276528084566e-05, "loss": 0.7382, "step": 1303 }, { "epoch": 1.045709703287891, "grad_norm": 0.6340166330337524, "learning_rate": 1.513684896132031e-05, "loss": 0.7271, "step": 1304 }, { "epoch": 1.0465116279069768, "grad_norm": 0.6427846550941467, "learning_rate": 1.5129417542771761e-05, "loss": 0.7534, "step": 1305 }, { "epoch": 1.0473135525260626, "grad_norm": 0.6341578960418701, "learning_rate": 1.512198227801125e-05, "loss": 0.73, "step": 1306 }, { "epoch": 1.0481154771451484, "grad_norm": 0.6635767817497253, "learning_rate": 1.5114543172613995e-05, "loss": 0.7734, "step": 1307 }, { "epoch": 1.0489174017642342, "grad_norm": 0.6806950569152832, "learning_rate": 1.5107100232158085e-05, "loss": 0.7465, "step": 1308 }, { "epoch": 1.04971932638332, "grad_norm": 0.639504075050354, "learning_rate": 1.5099653462224492e-05, "loss": 0.7822, "step": 1309 }, { "epoch": 1.0505212510024058, "grad_norm": 0.6781004667282104, "learning_rate": 1.5092202868397056e-05, "loss": 0.7742, "step": 1310 }, { "epoch": 1.0513231756214916, "grad_norm": 0.6971407532691956, "learning_rate": 1.5084748456262487e-05, "loss": 0.7638, "step": 1311 }, { "epoch": 1.0521251002405774, "grad_norm": 0.6818044781684875, "learning_rate": 1.5077290231410367e-05, "loss": 0.8214, "step": 1312 }, { "epoch": 1.0529270248596632, "grad_norm": 0.6158934831619263, "learning_rate": 1.506982819943311e-05, "loss": 0.7426, "step": 1313 }, { "epoch": 1.053728949478749, "grad_norm": 0.6084417700767517, "learning_rate": 1.5062362365926012e-05, "loss": 0.7396, "step": 1314 }, { "epoch": 1.0545308740978347, "grad_norm": 0.6691953539848328, "learning_rate": 1.5054892736487206e-05, "loss": 0.7497, "step": 1315 }, { "epoch": 1.0553327987169205, "grad_norm": 0.6629313826560974, "learning_rate": 1.504741931671768e-05, "loss": 0.773, "step": 1316 }, { "epoch": 1.0561347233360063, "grad_norm": 0.641639232635498, "learning_rate": 1.503994211222125e-05, "loss": 0.7542, "step": 1317 }, { "epoch": 1.0569366479550921, "grad_norm": 0.6214974522590637, "learning_rate": 1.5032461128604583e-05, "loss": 0.7645, "step": 1318 }, { "epoch": 1.057738572574178, "grad_norm": 0.6951003670692444, "learning_rate": 1.5024976371477175e-05, "loss": 0.7688, "step": 1319 }, { "epoch": 1.058540497193264, "grad_norm": 0.641646683216095, "learning_rate": 1.5017487846451353e-05, "loss": 0.7435, "step": 1320 }, { "epoch": 1.0593424218123497, "grad_norm": 0.6781443953514099, "learning_rate": 1.5009995559142268e-05, "loss": 0.7606, "step": 1321 }, { "epoch": 1.0601443464314355, "grad_norm": 0.6722328066825867, "learning_rate": 1.5002499515167891e-05, "loss": 0.7608, "step": 1322 }, { "epoch": 1.0609462710505213, "grad_norm": 0.6786977052688599, "learning_rate": 1.4994999720149008e-05, "loss": 0.7563, "step": 1323 }, { "epoch": 1.061748195669607, "grad_norm": 0.6650587320327759, "learning_rate": 1.4987496179709226e-05, "loss": 0.7366, "step": 1324 }, { "epoch": 1.062550120288693, "grad_norm": 0.6645624041557312, "learning_rate": 1.4979988899474955e-05, "loss": 0.7738, "step": 1325 }, { "epoch": 1.0633520449077787, "grad_norm": 0.622387170791626, "learning_rate": 1.4972477885075404e-05, "loss": 0.7404, "step": 1326 }, { "epoch": 1.0641539695268645, "grad_norm": 0.6579604148864746, "learning_rate": 1.4964963142142597e-05, "loss": 0.7977, "step": 1327 }, { "epoch": 1.0649558941459503, "grad_norm": 0.6148852109909058, "learning_rate": 1.4957444676311333e-05, "loss": 0.7356, "step": 1328 }, { "epoch": 1.065757818765036, "grad_norm": 0.7013448476791382, "learning_rate": 1.494992249321922e-05, "loss": 0.792, "step": 1329 }, { "epoch": 1.0665597433841218, "grad_norm": 0.6262637376785278, "learning_rate": 1.4942396598506643e-05, "loss": 0.7947, "step": 1330 }, { "epoch": 1.0673616680032076, "grad_norm": 0.6252999901771545, "learning_rate": 1.4934866997816779e-05, "loss": 0.756, "step": 1331 }, { "epoch": 1.0681635926222934, "grad_norm": 0.6794742345809937, "learning_rate": 1.4927333696795581e-05, "loss": 0.7121, "step": 1332 }, { "epoch": 1.0689655172413792, "grad_norm": 0.6507592797279358, "learning_rate": 1.4919796701091767e-05, "loss": 0.7567, "step": 1333 }, { "epoch": 1.069767441860465, "grad_norm": 0.6619201898574829, "learning_rate": 1.4912256016356837e-05, "loss": 0.7232, "step": 1334 }, { "epoch": 1.070569366479551, "grad_norm": 0.628643274307251, "learning_rate": 1.4904711648245053e-05, "loss": 0.7477, "step": 1335 }, { "epoch": 1.0713712910986368, "grad_norm": 0.6922639608383179, "learning_rate": 1.4897163602413438e-05, "loss": 0.8047, "step": 1336 }, { "epoch": 1.0721732157177226, "grad_norm": 0.6198031306266785, "learning_rate": 1.4889611884521777e-05, "loss": 0.7624, "step": 1337 }, { "epoch": 1.0729751403368084, "grad_norm": 0.6809404492378235, "learning_rate": 1.4882056500232604e-05, "loss": 0.8046, "step": 1338 }, { "epoch": 1.0737770649558942, "grad_norm": 0.6459743976593018, "learning_rate": 1.4874497455211203e-05, "loss": 0.7846, "step": 1339 }, { "epoch": 1.07457898957498, "grad_norm": 0.6424413323402405, "learning_rate": 1.48669347551256e-05, "loss": 0.7692, "step": 1340 }, { "epoch": 1.0753809141940658, "grad_norm": 0.7114162445068359, "learning_rate": 1.4859368405646568e-05, "loss": 0.774, "step": 1341 }, { "epoch": 1.0761828388131516, "grad_norm": 0.658915638923645, "learning_rate": 1.485179841244762e-05, "loss": 0.7763, "step": 1342 }, { "epoch": 1.0769847634322374, "grad_norm": 0.6556907296180725, "learning_rate": 1.484422478120498e-05, "loss": 0.7907, "step": 1343 }, { "epoch": 1.0777866880513232, "grad_norm": 0.6900550723075867, "learning_rate": 1.4836647517597627e-05, "loss": 0.7479, "step": 1344 }, { "epoch": 1.078588612670409, "grad_norm": 0.7201621532440186, "learning_rate": 1.4829066627307246e-05, "loss": 0.7893, "step": 1345 }, { "epoch": 1.0793905372894947, "grad_norm": 0.6671075820922852, "learning_rate": 1.4821482116018251e-05, "loss": 0.7821, "step": 1346 }, { "epoch": 1.0801924619085805, "grad_norm": 0.7838239669799805, "learning_rate": 1.4813893989417762e-05, "loss": 0.7846, "step": 1347 }, { "epoch": 1.0809943865276663, "grad_norm": 0.6680654287338257, "learning_rate": 1.4806302253195617e-05, "loss": 0.7694, "step": 1348 }, { "epoch": 1.0817963111467521, "grad_norm": 0.6512035131454468, "learning_rate": 1.4798706913044357e-05, "loss": 0.7297, "step": 1349 }, { "epoch": 1.082598235765838, "grad_norm": 0.6682960391044617, "learning_rate": 1.4791107974659229e-05, "loss": 0.7998, "step": 1350 }, { "epoch": 1.0834001603849237, "grad_norm": 0.7098135948181152, "learning_rate": 1.4783505443738173e-05, "loss": 0.7683, "step": 1351 }, { "epoch": 1.0842020850040097, "grad_norm": 0.6800927519798279, "learning_rate": 1.4775899325981828e-05, "loss": 0.7553, "step": 1352 }, { "epoch": 1.0850040096230955, "grad_norm": 0.6061440110206604, "learning_rate": 1.476828962709352e-05, "loss": 0.772, "step": 1353 }, { "epoch": 1.0858059342421813, "grad_norm": 0.6747270226478577, "learning_rate": 1.4760676352779258e-05, "loss": 0.8075, "step": 1354 }, { "epoch": 1.086607858861267, "grad_norm": 0.6570102572441101, "learning_rate": 1.4753059508747738e-05, "loss": 0.8008, "step": 1355 }, { "epoch": 1.0874097834803529, "grad_norm": 0.6908283233642578, "learning_rate": 1.4745439100710326e-05, "loss": 0.7605, "step": 1356 }, { "epoch": 1.0882117080994387, "grad_norm": 0.6615950465202332, "learning_rate": 1.4737815134381066e-05, "loss": 0.746, "step": 1357 }, { "epoch": 1.0890136327185245, "grad_norm": 0.6627095937728882, "learning_rate": 1.4730187615476663e-05, "loss": 0.7629, "step": 1358 }, { "epoch": 1.0898155573376103, "grad_norm": 0.7005937099456787, "learning_rate": 1.4722556549716495e-05, "loss": 0.7637, "step": 1359 }, { "epoch": 1.090617481956696, "grad_norm": 0.7017346620559692, "learning_rate": 1.4714921942822593e-05, "loss": 0.7745, "step": 1360 }, { "epoch": 1.0914194065757818, "grad_norm": 0.6601778268814087, "learning_rate": 1.4707283800519647e-05, "loss": 0.7665, "step": 1361 }, { "epoch": 1.0922213311948676, "grad_norm": 0.6817474961280823, "learning_rate": 1.4699642128534994e-05, "loss": 0.8088, "step": 1362 }, { "epoch": 1.0930232558139534, "grad_norm": 0.6907531023025513, "learning_rate": 1.4691996932598621e-05, "loss": 0.7555, "step": 1363 }, { "epoch": 1.0938251804330392, "grad_norm": 0.7029712796211243, "learning_rate": 1.4684348218443159e-05, "loss": 0.7749, "step": 1364 }, { "epoch": 1.094627105052125, "grad_norm": 0.7028645873069763, "learning_rate": 1.4676695991803869e-05, "loss": 0.7931, "step": 1365 }, { "epoch": 1.0954290296712108, "grad_norm": 0.6735509634017944, "learning_rate": 1.4669040258418652e-05, "loss": 0.7675, "step": 1366 }, { "epoch": 1.0962309542902968, "grad_norm": 0.6408675909042358, "learning_rate": 1.4661381024028042e-05, "loss": 0.7434, "step": 1367 }, { "epoch": 1.0970328789093826, "grad_norm": 0.6668729186058044, "learning_rate": 1.4653718294375192e-05, "loss": 0.782, "step": 1368 }, { "epoch": 1.0978348035284684, "grad_norm": 0.7412964701652527, "learning_rate": 1.4646052075205874e-05, "loss": 0.7711, "step": 1369 }, { "epoch": 1.0986367281475542, "grad_norm": 0.6989220976829529, "learning_rate": 1.4638382372268484e-05, "loss": 0.7949, "step": 1370 }, { "epoch": 1.09943865276664, "grad_norm": 0.6390843987464905, "learning_rate": 1.4630709191314026e-05, "loss": 0.7403, "step": 1371 }, { "epoch": 1.1002405773857258, "grad_norm": 0.6512402892112732, "learning_rate": 1.462303253809611e-05, "loss": 0.7627, "step": 1372 }, { "epoch": 1.1010425020048116, "grad_norm": 0.6433535218238831, "learning_rate": 1.4615352418370958e-05, "loss": 0.7596, "step": 1373 }, { "epoch": 1.1018444266238974, "grad_norm": 0.6682513356208801, "learning_rate": 1.460766883789738e-05, "loss": 0.7647, "step": 1374 }, { "epoch": 1.1026463512429832, "grad_norm": 0.6825112104415894, "learning_rate": 1.4599981802436785e-05, "loss": 0.7692, "step": 1375 }, { "epoch": 1.103448275862069, "grad_norm": 0.6553147435188293, "learning_rate": 1.4592291317753178e-05, "loss": 0.7661, "step": 1376 }, { "epoch": 1.1042502004811547, "grad_norm": 0.688605010509491, "learning_rate": 1.4584597389613144e-05, "loss": 0.7896, "step": 1377 }, { "epoch": 1.1050521251002405, "grad_norm": 0.6833084225654602, "learning_rate": 1.4576900023785853e-05, "loss": 0.776, "step": 1378 }, { "epoch": 1.1058540497193263, "grad_norm": 0.6186316013336182, "learning_rate": 1.4569199226043051e-05, "loss": 0.7468, "step": 1379 }, { "epoch": 1.1066559743384121, "grad_norm": 0.6914650201797485, "learning_rate": 1.4561495002159066e-05, "loss": 0.7954, "step": 1380 }, { "epoch": 1.107457898957498, "grad_norm": 0.6579850912094116, "learning_rate": 1.4553787357910774e-05, "loss": 0.7775, "step": 1381 }, { "epoch": 1.1082598235765837, "grad_norm": 0.6452553868293762, "learning_rate": 1.4546076299077639e-05, "loss": 0.7601, "step": 1382 }, { "epoch": 1.1090617481956695, "grad_norm": 0.654435396194458, "learning_rate": 1.4538361831441672e-05, "loss": 0.7614, "step": 1383 }, { "epoch": 1.1098636728147555, "grad_norm": 0.6667703986167908, "learning_rate": 1.4530643960787445e-05, "loss": 0.7705, "step": 1384 }, { "epoch": 1.1106655974338413, "grad_norm": 0.6765471696853638, "learning_rate": 1.452292269290208e-05, "loss": 0.8051, "step": 1385 }, { "epoch": 1.111467522052927, "grad_norm": 0.633200466632843, "learning_rate": 1.4515198033575243e-05, "loss": 0.7119, "step": 1386 }, { "epoch": 1.1122694466720129, "grad_norm": 0.6916564702987671, "learning_rate": 1.4507469988599153e-05, "loss": 0.758, "step": 1387 }, { "epoch": 1.1130713712910987, "grad_norm": 0.6819466352462769, "learning_rate": 1.4499738563768557e-05, "loss": 0.7795, "step": 1388 }, { "epoch": 1.1138732959101845, "grad_norm": 0.6802613735198975, "learning_rate": 1.4492003764880744e-05, "loss": 0.8001, "step": 1389 }, { "epoch": 1.1146752205292703, "grad_norm": 0.6491445302963257, "learning_rate": 1.4484265597735525e-05, "loss": 0.7703, "step": 1390 }, { "epoch": 1.115477145148356, "grad_norm": 0.634710431098938, "learning_rate": 1.4476524068135246e-05, "loss": 0.7764, "step": 1391 }, { "epoch": 1.1162790697674418, "grad_norm": 0.7030678391456604, "learning_rate": 1.4468779181884762e-05, "loss": 0.7844, "step": 1392 }, { "epoch": 1.1170809943865276, "grad_norm": 0.6353664398193359, "learning_rate": 1.4461030944791464e-05, "loss": 0.7452, "step": 1393 }, { "epoch": 1.1178829190056134, "grad_norm": 0.696847677230835, "learning_rate": 1.4453279362665234e-05, "loss": 0.7598, "step": 1394 }, { "epoch": 1.1186848436246992, "grad_norm": 0.6439919471740723, "learning_rate": 1.4445524441318477e-05, "loss": 0.7681, "step": 1395 }, { "epoch": 1.119486768243785, "grad_norm": 0.6072260737419128, "learning_rate": 1.4437766186566094e-05, "loss": 0.7165, "step": 1396 }, { "epoch": 1.1202886928628708, "grad_norm": 0.6615963578224182, "learning_rate": 1.4430004604225493e-05, "loss": 0.757, "step": 1397 }, { "epoch": 1.1210906174819566, "grad_norm": 0.6312723159790039, "learning_rate": 1.4422239700116572e-05, "loss": 0.7481, "step": 1398 }, { "epoch": 1.1218925421010426, "grad_norm": 0.6664157509803772, "learning_rate": 1.4414471480061716e-05, "loss": 0.766, "step": 1399 }, { "epoch": 1.1226944667201284, "grad_norm": 0.6936686038970947, "learning_rate": 1.4406699949885803e-05, "loss": 0.8061, "step": 1400 }, { "epoch": 1.1234963913392142, "grad_norm": 0.6664496660232544, "learning_rate": 1.4398925115416196e-05, "loss": 0.7682, "step": 1401 }, { "epoch": 1.1242983159583, "grad_norm": 0.6195146441459656, "learning_rate": 1.4391146982482724e-05, "loss": 0.7158, "step": 1402 }, { "epoch": 1.1251002405773858, "grad_norm": 0.627631425857544, "learning_rate": 1.4383365556917701e-05, "loss": 0.7568, "step": 1403 }, { "epoch": 1.1259021651964716, "grad_norm": 0.6510641574859619, "learning_rate": 1.4375580844555898e-05, "loss": 0.7522, "step": 1404 }, { "epoch": 1.1267040898155574, "grad_norm": 0.6601302027702332, "learning_rate": 1.4367792851234566e-05, "loss": 0.7652, "step": 1405 }, { "epoch": 1.1275060144346432, "grad_norm": 0.629599928855896, "learning_rate": 1.4360001582793404e-05, "loss": 0.7619, "step": 1406 }, { "epoch": 1.128307939053729, "grad_norm": 0.7037693858146667, "learning_rate": 1.4352207045074567e-05, "loss": 0.7956, "step": 1407 }, { "epoch": 1.1291098636728147, "grad_norm": 0.6922396421432495, "learning_rate": 1.4344409243922667e-05, "loss": 0.7827, "step": 1408 }, { "epoch": 1.1299117882919005, "grad_norm": 0.6473610997200012, "learning_rate": 1.4336608185184765e-05, "loss": 0.7751, "step": 1409 }, { "epoch": 1.1307137129109863, "grad_norm": 0.7747618556022644, "learning_rate": 1.4328803874710358e-05, "loss": 0.7786, "step": 1410 }, { "epoch": 1.1315156375300721, "grad_norm": 0.6290801763534546, "learning_rate": 1.4320996318351378e-05, "loss": 0.7315, "step": 1411 }, { "epoch": 1.132317562149158, "grad_norm": 0.6735879778862, "learning_rate": 1.4313185521962205e-05, "loss": 0.796, "step": 1412 }, { "epoch": 1.1331194867682437, "grad_norm": 0.6589605212211609, "learning_rate": 1.4305371491399638e-05, "loss": 0.7771, "step": 1413 }, { "epoch": 1.1339214113873295, "grad_norm": 0.6696829199790955, "learning_rate": 1.4297554232522898e-05, "loss": 0.7968, "step": 1414 }, { "epoch": 1.1347233360064153, "grad_norm": 0.6309067010879517, "learning_rate": 1.4289733751193643e-05, "loss": 0.7734, "step": 1415 }, { "epoch": 1.1355252606255013, "grad_norm": 0.6822018623352051, "learning_rate": 1.4281910053275923e-05, "loss": 0.7691, "step": 1416 }, { "epoch": 1.136327185244587, "grad_norm": 0.6693670153617859, "learning_rate": 1.427408314463622e-05, "loss": 0.7564, "step": 1417 }, { "epoch": 1.1371291098636729, "grad_norm": 0.6806270480155945, "learning_rate": 1.4266253031143418e-05, "loss": 0.7953, "step": 1418 }, { "epoch": 1.1379310344827587, "grad_norm": 0.7900277376174927, "learning_rate": 1.4258419718668801e-05, "loss": 0.7782, "step": 1419 }, { "epoch": 1.1387329591018445, "grad_norm": 0.6651455760002136, "learning_rate": 1.4250583213086051e-05, "loss": 0.7406, "step": 1420 }, { "epoch": 1.1395348837209303, "grad_norm": 0.6853930950164795, "learning_rate": 1.4242743520271249e-05, "loss": 0.7845, "step": 1421 }, { "epoch": 1.140336808340016, "grad_norm": 0.6740282773971558, "learning_rate": 1.4234900646102864e-05, "loss": 0.7476, "step": 1422 }, { "epoch": 1.1411387329591018, "grad_norm": 0.6734980344772339, "learning_rate": 1.4227054596461754e-05, "loss": 0.7855, "step": 1423 }, { "epoch": 1.1419406575781876, "grad_norm": 0.6694862842559814, "learning_rate": 1.4219205377231147e-05, "loss": 0.7757, "step": 1424 }, { "epoch": 1.1427425821972734, "grad_norm": 0.68555748462677, "learning_rate": 1.4211352994296655e-05, "loss": 0.7891, "step": 1425 }, { "epoch": 1.1435445068163592, "grad_norm": 0.6966123580932617, "learning_rate": 1.4203497453546267e-05, "loss": 0.766, "step": 1426 }, { "epoch": 1.144346431435445, "grad_norm": 0.6659271121025085, "learning_rate": 1.4195638760870334e-05, "loss": 0.7537, "step": 1427 }, { "epoch": 1.1451483560545308, "grad_norm": 0.6558569073677063, "learning_rate": 1.418777692216157e-05, "loss": 0.7547, "step": 1428 }, { "epoch": 1.1459502806736166, "grad_norm": 0.6753950119018555, "learning_rate": 1.417991194331505e-05, "loss": 0.7408, "step": 1429 }, { "epoch": 1.1467522052927026, "grad_norm": 0.720521092414856, "learning_rate": 1.4172043830228202e-05, "loss": 0.7769, "step": 1430 }, { "epoch": 1.1475541299117884, "grad_norm": 0.685820996761322, "learning_rate": 1.4164172588800809e-05, "loss": 0.7925, "step": 1431 }, { "epoch": 1.1483560545308742, "grad_norm": 0.6265881061553955, "learning_rate": 1.415629822493499e-05, "loss": 0.7714, "step": 1432 }, { "epoch": 1.14915797914996, "grad_norm": 0.6419846415519714, "learning_rate": 1.4148420744535214e-05, "loss": 0.7476, "step": 1433 }, { "epoch": 1.1499599037690458, "grad_norm": 0.6394557952880859, "learning_rate": 1.4140540153508285e-05, "loss": 0.7862, "step": 1434 }, { "epoch": 1.1507618283881316, "grad_norm": 0.6818154454231262, "learning_rate": 1.4132656457763338e-05, "loss": 0.8058, "step": 1435 }, { "epoch": 1.1515637530072174, "grad_norm": 0.6973996758460999, "learning_rate": 1.4124769663211837e-05, "loss": 0.75, "step": 1436 }, { "epoch": 1.1523656776263032, "grad_norm": 0.6343415379524231, "learning_rate": 1.4116879775767567e-05, "loss": 0.7878, "step": 1437 }, { "epoch": 1.153167602245389, "grad_norm": 0.6887206435203552, "learning_rate": 1.4108986801346633e-05, "loss": 0.7894, "step": 1438 }, { "epoch": 1.1539695268644747, "grad_norm": 0.6938029527664185, "learning_rate": 1.4101090745867464e-05, "loss": 0.7608, "step": 1439 }, { "epoch": 1.1547714514835605, "grad_norm": 0.6808055639266968, "learning_rate": 1.4093191615250785e-05, "loss": 0.7765, "step": 1440 }, { "epoch": 1.1555733761026463, "grad_norm": 0.6570947766304016, "learning_rate": 1.4085289415419632e-05, "loss": 0.7583, "step": 1441 }, { "epoch": 1.1563753007217321, "grad_norm": 0.6893501877784729, "learning_rate": 1.4077384152299348e-05, "loss": 0.7418, "step": 1442 }, { "epoch": 1.157177225340818, "grad_norm": 0.689246654510498, "learning_rate": 1.4069475831817564e-05, "loss": 0.7751, "step": 1443 }, { "epoch": 1.1579791499599037, "grad_norm": 0.6660308241844177, "learning_rate": 1.4061564459904214e-05, "loss": 0.7582, "step": 1444 }, { "epoch": 1.1587810745789895, "grad_norm": 0.6339597702026367, "learning_rate": 1.4053650042491507e-05, "loss": 0.7172, "step": 1445 }, { "epoch": 1.1595829991980753, "grad_norm": 0.6734780073165894, "learning_rate": 1.4045732585513945e-05, "loss": 0.7813, "step": 1446 }, { "epoch": 1.160384923817161, "grad_norm": 0.6459300518035889, "learning_rate": 1.403781209490831e-05, "loss": 0.755, "step": 1447 }, { "epoch": 1.161186848436247, "grad_norm": 0.6735323667526245, "learning_rate": 1.4029888576613654e-05, "loss": 0.7667, "step": 1448 }, { "epoch": 1.1619887730553329, "grad_norm": 0.7017342448234558, "learning_rate": 1.4021962036571301e-05, "loss": 0.7973, "step": 1449 }, { "epoch": 1.1627906976744187, "grad_norm": 0.6533816456794739, "learning_rate": 1.4014032480724838e-05, "loss": 0.7825, "step": 1450 }, { "epoch": 1.1635926222935045, "grad_norm": 0.674637496471405, "learning_rate": 1.400609991502012e-05, "loss": 0.7378, "step": 1451 }, { "epoch": 1.1643945469125903, "grad_norm": 0.6480211019515991, "learning_rate": 1.3998164345405253e-05, "loss": 0.7617, "step": 1452 }, { "epoch": 1.165196471531676, "grad_norm": 0.6655580997467041, "learning_rate": 1.3990225777830595e-05, "loss": 0.788, "step": 1453 }, { "epoch": 1.1659983961507618, "grad_norm": 0.6576639413833618, "learning_rate": 1.3982284218248758e-05, "loss": 0.7567, "step": 1454 }, { "epoch": 1.1668003207698476, "grad_norm": 0.6448426246643066, "learning_rate": 1.3974339672614594e-05, "loss": 0.7859, "step": 1455 }, { "epoch": 1.1676022453889334, "grad_norm": 0.6625378131866455, "learning_rate": 1.396639214688519e-05, "loss": 0.7589, "step": 1456 }, { "epoch": 1.1684041700080192, "grad_norm": 0.679095983505249, "learning_rate": 1.3958441647019877e-05, "loss": 0.7464, "step": 1457 }, { "epoch": 1.169206094627105, "grad_norm": 0.6716442704200745, "learning_rate": 1.3950488178980203e-05, "loss": 0.7687, "step": 1458 }, { "epoch": 1.1700080192461908, "grad_norm": 0.6463879346847534, "learning_rate": 1.394253174872996e-05, "loss": 0.7424, "step": 1459 }, { "epoch": 1.1708099438652766, "grad_norm": 0.6612167358398438, "learning_rate": 1.393457236223514e-05, "loss": 0.7628, "step": 1460 }, { "epoch": 1.1716118684843624, "grad_norm": 0.7058016657829285, "learning_rate": 1.3926610025463967e-05, "loss": 0.7804, "step": 1461 }, { "epoch": 1.1724137931034484, "grad_norm": 0.697573721408844, "learning_rate": 1.3918644744386868e-05, "loss": 0.7949, "step": 1462 }, { "epoch": 1.1732157177225342, "grad_norm": 0.6801334023475647, "learning_rate": 1.3910676524976489e-05, "loss": 0.7611, "step": 1463 }, { "epoch": 1.17401764234162, "grad_norm": 0.6564053893089294, "learning_rate": 1.3902705373207669e-05, "loss": 0.7559, "step": 1464 }, { "epoch": 1.1748195669607058, "grad_norm": 0.6379392147064209, "learning_rate": 1.3894731295057446e-05, "loss": 0.7549, "step": 1465 }, { "epoch": 1.1756214915797916, "grad_norm": 0.6612043380737305, "learning_rate": 1.388675429650506e-05, "loss": 0.7533, "step": 1466 }, { "epoch": 1.1764234161988774, "grad_norm": 0.6841898560523987, "learning_rate": 1.3878774383531935e-05, "loss": 0.7963, "step": 1467 }, { "epoch": 1.1772253408179632, "grad_norm": 0.6732792854309082, "learning_rate": 1.3870791562121679e-05, "loss": 0.7894, "step": 1468 }, { "epoch": 1.178027265437049, "grad_norm": 0.6291318535804749, "learning_rate": 1.3862805838260087e-05, "loss": 0.7317, "step": 1469 }, { "epoch": 1.1788291900561347, "grad_norm": 0.7076729536056519, "learning_rate": 1.3854817217935126e-05, "loss": 0.7777, "step": 1470 }, { "epoch": 1.1796311146752205, "grad_norm": 0.6925358176231384, "learning_rate": 1.384682570713693e-05, "loss": 0.7755, "step": 1471 }, { "epoch": 1.1804330392943063, "grad_norm": 0.6519325971603394, "learning_rate": 1.3838831311857812e-05, "loss": 0.7508, "step": 1472 }, { "epoch": 1.181234963913392, "grad_norm": 0.6759724020957947, "learning_rate": 1.383083403809224e-05, "loss": 0.7513, "step": 1473 }, { "epoch": 1.182036888532478, "grad_norm": 0.6461741328239441, "learning_rate": 1.3822833891836846e-05, "loss": 0.7574, "step": 1474 }, { "epoch": 1.1828388131515637, "grad_norm": 0.6571494936943054, "learning_rate": 1.3814830879090409e-05, "loss": 0.7941, "step": 1475 }, { "epoch": 1.1836407377706495, "grad_norm": 0.7067133784294128, "learning_rate": 1.3806825005853855e-05, "loss": 0.7657, "step": 1476 }, { "epoch": 1.1844426623897353, "grad_norm": 0.6409063935279846, "learning_rate": 1.3798816278130268e-05, "loss": 0.7547, "step": 1477 }, { "epoch": 1.185244587008821, "grad_norm": 0.6490313410758972, "learning_rate": 1.3790804701924861e-05, "loss": 0.7466, "step": 1478 }, { "epoch": 1.1860465116279069, "grad_norm": 0.6448349952697754, "learning_rate": 1.378279028324499e-05, "loss": 0.7466, "step": 1479 }, { "epoch": 1.1868484362469929, "grad_norm": 0.6649291515350342, "learning_rate": 1.3774773028100135e-05, "loss": 0.7569, "step": 1480 }, { "epoch": 1.1876503608660787, "grad_norm": 0.6493022441864014, "learning_rate": 1.3766752942501911e-05, "loss": 0.7479, "step": 1481 }, { "epoch": 1.1884522854851645, "grad_norm": 0.6873480081558228, "learning_rate": 1.375873003246405e-05, "loss": 0.7851, "step": 1482 }, { "epoch": 1.1892542101042503, "grad_norm": 0.6878486275672913, "learning_rate": 1.3750704304002398e-05, "loss": 0.7799, "step": 1483 }, { "epoch": 1.190056134723336, "grad_norm": 0.6725685596466064, "learning_rate": 1.3742675763134926e-05, "loss": 0.7607, "step": 1484 }, { "epoch": 1.1908580593424218, "grad_norm": 0.715043842792511, "learning_rate": 1.3734644415881708e-05, "loss": 0.7748, "step": 1485 }, { "epoch": 1.1916599839615076, "grad_norm": 0.7091044783592224, "learning_rate": 1.3726610268264917e-05, "loss": 0.7979, "step": 1486 }, { "epoch": 1.1924619085805934, "grad_norm": 0.7154737710952759, "learning_rate": 1.3718573326308834e-05, "loss": 0.7526, "step": 1487 }, { "epoch": 1.1932638331996792, "grad_norm": 0.6581177711486816, "learning_rate": 1.3710533596039828e-05, "loss": 0.7468, "step": 1488 }, { "epoch": 1.194065757818765, "grad_norm": 0.7028568387031555, "learning_rate": 1.3702491083486366e-05, "loss": 0.7795, "step": 1489 }, { "epoch": 1.1948676824378508, "grad_norm": 0.6829168200492859, "learning_rate": 1.3694445794678996e-05, "loss": 0.8018, "step": 1490 }, { "epoch": 1.1956696070569366, "grad_norm": 0.6602156162261963, "learning_rate": 1.3686397735650353e-05, "loss": 0.7477, "step": 1491 }, { "epoch": 1.1964715316760224, "grad_norm": 0.6569467782974243, "learning_rate": 1.3678346912435141e-05, "loss": 0.7608, "step": 1492 }, { "epoch": 1.1972734562951082, "grad_norm": 0.6565800905227661, "learning_rate": 1.3670293331070142e-05, "loss": 0.7646, "step": 1493 }, { "epoch": 1.1980753809141942, "grad_norm": 0.6818427443504333, "learning_rate": 1.3662236997594209e-05, "loss": 0.8143, "step": 1494 }, { "epoch": 1.19887730553328, "grad_norm": 0.7073442935943604, "learning_rate": 1.3654177918048253e-05, "loss": 0.7981, "step": 1495 }, { "epoch": 1.1996792301523658, "grad_norm": 0.6537404656410217, "learning_rate": 1.3646116098475246e-05, "loss": 0.7556, "step": 1496 }, { "epoch": 1.2004811547714516, "grad_norm": 0.6920551061630249, "learning_rate": 1.3638051544920217e-05, "loss": 0.7843, "step": 1497 }, { "epoch": 1.2012830793905374, "grad_norm": 0.9153415560722351, "learning_rate": 1.3629984263430238e-05, "loss": 0.7822, "step": 1498 }, { "epoch": 1.2020850040096231, "grad_norm": 0.6674759387969971, "learning_rate": 1.3621914260054437e-05, "loss": 0.75, "step": 1499 }, { "epoch": 1.202886928628709, "grad_norm": 0.6499200463294983, "learning_rate": 1.3613841540843978e-05, "loss": 0.7385, "step": 1500 }, { "epoch": 1.2036888532477947, "grad_norm": 0.724064290523529, "learning_rate": 1.3605766111852052e-05, "loss": 0.7836, "step": 1501 }, { "epoch": 1.2044907778668805, "grad_norm": 0.6528597474098206, "learning_rate": 1.3597687979133898e-05, "loss": 0.7304, "step": 1502 }, { "epoch": 1.2052927024859663, "grad_norm": 0.6762754917144775, "learning_rate": 1.3589607148746775e-05, "loss": 0.7487, "step": 1503 }, { "epoch": 1.206094627105052, "grad_norm": 0.693828284740448, "learning_rate": 1.3581523626749966e-05, "loss": 0.762, "step": 1504 }, { "epoch": 1.206896551724138, "grad_norm": 0.6931832432746887, "learning_rate": 1.3573437419204765e-05, "loss": 0.8055, "step": 1505 }, { "epoch": 1.2076984763432237, "grad_norm": 0.691015362739563, "learning_rate": 1.3565348532174487e-05, "loss": 0.7511, "step": 1506 }, { "epoch": 1.2085004009623095, "grad_norm": 0.6388340592384338, "learning_rate": 1.355725697172446e-05, "loss": 0.7281, "step": 1507 }, { "epoch": 1.2093023255813953, "grad_norm": 0.6492217779159546, "learning_rate": 1.354916274392201e-05, "loss": 0.7788, "step": 1508 }, { "epoch": 1.210104250200481, "grad_norm": 0.6737982034683228, "learning_rate": 1.3541065854836464e-05, "loss": 0.7426, "step": 1509 }, { "epoch": 1.2109061748195669, "grad_norm": 0.6400448083877563, "learning_rate": 1.3532966310539142e-05, "loss": 0.7495, "step": 1510 }, { "epoch": 1.2117080994386527, "grad_norm": 0.6762053370475769, "learning_rate": 1.352486411710336e-05, "loss": 0.7913, "step": 1511 }, { "epoch": 1.2125100240577387, "grad_norm": 0.6663560271263123, "learning_rate": 1.3516759280604423e-05, "loss": 0.7498, "step": 1512 }, { "epoch": 1.2133119486768245, "grad_norm": 0.6616519689559937, "learning_rate": 1.3508651807119609e-05, "loss": 0.7328, "step": 1513 }, { "epoch": 1.2141138732959103, "grad_norm": 0.636685311794281, "learning_rate": 1.3500541702728175e-05, "loss": 0.7758, "step": 1514 }, { "epoch": 1.214915797914996, "grad_norm": 0.6234886050224304, "learning_rate": 1.3492428973511363e-05, "loss": 0.7013, "step": 1515 }, { "epoch": 1.2157177225340818, "grad_norm": 0.6537496447563171, "learning_rate": 1.3484313625552362e-05, "loss": 0.7369, "step": 1516 }, { "epoch": 1.2165196471531676, "grad_norm": 0.6941004991531372, "learning_rate": 1.3476195664936347e-05, "loss": 0.7798, "step": 1517 }, { "epoch": 1.2173215717722534, "grad_norm": 0.670886754989624, "learning_rate": 1.3468075097750432e-05, "loss": 0.7566, "step": 1518 }, { "epoch": 1.2181234963913392, "grad_norm": 0.751348614692688, "learning_rate": 1.3459951930083698e-05, "loss": 0.7695, "step": 1519 }, { "epoch": 1.218925421010425, "grad_norm": 0.6668398976325989, "learning_rate": 1.345182616802718e-05, "loss": 0.7587, "step": 1520 }, { "epoch": 1.2197273456295108, "grad_norm": 0.6931249499320984, "learning_rate": 1.3443697817673842e-05, "loss": 0.7838, "step": 1521 }, { "epoch": 1.2205292702485966, "grad_norm": 0.6422427892684937, "learning_rate": 1.34355668851186e-05, "loss": 0.7395, "step": 1522 }, { "epoch": 1.2213311948676824, "grad_norm": 0.6797451376914978, "learning_rate": 1.3427433376458306e-05, "loss": 0.8113, "step": 1523 }, { "epoch": 1.2221331194867682, "grad_norm": 0.6311590075492859, "learning_rate": 1.341929729779174e-05, "loss": 0.774, "step": 1524 }, { "epoch": 1.222935044105854, "grad_norm": 0.6638842821121216, "learning_rate": 1.3411158655219615e-05, "loss": 0.7781, "step": 1525 }, { "epoch": 1.22373696872494, "grad_norm": 0.6489601135253906, "learning_rate": 1.3403017454844556e-05, "loss": 0.7779, "step": 1526 }, { "epoch": 1.2245388933440258, "grad_norm": 0.6765308976173401, "learning_rate": 1.3394873702771114e-05, "loss": 0.7736, "step": 1527 }, { "epoch": 1.2253408179631116, "grad_norm": 0.6653009653091431, "learning_rate": 1.3386727405105756e-05, "loss": 0.7436, "step": 1528 }, { "epoch": 1.2261427425821974, "grad_norm": 0.6361008286476135, "learning_rate": 1.337857856795685e-05, "loss": 0.7256, "step": 1529 }, { "epoch": 1.2269446672012831, "grad_norm": 0.6522552967071533, "learning_rate": 1.3370427197434673e-05, "loss": 0.7511, "step": 1530 }, { "epoch": 1.227746591820369, "grad_norm": 0.6462847590446472, "learning_rate": 1.3362273299651395e-05, "loss": 0.749, "step": 1531 }, { "epoch": 1.2285485164394547, "grad_norm": 0.6825747489929199, "learning_rate": 1.3354116880721093e-05, "loss": 0.7598, "step": 1532 }, { "epoch": 1.2293504410585405, "grad_norm": 0.6631978154182434, "learning_rate": 1.334595794675973e-05, "loss": 0.7954, "step": 1533 }, { "epoch": 1.2301523656776263, "grad_norm": 0.6733466386795044, "learning_rate": 1.333779650388514e-05, "loss": 0.7623, "step": 1534 }, { "epoch": 1.230954290296712, "grad_norm": 0.6870176792144775, "learning_rate": 1.3329632558217065e-05, "loss": 0.7626, "step": 1535 }, { "epoch": 1.231756214915798, "grad_norm": 0.7062235474586487, "learning_rate": 1.33214661158771e-05, "loss": 0.7815, "step": 1536 }, { "epoch": 1.2325581395348837, "grad_norm": 0.6793636679649353, "learning_rate": 1.3313297182988722e-05, "loss": 0.7597, "step": 1537 }, { "epoch": 1.2333600641539695, "grad_norm": 0.697195291519165, "learning_rate": 1.3305125765677283e-05, "loss": 0.7883, "step": 1538 }, { "epoch": 1.2341619887730553, "grad_norm": 0.7242034673690796, "learning_rate": 1.3296951870069981e-05, "loss": 0.7931, "step": 1539 }, { "epoch": 1.234963913392141, "grad_norm": 0.6695935130119324, "learning_rate": 1.328877550229589e-05, "loss": 0.737, "step": 1540 }, { "epoch": 1.2357658380112269, "grad_norm": 0.6315851807594299, "learning_rate": 1.3280596668485919e-05, "loss": 0.7701, "step": 1541 }, { "epoch": 1.2365677626303127, "grad_norm": 0.6564438343048096, "learning_rate": 1.3272415374772844e-05, "loss": 0.7729, "step": 1542 }, { "epoch": 1.2373696872493984, "grad_norm": 0.6894364953041077, "learning_rate": 1.3264231627291273e-05, "loss": 0.8072, "step": 1543 }, { "epoch": 1.2381716118684845, "grad_norm": 0.7009897828102112, "learning_rate": 1.325604543217766e-05, "loss": 0.752, "step": 1544 }, { "epoch": 1.2389735364875702, "grad_norm": 0.7217838764190674, "learning_rate": 1.3247856795570295e-05, "loss": 0.7707, "step": 1545 }, { "epoch": 1.239775461106656, "grad_norm": 0.6767851710319519, "learning_rate": 1.3239665723609294e-05, "loss": 0.7444, "step": 1546 }, { "epoch": 1.2405773857257418, "grad_norm": 0.68946373462677, "learning_rate": 1.3231472222436605e-05, "loss": 0.7341, "step": 1547 }, { "epoch": 1.2413793103448276, "grad_norm": 0.7371004223823547, "learning_rate": 1.3223276298195988e-05, "loss": 0.7759, "step": 1548 }, { "epoch": 1.2421812349639134, "grad_norm": 0.6482488512992859, "learning_rate": 1.3215077957033032e-05, "loss": 0.7692, "step": 1549 }, { "epoch": 1.2429831595829992, "grad_norm": 0.6388265490531921, "learning_rate": 1.3206877205095133e-05, "loss": 0.7739, "step": 1550 }, { "epoch": 1.243785084202085, "grad_norm": 0.7499321699142456, "learning_rate": 1.3198674048531488e-05, "loss": 0.8046, "step": 1551 }, { "epoch": 1.2445870088211708, "grad_norm": 0.6427833437919617, "learning_rate": 1.3190468493493107e-05, "loss": 0.7477, "step": 1552 }, { "epoch": 1.2453889334402566, "grad_norm": 0.6992602944374084, "learning_rate": 1.3182260546132795e-05, "loss": 0.7773, "step": 1553 }, { "epoch": 1.2461908580593424, "grad_norm": 0.6634381413459778, "learning_rate": 1.3174050212605147e-05, "loss": 0.7649, "step": 1554 }, { "epoch": 1.2469927826784282, "grad_norm": 0.6472319960594177, "learning_rate": 1.316583749906656e-05, "loss": 0.7866, "step": 1555 }, { "epoch": 1.247794707297514, "grad_norm": 0.6691563725471497, "learning_rate": 1.3157622411675195e-05, "loss": 0.7582, "step": 1556 }, { "epoch": 1.2485966319165998, "grad_norm": 0.7087457776069641, "learning_rate": 1.3149404956591008e-05, "loss": 0.7923, "step": 1557 }, { "epoch": 1.2493985565356858, "grad_norm": 0.6310862898826599, "learning_rate": 1.3141185139975728e-05, "loss": 0.7327, "step": 1558 }, { "epoch": 1.2502004811547716, "grad_norm": 0.6904287338256836, "learning_rate": 1.3132962967992854e-05, "loss": 0.8019, "step": 1559 }, { "epoch": 1.2510024057738574, "grad_norm": 0.6679447889328003, "learning_rate": 1.3124738446807652e-05, "loss": 0.7593, "step": 1560 }, { "epoch": 1.2518043303929431, "grad_norm": 0.6390910744667053, "learning_rate": 1.3116511582587144e-05, "loss": 0.7633, "step": 1561 }, { "epoch": 1.252606255012029, "grad_norm": 0.7109375, "learning_rate": 1.3108282381500113e-05, "loss": 0.7773, "step": 1562 }, { "epoch": 1.2534081796311147, "grad_norm": 0.7146701812744141, "learning_rate": 1.3100050849717102e-05, "loss": 0.7661, "step": 1563 }, { "epoch": 1.2542101042502005, "grad_norm": 0.6369656920433044, "learning_rate": 1.309181699341038e-05, "loss": 0.7478, "step": 1564 }, { "epoch": 1.2550120288692863, "grad_norm": 0.6547572016716003, "learning_rate": 1.3083580818753985e-05, "loss": 0.7648, "step": 1565 }, { "epoch": 1.255813953488372, "grad_norm": 0.6653978228569031, "learning_rate": 1.3075342331923675e-05, "loss": 0.7727, "step": 1566 }, { "epoch": 1.256615878107458, "grad_norm": 0.6517826914787292, "learning_rate": 1.3067101539096952e-05, "loss": 0.7463, "step": 1567 }, { "epoch": 1.2574178027265437, "grad_norm": 0.673172116279602, "learning_rate": 1.305885844645304e-05, "loss": 0.7474, "step": 1568 }, { "epoch": 1.2582197273456295, "grad_norm": 0.6843745112419128, "learning_rate": 1.3050613060172893e-05, "loss": 0.7819, "step": 1569 }, { "epoch": 1.2590216519647153, "grad_norm": 0.6570084095001221, "learning_rate": 1.304236538643918e-05, "loss": 0.7617, "step": 1570 }, { "epoch": 1.259823576583801, "grad_norm": 0.6739295125007629, "learning_rate": 1.3034115431436286e-05, "loss": 0.7473, "step": 1571 }, { "epoch": 1.2606255012028869, "grad_norm": 0.6281715035438538, "learning_rate": 1.3025863201350315e-05, "loss": 0.7476, "step": 1572 }, { "epoch": 1.2614274258219726, "grad_norm": 0.6386378407478333, "learning_rate": 1.3017608702369065e-05, "loss": 0.7579, "step": 1573 }, { "epoch": 1.2622293504410584, "grad_norm": 0.6800333857536316, "learning_rate": 1.300935194068204e-05, "loss": 0.7787, "step": 1574 }, { "epoch": 1.2630312750601442, "grad_norm": 0.6754662394523621, "learning_rate": 1.3001092922480445e-05, "loss": 0.7985, "step": 1575 }, { "epoch": 1.26383319967923, "grad_norm": 0.666191041469574, "learning_rate": 1.2992831653957173e-05, "loss": 0.7681, "step": 1576 }, { "epoch": 1.264635124298316, "grad_norm": 0.636799693107605, "learning_rate": 1.2984568141306797e-05, "loss": 0.7185, "step": 1577 }, { "epoch": 1.2654370489174018, "grad_norm": 0.6819969415664673, "learning_rate": 1.2976302390725586e-05, "loss": 0.8076, "step": 1578 }, { "epoch": 1.2662389735364876, "grad_norm": 0.658902645111084, "learning_rate": 1.296803440841148e-05, "loss": 0.77, "step": 1579 }, { "epoch": 1.2670408981555734, "grad_norm": 0.6477823853492737, "learning_rate": 1.29597642005641e-05, "loss": 0.7159, "step": 1580 }, { "epoch": 1.2678428227746592, "grad_norm": 0.6573794484138489, "learning_rate": 1.2951491773384722e-05, "loss": 0.7573, "step": 1581 }, { "epoch": 1.268644747393745, "grad_norm": 0.6093468070030212, "learning_rate": 1.2943217133076294e-05, "loss": 0.7136, "step": 1582 }, { "epoch": 1.2694466720128308, "grad_norm": 0.6893898844718933, "learning_rate": 1.2934940285843425e-05, "loss": 0.7904, "step": 1583 }, { "epoch": 1.2702485966319166, "grad_norm": 0.6822935938835144, "learning_rate": 1.2926661237892377e-05, "loss": 0.7664, "step": 1584 }, { "epoch": 1.2710505212510024, "grad_norm": 0.6702585220336914, "learning_rate": 1.2918379995431062e-05, "loss": 0.7504, "step": 1585 }, { "epoch": 1.2718524458700882, "grad_norm": 0.7012314200401306, "learning_rate": 1.2910096564669037e-05, "loss": 0.7665, "step": 1586 }, { "epoch": 1.272654370489174, "grad_norm": 0.6500260233879089, "learning_rate": 1.2901810951817499e-05, "loss": 0.7426, "step": 1587 }, { "epoch": 1.2734562951082598, "grad_norm": 0.6530648469924927, "learning_rate": 1.2893523163089285e-05, "loss": 0.7817, "step": 1588 }, { "epoch": 1.2742582197273458, "grad_norm": 0.6491802334785461, "learning_rate": 1.2885233204698866e-05, "loss": 0.75, "step": 1589 }, { "epoch": 1.2750601443464316, "grad_norm": 0.6376941204071045, "learning_rate": 1.2876941082862324e-05, "loss": 0.7574, "step": 1590 }, { "epoch": 1.2758620689655173, "grad_norm": 0.6957758665084839, "learning_rate": 1.2868646803797384e-05, "loss": 0.7617, "step": 1591 }, { "epoch": 1.2766639935846031, "grad_norm": 0.6394297480583191, "learning_rate": 1.2860350373723374e-05, "loss": 0.7365, "step": 1592 }, { "epoch": 1.277465918203689, "grad_norm": 0.6853082180023193, "learning_rate": 1.2852051798861243e-05, "loss": 0.7461, "step": 1593 }, { "epoch": 1.2782678428227747, "grad_norm": 0.6423301100730896, "learning_rate": 1.2843751085433539e-05, "loss": 0.7031, "step": 1594 }, { "epoch": 1.2790697674418605, "grad_norm": 0.6504702568054199, "learning_rate": 1.2835448239664425e-05, "loss": 0.7431, "step": 1595 }, { "epoch": 1.2798716920609463, "grad_norm": 0.6869426369667053, "learning_rate": 1.2827143267779658e-05, "loss": 0.7593, "step": 1596 }, { "epoch": 1.280673616680032, "grad_norm": 0.708145022392273, "learning_rate": 1.2818836176006586e-05, "loss": 0.7978, "step": 1597 }, { "epoch": 1.281475541299118, "grad_norm": 0.6440792679786682, "learning_rate": 1.2810526970574151e-05, "loss": 0.7948, "step": 1598 }, { "epoch": 1.2822774659182037, "grad_norm": 0.6792011260986328, "learning_rate": 1.2802215657712876e-05, "loss": 0.7789, "step": 1599 }, { "epoch": 1.2830793905372895, "grad_norm": 0.6505358815193176, "learning_rate": 1.2793902243654868e-05, "loss": 0.7309, "step": 1600 }, { "epoch": 1.2838813151563753, "grad_norm": 0.6658748388290405, "learning_rate": 1.278558673463381e-05, "loss": 0.7607, "step": 1601 }, { "epoch": 1.284683239775461, "grad_norm": 0.6865249276161194, "learning_rate": 1.2777269136884952e-05, "loss": 0.758, "step": 1602 }, { "epoch": 1.2854851643945469, "grad_norm": 0.6584889888763428, "learning_rate": 1.2768949456645108e-05, "loss": 0.7318, "step": 1603 }, { "epoch": 1.2862870890136326, "grad_norm": 0.6798214912414551, "learning_rate": 1.2760627700152664e-05, "loss": 0.7606, "step": 1604 }, { "epoch": 1.2870890136327184, "grad_norm": 0.6833958625793457, "learning_rate": 1.275230387364755e-05, "loss": 0.8089, "step": 1605 }, { "epoch": 1.2878909382518042, "grad_norm": 0.6360597014427185, "learning_rate": 1.2743977983371268e-05, "loss": 0.7443, "step": 1606 }, { "epoch": 1.28869286287089, "grad_norm": 0.7116873860359192, "learning_rate": 1.2735650035566836e-05, "loss": 0.7916, "step": 1607 }, { "epoch": 1.2894947874899758, "grad_norm": 0.6559532880783081, "learning_rate": 1.2727320036478843e-05, "loss": 0.7767, "step": 1608 }, { "epoch": 1.2902967121090618, "grad_norm": 0.6576822400093079, "learning_rate": 1.2718987992353403e-05, "loss": 0.7425, "step": 1609 }, { "epoch": 1.2910986367281476, "grad_norm": 0.6480294466018677, "learning_rate": 1.2710653909438172e-05, "loss": 0.7506, "step": 1610 }, { "epoch": 1.2919005613472334, "grad_norm": 0.6556183099746704, "learning_rate": 1.2702317793982327e-05, "loss": 0.7592, "step": 1611 }, { "epoch": 1.2927024859663192, "grad_norm": 0.7214966416358948, "learning_rate": 1.2693979652236564e-05, "loss": 0.7769, "step": 1612 }, { "epoch": 1.293504410585405, "grad_norm": 0.6807628273963928, "learning_rate": 1.2685639490453113e-05, "loss": 0.7976, "step": 1613 }, { "epoch": 1.2943063352044908, "grad_norm": 0.6581088900566101, "learning_rate": 1.2677297314885708e-05, "loss": 0.7919, "step": 1614 }, { "epoch": 1.2951082598235766, "grad_norm": 0.6764626502990723, "learning_rate": 1.2668953131789599e-05, "loss": 0.774, "step": 1615 }, { "epoch": 1.2959101844426624, "grad_norm": 0.696811854839325, "learning_rate": 1.2660606947421537e-05, "loss": 0.7757, "step": 1616 }, { "epoch": 1.2967121090617482, "grad_norm": 0.6721716523170471, "learning_rate": 1.2652258768039775e-05, "loss": 0.7861, "step": 1617 }, { "epoch": 1.297514033680834, "grad_norm": 0.7006967663764954, "learning_rate": 1.2643908599904063e-05, "loss": 0.7621, "step": 1618 }, { "epoch": 1.2983159582999197, "grad_norm": 0.630478024482727, "learning_rate": 1.2635556449275641e-05, "loss": 0.7318, "step": 1619 }, { "epoch": 1.2991178829190055, "grad_norm": 0.6607829332351685, "learning_rate": 1.2627202322417235e-05, "loss": 0.8096, "step": 1620 }, { "epoch": 1.2999198075380916, "grad_norm": 0.6839569807052612, "learning_rate": 1.2618846225593057e-05, "loss": 0.7783, "step": 1621 }, { "epoch": 1.3007217321571773, "grad_norm": 0.6771467328071594, "learning_rate": 1.2610488165068793e-05, "loss": 0.7737, "step": 1622 }, { "epoch": 1.3015236567762631, "grad_norm": 0.6908150315284729, "learning_rate": 1.2602128147111597e-05, "loss": 0.7958, "step": 1623 }, { "epoch": 1.302325581395349, "grad_norm": 0.731979489326477, "learning_rate": 1.2593766177990096e-05, "loss": 0.7927, "step": 1624 }, { "epoch": 1.3031275060144347, "grad_norm": 0.6440238952636719, "learning_rate": 1.2585402263974383e-05, "loss": 0.7429, "step": 1625 }, { "epoch": 1.3039294306335205, "grad_norm": 0.6557809114456177, "learning_rate": 1.2577036411336003e-05, "loss": 0.758, "step": 1626 }, { "epoch": 1.3047313552526063, "grad_norm": 0.670251727104187, "learning_rate": 1.256866862634796e-05, "loss": 0.7308, "step": 1627 }, { "epoch": 1.305533279871692, "grad_norm": 0.6871209144592285, "learning_rate": 1.2560298915284699e-05, "loss": 0.7549, "step": 1628 }, { "epoch": 1.306335204490778, "grad_norm": 0.6390516757965088, "learning_rate": 1.2551927284422117e-05, "loss": 0.7439, "step": 1629 }, { "epoch": 1.3071371291098637, "grad_norm": 0.6146913170814514, "learning_rate": 1.2543553740037546e-05, "loss": 0.7618, "step": 1630 }, { "epoch": 1.3079390537289495, "grad_norm": 0.6606637835502625, "learning_rate": 1.2535178288409761e-05, "loss": 0.7575, "step": 1631 }, { "epoch": 1.3087409783480353, "grad_norm": 0.6702629327774048, "learning_rate": 1.2526800935818956e-05, "loss": 0.8049, "step": 1632 }, { "epoch": 1.309542902967121, "grad_norm": 0.6716954112052917, "learning_rate": 1.2518421688546757e-05, "loss": 0.7453, "step": 1633 }, { "epoch": 1.3103448275862069, "grad_norm": 0.6508925557136536, "learning_rate": 1.2510040552876204e-05, "loss": 0.7823, "step": 1634 }, { "epoch": 1.3111467522052926, "grad_norm": 0.8851492404937744, "learning_rate": 1.2501657535091765e-05, "loss": 0.7895, "step": 1635 }, { "epoch": 1.3119486768243784, "grad_norm": 0.648134171962738, "learning_rate": 1.2493272641479311e-05, "loss": 0.7168, "step": 1636 }, { "epoch": 1.3127506014434642, "grad_norm": 0.6762018799781799, "learning_rate": 1.2484885878326114e-05, "loss": 0.7674, "step": 1637 }, { "epoch": 1.31355252606255, "grad_norm": 0.6717413067817688, "learning_rate": 1.247649725192086e-05, "loss": 0.7295, "step": 1638 }, { "epoch": 1.3143544506816358, "grad_norm": 0.7482190728187561, "learning_rate": 1.246810676855363e-05, "loss": 0.7754, "step": 1639 }, { "epoch": 1.3151563753007216, "grad_norm": 0.6837365627288818, "learning_rate": 1.2459714434515888e-05, "loss": 0.7827, "step": 1640 }, { "epoch": 1.3159582999198076, "grad_norm": 0.6977565288543701, "learning_rate": 1.2451320256100497e-05, "loss": 0.785, "step": 1641 }, { "epoch": 1.3167602245388934, "grad_norm": 0.6693125367164612, "learning_rate": 1.2442924239601692e-05, "loss": 0.7275, "step": 1642 }, { "epoch": 1.3175621491579792, "grad_norm": 0.7254882454872131, "learning_rate": 1.2434526391315095e-05, "loss": 0.8095, "step": 1643 }, { "epoch": 1.318364073777065, "grad_norm": 0.6936510801315308, "learning_rate": 1.2426126717537704e-05, "loss": 0.7396, "step": 1644 }, { "epoch": 1.3191659983961508, "grad_norm": 0.6720640063285828, "learning_rate": 1.2417725224567872e-05, "loss": 0.7378, "step": 1645 }, { "epoch": 1.3199679230152366, "grad_norm": 0.655472457408905, "learning_rate": 1.2409321918705329e-05, "loss": 0.7372, "step": 1646 }, { "epoch": 1.3207698476343224, "grad_norm": 0.7163864970207214, "learning_rate": 1.2400916806251157e-05, "loss": 0.7659, "step": 1647 }, { "epoch": 1.3215717722534082, "grad_norm": 0.6562886238098145, "learning_rate": 1.2392509893507799e-05, "loss": 0.7724, "step": 1648 }, { "epoch": 1.322373696872494, "grad_norm": 0.6730007529258728, "learning_rate": 1.2384101186779042e-05, "loss": 0.7505, "step": 1649 }, { "epoch": 1.3231756214915797, "grad_norm": 0.7325595617294312, "learning_rate": 1.2375690692370022e-05, "loss": 0.784, "step": 1650 }, { "epoch": 1.3239775461106655, "grad_norm": 0.6311853528022766, "learning_rate": 1.2367278416587216e-05, "loss": 0.7435, "step": 1651 }, { "epoch": 1.3247794707297513, "grad_norm": 0.67484050989151, "learning_rate": 1.235886436573843e-05, "loss": 0.7545, "step": 1652 }, { "epoch": 1.3255813953488373, "grad_norm": 0.7450650930404663, "learning_rate": 1.235044854613281e-05, "loss": 0.7657, "step": 1653 }, { "epoch": 1.3263833199679231, "grad_norm": 0.6910774111747742, "learning_rate": 1.2342030964080822e-05, "loss": 0.7257, "step": 1654 }, { "epoch": 1.327185244587009, "grad_norm": 0.6374717354774475, "learning_rate": 1.2333611625894254e-05, "loss": 0.7467, "step": 1655 }, { "epoch": 1.3279871692060947, "grad_norm": 0.6719356775283813, "learning_rate": 1.2325190537886221e-05, "loss": 0.7808, "step": 1656 }, { "epoch": 1.3287890938251805, "grad_norm": 0.7069136500358582, "learning_rate": 1.231676770637113e-05, "loss": 0.7416, "step": 1657 }, { "epoch": 1.3295910184442663, "grad_norm": 0.6578108668327332, "learning_rate": 1.2308343137664716e-05, "loss": 0.7745, "step": 1658 }, { "epoch": 1.330392943063352, "grad_norm": 0.684407114982605, "learning_rate": 1.2299916838084001e-05, "loss": 0.7411, "step": 1659 }, { "epoch": 1.3311948676824379, "grad_norm": 0.6682063341140747, "learning_rate": 1.2291488813947315e-05, "loss": 0.7518, "step": 1660 }, { "epoch": 1.3319967923015237, "grad_norm": 0.6518343091011047, "learning_rate": 1.2283059071574278e-05, "loss": 0.7582, "step": 1661 }, { "epoch": 1.3327987169206095, "grad_norm": 0.6990170478820801, "learning_rate": 1.2274627617285797e-05, "loss": 0.7964, "step": 1662 }, { "epoch": 1.3336006415396953, "grad_norm": 0.6665741205215454, "learning_rate": 1.2266194457404061e-05, "loss": 0.7903, "step": 1663 }, { "epoch": 1.334402566158781, "grad_norm": 0.6582921743392944, "learning_rate": 1.2257759598252543e-05, "loss": 0.7422, "step": 1664 }, { "epoch": 1.3352044907778668, "grad_norm": 0.6946974396705627, "learning_rate": 1.224932304615599e-05, "loss": 0.7956, "step": 1665 }, { "epoch": 1.3360064153969526, "grad_norm": 0.6991271376609802, "learning_rate": 1.2240884807440413e-05, "loss": 0.772, "step": 1666 }, { "epoch": 1.3368083400160384, "grad_norm": 0.6818379759788513, "learning_rate": 1.223244488843309e-05, "loss": 0.7572, "step": 1667 }, { "epoch": 1.3376102646351242, "grad_norm": 0.6668140292167664, "learning_rate": 1.2224003295462561e-05, "loss": 0.736, "step": 1668 }, { "epoch": 1.33841218925421, "grad_norm": 0.6249803304672241, "learning_rate": 1.221556003485862e-05, "loss": 0.7643, "step": 1669 }, { "epoch": 1.3392141138732958, "grad_norm": 0.6772146821022034, "learning_rate": 1.2207115112952313e-05, "loss": 0.7568, "step": 1670 }, { "epoch": 1.3400160384923816, "grad_norm": 0.6962553858757019, "learning_rate": 1.2198668536075924e-05, "loss": 0.8018, "step": 1671 }, { "epoch": 1.3408179631114674, "grad_norm": 0.6322622299194336, "learning_rate": 1.2190220310562992e-05, "loss": 0.7296, "step": 1672 }, { "epoch": 1.3416198877305534, "grad_norm": 0.6803750991821289, "learning_rate": 1.218177044274828e-05, "loss": 0.8024, "step": 1673 }, { "epoch": 1.3424218123496392, "grad_norm": 0.6757524013519287, "learning_rate": 1.217331893896779e-05, "loss": 0.7697, "step": 1674 }, { "epoch": 1.343223736968725, "grad_norm": 0.7142401337623596, "learning_rate": 1.2164865805558738e-05, "loss": 0.7728, "step": 1675 }, { "epoch": 1.3440256615878108, "grad_norm": 0.631919801235199, "learning_rate": 1.215641104885958e-05, "loss": 0.7395, "step": 1676 }, { "epoch": 1.3448275862068966, "grad_norm": 0.6723388433456421, "learning_rate": 1.2147954675209982e-05, "loss": 0.7703, "step": 1677 }, { "epoch": 1.3456295108259824, "grad_norm": 0.654248833656311, "learning_rate": 1.2139496690950813e-05, "loss": 0.7829, "step": 1678 }, { "epoch": 1.3464314354450682, "grad_norm": 0.6374508738517761, "learning_rate": 1.2131037102424165e-05, "loss": 0.7534, "step": 1679 }, { "epoch": 1.347233360064154, "grad_norm": 0.6467137932777405, "learning_rate": 1.2122575915973321e-05, "loss": 0.7589, "step": 1680 }, { "epoch": 1.3480352846832397, "grad_norm": 0.686490535736084, "learning_rate": 1.2114113137942767e-05, "loss": 0.7685, "step": 1681 }, { "epoch": 1.3488372093023255, "grad_norm": 0.6674497127532959, "learning_rate": 1.2105648774678188e-05, "loss": 0.7302, "step": 1682 }, { "epoch": 1.3496391339214113, "grad_norm": 0.6758518218994141, "learning_rate": 1.2097182832526443e-05, "loss": 0.7784, "step": 1683 }, { "epoch": 1.3504410585404971, "grad_norm": 0.663431704044342, "learning_rate": 1.2088715317835589e-05, "loss": 0.745, "step": 1684 }, { "epoch": 1.3512429831595831, "grad_norm": 0.6859851479530334, "learning_rate": 1.2080246236954856e-05, "loss": 0.8056, "step": 1685 }, { "epoch": 1.352044907778669, "grad_norm": 0.6677384972572327, "learning_rate": 1.2071775596234647e-05, "loss": 0.7598, "step": 1686 }, { "epoch": 1.3528468323977547, "grad_norm": 0.7199987769126892, "learning_rate": 1.2063303402026545e-05, "loss": 0.8167, "step": 1687 }, { "epoch": 1.3536487570168405, "grad_norm": 0.6905441284179688, "learning_rate": 1.2054829660683281e-05, "loss": 0.7508, "step": 1688 }, { "epoch": 1.3544506816359263, "grad_norm": 0.6765400171279907, "learning_rate": 1.2046354378558753e-05, "loss": 0.7208, "step": 1689 }, { "epoch": 1.355252606255012, "grad_norm": 0.6659426093101501, "learning_rate": 1.2037877562008025e-05, "loss": 0.7675, "step": 1690 }, { "epoch": 1.3560545308740979, "grad_norm": 0.6474359035491943, "learning_rate": 1.2029399217387299e-05, "loss": 0.753, "step": 1691 }, { "epoch": 1.3568564554931837, "grad_norm": 0.7105376124382019, "learning_rate": 1.2020919351053927e-05, "loss": 0.7842, "step": 1692 }, { "epoch": 1.3576583801122695, "grad_norm": 0.6748633980751038, "learning_rate": 1.2012437969366397e-05, "loss": 0.7136, "step": 1693 }, { "epoch": 1.3584603047313553, "grad_norm": 0.6597528457641602, "learning_rate": 1.2003955078684344e-05, "loss": 0.7817, "step": 1694 }, { "epoch": 1.359262229350441, "grad_norm": 0.6678489446640015, "learning_rate": 1.1995470685368527e-05, "loss": 0.7433, "step": 1695 }, { "epoch": 1.3600641539695268, "grad_norm": 0.7042926549911499, "learning_rate": 1.1986984795780829e-05, "loss": 0.7635, "step": 1696 }, { "epoch": 1.3608660785886126, "grad_norm": 0.6309202313423157, "learning_rate": 1.1978497416284265e-05, "loss": 0.703, "step": 1697 }, { "epoch": 1.3616680032076984, "grad_norm": 0.7144943475723267, "learning_rate": 1.1970008553242955e-05, "loss": 0.7649, "step": 1698 }, { "epoch": 1.3624699278267842, "grad_norm": 0.64893639087677, "learning_rate": 1.196151821302214e-05, "loss": 0.7039, "step": 1699 }, { "epoch": 1.36327185244587, "grad_norm": 0.8356136083602905, "learning_rate": 1.1953026401988172e-05, "loss": 0.7479, "step": 1700 }, { "epoch": 1.3640737770649558, "grad_norm": 0.6598641276359558, "learning_rate": 1.1944533126508491e-05, "loss": 0.7319, "step": 1701 }, { "epoch": 1.3648757016840416, "grad_norm": 0.6679075360298157, "learning_rate": 1.193603839295165e-05, "loss": 0.7221, "step": 1702 }, { "epoch": 1.3656776263031274, "grad_norm": 0.6554751396179199, "learning_rate": 1.1927542207687287e-05, "loss": 0.7633, "step": 1703 }, { "epoch": 1.3664795509222132, "grad_norm": 0.6698046922683716, "learning_rate": 1.1919044577086135e-05, "loss": 0.7493, "step": 1704 }, { "epoch": 1.3672814755412992, "grad_norm": 0.7508684396743774, "learning_rate": 1.191054550752e-05, "loss": 0.7613, "step": 1705 }, { "epoch": 1.368083400160385, "grad_norm": 0.6737611293792725, "learning_rate": 1.1902045005361775e-05, "loss": 0.7632, "step": 1706 }, { "epoch": 1.3688853247794708, "grad_norm": 0.6801590919494629, "learning_rate": 1.1893543076985434e-05, "loss": 0.7383, "step": 1707 }, { "epoch": 1.3696872493985566, "grad_norm": 0.6327788233757019, "learning_rate": 1.1885039728766006e-05, "loss": 0.7311, "step": 1708 }, { "epoch": 1.3704891740176424, "grad_norm": 0.6967527270317078, "learning_rate": 1.187653496707959e-05, "loss": 0.7488, "step": 1709 }, { "epoch": 1.3712910986367282, "grad_norm": 0.6776365041732788, "learning_rate": 1.1868028798303346e-05, "loss": 0.7433, "step": 1710 }, { "epoch": 1.372093023255814, "grad_norm": 0.6518088579177856, "learning_rate": 1.1859521228815495e-05, "loss": 0.7492, "step": 1711 }, { "epoch": 1.3728949478748997, "grad_norm": 0.7493876814842224, "learning_rate": 1.1851012264995296e-05, "loss": 0.7494, "step": 1712 }, { "epoch": 1.3736968724939855, "grad_norm": 0.6971762180328369, "learning_rate": 1.1842501913223066e-05, "loss": 0.7581, "step": 1713 }, { "epoch": 1.3744987971130713, "grad_norm": 0.6640205979347229, "learning_rate": 1.1833990179880148e-05, "loss": 0.7684, "step": 1714 }, { "epoch": 1.3753007217321571, "grad_norm": 0.6524538397789001, "learning_rate": 1.1825477071348937e-05, "loss": 0.7265, "step": 1715 }, { "epoch": 1.376102646351243, "grad_norm": 0.7143647074699402, "learning_rate": 1.1816962594012849e-05, "loss": 0.806, "step": 1716 }, { "epoch": 1.376904570970329, "grad_norm": 0.605280876159668, "learning_rate": 1.1808446754256329e-05, "loss": 0.7126, "step": 1717 }, { "epoch": 1.3777064955894147, "grad_norm": 0.6456320881843567, "learning_rate": 1.1799929558464843e-05, "loss": 0.7651, "step": 1718 }, { "epoch": 1.3785084202085005, "grad_norm": 0.7090603709220886, "learning_rate": 1.1791411013024873e-05, "loss": 0.804, "step": 1719 }, { "epoch": 1.3793103448275863, "grad_norm": 0.6659294962882996, "learning_rate": 1.178289112432392e-05, "loss": 0.7683, "step": 1720 }, { "epoch": 1.380112269446672, "grad_norm": 0.6606502532958984, "learning_rate": 1.1774369898750484e-05, "loss": 0.7351, "step": 1721 }, { "epoch": 1.3809141940657579, "grad_norm": 0.6632405519485474, "learning_rate": 1.176584734269407e-05, "loss": 0.7632, "step": 1722 }, { "epoch": 1.3817161186848437, "grad_norm": 0.6848816275596619, "learning_rate": 1.1757323462545177e-05, "loss": 0.7277, "step": 1723 }, { "epoch": 1.3825180433039295, "grad_norm": 0.6847621202468872, "learning_rate": 1.1748798264695305e-05, "loss": 0.7468, "step": 1724 }, { "epoch": 1.3833199679230153, "grad_norm": 0.7737583518028259, "learning_rate": 1.1740271755536939e-05, "loss": 0.7894, "step": 1725 }, { "epoch": 1.384121892542101, "grad_norm": 0.6926838755607605, "learning_rate": 1.173174394146354e-05, "loss": 0.7865, "step": 1726 }, { "epoch": 1.3849238171611868, "grad_norm": 0.7265371680259705, "learning_rate": 1.172321482886956e-05, "loss": 0.7498, "step": 1727 }, { "epoch": 1.3857257417802726, "grad_norm": 0.6517270803451538, "learning_rate": 1.1714684424150413e-05, "loss": 0.7666, "step": 1728 }, { "epoch": 1.3865276663993584, "grad_norm": 0.7151353359222412, "learning_rate": 1.1706152733702489e-05, "loss": 0.7528, "step": 1729 }, { "epoch": 1.3873295910184442, "grad_norm": 0.6981160044670105, "learning_rate": 1.1697619763923143e-05, "loss": 0.7428, "step": 1730 }, { "epoch": 1.38813151563753, "grad_norm": 0.7034778594970703, "learning_rate": 1.168908552121068e-05, "loss": 0.7919, "step": 1731 }, { "epoch": 1.3889334402566158, "grad_norm": 0.6916285753250122, "learning_rate": 1.1680550011964374e-05, "loss": 0.7575, "step": 1732 }, { "epoch": 1.3897353648757016, "grad_norm": 0.660354495048523, "learning_rate": 1.167201324258443e-05, "loss": 0.7571, "step": 1733 }, { "epoch": 1.3905372894947874, "grad_norm": 0.7056530117988586, "learning_rate": 1.166347521947202e-05, "loss": 0.7792, "step": 1734 }, { "epoch": 1.3913392141138732, "grad_norm": 0.6491231918334961, "learning_rate": 1.1654935949029234e-05, "loss": 0.725, "step": 1735 }, { "epoch": 1.392141138732959, "grad_norm": 0.681361198425293, "learning_rate": 1.1646395437659112e-05, "loss": 0.791, "step": 1736 }, { "epoch": 1.392943063352045, "grad_norm": 0.6471366286277771, "learning_rate": 1.1637853691765625e-05, "loss": 0.769, "step": 1737 }, { "epoch": 1.3937449879711308, "grad_norm": 0.7128544449806213, "learning_rate": 1.162931071775366e-05, "loss": 0.7581, "step": 1738 }, { "epoch": 1.3945469125902166, "grad_norm": 0.6705619692802429, "learning_rate": 1.162076652202903e-05, "loss": 0.7889, "step": 1739 }, { "epoch": 1.3953488372093024, "grad_norm": 0.642647922039032, "learning_rate": 1.1612221110998463e-05, "loss": 0.7282, "step": 1740 }, { "epoch": 1.3961507618283882, "grad_norm": 0.6899245381355286, "learning_rate": 1.16036744910696e-05, "loss": 0.7476, "step": 1741 }, { "epoch": 1.396952686447474, "grad_norm": 0.6138514876365662, "learning_rate": 1.1595126668650993e-05, "loss": 0.7197, "step": 1742 }, { "epoch": 1.3977546110665597, "grad_norm": 0.7220246195793152, "learning_rate": 1.1586577650152084e-05, "loss": 0.7255, "step": 1743 }, { "epoch": 1.3985565356856455, "grad_norm": 0.6729558706283569, "learning_rate": 1.1578027441983219e-05, "loss": 0.7612, "step": 1744 }, { "epoch": 1.3993584603047313, "grad_norm": 0.6875990629196167, "learning_rate": 1.1569476050555637e-05, "loss": 0.7713, "step": 1745 }, { "epoch": 1.4001603849238171, "grad_norm": 0.6855756044387817, "learning_rate": 1.156092348228146e-05, "loss": 0.7852, "step": 1746 }, { "epoch": 1.400962309542903, "grad_norm": 0.666982114315033, "learning_rate": 1.1552369743573699e-05, "loss": 0.7576, "step": 1747 }, { "epoch": 1.4017642341619887, "grad_norm": 0.6429185271263123, "learning_rate": 1.1543814840846237e-05, "loss": 0.7467, "step": 1748 }, { "epoch": 1.4025661587810747, "grad_norm": 0.6782676577568054, "learning_rate": 1.153525878051383e-05, "loss": 0.7397, "step": 1749 }, { "epoch": 1.4033680834001605, "grad_norm": 0.6263085007667542, "learning_rate": 1.1526701568992102e-05, "loss": 0.7627, "step": 1750 }, { "epoch": 1.4041700080192463, "grad_norm": 0.7163293361663818, "learning_rate": 1.1518143212697547e-05, "loss": 0.7476, "step": 1751 }, { "epoch": 1.404971932638332, "grad_norm": 0.6758559942245483, "learning_rate": 1.1509583718047508e-05, "loss": 0.7012, "step": 1752 }, { "epoch": 1.4057738572574179, "grad_norm": 0.6683815717697144, "learning_rate": 1.1501023091460187e-05, "loss": 0.7344, "step": 1753 }, { "epoch": 1.4065757818765037, "grad_norm": 0.7088032364845276, "learning_rate": 1.149246133935463e-05, "loss": 0.7192, "step": 1754 }, { "epoch": 1.4073777064955895, "grad_norm": 0.6811531782150269, "learning_rate": 1.1483898468150736e-05, "loss": 0.7312, "step": 1755 }, { "epoch": 1.4081796311146753, "grad_norm": 0.6816032528877258, "learning_rate": 1.1475334484269234e-05, "loss": 0.7249, "step": 1756 }, { "epoch": 1.408981555733761, "grad_norm": 0.6524032354354858, "learning_rate": 1.146676939413169e-05, "loss": 0.7065, "step": 1757 }, { "epoch": 1.4097834803528468, "grad_norm": 0.6719584465026855, "learning_rate": 1.1458203204160503e-05, "loss": 0.7836, "step": 1758 }, { "epoch": 1.4105854049719326, "grad_norm": 0.6611328721046448, "learning_rate": 1.1449635920778894e-05, "loss": 0.7608, "step": 1759 }, { "epoch": 1.4113873295910184, "grad_norm": 0.6723156571388245, "learning_rate": 1.14410675504109e-05, "loss": 0.7607, "step": 1760 }, { "epoch": 1.4121892542101042, "grad_norm": 0.7079909443855286, "learning_rate": 1.143249809948138e-05, "loss": 0.7835, "step": 1761 }, { "epoch": 1.41299117882919, "grad_norm": 0.6389018893241882, "learning_rate": 1.1423927574415998e-05, "loss": 0.7477, "step": 1762 }, { "epoch": 1.4137931034482758, "grad_norm": 0.7065446972846985, "learning_rate": 1.1415355981641229e-05, "loss": 0.7811, "step": 1763 }, { "epoch": 1.4145950280673616, "grad_norm": 0.6564798951148987, "learning_rate": 1.1406783327584345e-05, "loss": 0.7824, "step": 1764 }, { "epoch": 1.4153969526864474, "grad_norm": 0.6832185983657837, "learning_rate": 1.139820961867341e-05, "loss": 0.7373, "step": 1765 }, { "epoch": 1.4161988773055332, "grad_norm": 0.6919088363647461, "learning_rate": 1.1389634861337284e-05, "loss": 0.7716, "step": 1766 }, { "epoch": 1.417000801924619, "grad_norm": 0.6253665685653687, "learning_rate": 1.1381059062005617e-05, "loss": 0.757, "step": 1767 }, { "epoch": 1.4178027265437048, "grad_norm": 0.7172982692718506, "learning_rate": 1.137248222710883e-05, "loss": 0.7826, "step": 1768 }, { "epoch": 1.4186046511627908, "grad_norm": 0.6555379629135132, "learning_rate": 1.1363904363078126e-05, "loss": 0.7525, "step": 1769 }, { "epoch": 1.4194065757818766, "grad_norm": 0.6733851432800293, "learning_rate": 1.135532547634548e-05, "loss": 0.7448, "step": 1770 }, { "epoch": 1.4202085004009624, "grad_norm": 0.6609330177307129, "learning_rate": 1.1346745573343636e-05, "loss": 0.7659, "step": 1771 }, { "epoch": 1.4210104250200482, "grad_norm": 0.6952093839645386, "learning_rate": 1.13381646605061e-05, "loss": 0.7557, "step": 1772 }, { "epoch": 1.421812349639134, "grad_norm": 0.7016155123710632, "learning_rate": 1.1329582744267125e-05, "loss": 0.7528, "step": 1773 }, { "epoch": 1.4226142742582197, "grad_norm": 0.6713129281997681, "learning_rate": 1.1320999831061727e-05, "loss": 0.755, "step": 1774 }, { "epoch": 1.4234161988773055, "grad_norm": 0.6963139176368713, "learning_rate": 1.1312415927325668e-05, "loss": 0.7445, "step": 1775 }, { "epoch": 1.4242181234963913, "grad_norm": 0.6338862180709839, "learning_rate": 1.1303831039495452e-05, "loss": 0.7368, "step": 1776 }, { "epoch": 1.4250200481154771, "grad_norm": 0.6802634596824646, "learning_rate": 1.1295245174008317e-05, "loss": 0.7628, "step": 1777 }, { "epoch": 1.425821972734563, "grad_norm": 0.6717724204063416, "learning_rate": 1.1286658337302243e-05, "loss": 0.7874, "step": 1778 }, { "epoch": 1.4266238973536487, "grad_norm": 0.6719787120819092, "learning_rate": 1.1278070535815927e-05, "loss": 0.7606, "step": 1779 }, { "epoch": 1.4274258219727345, "grad_norm": 0.6636870503425598, "learning_rate": 1.1269481775988793e-05, "loss": 0.7203, "step": 1780 }, { "epoch": 1.4282277465918205, "grad_norm": 0.6615099310874939, "learning_rate": 1.1260892064260995e-05, "loss": 0.7462, "step": 1781 }, { "epoch": 1.4290296712109063, "grad_norm": 0.703061044216156, "learning_rate": 1.1252301407073386e-05, "loss": 0.7762, "step": 1782 }, { "epoch": 1.429831595829992, "grad_norm": 0.743383526802063, "learning_rate": 1.124370981086753e-05, "loss": 0.7658, "step": 1783 }, { "epoch": 1.4306335204490779, "grad_norm": 0.6267064809799194, "learning_rate": 1.1235117282085704e-05, "loss": 0.7852, "step": 1784 }, { "epoch": 1.4314354450681637, "grad_norm": 0.6929275393486023, "learning_rate": 1.1226523827170876e-05, "loss": 0.7605, "step": 1785 }, { "epoch": 1.4322373696872495, "grad_norm": 0.6413291692733765, "learning_rate": 1.121792945256671e-05, "loss": 0.7657, "step": 1786 }, { "epoch": 1.4330392943063353, "grad_norm": 0.6685841083526611, "learning_rate": 1.1209334164717562e-05, "loss": 0.7738, "step": 1787 }, { "epoch": 1.433841218925421, "grad_norm": 0.697259247303009, "learning_rate": 1.1200737970068476e-05, "loss": 0.7591, "step": 1788 }, { "epoch": 1.4346431435445068, "grad_norm": 0.6685236096382141, "learning_rate": 1.1192140875065167e-05, "loss": 0.7636, "step": 1789 }, { "epoch": 1.4354450681635926, "grad_norm": 0.6757694482803345, "learning_rate": 1.1183542886154027e-05, "loss": 0.7549, "step": 1790 }, { "epoch": 1.4362469927826784, "grad_norm": 0.6777142286300659, "learning_rate": 1.1174944009782123e-05, "loss": 0.7774, "step": 1791 }, { "epoch": 1.4370489174017642, "grad_norm": 0.7203055620193481, "learning_rate": 1.1166344252397187e-05, "loss": 0.7841, "step": 1792 }, { "epoch": 1.43785084202085, "grad_norm": 0.6814801096916199, "learning_rate": 1.1157743620447611e-05, "loss": 0.7389, "step": 1793 }, { "epoch": 1.4386527666399358, "grad_norm": 0.6721707582473755, "learning_rate": 1.1149142120382443e-05, "loss": 0.7395, "step": 1794 }, { "epoch": 1.4394546912590216, "grad_norm": 0.6581818461418152, "learning_rate": 1.1140539758651372e-05, "loss": 0.7273, "step": 1795 }, { "epoch": 1.4402566158781074, "grad_norm": 0.6775161027908325, "learning_rate": 1.1131936541704749e-05, "loss": 0.7649, "step": 1796 }, { "epoch": 1.4410585404971932, "grad_norm": 0.6994383931159973, "learning_rate": 1.112333247599356e-05, "loss": 0.766, "step": 1797 }, { "epoch": 1.441860465116279, "grad_norm": 0.6743654012680054, "learning_rate": 1.1114727567969423e-05, "loss": 0.7642, "step": 1798 }, { "epoch": 1.4426623897353648, "grad_norm": 0.6504762172698975, "learning_rate": 1.1106121824084593e-05, "loss": 0.758, "step": 1799 }, { "epoch": 1.4434643143544506, "grad_norm": 0.6630826592445374, "learning_rate": 1.1097515250791945e-05, "loss": 0.7632, "step": 1800 }, { "epoch": 1.4442662389735366, "grad_norm": 0.6329621076583862, "learning_rate": 1.1088907854544985e-05, "loss": 0.7214, "step": 1801 }, { "epoch": 1.4450681635926224, "grad_norm": 0.6646215319633484, "learning_rate": 1.1080299641797837e-05, "loss": 0.732, "step": 1802 }, { "epoch": 1.4458700882117081, "grad_norm": 0.6855160593986511, "learning_rate": 1.1071690619005224e-05, "loss": 0.7722, "step": 1803 }, { "epoch": 1.446672012830794, "grad_norm": 0.7373548746109009, "learning_rate": 1.1063080792622484e-05, "loss": 0.7716, "step": 1804 }, { "epoch": 1.4474739374498797, "grad_norm": 0.6910948753356934, "learning_rate": 1.1054470169105564e-05, "loss": 0.7635, "step": 1805 }, { "epoch": 1.4482758620689655, "grad_norm": 0.6741712093353271, "learning_rate": 1.1045858754911001e-05, "loss": 0.7724, "step": 1806 }, { "epoch": 1.4490777866880513, "grad_norm": 0.6909212470054626, "learning_rate": 1.1037246556495922e-05, "loss": 0.7664, "step": 1807 }, { "epoch": 1.449879711307137, "grad_norm": 0.7179321050643921, "learning_rate": 1.1028633580318056e-05, "loss": 0.7787, "step": 1808 }, { "epoch": 1.450681635926223, "grad_norm": 0.6822714805603027, "learning_rate": 1.1020019832835694e-05, "loss": 0.7634, "step": 1809 }, { "epoch": 1.4514835605453087, "grad_norm": 0.680316686630249, "learning_rate": 1.1011405320507726e-05, "loss": 0.7614, "step": 1810 }, { "epoch": 1.4522854851643945, "grad_norm": 0.6558269262313843, "learning_rate": 1.1002790049793604e-05, "loss": 0.6952, "step": 1811 }, { "epoch": 1.4530874097834803, "grad_norm": 0.6913748979568481, "learning_rate": 1.099417402715335e-05, "loss": 0.7696, "step": 1812 }, { "epoch": 1.4538893344025663, "grad_norm": 0.6790192723274231, "learning_rate": 1.0985557259047557e-05, "loss": 0.7428, "step": 1813 }, { "epoch": 1.454691259021652, "grad_norm": 0.6557827591896057, "learning_rate": 1.0976939751937361e-05, "loss": 0.7443, "step": 1814 }, { "epoch": 1.4554931836407379, "grad_norm": 0.6664519309997559, "learning_rate": 1.0968321512284472e-05, "loss": 0.7227, "step": 1815 }, { "epoch": 1.4562951082598237, "grad_norm": 0.6873872876167297, "learning_rate": 1.0959702546551135e-05, "loss": 0.7558, "step": 1816 }, { "epoch": 1.4570970328789095, "grad_norm": 0.6596401333808899, "learning_rate": 1.0951082861200142e-05, "loss": 0.7435, "step": 1817 }, { "epoch": 1.4578989574979953, "grad_norm": 0.6788073182106018, "learning_rate": 1.0942462462694834e-05, "loss": 0.8009, "step": 1818 }, { "epoch": 1.458700882117081, "grad_norm": 0.7057682871818542, "learning_rate": 1.0933841357499074e-05, "loss": 0.7332, "step": 1819 }, { "epoch": 1.4595028067361668, "grad_norm": 0.6896910071372986, "learning_rate": 1.0925219552077258e-05, "loss": 0.7549, "step": 1820 }, { "epoch": 1.4603047313552526, "grad_norm": 0.6752651333808899, "learning_rate": 1.091659705289431e-05, "loss": 0.7439, "step": 1821 }, { "epoch": 1.4611066559743384, "grad_norm": 0.7121813297271729, "learning_rate": 1.090797386641568e-05, "loss": 0.7304, "step": 1822 }, { "epoch": 1.4619085805934242, "grad_norm": 0.7077644467353821, "learning_rate": 1.0899349999107325e-05, "loss": 0.7529, "step": 1823 }, { "epoch": 1.46271050521251, "grad_norm": 0.6643130779266357, "learning_rate": 1.089072545743571e-05, "loss": 0.7544, "step": 1824 }, { "epoch": 1.4635124298315958, "grad_norm": 0.6911596655845642, "learning_rate": 1.088210024786781e-05, "loss": 0.76, "step": 1825 }, { "epoch": 1.4643143544506816, "grad_norm": 0.6701585054397583, "learning_rate": 1.0873474376871105e-05, "loss": 0.7368, "step": 1826 }, { "epoch": 1.4651162790697674, "grad_norm": 0.7087423801422119, "learning_rate": 1.0864847850913568e-05, "loss": 0.7703, "step": 1827 }, { "epoch": 1.4659182036888532, "grad_norm": 0.7176198363304138, "learning_rate": 1.0856220676463654e-05, "loss": 0.812, "step": 1828 }, { "epoch": 1.466720128307939, "grad_norm": 0.6819745302200317, "learning_rate": 1.084759285999032e-05, "loss": 0.792, "step": 1829 }, { "epoch": 1.4675220529270248, "grad_norm": 0.7085966467857361, "learning_rate": 1.0838964407962993e-05, "loss": 0.7629, "step": 1830 }, { "epoch": 1.4683239775461105, "grad_norm": 0.6867752075195312, "learning_rate": 1.0830335326851577e-05, "loss": 0.7573, "step": 1831 }, { "epoch": 1.4691259021651963, "grad_norm": 0.665777325630188, "learning_rate": 1.0821705623126461e-05, "loss": 0.7561, "step": 1832 }, { "epoch": 1.4699278267842824, "grad_norm": 0.7135064601898193, "learning_rate": 1.0813075303258483e-05, "loss": 0.7441, "step": 1833 }, { "epoch": 1.4707297514033681, "grad_norm": 0.6736664772033691, "learning_rate": 1.0804444373718952e-05, "loss": 0.7223, "step": 1834 }, { "epoch": 1.471531676022454, "grad_norm": 0.6878182888031006, "learning_rate": 1.0795812840979632e-05, "loss": 0.728, "step": 1835 }, { "epoch": 1.4723336006415397, "grad_norm": 0.6910241842269897, "learning_rate": 1.0787180711512744e-05, "loss": 0.7475, "step": 1836 }, { "epoch": 1.4731355252606255, "grad_norm": 0.739133894443512, "learning_rate": 1.0778547991790946e-05, "loss": 0.7601, "step": 1837 }, { "epoch": 1.4739374498797113, "grad_norm": 0.7009455561637878, "learning_rate": 1.076991468828735e-05, "loss": 0.7574, "step": 1838 }, { "epoch": 1.474739374498797, "grad_norm": 0.7256219983100891, "learning_rate": 1.0761280807475504e-05, "loss": 0.7775, "step": 1839 }, { "epoch": 1.475541299117883, "grad_norm": 0.7107866406440735, "learning_rate": 1.0752646355829382e-05, "loss": 0.7355, "step": 1840 }, { "epoch": 1.4763432237369687, "grad_norm": 0.7084487676620483, "learning_rate": 1.0744011339823389e-05, "loss": 0.7747, "step": 1841 }, { "epoch": 1.4771451483560545, "grad_norm": 0.6767612099647522, "learning_rate": 1.0735375765932352e-05, "loss": 0.7539, "step": 1842 }, { "epoch": 1.4779470729751403, "grad_norm": 0.7070626616477966, "learning_rate": 1.0726739640631523e-05, "loss": 0.788, "step": 1843 }, { "epoch": 1.478748997594226, "grad_norm": 0.6804197430610657, "learning_rate": 1.0718102970396564e-05, "loss": 0.7404, "step": 1844 }, { "epoch": 1.479550922213312, "grad_norm": 0.6973698139190674, "learning_rate": 1.0709465761703542e-05, "loss": 0.7441, "step": 1845 }, { "epoch": 1.4803528468323979, "grad_norm": 0.6551907658576965, "learning_rate": 1.0700828021028929e-05, "loss": 0.7265, "step": 1846 }, { "epoch": 1.4811547714514837, "grad_norm": 0.6486260890960693, "learning_rate": 1.0692189754849595e-05, "loss": 0.736, "step": 1847 }, { "epoch": 1.4819566960705695, "grad_norm": 0.601466715335846, "learning_rate": 1.0683550969642813e-05, "loss": 0.6997, "step": 1848 }, { "epoch": 1.4827586206896552, "grad_norm": 0.6700170636177063, "learning_rate": 1.0674911671886236e-05, "loss": 0.7438, "step": 1849 }, { "epoch": 1.483560545308741, "grad_norm": 0.6908175349235535, "learning_rate": 1.06662718680579e-05, "loss": 0.7422, "step": 1850 }, { "epoch": 1.4843624699278268, "grad_norm": 0.7098249197006226, "learning_rate": 1.0657631564636226e-05, "loss": 0.8039, "step": 1851 }, { "epoch": 1.4851643945469126, "grad_norm": 0.7431460022926331, "learning_rate": 1.0648990768100009e-05, "loss": 0.7891, "step": 1852 }, { "epoch": 1.4859663191659984, "grad_norm": 0.6714998483657837, "learning_rate": 1.0640349484928413e-05, "loss": 0.742, "step": 1853 }, { "epoch": 1.4867682437850842, "grad_norm": 0.6916069984436035, "learning_rate": 1.0631707721600965e-05, "loss": 0.7708, "step": 1854 }, { "epoch": 1.48757016840417, "grad_norm": 0.7198412418365479, "learning_rate": 1.0623065484597555e-05, "loss": 0.7498, "step": 1855 }, { "epoch": 1.4883720930232558, "grad_norm": 0.6497362852096558, "learning_rate": 1.0614422780398422e-05, "loss": 0.7526, "step": 1856 }, { "epoch": 1.4891740176423416, "grad_norm": 0.6391859650611877, "learning_rate": 1.0605779615484167e-05, "loss": 0.737, "step": 1857 }, { "epoch": 1.4899759422614274, "grad_norm": 0.6709389090538025, "learning_rate": 1.0597135996335723e-05, "loss": 0.7439, "step": 1858 }, { "epoch": 1.4907778668805132, "grad_norm": 0.7073272466659546, "learning_rate": 1.0588491929434375e-05, "loss": 0.7798, "step": 1859 }, { "epoch": 1.491579791499599, "grad_norm": 0.6603201031684875, "learning_rate": 1.0579847421261733e-05, "loss": 0.7422, "step": 1860 }, { "epoch": 1.4923817161186848, "grad_norm": 0.6485406160354614, "learning_rate": 1.057120247829975e-05, "loss": 0.7667, "step": 1861 }, { "epoch": 1.4931836407377705, "grad_norm": 0.648036777973175, "learning_rate": 1.0562557107030695e-05, "loss": 0.7603, "step": 1862 }, { "epoch": 1.4939855653568563, "grad_norm": 0.7099272012710571, "learning_rate": 1.0553911313937162e-05, "loss": 0.7725, "step": 1863 }, { "epoch": 1.4947874899759421, "grad_norm": 0.7114027142524719, "learning_rate": 1.0545265105502065e-05, "loss": 0.7704, "step": 1864 }, { "epoch": 1.4955894145950281, "grad_norm": 0.6591110825538635, "learning_rate": 1.053661848820862e-05, "loss": 0.733, "step": 1865 }, { "epoch": 1.496391339214114, "grad_norm": 0.7078248262405396, "learning_rate": 1.0527971468540356e-05, "loss": 0.7702, "step": 1866 }, { "epoch": 1.4971932638331997, "grad_norm": 0.6885595917701721, "learning_rate": 1.0519324052981103e-05, "loss": 0.7377, "step": 1867 }, { "epoch": 1.4979951884522855, "grad_norm": 0.681225061416626, "learning_rate": 1.0510676248014991e-05, "loss": 0.7427, "step": 1868 }, { "epoch": 1.4987971130713713, "grad_norm": 0.6760066747665405, "learning_rate": 1.050202806012644e-05, "loss": 0.7611, "step": 1869 }, { "epoch": 1.499599037690457, "grad_norm": 0.6731633543968201, "learning_rate": 1.0493379495800149e-05, "loss": 0.7486, "step": 1870 }, { "epoch": 1.500400962309543, "grad_norm": 0.6760236620903015, "learning_rate": 1.0484730561521107e-05, "loss": 0.7713, "step": 1871 }, { "epoch": 1.5012028869286287, "grad_norm": 0.6664961576461792, "learning_rate": 1.0476081263774585e-05, "loss": 0.7235, "step": 1872 }, { "epoch": 1.5020048115477145, "grad_norm": 0.6721197366714478, "learning_rate": 1.0467431609046116e-05, "loss": 0.7431, "step": 1873 }, { "epoch": 1.5028067361668003, "grad_norm": 0.6789388060569763, "learning_rate": 1.0458781603821508e-05, "loss": 0.7797, "step": 1874 }, { "epoch": 1.5036086607858863, "grad_norm": 0.7579322457313538, "learning_rate": 1.045013125458683e-05, "loss": 0.7856, "step": 1875 }, { "epoch": 1.504410585404972, "grad_norm": 0.6685303449630737, "learning_rate": 1.0441480567828408e-05, "loss": 0.7576, "step": 1876 }, { "epoch": 1.5052125100240579, "grad_norm": 0.6478756666183472, "learning_rate": 1.0432829550032818e-05, "loss": 0.7651, "step": 1877 }, { "epoch": 1.5060144346431437, "grad_norm": 0.6975502967834473, "learning_rate": 1.0424178207686894e-05, "loss": 0.7516, "step": 1878 }, { "epoch": 1.5068163592622295, "grad_norm": 0.6927207708358765, "learning_rate": 1.0415526547277706e-05, "loss": 0.767, "step": 1879 }, { "epoch": 1.5076182838813152, "grad_norm": 0.7247032523155212, "learning_rate": 1.0406874575292558e-05, "loss": 0.7695, "step": 1880 }, { "epoch": 1.508420208500401, "grad_norm": 0.6950458288192749, "learning_rate": 1.0398222298218996e-05, "loss": 0.7968, "step": 1881 }, { "epoch": 1.5092221331194868, "grad_norm": 0.660862386226654, "learning_rate": 1.0389569722544794e-05, "loss": 0.7443, "step": 1882 }, { "epoch": 1.5100240577385726, "grad_norm": 0.6698028445243835, "learning_rate": 1.0380916854757948e-05, "loss": 0.7537, "step": 1883 }, { "epoch": 1.5108259823576584, "grad_norm": 0.6830818057060242, "learning_rate": 1.0372263701346671e-05, "loss": 0.7432, "step": 1884 }, { "epoch": 1.5116279069767442, "grad_norm": 0.7310096025466919, "learning_rate": 1.0363610268799393e-05, "loss": 0.747, "step": 1885 }, { "epoch": 1.51242983159583, "grad_norm": 0.7210774421691895, "learning_rate": 1.035495656360475e-05, "loss": 0.798, "step": 1886 }, { "epoch": 1.5132317562149158, "grad_norm": 0.6990832686424255, "learning_rate": 1.0346302592251591e-05, "loss": 0.7494, "step": 1887 }, { "epoch": 1.5140336808340016, "grad_norm": 0.6791195273399353, "learning_rate": 1.033764836122895e-05, "loss": 0.7489, "step": 1888 }, { "epoch": 1.5148356054530874, "grad_norm": 0.6856716275215149, "learning_rate": 1.0328993877026075e-05, "loss": 0.7514, "step": 1889 }, { "epoch": 1.5156375300721732, "grad_norm": 0.6292150020599365, "learning_rate": 1.032033914613238e-05, "loss": 0.7234, "step": 1890 }, { "epoch": 1.516439454691259, "grad_norm": 0.6631137132644653, "learning_rate": 1.0311684175037488e-05, "loss": 0.7079, "step": 1891 }, { "epoch": 1.5172413793103448, "grad_norm": 0.6873642206192017, "learning_rate": 1.0303028970231185e-05, "loss": 0.7566, "step": 1892 }, { "epoch": 1.5180433039294305, "grad_norm": 0.6789519190788269, "learning_rate": 1.0294373538203439e-05, "loss": 0.7369, "step": 1893 }, { "epoch": 1.5188452285485163, "grad_norm": 0.716335117816925, "learning_rate": 1.028571788544439e-05, "loss": 0.7133, "step": 1894 }, { "epoch": 1.5196471531676021, "grad_norm": 0.7093126177787781, "learning_rate": 1.0277062018444342e-05, "loss": 0.7786, "step": 1895 }, { "epoch": 1.520449077786688, "grad_norm": 0.6854731440544128, "learning_rate": 1.0268405943693757e-05, "loss": 0.78, "step": 1896 }, { "epoch": 1.5212510024057737, "grad_norm": 0.6630930304527283, "learning_rate": 1.0259749667683252e-05, "loss": 0.7239, "step": 1897 }, { "epoch": 1.5220529270248595, "grad_norm": 0.6397947669029236, "learning_rate": 1.0251093196903601e-05, "loss": 0.7385, "step": 1898 }, { "epoch": 1.5228548516439455, "grad_norm": 0.6733710169792175, "learning_rate": 1.0242436537845719e-05, "loss": 0.7366, "step": 1899 }, { "epoch": 1.5236567762630313, "grad_norm": 0.7009027600288391, "learning_rate": 1.0233779697000667e-05, "loss": 0.7702, "step": 1900 }, { "epoch": 1.524458700882117, "grad_norm": 0.7578801512718201, "learning_rate": 1.0225122680859633e-05, "loss": 0.7649, "step": 1901 }, { "epoch": 1.525260625501203, "grad_norm": 0.7465493083000183, "learning_rate": 1.0216465495913947e-05, "loss": 0.7646, "step": 1902 }, { "epoch": 1.5260625501202887, "grad_norm": 0.6907299160957336, "learning_rate": 1.020780814865506e-05, "loss": 0.7389, "step": 1903 }, { "epoch": 1.5268644747393745, "grad_norm": 0.682547926902771, "learning_rate": 1.0199150645574548e-05, "loss": 0.7454, "step": 1904 }, { "epoch": 1.5276663993584603, "grad_norm": 0.6859135031700134, "learning_rate": 1.0190492993164101e-05, "loss": 0.7432, "step": 1905 }, { "epoch": 1.528468323977546, "grad_norm": 0.6617407202720642, "learning_rate": 1.0181835197915515e-05, "loss": 0.7214, "step": 1906 }, { "epoch": 1.529270248596632, "grad_norm": 0.6514879465103149, "learning_rate": 1.0173177266320706e-05, "loss": 0.7437, "step": 1907 }, { "epoch": 1.5300721732157179, "grad_norm": 0.6830449104309082, "learning_rate": 1.016451920487169e-05, "loss": 0.7648, "step": 1908 }, { "epoch": 1.5308740978348037, "grad_norm": 0.6907112002372742, "learning_rate": 1.0155861020060566e-05, "loss": 0.7236, "step": 1909 }, { "epoch": 1.5316760224538895, "grad_norm": 0.6831691861152649, "learning_rate": 1.0147202718379544e-05, "loss": 0.7153, "step": 1910 }, { "epoch": 1.5324779470729752, "grad_norm": 0.6687254905700684, "learning_rate": 1.013854430632091e-05, "loss": 0.7333, "step": 1911 }, { "epoch": 1.533279871692061, "grad_norm": 0.6817905902862549, "learning_rate": 1.0129885790377034e-05, "loss": 0.7489, "step": 1912 }, { "epoch": 1.5340817963111468, "grad_norm": 0.6689939498901367, "learning_rate": 1.0121227177040373e-05, "loss": 0.7337, "step": 1913 }, { "epoch": 1.5348837209302326, "grad_norm": 0.6985632181167603, "learning_rate": 1.0112568472803443e-05, "loss": 0.7522, "step": 1914 }, { "epoch": 1.5356856455493184, "grad_norm": 0.7158617973327637, "learning_rate": 1.0103909684158841e-05, "loss": 0.776, "step": 1915 }, { "epoch": 1.5364875701684042, "grad_norm": 0.671501874923706, "learning_rate": 1.0095250817599218e-05, "loss": 0.7396, "step": 1916 }, { "epoch": 1.53728949478749, "grad_norm": 0.6825124025344849, "learning_rate": 1.008659187961729e-05, "loss": 0.6984, "step": 1917 }, { "epoch": 1.5380914194065758, "grad_norm": 0.6682149171829224, "learning_rate": 1.0077932876705819e-05, "loss": 0.7488, "step": 1918 }, { "epoch": 1.5388933440256616, "grad_norm": 0.6929824948310852, "learning_rate": 1.0069273815357621e-05, "loss": 0.7576, "step": 1919 }, { "epoch": 1.5396952686447474, "grad_norm": 0.6691644191741943, "learning_rate": 1.006061470206556e-05, "loss": 0.7617, "step": 1920 }, { "epoch": 1.5404971932638332, "grad_norm": 0.6622249484062195, "learning_rate": 1.0051955543322533e-05, "loss": 0.7602, "step": 1921 }, { "epoch": 1.541299117882919, "grad_norm": 0.6343883275985718, "learning_rate": 1.0043296345621467e-05, "loss": 0.7423, "step": 1922 }, { "epoch": 1.5421010425020047, "grad_norm": 0.668969452381134, "learning_rate": 1.0034637115455327e-05, "loss": 0.7551, "step": 1923 }, { "epoch": 1.5429029671210905, "grad_norm": 0.7144033908843994, "learning_rate": 1.0025977859317097e-05, "loss": 0.716, "step": 1924 }, { "epoch": 1.5437048917401763, "grad_norm": 0.6621528267860413, "learning_rate": 1.0017318583699786e-05, "loss": 0.7375, "step": 1925 }, { "epoch": 1.5445068163592621, "grad_norm": 0.7118502259254456, "learning_rate": 1.0008659295096412e-05, "loss": 0.8017, "step": 1926 }, { "epoch": 1.545308740978348, "grad_norm": 0.6664971113204956, "learning_rate": 1e-05, "loss": 0.7292, "step": 1927 }, { "epoch": 1.5461106655974337, "grad_norm": 0.7016831636428833, "learning_rate": 9.991340704903593e-06, "loss": 0.7586, "step": 1928 }, { "epoch": 1.5469125902165195, "grad_norm": 0.7355296015739441, "learning_rate": 9.982681416300217e-06, "loss": 0.7695, "step": 1929 }, { "epoch": 1.5477145148356053, "grad_norm": 0.7109652757644653, "learning_rate": 9.974022140682906e-06, "loss": 0.7447, "step": 1930 }, { "epoch": 1.5485164394546913, "grad_norm": 0.8703607320785522, "learning_rate": 9.965362884544674e-06, "loss": 0.7483, "step": 1931 }, { "epoch": 1.549318364073777, "grad_norm": 0.6655393242835999, "learning_rate": 9.956703654378536e-06, "loss": 0.7431, "step": 1932 }, { "epoch": 1.550120288692863, "grad_norm": 0.6632983088493347, "learning_rate": 9.948044456677472e-06, "loss": 0.6951, "step": 1933 }, { "epoch": 1.5509222133119487, "grad_norm": 0.7212697267532349, "learning_rate": 9.939385297934441e-06, "loss": 0.7628, "step": 1934 }, { "epoch": 1.5517241379310345, "grad_norm": 0.6794565916061401, "learning_rate": 9.930726184642382e-06, "loss": 0.7403, "step": 1935 }, { "epoch": 1.5525260625501203, "grad_norm": 0.7048789262771606, "learning_rate": 9.922067123294183e-06, "loss": 0.7689, "step": 1936 }, { "epoch": 1.553327987169206, "grad_norm": 0.7070860862731934, "learning_rate": 9.913408120382714e-06, "loss": 0.7855, "step": 1937 }, { "epoch": 1.5541299117882919, "grad_norm": 0.681030809879303, "learning_rate": 9.904749182400786e-06, "loss": 0.7341, "step": 1938 }, { "epoch": 1.5549318364073779, "grad_norm": 0.6738923788070679, "learning_rate": 9.896090315841162e-06, "loss": 0.7725, "step": 1939 }, { "epoch": 1.5557337610264637, "grad_norm": 0.6607416868209839, "learning_rate": 9.88743152719656e-06, "loss": 0.7083, "step": 1940 }, { "epoch": 1.5565356856455494, "grad_norm": 0.6659730672836304, "learning_rate": 9.878772822959628e-06, "loss": 0.7544, "step": 1941 }, { "epoch": 1.5573376102646352, "grad_norm": 0.669576108455658, "learning_rate": 9.870114209622969e-06, "loss": 0.741, "step": 1942 }, { "epoch": 1.558139534883721, "grad_norm": 0.6290959715843201, "learning_rate": 9.861455693679096e-06, "loss": 0.7228, "step": 1943 }, { "epoch": 1.5589414595028068, "grad_norm": 0.6587105393409729, "learning_rate": 9.852797281620459e-06, "loss": 0.716, "step": 1944 }, { "epoch": 1.5597433841218926, "grad_norm": 0.7020542621612549, "learning_rate": 9.844138979939437e-06, "loss": 0.7684, "step": 1945 }, { "epoch": 1.5605453087409784, "grad_norm": 0.6784189343452454, "learning_rate": 9.835480795128314e-06, "loss": 0.7699, "step": 1946 }, { "epoch": 1.5613472333600642, "grad_norm": 0.6841933727264404, "learning_rate": 9.826822733679296e-06, "loss": 0.7578, "step": 1947 }, { "epoch": 1.56214915797915, "grad_norm": 0.7108730673789978, "learning_rate": 9.81816480208449e-06, "loss": 0.7228, "step": 1948 }, { "epoch": 1.5629510825982358, "grad_norm": 0.6719072461128235, "learning_rate": 9.809507006835904e-06, "loss": 0.7476, "step": 1949 }, { "epoch": 1.5637530072173216, "grad_norm": 0.6494175791740417, "learning_rate": 9.800849354425455e-06, "loss": 0.7254, "step": 1950 }, { "epoch": 1.5645549318364074, "grad_norm": 0.6985930800437927, "learning_rate": 9.79219185134494e-06, "loss": 0.7566, "step": 1951 }, { "epoch": 1.5653568564554932, "grad_norm": 0.6902778744697571, "learning_rate": 9.783534504086055e-06, "loss": 0.7368, "step": 1952 }, { "epoch": 1.566158781074579, "grad_norm": 0.6868149042129517, "learning_rate": 9.774877319140372e-06, "loss": 0.7191, "step": 1953 }, { "epoch": 1.5669607056936647, "grad_norm": 0.6847664713859558, "learning_rate": 9.766220302999336e-06, "loss": 0.7588, "step": 1954 }, { "epoch": 1.5677626303127505, "grad_norm": 0.6886143088340759, "learning_rate": 9.757563462154283e-06, "loss": 0.7608, "step": 1955 }, { "epoch": 1.5685645549318363, "grad_norm": 0.7180765271186829, "learning_rate": 9.7489068030964e-06, "loss": 0.7273, "step": 1956 }, { "epoch": 1.5693664795509221, "grad_norm": 0.6763858795166016, "learning_rate": 9.74025033231675e-06, "loss": 0.7549, "step": 1957 }, { "epoch": 1.570168404170008, "grad_norm": 0.6915479898452759, "learning_rate": 9.731594056306248e-06, "loss": 0.7512, "step": 1958 }, { "epoch": 1.5709703287890937, "grad_norm": 0.6777629852294922, "learning_rate": 9.72293798155566e-06, "loss": 0.7536, "step": 1959 }, { "epoch": 1.5717722534081795, "grad_norm": 0.6455657482147217, "learning_rate": 9.714282114555613e-06, "loss": 0.7347, "step": 1960 }, { "epoch": 1.5725741780272653, "grad_norm": 0.700589656829834, "learning_rate": 9.70562646179656e-06, "loss": 0.7709, "step": 1961 }, { "epoch": 1.573376102646351, "grad_norm": 0.6736430525779724, "learning_rate": 9.696971029768817e-06, "loss": 0.7816, "step": 1962 }, { "epoch": 1.574178027265437, "grad_norm": 0.6946107149124146, "learning_rate": 9.688315824962516e-06, "loss": 0.7248, "step": 1963 }, { "epoch": 1.5749799518845229, "grad_norm": 0.6668774485588074, "learning_rate": 9.679660853867621e-06, "loss": 0.7486, "step": 1964 }, { "epoch": 1.5757818765036087, "grad_norm": 0.7241086363792419, "learning_rate": 9.67100612297393e-06, "loss": 0.7377, "step": 1965 }, { "epoch": 1.5765838011226945, "grad_norm": 0.6383355259895325, "learning_rate": 9.662351638771049e-06, "loss": 0.7387, "step": 1966 }, { "epoch": 1.5773857257417803, "grad_norm": 0.6955791115760803, "learning_rate": 9.653697407748412e-06, "loss": 0.7487, "step": 1967 }, { "epoch": 1.578187650360866, "grad_norm": 0.6960842609405518, "learning_rate": 9.645043436395253e-06, "loss": 0.7984, "step": 1968 }, { "epoch": 1.5789895749799518, "grad_norm": 0.7226347923278809, "learning_rate": 9.63638973120061e-06, "loss": 0.7261, "step": 1969 }, { "epoch": 1.5797914995990376, "grad_norm": 0.6601559519767761, "learning_rate": 9.627736298653332e-06, "loss": 0.732, "step": 1970 }, { "epoch": 1.5805934242181237, "grad_norm": 0.6827449798583984, "learning_rate": 9.619083145242053e-06, "loss": 0.7392, "step": 1971 }, { "epoch": 1.5813953488372094, "grad_norm": 0.6553324460983276, "learning_rate": 9.610430277455209e-06, "loss": 0.7435, "step": 1972 }, { "epoch": 1.5821972734562952, "grad_norm": 0.6525527238845825, "learning_rate": 9.601777701781009e-06, "loss": 0.7591, "step": 1973 }, { "epoch": 1.582999198075381, "grad_norm": 0.6529831290245056, "learning_rate": 9.593125424707446e-06, "loss": 0.7414, "step": 1974 }, { "epoch": 1.5838011226944668, "grad_norm": 0.6787192821502686, "learning_rate": 9.584473452722299e-06, "loss": 0.7597, "step": 1975 }, { "epoch": 1.5846030473135526, "grad_norm": 0.6774669289588928, "learning_rate": 9.575821792313108e-06, "loss": 0.7418, "step": 1976 }, { "epoch": 1.5854049719326384, "grad_norm": 0.6893562078475952, "learning_rate": 9.567170449967183e-06, "loss": 0.6952, "step": 1977 }, { "epoch": 1.5862068965517242, "grad_norm": 0.6523553133010864, "learning_rate": 9.558519432171597e-06, "loss": 0.7763, "step": 1978 }, { "epoch": 1.58700882117081, "grad_norm": 0.668114960193634, "learning_rate": 9.549868745413172e-06, "loss": 0.7025, "step": 1979 }, { "epoch": 1.5878107457898958, "grad_norm": 0.6470924019813538, "learning_rate": 9.541218396178494e-06, "loss": 0.7311, "step": 1980 }, { "epoch": 1.5886126704089816, "grad_norm": 0.7354634404182434, "learning_rate": 9.532568390953886e-06, "loss": 0.8128, "step": 1981 }, { "epoch": 1.5894145950280674, "grad_norm": 0.6740539073944092, "learning_rate": 9.52391873622542e-06, "loss": 0.7422, "step": 1982 }, { "epoch": 1.5902165196471532, "grad_norm": 0.6699314117431641, "learning_rate": 9.515269438478898e-06, "loss": 0.7493, "step": 1983 }, { "epoch": 1.591018444266239, "grad_norm": 0.6977644562721252, "learning_rate": 9.506620504199854e-06, "loss": 0.749, "step": 1984 }, { "epoch": 1.5918203688853247, "grad_norm": 0.6674862504005432, "learning_rate": 9.497971939873567e-06, "loss": 0.7679, "step": 1985 }, { "epoch": 1.5926222935044105, "grad_norm": 0.6880958676338196, "learning_rate": 9.489323751985009e-06, "loss": 0.7485, "step": 1986 }, { "epoch": 1.5934242181234963, "grad_norm": 0.6663671731948853, "learning_rate": 9.480675947018899e-06, "loss": 0.7573, "step": 1987 }, { "epoch": 1.5942261427425821, "grad_norm": 0.706123948097229, "learning_rate": 9.472028531459649e-06, "loss": 0.7605, "step": 1988 }, { "epoch": 1.595028067361668, "grad_norm": 0.7029390931129456, "learning_rate": 9.463381511791386e-06, "loss": 0.7809, "step": 1989 }, { "epoch": 1.5958299919807537, "grad_norm": 0.633929431438446, "learning_rate": 9.454734894497942e-06, "loss": 0.7103, "step": 1990 }, { "epoch": 1.5966319165998395, "grad_norm": 0.6639130115509033, "learning_rate": 9.446088686062838e-06, "loss": 0.7599, "step": 1991 }, { "epoch": 1.5974338412189253, "grad_norm": 0.6766201853752136, "learning_rate": 9.437442892969308e-06, "loss": 0.7605, "step": 1992 }, { "epoch": 1.598235765838011, "grad_norm": 0.6676896810531616, "learning_rate": 9.428797521700254e-06, "loss": 0.7316, "step": 1993 }, { "epoch": 1.5990376904570969, "grad_norm": 0.7439046502113342, "learning_rate": 9.420152578738269e-06, "loss": 0.7832, "step": 1994 }, { "epoch": 1.5998396150761829, "grad_norm": 0.8324495553970337, "learning_rate": 9.41150807056563e-06, "loss": 0.7817, "step": 1995 }, { "epoch": 1.6006415396952687, "grad_norm": 0.6500052213668823, "learning_rate": 9.402864003664279e-06, "loss": 0.7429, "step": 1996 }, { "epoch": 1.6014434643143545, "grad_norm": 0.6784413456916809, "learning_rate": 9.394220384515836e-06, "loss": 0.7663, "step": 1997 }, { "epoch": 1.6022453889334403, "grad_norm": 0.6711524724960327, "learning_rate": 9.38557721960158e-06, "loss": 0.7391, "step": 1998 }, { "epoch": 1.603047313552526, "grad_norm": 0.6689935326576233, "learning_rate": 9.37693451540245e-06, "loss": 0.7391, "step": 1999 }, { "epoch": 1.6038492381716118, "grad_norm": 0.7078515887260437, "learning_rate": 9.368292278399038e-06, "loss": 0.755, "step": 2000 }, { "epoch": 1.6046511627906976, "grad_norm": 0.6978031396865845, "learning_rate": 9.35965051507159e-06, "loss": 0.7511, "step": 2001 }, { "epoch": 1.6054530874097834, "grad_norm": 0.6822302937507629, "learning_rate": 9.351009231899995e-06, "loss": 0.7673, "step": 2002 }, { "epoch": 1.6062550120288694, "grad_norm": 0.7071417570114136, "learning_rate": 9.342368435363774e-06, "loss": 0.7611, "step": 2003 }, { "epoch": 1.6070569366479552, "grad_norm": 0.7437505722045898, "learning_rate": 9.333728131942104e-06, "loss": 0.7593, "step": 2004 }, { "epoch": 1.607858861267041, "grad_norm": 0.6476147770881653, "learning_rate": 9.325088328113769e-06, "loss": 0.743, "step": 2005 }, { "epoch": 1.6086607858861268, "grad_norm": 0.6686375737190247, "learning_rate": 9.316449030357188e-06, "loss": 0.7613, "step": 2006 }, { "epoch": 1.6094627105052126, "grad_norm": 0.6458247303962708, "learning_rate": 9.307810245150408e-06, "loss": 0.7171, "step": 2007 }, { "epoch": 1.6102646351242984, "grad_norm": 1.242529273033142, "learning_rate": 9.299171978971073e-06, "loss": 0.7636, "step": 2008 }, { "epoch": 1.6110665597433842, "grad_norm": 0.7031643390655518, "learning_rate": 9.290534238296462e-06, "loss": 0.7621, "step": 2009 }, { "epoch": 1.61186848436247, "grad_norm": 0.6745364665985107, "learning_rate": 9.281897029603439e-06, "loss": 0.6897, "step": 2010 }, { "epoch": 1.6126704089815558, "grad_norm": 0.8014926314353943, "learning_rate": 9.273260359368478e-06, "loss": 0.7431, "step": 2011 }, { "epoch": 1.6134723336006416, "grad_norm": 0.6873915791511536, "learning_rate": 9.264624234067651e-06, "loss": 0.7679, "step": 2012 }, { "epoch": 1.6142742582197274, "grad_norm": 0.6493097543716431, "learning_rate": 9.255988660176613e-06, "loss": 0.7457, "step": 2013 }, { "epoch": 1.6150761828388132, "grad_norm": 0.7045702934265137, "learning_rate": 9.247353644170622e-06, "loss": 0.7683, "step": 2014 }, { "epoch": 1.615878107457899, "grad_norm": 0.6594098210334778, "learning_rate": 9.238719192524501e-06, "loss": 0.7129, "step": 2015 }, { "epoch": 1.6166800320769847, "grad_norm": 0.7022315859794617, "learning_rate": 9.23008531171265e-06, "loss": 0.7376, "step": 2016 }, { "epoch": 1.6174819566960705, "grad_norm": 0.6779872179031372, "learning_rate": 9.221452008209057e-06, "loss": 0.7507, "step": 2017 }, { "epoch": 1.6182838813151563, "grad_norm": 0.7060291767120361, "learning_rate": 9.21281928848726e-06, "loss": 0.7652, "step": 2018 }, { "epoch": 1.6190858059342421, "grad_norm": 0.6302214860916138, "learning_rate": 9.204187159020372e-06, "loss": 0.7142, "step": 2019 }, { "epoch": 1.619887730553328, "grad_norm": 0.7084274291992188, "learning_rate": 9.195555626281053e-06, "loss": 0.7594, "step": 2020 }, { "epoch": 1.6206896551724137, "grad_norm": 0.678536593914032, "learning_rate": 9.186924696741519e-06, "loss": 0.7467, "step": 2021 }, { "epoch": 1.6214915797914995, "grad_norm": 0.670260488986969, "learning_rate": 9.17829437687354e-06, "loss": 0.7463, "step": 2022 }, { "epoch": 1.6222935044105853, "grad_norm": 0.702285885810852, "learning_rate": 9.169664673148421e-06, "loss": 0.7582, "step": 2023 }, { "epoch": 1.623095429029671, "grad_norm": 0.7096335291862488, "learning_rate": 9.16103559203701e-06, "loss": 0.706, "step": 2024 }, { "epoch": 1.6238973536487569, "grad_norm": 0.6567702293395996, "learning_rate": 9.152407140009684e-06, "loss": 0.721, "step": 2025 }, { "epoch": 1.6246992782678427, "grad_norm": 0.6644206047058105, "learning_rate": 9.143779323536346e-06, "loss": 0.7575, "step": 2026 }, { "epoch": 1.6255012028869287, "grad_norm": 0.6624019145965576, "learning_rate": 9.135152149086436e-06, "loss": 0.7174, "step": 2027 }, { "epoch": 1.6263031275060145, "grad_norm": 0.723945677280426, "learning_rate": 9.126525623128896e-06, "loss": 0.7682, "step": 2028 }, { "epoch": 1.6271050521251003, "grad_norm": 0.684164822101593, "learning_rate": 9.117899752132193e-06, "loss": 0.7677, "step": 2029 }, { "epoch": 1.627906976744186, "grad_norm": 0.6822468042373657, "learning_rate": 9.109274542564295e-06, "loss": 0.7646, "step": 2030 }, { "epoch": 1.6287089013632718, "grad_norm": 0.707531750202179, "learning_rate": 9.100650000892679e-06, "loss": 0.7443, "step": 2031 }, { "epoch": 1.6295108259823576, "grad_norm": 0.6925025582313538, "learning_rate": 9.092026133584322e-06, "loss": 0.7717, "step": 2032 }, { "epoch": 1.6303127506014434, "grad_norm": 0.6611237525939941, "learning_rate": 9.083402947105688e-06, "loss": 0.7586, "step": 2033 }, { "epoch": 1.6311146752205292, "grad_norm": 0.6879470348358154, "learning_rate": 9.074780447922746e-06, "loss": 0.7076, "step": 2034 }, { "epoch": 1.6319165998396152, "grad_norm": 0.6737411618232727, "learning_rate": 9.066158642500933e-06, "loss": 0.7587, "step": 2035 }, { "epoch": 1.632718524458701, "grad_norm": 0.6747552752494812, "learning_rate": 9.05753753730517e-06, "loss": 0.7363, "step": 2036 }, { "epoch": 1.6335204490777868, "grad_norm": 0.7112221717834473, "learning_rate": 9.04891713879986e-06, "loss": 0.7896, "step": 2037 }, { "epoch": 1.6343223736968726, "grad_norm": 0.6935960650444031, "learning_rate": 9.040297453448867e-06, "loss": 0.7195, "step": 2038 }, { "epoch": 1.6351242983159584, "grad_norm": 0.6707137227058411, "learning_rate": 9.03167848771553e-06, "loss": 0.7473, "step": 2039 }, { "epoch": 1.6359262229350442, "grad_norm": 0.6829401254653931, "learning_rate": 9.023060248062642e-06, "loss": 0.7654, "step": 2040 }, { "epoch": 1.63672814755413, "grad_norm": 0.6423894166946411, "learning_rate": 9.014442740952446e-06, "loss": 0.7386, "step": 2041 }, { "epoch": 1.6375300721732158, "grad_norm": 0.678142786026001, "learning_rate": 9.005825972846652e-06, "loss": 0.7215, "step": 2042 }, { "epoch": 1.6383319967923016, "grad_norm": 0.6874783039093018, "learning_rate": 8.997209950206396e-06, "loss": 0.7578, "step": 2043 }, { "epoch": 1.6391339214113874, "grad_norm": 0.6972293257713318, "learning_rate": 8.988594679492276e-06, "loss": 0.7656, "step": 2044 }, { "epoch": 1.6399358460304732, "grad_norm": 0.6379215121269226, "learning_rate": 8.979980167164311e-06, "loss": 0.7223, "step": 2045 }, { "epoch": 1.640737770649559, "grad_norm": 0.6454169750213623, "learning_rate": 8.971366419681948e-06, "loss": 0.7007, "step": 2046 }, { "epoch": 1.6415396952686447, "grad_norm": 0.6857996582984924, "learning_rate": 8.96275344350408e-06, "loss": 0.7664, "step": 2047 }, { "epoch": 1.6423416198877305, "grad_norm": 0.7171909809112549, "learning_rate": 8.954141245089002e-06, "loss": 0.7698, "step": 2048 }, { "epoch": 1.6431435445068163, "grad_norm": 0.6804106831550598, "learning_rate": 8.945529830894439e-06, "loss": 0.7481, "step": 2049 }, { "epoch": 1.6439454691259021, "grad_norm": 0.6667369604110718, "learning_rate": 8.93691920737752e-06, "loss": 0.7417, "step": 2050 }, { "epoch": 1.644747393744988, "grad_norm": 0.6525418758392334, "learning_rate": 8.92830938099478e-06, "loss": 0.6958, "step": 2051 }, { "epoch": 1.6455493183640737, "grad_norm": 0.6953479647636414, "learning_rate": 8.919700358202167e-06, "loss": 0.7469, "step": 2052 }, { "epoch": 1.6463512429831595, "grad_norm": 0.715549886226654, "learning_rate": 8.911092145455015e-06, "loss": 0.7828, "step": 2053 }, { "epoch": 1.6471531676022453, "grad_norm": 0.9194969534873962, "learning_rate": 8.902484749208058e-06, "loss": 0.737, "step": 2054 }, { "epoch": 1.647955092221331, "grad_norm": 0.6881150603294373, "learning_rate": 8.893878175915414e-06, "loss": 0.718, "step": 2055 }, { "epoch": 1.6487570168404169, "grad_norm": 0.716201663017273, "learning_rate": 8.885272432030579e-06, "loss": 0.7638, "step": 2056 }, { "epoch": 1.6495589414595027, "grad_norm": 0.6798361539840698, "learning_rate": 8.876667524006442e-06, "loss": 0.7288, "step": 2057 }, { "epoch": 1.6503608660785885, "grad_norm": 0.6651695370674133, "learning_rate": 8.868063458295251e-06, "loss": 0.7251, "step": 2058 }, { "epoch": 1.6511627906976745, "grad_norm": 0.6712408065795898, "learning_rate": 8.85946024134863e-06, "loss": 0.7271, "step": 2059 }, { "epoch": 1.6519647153167603, "grad_norm": 0.6494314670562744, "learning_rate": 8.850857879617562e-06, "loss": 0.7224, "step": 2060 }, { "epoch": 1.652766639935846, "grad_norm": 0.6849057674407959, "learning_rate": 8.84225637955239e-06, "loss": 0.7429, "step": 2061 }, { "epoch": 1.6535685645549318, "grad_norm": 0.6645267009735107, "learning_rate": 8.833655747602816e-06, "loss": 0.6723, "step": 2062 }, { "epoch": 1.6543704891740176, "grad_norm": 0.6581180691719055, "learning_rate": 8.825055990217877e-06, "loss": 0.7193, "step": 2063 }, { "epoch": 1.6551724137931034, "grad_norm": 0.6753679513931274, "learning_rate": 8.816457113845977e-06, "loss": 0.7498, "step": 2064 }, { "epoch": 1.6559743384121892, "grad_norm": 0.6934730410575867, "learning_rate": 8.80785912493484e-06, "loss": 0.755, "step": 2065 }, { "epoch": 1.656776263031275, "grad_norm": 0.6492776870727539, "learning_rate": 8.799262029931527e-06, "loss": 0.7371, "step": 2066 }, { "epoch": 1.657578187650361, "grad_norm": 0.6823244690895081, "learning_rate": 8.79066583528244e-06, "loss": 0.7319, "step": 2067 }, { "epoch": 1.6583801122694468, "grad_norm": 0.6459192037582397, "learning_rate": 8.78207054743329e-06, "loss": 0.6997, "step": 2068 }, { "epoch": 1.6591820368885326, "grad_norm": 0.6940407156944275, "learning_rate": 8.773476172829127e-06, "loss": 0.7578, "step": 2069 }, { "epoch": 1.6599839615076184, "grad_norm": 0.6635679602622986, "learning_rate": 8.7648827179143e-06, "loss": 0.7466, "step": 2070 }, { "epoch": 1.6607858861267042, "grad_norm": 0.7041206359863281, "learning_rate": 8.756290189132473e-06, "loss": 0.7564, "step": 2071 }, { "epoch": 1.66158781074579, "grad_norm": 0.6900807619094849, "learning_rate": 8.74769859292662e-06, "loss": 0.723, "step": 2072 }, { "epoch": 1.6623897353648758, "grad_norm": 0.7090346217155457, "learning_rate": 8.739107935739004e-06, "loss": 0.7512, "step": 2073 }, { "epoch": 1.6631916599839616, "grad_norm": 0.6753419041633606, "learning_rate": 8.730518224011209e-06, "loss": 0.7404, "step": 2074 }, { "epoch": 1.6639935846030474, "grad_norm": 0.6859150528907776, "learning_rate": 8.721929464184079e-06, "loss": 0.7529, "step": 2075 }, { "epoch": 1.6647955092221332, "grad_norm": 0.713150143623352, "learning_rate": 8.71334166269776e-06, "loss": 0.7569, "step": 2076 }, { "epoch": 1.665597433841219, "grad_norm": 0.6997129321098328, "learning_rate": 8.704754825991684e-06, "loss": 0.7733, "step": 2077 }, { "epoch": 1.6663993584603047, "grad_norm": 0.6805722117424011, "learning_rate": 8.69616896050455e-06, "loss": 0.7407, "step": 2078 }, { "epoch": 1.6672012830793905, "grad_norm": 0.6935459971427917, "learning_rate": 8.687584072674335e-06, "loss": 0.7519, "step": 2079 }, { "epoch": 1.6680032076984763, "grad_norm": 0.6706266403198242, "learning_rate": 8.679000168938278e-06, "loss": 0.7196, "step": 2080 }, { "epoch": 1.6688051323175621, "grad_norm": 0.6920490264892578, "learning_rate": 8.670417255732876e-06, "loss": 0.7471, "step": 2081 }, { "epoch": 1.669607056936648, "grad_norm": 0.7178738117218018, "learning_rate": 8.661835339493903e-06, "loss": 0.734, "step": 2082 }, { "epoch": 1.6704089815557337, "grad_norm": 0.6797714233398438, "learning_rate": 8.653254426656364e-06, "loss": 0.7109, "step": 2083 }, { "epoch": 1.6712109061748195, "grad_norm": 0.7086063027381897, "learning_rate": 8.644674523654522e-06, "loss": 0.7198, "step": 2084 }, { "epoch": 1.6720128307939053, "grad_norm": 0.660064160823822, "learning_rate": 8.636095636921878e-06, "loss": 0.7373, "step": 2085 }, { "epoch": 1.672814755412991, "grad_norm": 0.6909743547439575, "learning_rate": 8.627517772891172e-06, "loss": 0.7312, "step": 2086 }, { "epoch": 1.6736166800320769, "grad_norm": 0.6800159811973572, "learning_rate": 8.618940937994387e-06, "loss": 0.7629, "step": 2087 }, { "epoch": 1.6744186046511627, "grad_norm": 0.7067490816116333, "learning_rate": 8.610365138662716e-06, "loss": 0.7388, "step": 2088 }, { "epoch": 1.6752205292702484, "grad_norm": 0.7322930097579956, "learning_rate": 8.601790381326593e-06, "loss": 0.7714, "step": 2089 }, { "epoch": 1.6760224538893342, "grad_norm": 0.6572024822235107, "learning_rate": 8.59321667241566e-06, "loss": 0.7291, "step": 2090 }, { "epoch": 1.6768243785084203, "grad_norm": 0.6840153932571411, "learning_rate": 8.584644018358773e-06, "loss": 0.7107, "step": 2091 }, { "epoch": 1.677626303127506, "grad_norm": 0.653813898563385, "learning_rate": 8.576072425584004e-06, "loss": 0.7084, "step": 2092 }, { "epoch": 1.6784282277465918, "grad_norm": 0.6750298142433167, "learning_rate": 8.56750190051862e-06, "loss": 0.728, "step": 2093 }, { "epoch": 1.6792301523656776, "grad_norm": 0.6693862080574036, "learning_rate": 8.558932449589103e-06, "loss": 0.7279, "step": 2094 }, { "epoch": 1.6800320769847634, "grad_norm": 0.6705959439277649, "learning_rate": 8.550364079221111e-06, "loss": 0.7475, "step": 2095 }, { "epoch": 1.6808340016038492, "grad_norm": 0.7326763868331909, "learning_rate": 8.541796795839498e-06, "loss": 0.7598, "step": 2096 }, { "epoch": 1.681635926222935, "grad_norm": 0.6632883548736572, "learning_rate": 8.533230605868314e-06, "loss": 0.714, "step": 2097 }, { "epoch": 1.6824378508420208, "grad_norm": 0.676483154296875, "learning_rate": 8.524665515730766e-06, "loss": 0.7276, "step": 2098 }, { "epoch": 1.6832397754611068, "grad_norm": 0.6715077757835388, "learning_rate": 8.516101531849266e-06, "loss": 0.7608, "step": 2099 }, { "epoch": 1.6840417000801926, "grad_norm": 0.6537802219390869, "learning_rate": 8.507538660645372e-06, "loss": 0.725, "step": 2100 }, { "epoch": 1.6848436246992784, "grad_norm": 0.6930273771286011, "learning_rate": 8.498976908539817e-06, "loss": 0.7545, "step": 2101 }, { "epoch": 1.6856455493183642, "grad_norm": 0.6789145469665527, "learning_rate": 8.490416281952495e-06, "loss": 0.7618, "step": 2102 }, { "epoch": 1.68644747393745, "grad_norm": 0.6359390020370483, "learning_rate": 8.481856787302454e-06, "loss": 0.7155, "step": 2103 }, { "epoch": 1.6872493985565358, "grad_norm": 0.658112108707428, "learning_rate": 8.473298431007901e-06, "loss": 0.7385, "step": 2104 }, { "epoch": 1.6880513231756216, "grad_norm": 0.6744823455810547, "learning_rate": 8.464741219486175e-06, "loss": 0.7365, "step": 2105 }, { "epoch": 1.6888532477947074, "grad_norm": 0.6585288643836975, "learning_rate": 8.456185159153765e-06, "loss": 0.7542, "step": 2106 }, { "epoch": 1.6896551724137931, "grad_norm": 0.6513398885726929, "learning_rate": 8.447630256426303e-06, "loss": 0.7255, "step": 2107 }, { "epoch": 1.690457097032879, "grad_norm": 0.6796519756317139, "learning_rate": 8.439076517718541e-06, "loss": 0.7541, "step": 2108 }, { "epoch": 1.6912590216519647, "grad_norm": 0.6824721097946167, "learning_rate": 8.430523949444367e-06, "loss": 0.7137, "step": 2109 }, { "epoch": 1.6920609462710505, "grad_norm": 0.6917021870613098, "learning_rate": 8.421972558016786e-06, "loss": 0.7435, "step": 2110 }, { "epoch": 1.6928628708901363, "grad_norm": 0.6658660173416138, "learning_rate": 8.413422349847918e-06, "loss": 0.7403, "step": 2111 }, { "epoch": 1.693664795509222, "grad_norm": 0.7041023373603821, "learning_rate": 8.404873331349009e-06, "loss": 0.7765, "step": 2112 }, { "epoch": 1.694466720128308, "grad_norm": 0.7012107372283936, "learning_rate": 8.396325508930398e-06, "loss": 0.766, "step": 2113 }, { "epoch": 1.6952686447473937, "grad_norm": 0.662287712097168, "learning_rate": 8.387778889001539e-06, "loss": 0.7359, "step": 2114 }, { "epoch": 1.6960705693664795, "grad_norm": 0.692939043045044, "learning_rate": 8.379233477970975e-06, "loss": 0.7821, "step": 2115 }, { "epoch": 1.6968724939855653, "grad_norm": 0.7044574022293091, "learning_rate": 8.370689282246341e-06, "loss": 0.7677, "step": 2116 }, { "epoch": 1.697674418604651, "grad_norm": 0.6683332920074463, "learning_rate": 8.36214630823438e-06, "loss": 0.7545, "step": 2117 }, { "epoch": 1.6984763432237369, "grad_norm": 0.7165277600288391, "learning_rate": 8.353604562340886e-06, "loss": 0.777, "step": 2118 }, { "epoch": 1.6992782678428227, "grad_norm": 0.7054672241210938, "learning_rate": 8.345064050970767e-06, "loss": 0.7495, "step": 2119 }, { "epoch": 1.7000801924619084, "grad_norm": 0.664445161819458, "learning_rate": 8.336524780527986e-06, "loss": 0.7648, "step": 2120 }, { "epoch": 1.7008821170809942, "grad_norm": 0.6737078428268433, "learning_rate": 8.327986757415571e-06, "loss": 0.7248, "step": 2121 }, { "epoch": 1.70168404170008, "grad_norm": 0.6758593320846558, "learning_rate": 8.319449988035631e-06, "loss": 0.7499, "step": 2122 }, { "epoch": 1.702485966319166, "grad_norm": 0.668846845626831, "learning_rate": 8.310914478789321e-06, "loss": 0.7671, "step": 2123 }, { "epoch": 1.7032878909382518, "grad_norm": 0.6598069071769714, "learning_rate": 8.30238023607686e-06, "loss": 0.7507, "step": 2124 }, { "epoch": 1.7040898155573376, "grad_norm": 0.6659058928489685, "learning_rate": 8.293847266297513e-06, "loss": 0.7198, "step": 2125 }, { "epoch": 1.7048917401764234, "grad_norm": 0.6601713299751282, "learning_rate": 8.285315575849589e-06, "loss": 0.7103, "step": 2126 }, { "epoch": 1.7056936647955092, "grad_norm": 0.6853638887405396, "learning_rate": 8.276785171130445e-06, "loss": 0.7441, "step": 2127 }, { "epoch": 1.706495589414595, "grad_norm": 0.71426922082901, "learning_rate": 8.26825605853646e-06, "loss": 0.744, "step": 2128 }, { "epoch": 1.7072975140336808, "grad_norm": 0.6715183854103088, "learning_rate": 8.259728244463065e-06, "loss": 0.694, "step": 2129 }, { "epoch": 1.7080994386527666, "grad_norm": 0.6622087359428406, "learning_rate": 8.251201735304698e-06, "loss": 0.7388, "step": 2130 }, { "epoch": 1.7089013632718526, "grad_norm": 0.7121644616127014, "learning_rate": 8.242676537454825e-06, "loss": 0.775, "step": 2131 }, { "epoch": 1.7097032878909384, "grad_norm": 0.6632829904556274, "learning_rate": 8.234152657305936e-06, "loss": 0.7363, "step": 2132 }, { "epoch": 1.7105052125100242, "grad_norm": 0.6959726214408875, "learning_rate": 8.22563010124952e-06, "loss": 0.7378, "step": 2133 }, { "epoch": 1.71130713712911, "grad_norm": 0.7143699526786804, "learning_rate": 8.217108875676083e-06, "loss": 0.7204, "step": 2134 }, { "epoch": 1.7121090617481958, "grad_norm": 0.6711505651473999, "learning_rate": 8.20858898697513e-06, "loss": 0.7385, "step": 2135 }, { "epoch": 1.7129109863672816, "grad_norm": 0.6337816119194031, "learning_rate": 8.200070441535159e-06, "loss": 0.7139, "step": 2136 }, { "epoch": 1.7137129109863674, "grad_norm": 0.6956325173377991, "learning_rate": 8.191553245743675e-06, "loss": 0.7812, "step": 2137 }, { "epoch": 1.7145148356054531, "grad_norm": 0.6819128394126892, "learning_rate": 8.183037405987155e-06, "loss": 0.7351, "step": 2138 }, { "epoch": 1.715316760224539, "grad_norm": 0.6942816972732544, "learning_rate": 8.174522928651068e-06, "loss": 0.7443, "step": 2139 }, { "epoch": 1.7161186848436247, "grad_norm": 0.653355598449707, "learning_rate": 8.166009820119857e-06, "loss": 0.7521, "step": 2140 }, { "epoch": 1.7169206094627105, "grad_norm": 0.6483197808265686, "learning_rate": 8.157498086776937e-06, "loss": 0.7139, "step": 2141 }, { "epoch": 1.7177225340817963, "grad_norm": 0.6634138822555542, "learning_rate": 8.148987735004706e-06, "loss": 0.7773, "step": 2142 }, { "epoch": 1.718524458700882, "grad_norm": 0.6787661910057068, "learning_rate": 8.140478771184507e-06, "loss": 0.7243, "step": 2143 }, { "epoch": 1.719326383319968, "grad_norm": 0.6924588680267334, "learning_rate": 8.131971201696656e-06, "loss": 0.7558, "step": 2144 }, { "epoch": 1.7201283079390537, "grad_norm": 0.6862332820892334, "learning_rate": 8.123465032920415e-06, "loss": 0.7175, "step": 2145 }, { "epoch": 1.7209302325581395, "grad_norm": 0.6828835010528564, "learning_rate": 8.114960271233999e-06, "loss": 0.7774, "step": 2146 }, { "epoch": 1.7217321571772253, "grad_norm": 0.6388763189315796, "learning_rate": 8.106456923014571e-06, "loss": 0.7002, "step": 2147 }, { "epoch": 1.722534081796311, "grad_norm": 0.6799570918083191, "learning_rate": 8.097954994638225e-06, "loss": 0.7579, "step": 2148 }, { "epoch": 1.7233360064153969, "grad_norm": 0.6811538338661194, "learning_rate": 8.089454492480004e-06, "loss": 0.7354, "step": 2149 }, { "epoch": 1.7241379310344827, "grad_norm": 0.7103093266487122, "learning_rate": 8.080955422913872e-06, "loss": 0.7416, "step": 2150 }, { "epoch": 1.7249398556535684, "grad_norm": 0.6595764756202698, "learning_rate": 8.072457792312715e-06, "loss": 0.7321, "step": 2151 }, { "epoch": 1.7257417802726542, "grad_norm": 0.6965066194534302, "learning_rate": 8.063961607048353e-06, "loss": 0.7535, "step": 2152 }, { "epoch": 1.72654370489174, "grad_norm": 0.6480819582939148, "learning_rate": 8.05546687349151e-06, "loss": 0.7559, "step": 2153 }, { "epoch": 1.7273456295108258, "grad_norm": 0.6977070569992065, "learning_rate": 8.046973598011831e-06, "loss": 0.7416, "step": 2154 }, { "epoch": 1.7281475541299118, "grad_norm": 0.689796507358551, "learning_rate": 8.038481786977858e-06, "loss": 0.7546, "step": 2155 }, { "epoch": 1.7289494787489976, "grad_norm": 0.6607633233070374, "learning_rate": 8.029991446757047e-06, "loss": 0.733, "step": 2156 }, { "epoch": 1.7297514033680834, "grad_norm": 0.7048103213310242, "learning_rate": 8.02150258371574e-06, "loss": 0.7734, "step": 2157 }, { "epoch": 1.7305533279871692, "grad_norm": 0.6884298324584961, "learning_rate": 8.013015204219171e-06, "loss": 0.7469, "step": 2158 }, { "epoch": 1.731355252606255, "grad_norm": 0.7125540375709534, "learning_rate": 8.004529314631476e-06, "loss": 0.7367, "step": 2159 }, { "epoch": 1.7321571772253408, "grad_norm": 0.6849104762077332, "learning_rate": 7.996044921315656e-06, "loss": 0.7246, "step": 2160 }, { "epoch": 1.7329591018444266, "grad_norm": 0.6800344586372375, "learning_rate": 7.987562030633604e-06, "loss": 0.726, "step": 2161 }, { "epoch": 1.7337610264635124, "grad_norm": 0.7025103569030762, "learning_rate": 7.979080648946078e-06, "loss": 0.7169, "step": 2162 }, { "epoch": 1.7345629510825984, "grad_norm": 0.6762646436691284, "learning_rate": 7.970600782612703e-06, "loss": 0.7497, "step": 2163 }, { "epoch": 1.7353648757016842, "grad_norm": 0.7249945998191833, "learning_rate": 7.962122437991978e-06, "loss": 0.7864, "step": 2164 }, { "epoch": 1.73616680032077, "grad_norm": 0.6832449436187744, "learning_rate": 7.953645621441245e-06, "loss": 0.7619, "step": 2165 }, { "epoch": 1.7369687249398558, "grad_norm": 0.6707166433334351, "learning_rate": 7.945170339316724e-06, "loss": 0.721, "step": 2166 }, { "epoch": 1.7377706495589416, "grad_norm": 0.6761574149131775, "learning_rate": 7.93669659797346e-06, "loss": 0.7313, "step": 2167 }, { "epoch": 1.7385725741780274, "grad_norm": 0.6892061829566956, "learning_rate": 7.928224403765353e-06, "loss": 0.736, "step": 2168 }, { "epoch": 1.7393744987971131, "grad_norm": 0.6842492818832397, "learning_rate": 7.919753763045148e-06, "loss": 0.7297, "step": 2169 }, { "epoch": 1.740176423416199, "grad_norm": 0.6731321811676025, "learning_rate": 7.911284682164413e-06, "loss": 0.7464, "step": 2170 }, { "epoch": 1.7409783480352847, "grad_norm": 0.6783955693244934, "learning_rate": 7.90281716747356e-06, "loss": 0.737, "step": 2171 }, { "epoch": 1.7417802726543705, "grad_norm": 0.6662715673446655, "learning_rate": 7.894351225321817e-06, "loss": 0.7445, "step": 2172 }, { "epoch": 1.7425821972734563, "grad_norm": 0.6635359525680542, "learning_rate": 7.885886862057233e-06, "loss": 0.7155, "step": 2173 }, { "epoch": 1.743384121892542, "grad_norm": 0.691408634185791, "learning_rate": 7.877424084026682e-06, "loss": 0.729, "step": 2174 }, { "epoch": 1.744186046511628, "grad_norm": 0.6881137490272522, "learning_rate": 7.868962897575837e-06, "loss": 0.7426, "step": 2175 }, { "epoch": 1.7449879711307137, "grad_norm": 0.6919487714767456, "learning_rate": 7.86050330904919e-06, "loss": 0.7381, "step": 2176 }, { "epoch": 1.7457898957497995, "grad_norm": 0.6565024852752686, "learning_rate": 7.852045324790023e-06, "loss": 0.7485, "step": 2177 }, { "epoch": 1.7465918203688853, "grad_norm": 0.6828457117080688, "learning_rate": 7.843588951140421e-06, "loss": 0.7429, "step": 2178 }, { "epoch": 1.747393744987971, "grad_norm": 0.6890912652015686, "learning_rate": 7.835134194441265e-06, "loss": 0.7139, "step": 2179 }, { "epoch": 1.7481956696070569, "grad_norm": 0.6851256489753723, "learning_rate": 7.826681061032216e-06, "loss": 0.7731, "step": 2180 }, { "epoch": 1.7489975942261426, "grad_norm": 0.6674900054931641, "learning_rate": 7.818229557251722e-06, "loss": 0.7865, "step": 2181 }, { "epoch": 1.7497995188452284, "grad_norm": 0.693809986114502, "learning_rate": 7.809779689437011e-06, "loss": 0.7245, "step": 2182 }, { "epoch": 1.7506014434643142, "grad_norm": 0.7122488617897034, "learning_rate": 7.801331463924076e-06, "loss": 0.7556, "step": 2183 }, { "epoch": 1.7514033680834, "grad_norm": 0.6818574666976929, "learning_rate": 7.79288488704769e-06, "loss": 0.7313, "step": 2184 }, { "epoch": 1.7522052927024858, "grad_norm": 0.6880291104316711, "learning_rate": 7.784439965141381e-06, "loss": 0.7223, "step": 2185 }, { "epoch": 1.7530072173215716, "grad_norm": 0.6625512838363647, "learning_rate": 7.775996704537442e-06, "loss": 0.7173, "step": 2186 }, { "epoch": 1.7538091419406576, "grad_norm": 0.669406533241272, "learning_rate": 7.767555111566914e-06, "loss": 0.7263, "step": 2187 }, { "epoch": 1.7546110665597434, "grad_norm": 0.6676865816116333, "learning_rate": 7.759115192559589e-06, "loss": 0.7081, "step": 2188 }, { "epoch": 1.7554129911788292, "grad_norm": 0.6633387804031372, "learning_rate": 7.750676953844011e-06, "loss": 0.741, "step": 2189 }, { "epoch": 1.756214915797915, "grad_norm": 0.7206395864486694, "learning_rate": 7.742240401747457e-06, "loss": 0.7791, "step": 2190 }, { "epoch": 1.7570168404170008, "grad_norm": 0.7158024311065674, "learning_rate": 7.73380554259594e-06, "loss": 0.7411, "step": 2191 }, { "epoch": 1.7578187650360866, "grad_norm": 0.684083104133606, "learning_rate": 7.725372382714208e-06, "loss": 0.7193, "step": 2192 }, { "epoch": 1.7586206896551724, "grad_norm": 0.6862793564796448, "learning_rate": 7.716940928425724e-06, "loss": 0.7775, "step": 2193 }, { "epoch": 1.7594226142742582, "grad_norm": 0.718550980091095, "learning_rate": 7.708511186052689e-06, "loss": 0.7628, "step": 2194 }, { "epoch": 1.7602245388933442, "grad_norm": 0.7173058986663818, "learning_rate": 7.700083161916e-06, "loss": 0.7639, "step": 2195 }, { "epoch": 1.76102646351243, "grad_norm": 0.6749827861785889, "learning_rate": 7.691656862335288e-06, "loss": 0.705, "step": 2196 }, { "epoch": 1.7618283881315158, "grad_norm": 0.6680623292922974, "learning_rate": 7.683232293628873e-06, "loss": 0.7156, "step": 2197 }, { "epoch": 1.7626303127506016, "grad_norm": 0.6810491681098938, "learning_rate": 7.674809462113782e-06, "loss": 0.7494, "step": 2198 }, { "epoch": 1.7634322373696873, "grad_norm": 0.6893939971923828, "learning_rate": 7.666388374105747e-06, "loss": 0.7559, "step": 2199 }, { "epoch": 1.7642341619887731, "grad_norm": 0.6892242431640625, "learning_rate": 7.65796903591918e-06, "loss": 0.7398, "step": 2200 }, { "epoch": 1.765036086607859, "grad_norm": 0.6665722131729126, "learning_rate": 7.649551453867192e-06, "loss": 0.7275, "step": 2201 }, { "epoch": 1.7658380112269447, "grad_norm": 0.7008151412010193, "learning_rate": 7.641135634261572e-06, "loss": 0.7379, "step": 2202 }, { "epoch": 1.7666399358460305, "grad_norm": 0.6838683485984802, "learning_rate": 7.632721583412787e-06, "loss": 0.77, "step": 2203 }, { "epoch": 1.7674418604651163, "grad_norm": 0.691834032535553, "learning_rate": 7.62430930762998e-06, "loss": 0.7355, "step": 2204 }, { "epoch": 1.768243785084202, "grad_norm": 0.6844693422317505, "learning_rate": 7.615898813220958e-06, "loss": 0.7353, "step": 2205 }, { "epoch": 1.769045709703288, "grad_norm": 0.7072806358337402, "learning_rate": 7.607490106492205e-06, "loss": 0.747, "step": 2206 }, { "epoch": 1.7698476343223737, "grad_norm": 0.6707396507263184, "learning_rate": 7.5990831937488476e-06, "loss": 0.697, "step": 2207 }, { "epoch": 1.7706495589414595, "grad_norm": 0.7029158473014832, "learning_rate": 7.590678081294673e-06, "loss": 0.7167, "step": 2208 }, { "epoch": 1.7714514835605453, "grad_norm": 0.7110798954963684, "learning_rate": 7.5822747754321315e-06, "loss": 0.7507, "step": 2209 }, { "epoch": 1.772253408179631, "grad_norm": 0.6975316405296326, "learning_rate": 7.573873282462299e-06, "loss": 0.7402, "step": 2210 }, { "epoch": 1.7730553327987169, "grad_norm": 0.6738576889038086, "learning_rate": 7.5654736086849056e-06, "loss": 0.714, "step": 2211 }, { "epoch": 1.7738572574178026, "grad_norm": 0.6818029284477234, "learning_rate": 7.5570757603983115e-06, "loss": 0.7079, "step": 2212 }, { "epoch": 1.7746591820368884, "grad_norm": 0.6604394316673279, "learning_rate": 7.548679743899505e-06, "loss": 0.7548, "step": 2213 }, { "epoch": 1.7754611066559742, "grad_norm": 0.6959803104400635, "learning_rate": 7.540285565484114e-06, "loss": 0.7403, "step": 2214 }, { "epoch": 1.77626303127506, "grad_norm": 0.7064805626869202, "learning_rate": 7.531893231446372e-06, "loss": 0.7615, "step": 2215 }, { "epoch": 1.7770649558941458, "grad_norm": 0.6517053842544556, "learning_rate": 7.523502748079141e-06, "loss": 0.7474, "step": 2216 }, { "epoch": 1.7778668805132316, "grad_norm": 0.674662172794342, "learning_rate": 7.51511412167389e-06, "loss": 0.702, "step": 2217 }, { "epoch": 1.7786688051323174, "grad_norm": 0.7000203132629395, "learning_rate": 7.506727358520693e-06, "loss": 0.7111, "step": 2218 }, { "epoch": 1.7794707297514034, "grad_norm": 0.6659766435623169, "learning_rate": 7.498342464908237e-06, "loss": 0.7337, "step": 2219 }, { "epoch": 1.7802726543704892, "grad_norm": 0.6771808862686157, "learning_rate": 7.489959447123797e-06, "loss": 0.7378, "step": 2220 }, { "epoch": 1.781074578989575, "grad_norm": 0.7038045525550842, "learning_rate": 7.4815783114532485e-06, "loss": 0.7649, "step": 2221 }, { "epoch": 1.7818765036086608, "grad_norm": 0.6613171100616455, "learning_rate": 7.473199064181048e-06, "loss": 0.7119, "step": 2222 }, { "epoch": 1.7826784282277466, "grad_norm": 0.6629149913787842, "learning_rate": 7.464821711590242e-06, "loss": 0.7293, "step": 2223 }, { "epoch": 1.7834803528468324, "grad_norm": 0.6879216432571411, "learning_rate": 7.456446259962455e-06, "loss": 0.7311, "step": 2224 }, { "epoch": 1.7842822774659182, "grad_norm": 0.6925482153892517, "learning_rate": 7.448072715577885e-06, "loss": 0.7511, "step": 2225 }, { "epoch": 1.785084202085004, "grad_norm": 0.6826873421669006, "learning_rate": 7.439701084715305e-06, "loss": 0.7436, "step": 2226 }, { "epoch": 1.78588612670409, "grad_norm": 0.7013863325119019, "learning_rate": 7.431331373652046e-06, "loss": 0.7159, "step": 2227 }, { "epoch": 1.7866880513231758, "grad_norm": 0.6822634935379028, "learning_rate": 7.422963588663998e-06, "loss": 0.7404, "step": 2228 }, { "epoch": 1.7874899759422616, "grad_norm": 0.6994298100471497, "learning_rate": 7.414597736025621e-06, "loss": 0.755, "step": 2229 }, { "epoch": 1.7882919005613473, "grad_norm": 0.7196714282035828, "learning_rate": 7.406233822009904e-06, "loss": 0.7806, "step": 2230 }, { "epoch": 1.7890938251804331, "grad_norm": 0.6684456467628479, "learning_rate": 7.397871852888405e-06, "loss": 0.7119, "step": 2231 }, { "epoch": 1.789895749799519, "grad_norm": 0.6782661080360413, "learning_rate": 7.389511834931211e-06, "loss": 0.7417, "step": 2232 }, { "epoch": 1.7906976744186047, "grad_norm": 0.7280923128128052, "learning_rate": 7.381153774406944e-06, "loss": 0.7621, "step": 2233 }, { "epoch": 1.7914995990376905, "grad_norm": 0.6602609157562256, "learning_rate": 7.372797677582767e-06, "loss": 0.7315, "step": 2234 }, { "epoch": 1.7923015236567763, "grad_norm": 0.6975564956665039, "learning_rate": 7.36444355072436e-06, "loss": 0.7265, "step": 2235 }, { "epoch": 1.793103448275862, "grad_norm": 0.6379725933074951, "learning_rate": 7.356091400095942e-06, "loss": 0.7065, "step": 2236 }, { "epoch": 1.793905372894948, "grad_norm": 0.7008256316184998, "learning_rate": 7.3477412319602306e-06, "loss": 0.7275, "step": 2237 }, { "epoch": 1.7947072975140337, "grad_norm": 0.6815301775932312, "learning_rate": 7.339393052578465e-06, "loss": 0.732, "step": 2238 }, { "epoch": 1.7955092221331195, "grad_norm": 0.6972305178642273, "learning_rate": 7.3310468682104055e-06, "loss": 0.7292, "step": 2239 }, { "epoch": 1.7963111467522053, "grad_norm": 0.6848737597465515, "learning_rate": 7.322702685114295e-06, "loss": 0.6968, "step": 2240 }, { "epoch": 1.797113071371291, "grad_norm": 0.7986035943031311, "learning_rate": 7.3143605095468915e-06, "loss": 0.7678, "step": 2241 }, { "epoch": 1.7979149959903769, "grad_norm": 0.6967061758041382, "learning_rate": 7.30602034776344e-06, "loss": 0.6936, "step": 2242 }, { "epoch": 1.7987169206094626, "grad_norm": 0.6833521723747253, "learning_rate": 7.297682206017676e-06, "loss": 0.7218, "step": 2243 }, { "epoch": 1.7995188452285484, "grad_norm": 0.6830206513404846, "learning_rate": 7.289346090561828e-06, "loss": 0.7502, "step": 2244 }, { "epoch": 1.8003207698476342, "grad_norm": 0.68632972240448, "learning_rate": 7.281012007646595e-06, "loss": 0.7189, "step": 2245 }, { "epoch": 1.80112269446672, "grad_norm": 0.7078197002410889, "learning_rate": 7.272679963521158e-06, "loss": 0.7313, "step": 2246 }, { "epoch": 1.8019246190858058, "grad_norm": 0.6793120503425598, "learning_rate": 7.264349964433168e-06, "loss": 0.7321, "step": 2247 }, { "epoch": 1.8027265437048916, "grad_norm": 0.7071113586425781, "learning_rate": 7.2560220166287355e-06, "loss": 0.7568, "step": 2248 }, { "epoch": 1.8035284683239774, "grad_norm": 0.6845733523368835, "learning_rate": 7.24769612635245e-06, "loss": 0.7318, "step": 2249 }, { "epoch": 1.8043303929430632, "grad_norm": 0.6932980418205261, "learning_rate": 7.239372299847338e-06, "loss": 0.7303, "step": 2250 }, { "epoch": 1.8051323175621492, "grad_norm": 0.6790763139724731, "learning_rate": 7.231050543354894e-06, "loss": 0.7239, "step": 2251 }, { "epoch": 1.805934242181235, "grad_norm": 0.7017188668251038, "learning_rate": 7.2227308631150535e-06, "loss": 0.7255, "step": 2252 }, { "epoch": 1.8067361668003208, "grad_norm": 0.6675518155097961, "learning_rate": 7.214413265366194e-06, "loss": 0.7225, "step": 2253 }, { "epoch": 1.8075380914194066, "grad_norm": 0.6851517558097839, "learning_rate": 7.206097756345135e-06, "loss": 0.7182, "step": 2254 }, { "epoch": 1.8083400160384924, "grad_norm": 0.6712617874145508, "learning_rate": 7.197784342287125e-06, "loss": 0.7717, "step": 2255 }, { "epoch": 1.8091419406575782, "grad_norm": 0.6820451617240906, "learning_rate": 7.189473029425852e-06, "loss": 0.7246, "step": 2256 }, { "epoch": 1.809943865276664, "grad_norm": 0.6897710561752319, "learning_rate": 7.181163823993418e-06, "loss": 0.7586, "step": 2257 }, { "epoch": 1.8107457898957497, "grad_norm": 0.6737632751464844, "learning_rate": 7.172856732220344e-06, "loss": 0.71, "step": 2258 }, { "epoch": 1.8115477145148358, "grad_norm": 0.6887868046760559, "learning_rate": 7.164551760335579e-06, "loss": 0.7375, "step": 2259 }, { "epoch": 1.8123496391339216, "grad_norm": 0.6990635395050049, "learning_rate": 7.156248914566461e-06, "loss": 0.7322, "step": 2260 }, { "epoch": 1.8131515637530073, "grad_norm": 0.6845853924751282, "learning_rate": 7.147948201138761e-06, "loss": 0.7243, "step": 2261 }, { "epoch": 1.8139534883720931, "grad_norm": 0.7157221436500549, "learning_rate": 7.139649626276629e-06, "loss": 0.7328, "step": 2262 }, { "epoch": 1.814755412991179, "grad_norm": 0.668306827545166, "learning_rate": 7.131353196202617e-06, "loss": 0.737, "step": 2263 }, { "epoch": 1.8155573376102647, "grad_norm": 0.6798452138900757, "learning_rate": 7.123058917137677e-06, "loss": 0.7033, "step": 2264 }, { "epoch": 1.8163592622293505, "grad_norm": 0.7059512138366699, "learning_rate": 7.114766795301138e-06, "loss": 0.6999, "step": 2265 }, { "epoch": 1.8171611868484363, "grad_norm": 0.6953184604644775, "learning_rate": 7.106476836910716e-06, "loss": 0.7199, "step": 2266 }, { "epoch": 1.817963111467522, "grad_norm": 0.7047235369682312, "learning_rate": 7.098189048182504e-06, "loss": 0.7685, "step": 2267 }, { "epoch": 1.818765036086608, "grad_norm": 0.7124036550521851, "learning_rate": 7.089903435330966e-06, "loss": 0.7466, "step": 2268 }, { "epoch": 1.8195669607056937, "grad_norm": 0.6875273585319519, "learning_rate": 7.081620004568943e-06, "loss": 0.7218, "step": 2269 }, { "epoch": 1.8203688853247795, "grad_norm": 0.6810701489448547, "learning_rate": 7.073338762107627e-06, "loss": 0.7362, "step": 2270 }, { "epoch": 1.8211708099438653, "grad_norm": 0.6458592414855957, "learning_rate": 7.065059714156579e-06, "loss": 0.7142, "step": 2271 }, { "epoch": 1.821972734562951, "grad_norm": 0.6925168037414551, "learning_rate": 7.0567828669237125e-06, "loss": 0.7441, "step": 2272 }, { "epoch": 1.8227746591820368, "grad_norm": 0.7175741195678711, "learning_rate": 7.048508226615282e-06, "loss": 0.72, "step": 2273 }, { "epoch": 1.8235765838011226, "grad_norm": 0.6916970610618591, "learning_rate": 7.040235799435904e-06, "loss": 0.727, "step": 2274 }, { "epoch": 1.8243785084202084, "grad_norm": 0.6771306395530701, "learning_rate": 7.0319655915885185e-06, "loss": 0.7692, "step": 2275 }, { "epoch": 1.8251804330392942, "grad_norm": 0.7066898941993713, "learning_rate": 7.023697609274418e-06, "loss": 0.7244, "step": 2276 }, { "epoch": 1.82598235765838, "grad_norm": 0.6827045679092407, "learning_rate": 7.015431858693209e-06, "loss": 0.7411, "step": 2277 }, { "epoch": 1.8267842822774658, "grad_norm": 0.7030799388885498, "learning_rate": 7.007168346042832e-06, "loss": 0.7481, "step": 2278 }, { "epoch": 1.8275862068965516, "grad_norm": 0.6946167945861816, "learning_rate": 6.998907077519561e-06, "loss": 0.7296, "step": 2279 }, { "epoch": 1.8283881315156374, "grad_norm": 0.6715916991233826, "learning_rate": 6.990648059317961e-06, "loss": 0.7295, "step": 2280 }, { "epoch": 1.8291900561347232, "grad_norm": 0.6531383395195007, "learning_rate": 6.982391297630939e-06, "loss": 0.7294, "step": 2281 }, { "epoch": 1.829991980753809, "grad_norm": 0.6955968141555786, "learning_rate": 6.97413679864969e-06, "loss": 0.7247, "step": 2282 }, { "epoch": 1.830793905372895, "grad_norm": 0.7067756652832031, "learning_rate": 6.965884568563717e-06, "loss": 0.6878, "step": 2283 }, { "epoch": 1.8315958299919808, "grad_norm": 0.6867192983627319, "learning_rate": 6.957634613560827e-06, "loss": 0.7231, "step": 2284 }, { "epoch": 1.8323977546110666, "grad_norm": 0.6870403289794922, "learning_rate": 6.94938693982711e-06, "loss": 0.7236, "step": 2285 }, { "epoch": 1.8331996792301524, "grad_norm": 0.687545120716095, "learning_rate": 6.941141553546963e-06, "loss": 0.7548, "step": 2286 }, { "epoch": 1.8340016038492382, "grad_norm": 0.6433346271514893, "learning_rate": 6.932898460903052e-06, "loss": 0.7243, "step": 2287 }, { "epoch": 1.834803528468324, "grad_norm": 0.7730824947357178, "learning_rate": 6.924657668076326e-06, "loss": 0.7515, "step": 2288 }, { "epoch": 1.8356054530874097, "grad_norm": 0.6878124475479126, "learning_rate": 6.9164191812460194e-06, "loss": 0.714, "step": 2289 }, { "epoch": 1.8364073777064955, "grad_norm": 0.6526556015014648, "learning_rate": 6.90818300658962e-06, "loss": 0.701, "step": 2290 }, { "epoch": 1.8372093023255816, "grad_norm": 0.6894628405570984, "learning_rate": 6.899949150282903e-06, "loss": 0.714, "step": 2291 }, { "epoch": 1.8380112269446673, "grad_norm": 0.7038993239402771, "learning_rate": 6.8917176184998915e-06, "loss": 0.7532, "step": 2292 }, { "epoch": 1.8388131515637531, "grad_norm": 0.6736302375793457, "learning_rate": 6.883488417412858e-06, "loss": 0.7281, "step": 2293 }, { "epoch": 1.839615076182839, "grad_norm": 0.7072314023971558, "learning_rate": 6.875261553192352e-06, "loss": 0.7431, "step": 2294 }, { "epoch": 1.8404170008019247, "grad_norm": 0.6677948832511902, "learning_rate": 6.8670370320071466e-06, "loss": 0.6828, "step": 2295 }, { "epoch": 1.8412189254210105, "grad_norm": 0.6706652641296387, "learning_rate": 6.858814860024275e-06, "loss": 0.7471, "step": 2296 }, { "epoch": 1.8420208500400963, "grad_norm": 0.6636921763420105, "learning_rate": 6.850595043408997e-06, "loss": 0.691, "step": 2297 }, { "epoch": 1.842822774659182, "grad_norm": 0.6821714639663696, "learning_rate": 6.842377588324809e-06, "loss": 0.7275, "step": 2298 }, { "epoch": 1.8436246992782679, "grad_norm": 0.6688547730445862, "learning_rate": 6.834162500933445e-06, "loss": 0.7008, "step": 2299 }, { "epoch": 1.8444266238973537, "grad_norm": 0.675174355506897, "learning_rate": 6.825949787394853e-06, "loss": 0.7175, "step": 2300 }, { "epoch": 1.8452285485164395, "grad_norm": 0.6541465520858765, "learning_rate": 6.817739453867209e-06, "loss": 0.7273, "step": 2301 }, { "epoch": 1.8460304731355253, "grad_norm": 0.6905247569084167, "learning_rate": 6.809531506506898e-06, "loss": 0.7551, "step": 2302 }, { "epoch": 1.846832397754611, "grad_norm": 0.6996776461601257, "learning_rate": 6.801325951468514e-06, "loss": 0.7546, "step": 2303 }, { "epoch": 1.8476343223736968, "grad_norm": 0.6827793717384338, "learning_rate": 6.7931227949048714e-06, "loss": 0.7418, "step": 2304 }, { "epoch": 1.8484362469927826, "grad_norm": 0.6554014682769775, "learning_rate": 6.784922042966968e-06, "loss": 0.7051, "step": 2305 }, { "epoch": 1.8492381716118684, "grad_norm": 0.6803217530250549, "learning_rate": 6.776723701804013e-06, "loss": 0.7335, "step": 2306 }, { "epoch": 1.8500400962309542, "grad_norm": 0.6773452758789062, "learning_rate": 6.768527777563396e-06, "loss": 0.7053, "step": 2307 }, { "epoch": 1.85084202085004, "grad_norm": 0.6819374561309814, "learning_rate": 6.760334276390707e-06, "loss": 0.7471, "step": 2308 }, { "epoch": 1.8516439454691258, "grad_norm": 0.6662135720252991, "learning_rate": 6.752143204429709e-06, "loss": 0.7263, "step": 2309 }, { "epoch": 1.8524458700882116, "grad_norm": 0.7113257646560669, "learning_rate": 6.7439545678223404e-06, "loss": 0.7027, "step": 2310 }, { "epoch": 1.8532477947072974, "grad_norm": 0.697996973991394, "learning_rate": 6.735768372708731e-06, "loss": 0.7514, "step": 2311 }, { "epoch": 1.8540497193263832, "grad_norm": 0.6723498702049255, "learning_rate": 6.727584625227159e-06, "loss": 0.7454, "step": 2312 }, { "epoch": 1.854851643945469, "grad_norm": 0.6865621209144592, "learning_rate": 6.719403331514085e-06, "loss": 0.7167, "step": 2313 }, { "epoch": 1.8556535685645548, "grad_norm": 0.6741671562194824, "learning_rate": 6.711224497704116e-06, "loss": 0.7182, "step": 2314 }, { "epoch": 1.8564554931836408, "grad_norm": 0.6949282884597778, "learning_rate": 6.703048129930019e-06, "loss": 0.7246, "step": 2315 }, { "epoch": 1.8572574178027266, "grad_norm": 0.6716841459274292, "learning_rate": 6.694874234322719e-06, "loss": 0.7259, "step": 2316 }, { "epoch": 1.8580593424218124, "grad_norm": 0.7084620594978333, "learning_rate": 6.686702817011277e-06, "loss": 0.7328, "step": 2317 }, { "epoch": 1.8588612670408982, "grad_norm": 0.6928534507751465, "learning_rate": 6.678533884122904e-06, "loss": 0.73, "step": 2318 }, { "epoch": 1.859663191659984, "grad_norm": 0.6859990358352661, "learning_rate": 6.670367441782941e-06, "loss": 0.6775, "step": 2319 }, { "epoch": 1.8604651162790697, "grad_norm": 0.7146435379981995, "learning_rate": 6.66220349611486e-06, "loss": 0.7571, "step": 2320 }, { "epoch": 1.8612670408981555, "grad_norm": 0.702168345451355, "learning_rate": 6.654042053240275e-06, "loss": 0.7387, "step": 2321 }, { "epoch": 1.8620689655172413, "grad_norm": 0.688408613204956, "learning_rate": 6.645883119278906e-06, "loss": 0.7394, "step": 2322 }, { "epoch": 1.8628708901363273, "grad_norm": 0.6995466351509094, "learning_rate": 6.637726700348606e-06, "loss": 0.7156, "step": 2323 }, { "epoch": 1.8636728147554131, "grad_norm": 0.6815131306648254, "learning_rate": 6.629572802565332e-06, "loss": 0.715, "step": 2324 }, { "epoch": 1.864474739374499, "grad_norm": 0.6656938195228577, "learning_rate": 6.6214214320431534e-06, "loss": 0.7109, "step": 2325 }, { "epoch": 1.8652766639935847, "grad_norm": 0.6970621943473816, "learning_rate": 6.613272594894248e-06, "loss": 0.7439, "step": 2326 }, { "epoch": 1.8660785886126705, "grad_norm": 0.6916574835777283, "learning_rate": 6.605126297228886e-06, "loss": 0.7338, "step": 2327 }, { "epoch": 1.8668805132317563, "grad_norm": 0.66231769323349, "learning_rate": 6.596982545155447e-06, "loss": 0.7179, "step": 2328 }, { "epoch": 1.867682437850842, "grad_norm": 0.7257800102233887, "learning_rate": 6.5888413447803905e-06, "loss": 0.7485, "step": 2329 }, { "epoch": 1.8684843624699279, "grad_norm": 0.6953336000442505, "learning_rate": 6.580702702208261e-06, "loss": 0.7652, "step": 2330 }, { "epoch": 1.8692862870890137, "grad_norm": 0.6823389530181885, "learning_rate": 6.572566623541697e-06, "loss": 0.7024, "step": 2331 }, { "epoch": 1.8700882117080995, "grad_norm": 0.6590924859046936, "learning_rate": 6.5644331148814e-06, "loss": 0.7128, "step": 2332 }, { "epoch": 1.8708901363271853, "grad_norm": 0.6826373934745789, "learning_rate": 6.55630218232616e-06, "loss": 0.7213, "step": 2333 }, { "epoch": 1.871692060946271, "grad_norm": 0.6541956663131714, "learning_rate": 6.548173831972824e-06, "loss": 0.6984, "step": 2334 }, { "epoch": 1.8724939855653568, "grad_norm": 0.6662783622741699, "learning_rate": 6.540048069916301e-06, "loss": 0.7364, "step": 2335 }, { "epoch": 1.8732959101844426, "grad_norm": 0.6726429462432861, "learning_rate": 6.5319249022495715e-06, "loss": 0.7371, "step": 2336 }, { "epoch": 1.8740978348035284, "grad_norm": 0.6762657165527344, "learning_rate": 6.523804335063655e-06, "loss": 0.7046, "step": 2337 }, { "epoch": 1.8748997594226142, "grad_norm": 0.7136995196342468, "learning_rate": 6.515686374447641e-06, "loss": 0.7201, "step": 2338 }, { "epoch": 1.8757016840417, "grad_norm": 0.7213540077209473, "learning_rate": 6.507571026488644e-06, "loss": 0.7674, "step": 2339 }, { "epoch": 1.8765036086607858, "grad_norm": 0.7228121161460876, "learning_rate": 6.499458297271826e-06, "loss": 0.7653, "step": 2340 }, { "epoch": 1.8773055332798716, "grad_norm": 0.6905105113983154, "learning_rate": 6.491348192880395e-06, "loss": 0.7133, "step": 2341 }, { "epoch": 1.8781074578989574, "grad_norm": 0.6937234997749329, "learning_rate": 6.48324071939558e-06, "loss": 0.7293, "step": 2342 }, { "epoch": 1.8789093825180432, "grad_norm": 0.7513543963432312, "learning_rate": 6.4751358828966415e-06, "loss": 0.7285, "step": 2343 }, { "epoch": 1.879711307137129, "grad_norm": 0.6793731451034546, "learning_rate": 6.467033689460863e-06, "loss": 0.7007, "step": 2344 }, { "epoch": 1.8805132317562148, "grad_norm": 0.6910126805305481, "learning_rate": 6.458934145163539e-06, "loss": 0.7152, "step": 2345 }, { "epoch": 1.8813151563753006, "grad_norm": 0.7317004203796387, "learning_rate": 6.450837256077993e-06, "loss": 0.7716, "step": 2346 }, { "epoch": 1.8821170809943866, "grad_norm": 0.6594632863998413, "learning_rate": 6.44274302827554e-06, "loss": 0.7189, "step": 2347 }, { "epoch": 1.8829190056134724, "grad_norm": 0.654815673828125, "learning_rate": 6.434651467825515e-06, "loss": 0.714, "step": 2348 }, { "epoch": 1.8837209302325582, "grad_norm": 0.7162003517150879, "learning_rate": 6.426562580795242e-06, "loss": 0.7311, "step": 2349 }, { "epoch": 1.884522854851644, "grad_norm": 0.6873356103897095, "learning_rate": 6.4184763732500376e-06, "loss": 0.7173, "step": 2350 }, { "epoch": 1.8853247794707297, "grad_norm": 0.6748940944671631, "learning_rate": 6.410392851253229e-06, "loss": 0.7156, "step": 2351 }, { "epoch": 1.8861267040898155, "grad_norm": 0.6667020916938782, "learning_rate": 6.402312020866102e-06, "loss": 0.7354, "step": 2352 }, { "epoch": 1.8869286287089013, "grad_norm": 0.666428804397583, "learning_rate": 6.39423388814795e-06, "loss": 0.7357, "step": 2353 }, { "epoch": 1.8877305533279871, "grad_norm": 0.6650567054748535, "learning_rate": 6.386158459156029e-06, "loss": 0.718, "step": 2354 }, { "epoch": 1.8885324779470731, "grad_norm": 0.7268814444541931, "learning_rate": 6.378085739945566e-06, "loss": 0.7532, "step": 2355 }, { "epoch": 1.889334402566159, "grad_norm": 0.696033239364624, "learning_rate": 6.3700157365697655e-06, "loss": 0.7387, "step": 2356 }, { "epoch": 1.8901363271852447, "grad_norm": 0.7350199818611145, "learning_rate": 6.361948455079785e-06, "loss": 0.7662, "step": 2357 }, { "epoch": 1.8909382518043305, "grad_norm": 0.6738780736923218, "learning_rate": 6.353883901524756e-06, "loss": 0.7182, "step": 2358 }, { "epoch": 1.8917401764234163, "grad_norm": 0.7525630593299866, "learning_rate": 6.34582208195175e-06, "loss": 0.7417, "step": 2359 }, { "epoch": 1.892542101042502, "grad_norm": 0.6829856038093567, "learning_rate": 6.337763002405792e-06, "loss": 0.7616, "step": 2360 }, { "epoch": 1.8933440256615879, "grad_norm": 0.6920203566551208, "learning_rate": 6.329706668929861e-06, "loss": 0.7149, "step": 2361 }, { "epoch": 1.8941459502806737, "grad_norm": 0.7000799775123596, "learning_rate": 6.321653087564861e-06, "loss": 0.754, "step": 2362 }, { "epoch": 1.8949478748997595, "grad_norm": 0.6926515102386475, "learning_rate": 6.31360226434965e-06, "loss": 0.7099, "step": 2363 }, { "epoch": 1.8957497995188453, "grad_norm": 0.6759518384933472, "learning_rate": 6.305554205321005e-06, "loss": 0.7287, "step": 2364 }, { "epoch": 1.896551724137931, "grad_norm": 0.6605546474456787, "learning_rate": 6.297508916513636e-06, "loss": 0.7242, "step": 2365 }, { "epoch": 1.8973536487570168, "grad_norm": 0.6701246500015259, "learning_rate": 6.289466403960175e-06, "loss": 0.7318, "step": 2366 }, { "epoch": 1.8981555733761026, "grad_norm": 0.6828826069831848, "learning_rate": 6.281426673691169e-06, "loss": 0.7054, "step": 2367 }, { "epoch": 1.8989574979951884, "grad_norm": 0.6410530209541321, "learning_rate": 6.273389731735087e-06, "loss": 0.7241, "step": 2368 }, { "epoch": 1.8997594226142742, "grad_norm": 0.7067154049873352, "learning_rate": 6.265355584118297e-06, "loss": 0.7071, "step": 2369 }, { "epoch": 1.90056134723336, "grad_norm": 0.698462724685669, "learning_rate": 6.257324236865074e-06, "loss": 0.723, "step": 2370 }, { "epoch": 1.9013632718524458, "grad_norm": 0.747157096862793, "learning_rate": 6.249295695997604e-06, "loss": 0.755, "step": 2371 }, { "epoch": 1.9021651964715316, "grad_norm": 0.7117529511451721, "learning_rate": 6.241269967535955e-06, "loss": 0.71, "step": 2372 }, { "epoch": 1.9029671210906174, "grad_norm": 0.7347584962844849, "learning_rate": 6.233247057498093e-06, "loss": 0.7238, "step": 2373 }, { "epoch": 1.9037690457097032, "grad_norm": 0.6999946236610413, "learning_rate": 6.225226971899869e-06, "loss": 0.7107, "step": 2374 }, { "epoch": 1.904570970328789, "grad_norm": 0.7275912761688232, "learning_rate": 6.217209716755013e-06, "loss": 0.7432, "step": 2375 }, { "epoch": 1.9053728949478748, "grad_norm": 0.6637576222419739, "learning_rate": 6.2091952980751414e-06, "loss": 0.6802, "step": 2376 }, { "epoch": 1.9061748195669606, "grad_norm": 0.7044709920883179, "learning_rate": 6.201183721869735e-06, "loss": 0.7396, "step": 2377 }, { "epoch": 1.9069767441860463, "grad_norm": 0.6789054870605469, "learning_rate": 6.193174994146148e-06, "loss": 0.7369, "step": 2378 }, { "epoch": 1.9077786688051324, "grad_norm": 0.6822087168693542, "learning_rate": 6.185169120909598e-06, "loss": 0.7293, "step": 2379 }, { "epoch": 1.9085805934242182, "grad_norm": 0.7218993306159973, "learning_rate": 6.177166108163155e-06, "loss": 0.7156, "step": 2380 }, { "epoch": 1.909382518043304, "grad_norm": 0.6758652329444885, "learning_rate": 6.169165961907762e-06, "loss": 0.7279, "step": 2381 }, { "epoch": 1.9101844426623897, "grad_norm": 0.7355567216873169, "learning_rate": 6.1611686881421875e-06, "loss": 0.7328, "step": 2382 }, { "epoch": 1.9109863672814755, "grad_norm": 0.7142036557197571, "learning_rate": 6.153174292863071e-06, "loss": 0.7094, "step": 2383 }, { "epoch": 1.9117882919005613, "grad_norm": 0.7164692282676697, "learning_rate": 6.145182782064879e-06, "loss": 0.7163, "step": 2384 }, { "epoch": 1.9125902165196471, "grad_norm": 0.6600916981697083, "learning_rate": 6.137194161739915e-06, "loss": 0.6753, "step": 2385 }, { "epoch": 1.913392141138733, "grad_norm": 0.7095491290092468, "learning_rate": 6.129208437878324e-06, "loss": 0.7447, "step": 2386 }, { "epoch": 1.914194065757819, "grad_norm": 0.6852803230285645, "learning_rate": 6.121225616468065e-06, "loss": 0.6973, "step": 2387 }, { "epoch": 1.9149959903769047, "grad_norm": 0.6977118253707886, "learning_rate": 6.113245703494941e-06, "loss": 0.7793, "step": 2388 }, { "epoch": 1.9157979149959905, "grad_norm": 0.6602482199668884, "learning_rate": 6.105268704942555e-06, "loss": 0.7237, "step": 2389 }, { "epoch": 1.9165998396150763, "grad_norm": 0.7034119963645935, "learning_rate": 6.097294626792334e-06, "loss": 0.7226, "step": 2390 }, { "epoch": 1.917401764234162, "grad_norm": 0.6831420063972473, "learning_rate": 6.0893234750235145e-06, "loss": 0.73, "step": 2391 }, { "epoch": 1.9182036888532479, "grad_norm": 0.7341967225074768, "learning_rate": 6.0813552556131315e-06, "loss": 0.7314, "step": 2392 }, { "epoch": 1.9190056134723337, "grad_norm": 0.7385361194610596, "learning_rate": 6.073389974536037e-06, "loss": 0.7362, "step": 2393 }, { "epoch": 1.9198075380914195, "grad_norm": 0.6924091577529907, "learning_rate": 6.065427637764865e-06, "loss": 0.742, "step": 2394 }, { "epoch": 1.9206094627105053, "grad_norm": 0.6444892883300781, "learning_rate": 6.0574682512700444e-06, "loss": 0.6972, "step": 2395 }, { "epoch": 1.921411387329591, "grad_norm": 0.7088480591773987, "learning_rate": 6.0495118210197975e-06, "loss": 0.7426, "step": 2396 }, { "epoch": 1.9222133119486768, "grad_norm": 0.7047684788703918, "learning_rate": 6.041558352980126e-06, "loss": 0.7335, "step": 2397 }, { "epoch": 1.9230152365677626, "grad_norm": 0.7246830463409424, "learning_rate": 6.033607853114813e-06, "loss": 0.802, "step": 2398 }, { "epoch": 1.9238171611868484, "grad_norm": 0.6952186822891235, "learning_rate": 6.025660327385412e-06, "loss": 0.7143, "step": 2399 }, { "epoch": 1.9246190858059342, "grad_norm": 0.6386004090309143, "learning_rate": 6.017715781751243e-06, "loss": 0.7112, "step": 2400 }, { "epoch": 1.92542101042502, "grad_norm": 0.6913342475891113, "learning_rate": 6.009774222169409e-06, "loss": 0.7377, "step": 2401 }, { "epoch": 1.9262229350441058, "grad_norm": 0.6988136768341064, "learning_rate": 6.001835654594751e-06, "loss": 0.7444, "step": 2402 }, { "epoch": 1.9270248596631916, "grad_norm": 0.7216395735740662, "learning_rate": 5.993900084979884e-06, "loss": 0.7516, "step": 2403 }, { "epoch": 1.9278267842822774, "grad_norm": 0.6848301887512207, "learning_rate": 5.985967519275167e-06, "loss": 0.6978, "step": 2404 }, { "epoch": 1.9286287089013632, "grad_norm": 0.6854767203330994, "learning_rate": 5.978037963428702e-06, "loss": 0.7278, "step": 2405 }, { "epoch": 1.929430633520449, "grad_norm": 0.6917245984077454, "learning_rate": 5.970111423386349e-06, "loss": 0.7319, "step": 2406 }, { "epoch": 1.9302325581395348, "grad_norm": 0.7103894948959351, "learning_rate": 5.962187905091692e-06, "loss": 0.7181, "step": 2407 }, { "epoch": 1.9310344827586206, "grad_norm": 0.6659692525863647, "learning_rate": 5.954267414486057e-06, "loss": 0.7213, "step": 2408 }, { "epoch": 1.9318364073777063, "grad_norm": 0.7113282084465027, "learning_rate": 5.946349957508499e-06, "loss": 0.7317, "step": 2409 }, { "epoch": 1.9326383319967921, "grad_norm": 0.6863912343978882, "learning_rate": 5.93843554009579e-06, "loss": 0.6955, "step": 2410 }, { "epoch": 1.9334402566158782, "grad_norm": 0.6813226938247681, "learning_rate": 5.930524168182441e-06, "loss": 0.7199, "step": 2411 }, { "epoch": 1.934242181234964, "grad_norm": 0.6929488778114319, "learning_rate": 5.922615847700655e-06, "loss": 0.7189, "step": 2412 }, { "epoch": 1.9350441058540497, "grad_norm": 0.6780677437782288, "learning_rate": 5.91471058458037e-06, "loss": 0.7502, "step": 2413 }, { "epoch": 1.9358460304731355, "grad_norm": 0.6948460340499878, "learning_rate": 5.90680838474922e-06, "loss": 0.6846, "step": 2414 }, { "epoch": 1.9366479550922213, "grad_norm": 0.6975181698799133, "learning_rate": 5.898909254132539e-06, "loss": 0.6991, "step": 2415 }, { "epoch": 1.937449879711307, "grad_norm": 0.7138963937759399, "learning_rate": 5.891013198653368e-06, "loss": 0.7178, "step": 2416 }, { "epoch": 1.938251804330393, "grad_norm": 0.7381751537322998, "learning_rate": 5.8831202242324345e-06, "loss": 0.7485, "step": 2417 }, { "epoch": 1.9390537289494787, "grad_norm": 0.6692777872085571, "learning_rate": 5.875230336788167e-06, "loss": 0.7372, "step": 2418 }, { "epoch": 1.9398556535685647, "grad_norm": 0.699887216091156, "learning_rate": 5.8673435422366656e-06, "loss": 0.7446, "step": 2419 }, { "epoch": 1.9406575781876505, "grad_norm": 0.6614972352981567, "learning_rate": 5.859459846491718e-06, "loss": 0.7047, "step": 2420 }, { "epoch": 1.9414595028067363, "grad_norm": 0.6647498607635498, "learning_rate": 5.85157925546479e-06, "loss": 0.7272, "step": 2421 }, { "epoch": 1.942261427425822, "grad_norm": 0.6981691122055054, "learning_rate": 5.843701775065011e-06, "loss": 0.7061, "step": 2422 }, { "epoch": 1.9430633520449079, "grad_norm": 0.7187725305557251, "learning_rate": 5.835827411199194e-06, "loss": 0.7676, "step": 2423 }, { "epoch": 1.9438652766639937, "grad_norm": 0.6973015069961548, "learning_rate": 5.8279561697718025e-06, "loss": 0.7354, "step": 2424 }, { "epoch": 1.9446672012830795, "grad_norm": 0.69856858253479, "learning_rate": 5.8200880566849535e-06, "loss": 0.7591, "step": 2425 }, { "epoch": 1.9454691259021653, "grad_norm": 0.674909770488739, "learning_rate": 5.812223077838433e-06, "loss": 0.7067, "step": 2426 }, { "epoch": 1.946271050521251, "grad_norm": 0.6738868951797485, "learning_rate": 5.804361239129668e-06, "loss": 0.726, "step": 2427 }, { "epoch": 1.9470729751403368, "grad_norm": 0.6930453777313232, "learning_rate": 5.7965025464537336e-06, "loss": 0.7368, "step": 2428 }, { "epoch": 1.9478748997594226, "grad_norm": 0.6769317388534546, "learning_rate": 5.788647005703349e-06, "loss": 0.7118, "step": 2429 }, { "epoch": 1.9486768243785084, "grad_norm": 0.6883741021156311, "learning_rate": 5.780794622768859e-06, "loss": 0.7179, "step": 2430 }, { "epoch": 1.9494787489975942, "grad_norm": 0.6916329860687256, "learning_rate": 5.77294540353825e-06, "loss": 0.7072, "step": 2431 }, { "epoch": 1.95028067361668, "grad_norm": 0.6634646654129028, "learning_rate": 5.765099353897136e-06, "loss": 0.7044, "step": 2432 }, { "epoch": 1.9510825982357658, "grad_norm": 0.6738753318786621, "learning_rate": 5.7572564797287525e-06, "loss": 0.7317, "step": 2433 }, { "epoch": 1.9518845228548516, "grad_norm": 0.6909292340278625, "learning_rate": 5.749416786913954e-06, "loss": 0.7059, "step": 2434 }, { "epoch": 1.9526864474739374, "grad_norm": 0.6980206966400146, "learning_rate": 5.741580281331204e-06, "loss": 0.7367, "step": 2435 }, { "epoch": 1.9534883720930232, "grad_norm": 0.7224229574203491, "learning_rate": 5.733746968856585e-06, "loss": 0.6925, "step": 2436 }, { "epoch": 1.954290296712109, "grad_norm": 0.680873692035675, "learning_rate": 5.7259168553637815e-06, "loss": 0.7288, "step": 2437 }, { "epoch": 1.9550922213311948, "grad_norm": 0.6676924824714661, "learning_rate": 5.718089946724078e-06, "loss": 0.6918, "step": 2438 }, { "epoch": 1.9558941459502805, "grad_norm": 0.7096678614616394, "learning_rate": 5.710266248806363e-06, "loss": 0.7167, "step": 2439 }, { "epoch": 1.9566960705693663, "grad_norm": 0.6810303926467896, "learning_rate": 5.702445767477103e-06, "loss": 0.6996, "step": 2440 }, { "epoch": 1.9574979951884521, "grad_norm": 0.6997901201248169, "learning_rate": 5.6946285086003636e-06, "loss": 0.7609, "step": 2441 }, { "epoch": 1.958299919807538, "grad_norm": 0.7114957571029663, "learning_rate": 5.686814478037795e-06, "loss": 0.7472, "step": 2442 }, { "epoch": 1.959101844426624, "grad_norm": 0.7095164656639099, "learning_rate": 5.679003681648625e-06, "loss": 0.6977, "step": 2443 }, { "epoch": 1.9599037690457097, "grad_norm": 0.7158694267272949, "learning_rate": 5.671196125289647e-06, "loss": 0.7513, "step": 2444 }, { "epoch": 1.9607056936647955, "grad_norm": 0.6895703673362732, "learning_rate": 5.663391814815238e-06, "loss": 0.7194, "step": 2445 }, { "epoch": 1.9615076182838813, "grad_norm": 0.6775211095809937, "learning_rate": 5.655590756077334e-06, "loss": 0.7336, "step": 2446 }, { "epoch": 1.962309542902967, "grad_norm": 0.7095913290977478, "learning_rate": 5.647792954925435e-06, "loss": 0.7276, "step": 2447 }, { "epoch": 1.963111467522053, "grad_norm": 0.6791907548904419, "learning_rate": 5.639998417206602e-06, "loss": 0.7091, "step": 2448 }, { "epoch": 1.9639133921411387, "grad_norm": 0.7308151125907898, "learning_rate": 5.632207148765438e-06, "loss": 0.7246, "step": 2449 }, { "epoch": 1.9647153167602245, "grad_norm": 0.7150318026542664, "learning_rate": 5.6244191554441045e-06, "loss": 0.7325, "step": 2450 }, { "epoch": 1.9655172413793105, "grad_norm": 0.6908929347991943, "learning_rate": 5.616634443082303e-06, "loss": 0.7073, "step": 2451 }, { "epoch": 1.9663191659983963, "grad_norm": 0.7129378318786621, "learning_rate": 5.608853017517277e-06, "loss": 0.6903, "step": 2452 }, { "epoch": 1.967121090617482, "grad_norm": 0.6859320998191833, "learning_rate": 5.601074884583809e-06, "loss": 0.7296, "step": 2453 }, { "epoch": 1.9679230152365679, "grad_norm": 0.6935213208198547, "learning_rate": 5.593300050114199e-06, "loss": 0.7437, "step": 2454 }, { "epoch": 1.9687249398556537, "grad_norm": 0.6933846473693848, "learning_rate": 5.585528519938288e-06, "loss": 0.7118, "step": 2455 }, { "epoch": 1.9695268644747395, "grad_norm": 0.6689132452011108, "learning_rate": 5.5777602998834345e-06, "loss": 0.7091, "step": 2456 }, { "epoch": 1.9703287890938253, "grad_norm": 0.6558547616004944, "learning_rate": 5.569995395774508e-06, "loss": 0.6749, "step": 2457 }, { "epoch": 1.971130713712911, "grad_norm": 0.680107057094574, "learning_rate": 5.562233813433909e-06, "loss": 0.7316, "step": 2458 }, { "epoch": 1.9719326383319968, "grad_norm": 0.6814321875572205, "learning_rate": 5.5544755586815265e-06, "loss": 0.7284, "step": 2459 }, { "epoch": 1.9727345629510826, "grad_norm": 0.6948514580726624, "learning_rate": 5.546720637334769e-06, "loss": 0.7091, "step": 2460 }, { "epoch": 1.9735364875701684, "grad_norm": 0.6681773066520691, "learning_rate": 5.538969055208543e-06, "loss": 0.7373, "step": 2461 }, { "epoch": 1.9743384121892542, "grad_norm": 0.7033309936523438, "learning_rate": 5.5312208181152376e-06, "loss": 0.7387, "step": 2462 }, { "epoch": 1.97514033680834, "grad_norm": 0.6964126229286194, "learning_rate": 5.523475931864759e-06, "loss": 0.7446, "step": 2463 }, { "epoch": 1.9759422614274258, "grad_norm": 0.6732887029647827, "learning_rate": 5.515734402264478e-06, "loss": 0.6558, "step": 2464 }, { "epoch": 1.9767441860465116, "grad_norm": 0.6745656728744507, "learning_rate": 5.5079962351192585e-06, "loss": 0.7085, "step": 2465 }, { "epoch": 1.9775461106655974, "grad_norm": 0.7190232276916504, "learning_rate": 5.500261436231447e-06, "loss": 0.7126, "step": 2466 }, { "epoch": 1.9783480352846832, "grad_norm": 0.6871313452720642, "learning_rate": 5.4925300114008465e-06, "loss": 0.7176, "step": 2467 }, { "epoch": 1.979149959903769, "grad_norm": 0.6837944388389587, "learning_rate": 5.4848019664247575e-06, "loss": 0.7013, "step": 2468 }, { "epoch": 1.9799518845228548, "grad_norm": 0.706548810005188, "learning_rate": 5.4770773070979225e-06, "loss": 0.733, "step": 2469 }, { "epoch": 1.9807538091419405, "grad_norm": 0.682320773601532, "learning_rate": 5.469356039212557e-06, "loss": 0.7227, "step": 2470 }, { "epoch": 1.9815557337610263, "grad_norm": 0.7176364660263062, "learning_rate": 5.461638168558332e-06, "loss": 0.7324, "step": 2471 }, { "epoch": 1.9823576583801121, "grad_norm": 0.6608320474624634, "learning_rate": 5.453923700922366e-06, "loss": 0.7153, "step": 2472 }, { "epoch": 1.983159582999198, "grad_norm": 0.6956177353858948, "learning_rate": 5.446212642089228e-06, "loss": 0.7022, "step": 2473 }, { "epoch": 1.9839615076182837, "grad_norm": 0.6896581053733826, "learning_rate": 5.4385049978409385e-06, "loss": 0.7185, "step": 2474 }, { "epoch": 1.9847634322373697, "grad_norm": 0.7071901559829712, "learning_rate": 5.430800773956948e-06, "loss": 0.7393, "step": 2475 }, { "epoch": 1.9855653568564555, "grad_norm": 0.7110061645507812, "learning_rate": 5.42309997621415e-06, "loss": 0.7563, "step": 2476 }, { "epoch": 1.9863672814755413, "grad_norm": 0.7318345904350281, "learning_rate": 5.415402610386859e-06, "loss": 0.7583, "step": 2477 }, { "epoch": 1.987169206094627, "grad_norm": 0.6681869029998779, "learning_rate": 5.407708682246825e-06, "loss": 0.6964, "step": 2478 }, { "epoch": 1.987971130713713, "grad_norm": 0.6897268891334534, "learning_rate": 5.400018197563217e-06, "loss": 0.7301, "step": 2479 }, { "epoch": 1.9887730553327987, "grad_norm": 0.6932487487792969, "learning_rate": 5.392331162102622e-06, "loss": 0.6904, "step": 2480 }, { "epoch": 1.9895749799518845, "grad_norm": 0.7004687786102295, "learning_rate": 5.384647581629045e-06, "loss": 0.7364, "step": 2481 }, { "epoch": 1.9903769045709703, "grad_norm": 0.6893764734268188, "learning_rate": 5.37696746190389e-06, "loss": 0.7311, "step": 2482 }, { "epoch": 1.9911788291900563, "grad_norm": 0.7490194439888, "learning_rate": 5.369290808685975e-06, "loss": 0.7163, "step": 2483 }, { "epoch": 1.991980753809142, "grad_norm": 0.7016685009002686, "learning_rate": 5.3616176277315164e-06, "loss": 0.7446, "step": 2484 }, { "epoch": 1.9927826784282279, "grad_norm": 0.7102388739585876, "learning_rate": 5.353947924794129e-06, "loss": 0.7777, "step": 2485 }, { "epoch": 1.9935846030473137, "grad_norm": 0.707472026348114, "learning_rate": 5.346281705624812e-06, "loss": 0.7304, "step": 2486 }, { "epoch": 1.9943865276663995, "grad_norm": 0.6962066888809204, "learning_rate": 5.33861897597196e-06, "loss": 0.7545, "step": 2487 }, { "epoch": 1.9951884522854852, "grad_norm": 0.684525191783905, "learning_rate": 5.330959741581347e-06, "loss": 0.712, "step": 2488 }, { "epoch": 1.995990376904571, "grad_norm": 0.6817164421081543, "learning_rate": 5.323304008196133e-06, "loss": 0.7334, "step": 2489 }, { "epoch": 1.9967923015236568, "grad_norm": 0.6918975114822388, "learning_rate": 5.3156517815568455e-06, "loss": 0.7334, "step": 2490 }, { "epoch": 1.9975942261427426, "grad_norm": 0.6976943612098694, "learning_rate": 5.30800306740138e-06, "loss": 0.7255, "step": 2491 }, { "epoch": 1.9983961507618284, "grad_norm": 0.7042475938796997, "learning_rate": 5.300357871465007e-06, "loss": 0.7338, "step": 2492 }, { "epoch": 1.9991980753809142, "grad_norm": 0.6709238290786743, "learning_rate": 5.292716199480354e-06, "loss": 0.7195, "step": 2493 }, { "epoch": 2.0, "grad_norm": 0.648729145526886, "learning_rate": 5.285078057177406e-06, "loss": 0.6162, "step": 2494 }, { "epoch": 2.000801924619086, "grad_norm": 0.6973950862884521, "learning_rate": 5.277443450283508e-06, "loss": 0.5518, "step": 2495 }, { "epoch": 2.0016038492381716, "grad_norm": 0.6707605123519897, "learning_rate": 5.269812384523341e-06, "loss": 0.5418, "step": 2496 }, { "epoch": 2.0024057738572574, "grad_norm": 0.6641839146614075, "learning_rate": 5.262184865618938e-06, "loss": 0.5272, "step": 2497 }, { "epoch": 2.003207698476343, "grad_norm": 0.703292191028595, "learning_rate": 5.254560899289679e-06, "loss": 0.5537, "step": 2498 }, { "epoch": 2.004009623095429, "grad_norm": 0.7050741314888, "learning_rate": 5.246940491252263e-06, "loss": 0.5197, "step": 2499 }, { "epoch": 2.0048115477145148, "grad_norm": 0.7010351419448853, "learning_rate": 5.239323647220744e-06, "loss": 0.5039, "step": 2500 }, { "epoch": 2.0056134723336005, "grad_norm": 0.7894969582557678, "learning_rate": 5.231710372906482e-06, "loss": 0.5257, "step": 2501 }, { "epoch": 2.0064153969526863, "grad_norm": 0.8365249633789062, "learning_rate": 5.224100674018173e-06, "loss": 0.5038, "step": 2502 }, { "epoch": 2.007217321571772, "grad_norm": 0.9526363611221313, "learning_rate": 5.216494556261831e-06, "loss": 0.5347, "step": 2503 }, { "epoch": 2.008019246190858, "grad_norm": 0.9146489500999451, "learning_rate": 5.208892025340772e-06, "loss": 0.506, "step": 2504 }, { "epoch": 2.0088211708099437, "grad_norm": 0.8940325975418091, "learning_rate": 5.201293086955646e-06, "loss": 0.5, "step": 2505 }, { "epoch": 2.0096230954290295, "grad_norm": 0.9119753837585449, "learning_rate": 5.193697746804386e-06, "loss": 0.5178, "step": 2506 }, { "epoch": 2.0104250200481153, "grad_norm": 0.8597251176834106, "learning_rate": 5.186106010582239e-06, "loss": 0.5075, "step": 2507 }, { "epoch": 2.011226944667201, "grad_norm": 0.7838432192802429, "learning_rate": 5.178517883981753e-06, "loss": 0.4928, "step": 2508 }, { "epoch": 2.012028869286287, "grad_norm": 0.7873410582542419, "learning_rate": 5.170933372692752e-06, "loss": 0.5191, "step": 2509 }, { "epoch": 2.0128307939053727, "grad_norm": 0.7258116006851196, "learning_rate": 5.163352482402375e-06, "loss": 0.5039, "step": 2510 }, { "epoch": 2.013632718524459, "grad_norm": 0.8325080871582031, "learning_rate": 5.15577521879502e-06, "loss": 0.5224, "step": 2511 }, { "epoch": 2.0144346431435447, "grad_norm": 0.7581323385238647, "learning_rate": 5.148201587552384e-06, "loss": 0.4951, "step": 2512 }, { "epoch": 2.0152365677626305, "grad_norm": 0.7743967175483704, "learning_rate": 5.140631594353434e-06, "loss": 0.5489, "step": 2513 }, { "epoch": 2.0160384923817163, "grad_norm": 0.7546889185905457, "learning_rate": 5.133065244874404e-06, "loss": 0.4962, "step": 2514 }, { "epoch": 2.016840417000802, "grad_norm": 0.725445568561554, "learning_rate": 5.1255025447888005e-06, "loss": 0.4818, "step": 2515 }, { "epoch": 2.017642341619888, "grad_norm": 0.7801692485809326, "learning_rate": 5.117943499767402e-06, "loss": 0.4848, "step": 2516 }, { "epoch": 2.0184442662389737, "grad_norm": 0.7961569428443909, "learning_rate": 5.110388115478222e-06, "loss": 0.4813, "step": 2517 }, { "epoch": 2.0192461908580595, "grad_norm": 0.840562641620636, "learning_rate": 5.102836397586564e-06, "loss": 0.4887, "step": 2518 }, { "epoch": 2.0200481154771452, "grad_norm": 0.8071300983428955, "learning_rate": 5.09528835175495e-06, "loss": 0.4874, "step": 2519 }, { "epoch": 2.020850040096231, "grad_norm": 0.8382665514945984, "learning_rate": 5.087743983643165e-06, "loss": 0.4947, "step": 2520 }, { "epoch": 2.021651964715317, "grad_norm": 0.7823915481567383, "learning_rate": 5.080203298908239e-06, "loss": 0.4644, "step": 2521 }, { "epoch": 2.0224538893344026, "grad_norm": 0.8397455215454102, "learning_rate": 5.072666303204421e-06, "loss": 0.4924, "step": 2522 }, { "epoch": 2.0232558139534884, "grad_norm": 0.8278082609176636, "learning_rate": 5.065133002183223e-06, "loss": 0.5191, "step": 2523 }, { "epoch": 2.024057738572574, "grad_norm": 0.8194684982299805, "learning_rate": 5.057603401493358e-06, "loss": 0.4957, "step": 2524 }, { "epoch": 2.02485966319166, "grad_norm": 0.7626014947891235, "learning_rate": 5.050077506780783e-06, "loss": 0.4892, "step": 2525 }, { "epoch": 2.025661587810746, "grad_norm": 0.7766503691673279, "learning_rate": 5.042555323688673e-06, "loss": 0.5108, "step": 2526 }, { "epoch": 2.0264635124298316, "grad_norm": 0.7892016768455505, "learning_rate": 5.035036857857405e-06, "loss": 0.4472, "step": 2527 }, { "epoch": 2.0272654370489174, "grad_norm": 0.7951651811599731, "learning_rate": 5.027522114924597e-06, "loss": 0.4936, "step": 2528 }, { "epoch": 2.028067361668003, "grad_norm": 0.7646651864051819, "learning_rate": 5.020011100525047e-06, "loss": 0.5195, "step": 2529 }, { "epoch": 2.028869286287089, "grad_norm": 0.7651566863059998, "learning_rate": 5.0125038202907735e-06, "loss": 0.4591, "step": 2530 }, { "epoch": 2.0296712109061747, "grad_norm": 0.814940333366394, "learning_rate": 5.0050002798509956e-06, "loss": 0.5049, "step": 2531 }, { "epoch": 2.0304731355252605, "grad_norm": 0.8493900895118713, "learning_rate": 4.997500484832114e-06, "loss": 0.5016, "step": 2532 }, { "epoch": 2.0312750601443463, "grad_norm": 0.8357752561569214, "learning_rate": 4.990004440857735e-06, "loss": 0.4844, "step": 2533 }, { "epoch": 2.032076984763432, "grad_norm": 0.8459290862083435, "learning_rate": 4.9825121535486475e-06, "loss": 0.5135, "step": 2534 }, { "epoch": 2.032878909382518, "grad_norm": 0.7996906042098999, "learning_rate": 4.975023628522825e-06, "loss": 0.482, "step": 2535 }, { "epoch": 2.0336808340016037, "grad_norm": 0.8636319041252136, "learning_rate": 4.967538871395421e-06, "loss": 0.4959, "step": 2536 }, { "epoch": 2.0344827586206895, "grad_norm": 0.813450276851654, "learning_rate": 4.960057887778754e-06, "loss": 0.4843, "step": 2537 }, { "epoch": 2.0352846832397753, "grad_norm": 0.807138979434967, "learning_rate": 4.952580683282324e-06, "loss": 0.497, "step": 2538 }, { "epoch": 2.036086607858861, "grad_norm": 0.8144460916519165, "learning_rate": 4.945107263512794e-06, "loss": 0.4893, "step": 2539 }, { "epoch": 2.036888532477947, "grad_norm": 0.8009450435638428, "learning_rate": 4.937637634073988e-06, "loss": 0.5016, "step": 2540 }, { "epoch": 2.0376904570970327, "grad_norm": 0.7973288893699646, "learning_rate": 4.930171800566893e-06, "loss": 0.4966, "step": 2541 }, { "epoch": 2.038492381716119, "grad_norm": 0.773374617099762, "learning_rate": 4.922709768589638e-06, "loss": 0.5013, "step": 2542 }, { "epoch": 2.0392943063352047, "grad_norm": 0.8175215125083923, "learning_rate": 4.915251543737512e-06, "loss": 0.4926, "step": 2543 }, { "epoch": 2.0400962309542905, "grad_norm": 0.8121562600135803, "learning_rate": 4.907797131602945e-06, "loss": 0.5069, "step": 2544 }, { "epoch": 2.0408981555733763, "grad_norm": 0.7891466021537781, "learning_rate": 4.900346537775513e-06, "loss": 0.4978, "step": 2545 }, { "epoch": 2.041700080192462, "grad_norm": 0.7856062650680542, "learning_rate": 4.89289976784192e-06, "loss": 0.4882, "step": 2546 }, { "epoch": 2.042502004811548, "grad_norm": 0.7932535409927368, "learning_rate": 4.885456827386008e-06, "loss": 0.4836, "step": 2547 }, { "epoch": 2.0433039294306337, "grad_norm": 0.7187968492507935, "learning_rate": 4.87801772198875e-06, "loss": 0.4609, "step": 2548 }, { "epoch": 2.0441058540497195, "grad_norm": 0.8403437733650208, "learning_rate": 4.870582457228239e-06, "loss": 0.5197, "step": 2549 }, { "epoch": 2.0449077786688052, "grad_norm": 0.8300922513008118, "learning_rate": 4.863151038679694e-06, "loss": 0.5118, "step": 2550 }, { "epoch": 2.045709703287891, "grad_norm": 0.8011190891265869, "learning_rate": 4.855723471915438e-06, "loss": 0.4769, "step": 2551 }, { "epoch": 2.046511627906977, "grad_norm": 0.8211809992790222, "learning_rate": 4.848299762504918e-06, "loss": 0.5017, "step": 2552 }, { "epoch": 2.0473135525260626, "grad_norm": 0.7941953539848328, "learning_rate": 4.840879916014683e-06, "loss": 0.4776, "step": 2553 }, { "epoch": 2.0481154771451484, "grad_norm": 0.8611568212509155, "learning_rate": 4.833463938008387e-06, "loss": 0.4887, "step": 2554 }, { "epoch": 2.048917401764234, "grad_norm": 0.8546658754348755, "learning_rate": 4.826051834046787e-06, "loss": 0.508, "step": 2555 }, { "epoch": 2.04971932638332, "grad_norm": 0.8082013726234436, "learning_rate": 4.818643609687724e-06, "loss": 0.5016, "step": 2556 }, { "epoch": 2.050521251002406, "grad_norm": 0.868209183216095, "learning_rate": 4.811239270486139e-06, "loss": 0.4957, "step": 2557 }, { "epoch": 2.0513231756214916, "grad_norm": 0.8040471076965332, "learning_rate": 4.803838821994062e-06, "loss": 0.4874, "step": 2558 }, { "epoch": 2.0521251002405774, "grad_norm": 0.7826542854309082, "learning_rate": 4.796442269760592e-06, "loss": 0.4845, "step": 2559 }, { "epoch": 2.052927024859663, "grad_norm": 0.8145564794540405, "learning_rate": 4.789049619331928e-06, "loss": 0.5049, "step": 2560 }, { "epoch": 2.053728949478749, "grad_norm": 0.8400808572769165, "learning_rate": 4.781660876251322e-06, "loss": 0.4969, "step": 2561 }, { "epoch": 2.0545308740978347, "grad_norm": 0.8168050050735474, "learning_rate": 4.774276046059107e-06, "loss": 0.458, "step": 2562 }, { "epoch": 2.0553327987169205, "grad_norm": 0.8102244734764099, "learning_rate": 4.766895134292685e-06, "loss": 0.5026, "step": 2563 }, { "epoch": 2.0561347233360063, "grad_norm": 0.8787121772766113, "learning_rate": 4.759518146486504e-06, "loss": 0.5098, "step": 2564 }, { "epoch": 2.056936647955092, "grad_norm": 1.1912168264389038, "learning_rate": 4.752145088172094e-06, "loss": 0.5131, "step": 2565 }, { "epoch": 2.057738572574178, "grad_norm": 0.7951311469078064, "learning_rate": 4.744775964878017e-06, "loss": 0.4842, "step": 2566 }, { "epoch": 2.0585404971932637, "grad_norm": 0.8363946080207825, "learning_rate": 4.737410782129894e-06, "loss": 0.4933, "step": 2567 }, { "epoch": 2.0593424218123495, "grad_norm": 0.8067214488983154, "learning_rate": 4.730049545450394e-06, "loss": 0.4983, "step": 2568 }, { "epoch": 2.0601443464314353, "grad_norm": 0.8054936528205872, "learning_rate": 4.722692260359211e-06, "loss": 0.4895, "step": 2569 }, { "epoch": 2.060946271050521, "grad_norm": 0.8232284188270569, "learning_rate": 4.715338932373107e-06, "loss": 0.4964, "step": 2570 }, { "epoch": 2.061748195669607, "grad_norm": 0.8356310725212097, "learning_rate": 4.707989567005845e-06, "loss": 0.482, "step": 2571 }, { "epoch": 2.0625501202886927, "grad_norm": 0.8142298460006714, "learning_rate": 4.700644169768223e-06, "loss": 0.4894, "step": 2572 }, { "epoch": 2.0633520449077785, "grad_norm": 0.8215280771255493, "learning_rate": 4.693302746168088e-06, "loss": 0.4808, "step": 2573 }, { "epoch": 2.0641539695268643, "grad_norm": 0.8594109416007996, "learning_rate": 4.685965301710276e-06, "loss": 0.4986, "step": 2574 }, { "epoch": 2.0649558941459505, "grad_norm": 0.7795203328132629, "learning_rate": 4.678631841896657e-06, "loss": 0.4765, "step": 2575 }, { "epoch": 2.0657578187650363, "grad_norm": 0.8334662318229675, "learning_rate": 4.6713023722261106e-06, "loss": 0.4933, "step": 2576 }, { "epoch": 2.066559743384122, "grad_norm": 0.8101879954338074, "learning_rate": 4.663976898194516e-06, "loss": 0.5096, "step": 2577 }, { "epoch": 2.067361668003208, "grad_norm": 0.8048333525657654, "learning_rate": 4.656655425294774e-06, "loss": 0.4825, "step": 2578 }, { "epoch": 2.0681635926222937, "grad_norm": 0.8437180519104004, "learning_rate": 4.649337959016764e-06, "loss": 0.4801, "step": 2579 }, { "epoch": 2.0689655172413794, "grad_norm": 0.8027264475822449, "learning_rate": 4.6420245048473766e-06, "loss": 0.4864, "step": 2580 }, { "epoch": 2.0697674418604652, "grad_norm": 0.8142487406730652, "learning_rate": 4.634715068270491e-06, "loss": 0.494, "step": 2581 }, { "epoch": 2.070569366479551, "grad_norm": 0.8042910695075989, "learning_rate": 4.6274096547669625e-06, "loss": 0.4805, "step": 2582 }, { "epoch": 2.071371291098637, "grad_norm": 0.8223109245300293, "learning_rate": 4.62010826981465e-06, "loss": 0.5094, "step": 2583 }, { "epoch": 2.0721732157177226, "grad_norm": 0.8160894513130188, "learning_rate": 4.612810918888374e-06, "loss": 0.4881, "step": 2584 }, { "epoch": 2.0729751403368084, "grad_norm": 0.8002573847770691, "learning_rate": 4.605517607459938e-06, "loss": 0.488, "step": 2585 }, { "epoch": 2.073777064955894, "grad_norm": 0.7782284021377563, "learning_rate": 4.598228340998118e-06, "loss": 0.4976, "step": 2586 }, { "epoch": 2.07457898957498, "grad_norm": 0.7785077691078186, "learning_rate": 4.590943124968651e-06, "loss": 0.4935, "step": 2587 }, { "epoch": 2.075380914194066, "grad_norm": 0.8213626742362976, "learning_rate": 4.583661964834238e-06, "loss": 0.4781, "step": 2588 }, { "epoch": 2.0761828388131516, "grad_norm": 0.8277866244316101, "learning_rate": 4.576384866054546e-06, "loss": 0.4833, "step": 2589 }, { "epoch": 2.0769847634322374, "grad_norm": 0.8673251867294312, "learning_rate": 4.5691118340861885e-06, "loss": 0.4927, "step": 2590 }, { "epoch": 2.077786688051323, "grad_norm": 0.8178399205207825, "learning_rate": 4.561842874382737e-06, "loss": 0.504, "step": 2591 }, { "epoch": 2.078588612670409, "grad_norm": 0.8196151852607727, "learning_rate": 4.554577992394697e-06, "loss": 0.4728, "step": 2592 }, { "epoch": 2.0793905372894947, "grad_norm": 0.8163505792617798, "learning_rate": 4.54731719356953e-06, "loss": 0.5019, "step": 2593 }, { "epoch": 2.0801924619085805, "grad_norm": 0.8096843957901001, "learning_rate": 4.540060483351628e-06, "loss": 0.4947, "step": 2594 }, { "epoch": 2.0809943865276663, "grad_norm": 0.846545398235321, "learning_rate": 4.532807867182322e-06, "loss": 0.4999, "step": 2595 }, { "epoch": 2.081796311146752, "grad_norm": 0.8637265563011169, "learning_rate": 4.525559350499872e-06, "loss": 0.503, "step": 2596 }, { "epoch": 2.082598235765838, "grad_norm": 0.795164942741394, "learning_rate": 4.5183149387394566e-06, "loss": 0.488, "step": 2597 }, { "epoch": 2.0834001603849237, "grad_norm": 0.8308284282684326, "learning_rate": 4.511074637333185e-06, "loss": 0.4855, "step": 2598 }, { "epoch": 2.0842020850040095, "grad_norm": 0.8101129531860352, "learning_rate": 4.503838451710082e-06, "loss": 0.483, "step": 2599 }, { "epoch": 2.0850040096230953, "grad_norm": 0.8016064167022705, "learning_rate": 4.49660638729609e-06, "loss": 0.4921, "step": 2600 }, { "epoch": 2.085805934242181, "grad_norm": 0.8189466595649719, "learning_rate": 4.489378449514051e-06, "loss": 0.4983, "step": 2601 }, { "epoch": 2.086607858861267, "grad_norm": 0.8600638508796692, "learning_rate": 4.482154643783722e-06, "loss": 0.506, "step": 2602 }, { "epoch": 2.0874097834803527, "grad_norm": 0.7984684705734253, "learning_rate": 4.4749349755217575e-06, "loss": 0.4746, "step": 2603 }, { "epoch": 2.0882117080994385, "grad_norm": 0.8474909067153931, "learning_rate": 4.467719450141711e-06, "loss": 0.4957, "step": 2604 }, { "epoch": 2.0890136327185242, "grad_norm": 0.8484524488449097, "learning_rate": 4.460508073054033e-06, "loss": 0.4828, "step": 2605 }, { "epoch": 2.0898155573376105, "grad_norm": 0.8325912952423096, "learning_rate": 4.453300849666053e-06, "loss": 0.4942, "step": 2606 }, { "epoch": 2.0906174819566963, "grad_norm": 0.8752564787864685, "learning_rate": 4.446097785381995e-06, "loss": 0.4944, "step": 2607 }, { "epoch": 2.091419406575782, "grad_norm": 0.8515805006027222, "learning_rate": 4.438898885602962e-06, "loss": 0.5203, "step": 2608 }, { "epoch": 2.092221331194868, "grad_norm": 0.8291308283805847, "learning_rate": 4.431704155726936e-06, "loss": 0.5042, "step": 2609 }, { "epoch": 2.0930232558139537, "grad_norm": 0.7905226349830627, "learning_rate": 4.424513601148772e-06, "loss": 0.4855, "step": 2610 }, { "epoch": 2.0938251804330394, "grad_norm": 0.7867658734321594, "learning_rate": 4.417327227260183e-06, "loss": 0.5087, "step": 2611 }, { "epoch": 2.0946271050521252, "grad_norm": 0.7623449563980103, "learning_rate": 4.410145039449771e-06, "loss": 0.493, "step": 2612 }, { "epoch": 2.095429029671211, "grad_norm": 0.816936194896698, "learning_rate": 4.402967043102974e-06, "loss": 0.4888, "step": 2613 }, { "epoch": 2.096230954290297, "grad_norm": 0.7807714343070984, "learning_rate": 4.395793243602102e-06, "loss": 0.4777, "step": 2614 }, { "epoch": 2.0970328789093826, "grad_norm": 0.7865536212921143, "learning_rate": 4.388623646326318e-06, "loss": 0.4997, "step": 2615 }, { "epoch": 2.0978348035284684, "grad_norm": 0.8231841325759888, "learning_rate": 4.381458256651622e-06, "loss": 0.504, "step": 2616 }, { "epoch": 2.098636728147554, "grad_norm": 0.8537681698799133, "learning_rate": 4.374297079950872e-06, "loss": 0.4859, "step": 2617 }, { "epoch": 2.09943865276664, "grad_norm": 0.89604651927948, "learning_rate": 4.367140121593764e-06, "loss": 0.5137, "step": 2618 }, { "epoch": 2.100240577385726, "grad_norm": 0.838858962059021, "learning_rate": 4.359987386946822e-06, "loss": 0.4979, "step": 2619 }, { "epoch": 2.1010425020048116, "grad_norm": 0.8409374952316284, "learning_rate": 4.352838881373421e-06, "loss": 0.4767, "step": 2620 }, { "epoch": 2.1018444266238974, "grad_norm": 0.7959094643592834, "learning_rate": 4.345694610233744e-06, "loss": 0.4772, "step": 2621 }, { "epoch": 2.102646351242983, "grad_norm": 0.8662393689155579, "learning_rate": 4.338554578884813e-06, "loss": 0.4998, "step": 2622 }, { "epoch": 2.103448275862069, "grad_norm": 0.8256474733352661, "learning_rate": 4.331418792680468e-06, "loss": 0.4968, "step": 2623 }, { "epoch": 2.1042502004811547, "grad_norm": 0.8457236289978027, "learning_rate": 4.324287256971358e-06, "loss": 0.5243, "step": 2624 }, { "epoch": 2.1050521251002405, "grad_norm": 0.7527933716773987, "learning_rate": 4.3171599771049625e-06, "loss": 0.4426, "step": 2625 }, { "epoch": 2.1058540497193263, "grad_norm": 0.8174936175346375, "learning_rate": 4.3100369584255475e-06, "loss": 0.5017, "step": 2626 }, { "epoch": 2.106655974338412, "grad_norm": 0.8383927941322327, "learning_rate": 4.302918206274202e-06, "loss": 0.4952, "step": 2627 }, { "epoch": 2.107457898957498, "grad_norm": 0.8061890602111816, "learning_rate": 4.295803725988807e-06, "loss": 0.4923, "step": 2628 }, { "epoch": 2.1082598235765837, "grad_norm": 0.8143693804740906, "learning_rate": 4.2886935229040375e-06, "loss": 0.5019, "step": 2629 }, { "epoch": 2.1090617481956695, "grad_norm": 0.8190400004386902, "learning_rate": 4.281587602351376e-06, "loss": 0.4858, "step": 2630 }, { "epoch": 2.1098636728147553, "grad_norm": 0.7694993615150452, "learning_rate": 4.274485969659074e-06, "loss": 0.4635, "step": 2631 }, { "epoch": 2.110665597433841, "grad_norm": 0.840126633644104, "learning_rate": 4.267388630152182e-06, "loss": 0.506, "step": 2632 }, { "epoch": 2.111467522052927, "grad_norm": 0.8422167301177979, "learning_rate": 4.26029558915253e-06, "loss": 0.4815, "step": 2633 }, { "epoch": 2.1122694466720127, "grad_norm": 0.83141028881073, "learning_rate": 4.2532068519787124e-06, "loss": 0.4826, "step": 2634 }, { "epoch": 2.1130713712910985, "grad_norm": 0.8261462450027466, "learning_rate": 4.246122423946114e-06, "loss": 0.4945, "step": 2635 }, { "epoch": 2.1138732959101842, "grad_norm": 0.8225822448730469, "learning_rate": 4.239042310366875e-06, "loss": 0.4753, "step": 2636 }, { "epoch": 2.11467522052927, "grad_norm": 0.8107204437255859, "learning_rate": 4.23196651654991e-06, "loss": 0.4763, "step": 2637 }, { "epoch": 2.115477145148356, "grad_norm": 0.8356348276138306, "learning_rate": 4.224895047800892e-06, "loss": 0.4931, "step": 2638 }, { "epoch": 2.116279069767442, "grad_norm": 0.803632915019989, "learning_rate": 4.217827909422241e-06, "loss": 0.4685, "step": 2639 }, { "epoch": 2.117080994386528, "grad_norm": 0.8820094466209412, "learning_rate": 4.210765106713143e-06, "loss": 0.4863, "step": 2640 }, { "epoch": 2.1178829190056137, "grad_norm": 0.8546995520591736, "learning_rate": 4.2037066449695275e-06, "loss": 0.496, "step": 2641 }, { "epoch": 2.1186848436246994, "grad_norm": 0.8304917216300964, "learning_rate": 4.196652529484068e-06, "loss": 0.5053, "step": 2642 }, { "epoch": 2.1194867682437852, "grad_norm": 0.8051602244377136, "learning_rate": 4.189602765546188e-06, "loss": 0.5033, "step": 2643 }, { "epoch": 2.120288692862871, "grad_norm": 0.8486653566360474, "learning_rate": 4.18255735844203e-06, "loss": 0.5049, "step": 2644 }, { "epoch": 2.121090617481957, "grad_norm": 0.8334927558898926, "learning_rate": 4.175516313454485e-06, "loss": 0.5047, "step": 2645 }, { "epoch": 2.1218925421010426, "grad_norm": 0.8217371106147766, "learning_rate": 4.168479635863167e-06, "loss": 0.5031, "step": 2646 }, { "epoch": 2.1226944667201284, "grad_norm": 0.8687995076179504, "learning_rate": 4.161447330944422e-06, "loss": 0.5255, "step": 2647 }, { "epoch": 2.123496391339214, "grad_norm": 0.838424026966095, "learning_rate": 4.154419403971305e-06, "loss": 0.4944, "step": 2648 }, { "epoch": 2.1242983159583, "grad_norm": 0.7598464488983154, "learning_rate": 4.1473958602135956e-06, "loss": 0.4791, "step": 2649 }, { "epoch": 2.125100240577386, "grad_norm": 0.9013649821281433, "learning_rate": 4.140376704937789e-06, "loss": 0.5074, "step": 2650 }, { "epoch": 2.1259021651964716, "grad_norm": 0.8267843723297119, "learning_rate": 4.133361943407085e-06, "loss": 0.4883, "step": 2651 }, { "epoch": 2.1267040898155574, "grad_norm": 0.8689286112785339, "learning_rate": 4.126351580881395e-06, "loss": 0.4759, "step": 2652 }, { "epoch": 2.127506014434643, "grad_norm": 0.912152886390686, "learning_rate": 4.11934562261732e-06, "loss": 0.5206, "step": 2653 }, { "epoch": 2.128307939053729, "grad_norm": 0.832675576210022, "learning_rate": 4.112344073868171e-06, "loss": 0.4897, "step": 2654 }, { "epoch": 2.1291098636728147, "grad_norm": 0.8386573791503906, "learning_rate": 4.105346939883946e-06, "loss": 0.5046, "step": 2655 }, { "epoch": 2.1299117882919005, "grad_norm": 0.8459692597389221, "learning_rate": 4.098354225911336e-06, "loss": 0.5104, "step": 2656 }, { "epoch": 2.1307137129109863, "grad_norm": 0.8297333121299744, "learning_rate": 4.091365937193719e-06, "loss": 0.4764, "step": 2657 }, { "epoch": 2.131515637530072, "grad_norm": 0.8380979895591736, "learning_rate": 4.084382078971143e-06, "loss": 0.4874, "step": 2658 }, { "epoch": 2.132317562149158, "grad_norm": 0.8121061325073242, "learning_rate": 4.0774026564803494e-06, "loss": 0.5001, "step": 2659 }, { "epoch": 2.1331194867682437, "grad_norm": 0.8189265727996826, "learning_rate": 4.070427674954748e-06, "loss": 0.4694, "step": 2660 }, { "epoch": 2.1339214113873295, "grad_norm": 0.8043553829193115, "learning_rate": 4.063457139624407e-06, "loss": 0.4957, "step": 2661 }, { "epoch": 2.1347233360064153, "grad_norm": 0.7954015731811523, "learning_rate": 4.056491055716088e-06, "loss": 0.4764, "step": 2662 }, { "epoch": 2.135525260625501, "grad_norm": 0.7770733833312988, "learning_rate": 4.049529428453184e-06, "loss": 0.4811, "step": 2663 }, { "epoch": 2.136327185244587, "grad_norm": 0.8199390172958374, "learning_rate": 4.042572263055765e-06, "loss": 0.4998, "step": 2664 }, { "epoch": 2.1371291098636727, "grad_norm": 0.8421967029571533, "learning_rate": 4.035619564740555e-06, "loss": 0.4694, "step": 2665 }, { "epoch": 2.1379310344827585, "grad_norm": 0.8318009376525879, "learning_rate": 4.028671338720912e-06, "loss": 0.4854, "step": 2666 }, { "epoch": 2.1387329591018442, "grad_norm": 0.8843598365783691, "learning_rate": 4.021727590206868e-06, "loss": 0.4877, "step": 2667 }, { "epoch": 2.13953488372093, "grad_norm": 0.8518946170806885, "learning_rate": 4.01478832440507e-06, "loss": 0.5328, "step": 2668 }, { "epoch": 2.140336808340016, "grad_norm": 0.8255460858345032, "learning_rate": 4.00785354651882e-06, "loss": 0.5143, "step": 2669 }, { "epoch": 2.141138732959102, "grad_norm": 0.8585113286972046, "learning_rate": 4.000923261748055e-06, "loss": 0.5018, "step": 2670 }, { "epoch": 2.141940657578188, "grad_norm": 0.8432846665382385, "learning_rate": 3.9939974752893275e-06, "loss": 0.4755, "step": 2671 }, { "epoch": 2.1427425821972736, "grad_norm": 0.9172224402427673, "learning_rate": 3.9870761923358405e-06, "loss": 0.4902, "step": 2672 }, { "epoch": 2.1435445068163594, "grad_norm": 0.8651891946792603, "learning_rate": 3.980159418077403e-06, "loss": 0.5009, "step": 2673 }, { "epoch": 2.1443464314354452, "grad_norm": 0.8147042393684387, "learning_rate": 3.97324715770044e-06, "loss": 0.5091, "step": 2674 }, { "epoch": 2.145148356054531, "grad_norm": 0.8397491574287415, "learning_rate": 3.966339416388013e-06, "loss": 0.4879, "step": 2675 }, { "epoch": 2.145950280673617, "grad_norm": 0.8129686713218689, "learning_rate": 3.959436199319771e-06, "loss": 0.5036, "step": 2676 }, { "epoch": 2.1467522052927026, "grad_norm": 0.8532066345214844, "learning_rate": 3.952537511671988e-06, "loss": 0.4864, "step": 2677 }, { "epoch": 2.1475541299117884, "grad_norm": 0.817719578742981, "learning_rate": 3.9456433586175335e-06, "loss": 0.4962, "step": 2678 }, { "epoch": 2.148356054530874, "grad_norm": 0.8393282890319824, "learning_rate": 3.938753745325872e-06, "loss": 0.4987, "step": 2679 }, { "epoch": 2.14915797914996, "grad_norm": 0.9043245315551758, "learning_rate": 3.931868676963082e-06, "loss": 0.5131, "step": 2680 }, { "epoch": 2.1499599037690458, "grad_norm": 0.876274049282074, "learning_rate": 3.924988158691812e-06, "loss": 0.5018, "step": 2681 }, { "epoch": 2.1507618283881316, "grad_norm": 0.8482949137687683, "learning_rate": 3.918112195671313e-06, "loss": 0.5111, "step": 2682 }, { "epoch": 2.1515637530072174, "grad_norm": 0.8316020965576172, "learning_rate": 3.9112407930574195e-06, "loss": 0.5079, "step": 2683 }, { "epoch": 2.152365677626303, "grad_norm": 0.8304176330566406, "learning_rate": 3.904373956002532e-06, "loss": 0.4938, "step": 2684 }, { "epoch": 2.153167602245389, "grad_norm": 0.8400371670722961, "learning_rate": 3.897511689655653e-06, "loss": 0.4879, "step": 2685 }, { "epoch": 2.1539695268644747, "grad_norm": 0.8171892762184143, "learning_rate": 3.890653999162333e-06, "loss": 0.4841, "step": 2686 }, { "epoch": 2.1547714514835605, "grad_norm": 0.8671571612358093, "learning_rate": 3.8838008896647075e-06, "loss": 0.5202, "step": 2687 }, { "epoch": 2.1555733761026463, "grad_norm": 0.8403294682502747, "learning_rate": 3.876952366301472e-06, "loss": 0.5099, "step": 2688 }, { "epoch": 2.156375300721732, "grad_norm": 0.8804596066474915, "learning_rate": 3.870108434207877e-06, "loss": 0.508, "step": 2689 }, { "epoch": 2.157177225340818, "grad_norm": 0.8431464433670044, "learning_rate": 3.863269098515738e-06, "loss": 0.5076, "step": 2690 }, { "epoch": 2.1579791499599037, "grad_norm": 0.8322014212608337, "learning_rate": 3.856434364353424e-06, "loss": 0.4894, "step": 2691 }, { "epoch": 2.1587810745789895, "grad_norm": 0.845043420791626, "learning_rate": 3.84960423684585e-06, "loss": 0.489, "step": 2692 }, { "epoch": 2.1595829991980753, "grad_norm": 0.8513469696044922, "learning_rate": 3.842778721114482e-06, "loss": 0.4815, "step": 2693 }, { "epoch": 2.160384923817161, "grad_norm": 0.8116580247879028, "learning_rate": 3.835957822277317e-06, "loss": 0.4791, "step": 2694 }, { "epoch": 2.161186848436247, "grad_norm": 0.8411436676979065, "learning_rate": 3.829141545448901e-06, "loss": 0.5097, "step": 2695 }, { "epoch": 2.1619887730553327, "grad_norm": 0.8362702131271362, "learning_rate": 3.82232989574031e-06, "loss": 0.4959, "step": 2696 }, { "epoch": 2.1627906976744184, "grad_norm": 0.8088338375091553, "learning_rate": 3.815522878259153e-06, "loss": 0.5093, "step": 2697 }, { "epoch": 2.1635926222935042, "grad_norm": 0.8484572768211365, "learning_rate": 3.8087204981095625e-06, "loss": 0.4834, "step": 2698 }, { "epoch": 2.16439454691259, "grad_norm": 0.8214154839515686, "learning_rate": 3.8019227603921927e-06, "loss": 0.4883, "step": 2699 }, { "epoch": 2.165196471531676, "grad_norm": 0.8145144581794739, "learning_rate": 3.7951296702042194e-06, "loss": 0.4891, "step": 2700 }, { "epoch": 2.165998396150762, "grad_norm": 0.7915971875190735, "learning_rate": 3.7883412326393352e-06, "loss": 0.4821, "step": 2701 }, { "epoch": 2.1668003207698474, "grad_norm": 0.809570848941803, "learning_rate": 3.7815574527877395e-06, "loss": 0.4941, "step": 2702 }, { "epoch": 2.1676022453889336, "grad_norm": 0.8416109085083008, "learning_rate": 3.7747783357361455e-06, "loss": 0.4698, "step": 2703 }, { "epoch": 2.1684041700080194, "grad_norm": 0.8841302394866943, "learning_rate": 3.7680038865677603e-06, "loss": 0.4938, "step": 2704 }, { "epoch": 2.1692060946271052, "grad_norm": 0.8262732028961182, "learning_rate": 3.7612341103622984e-06, "loss": 0.4911, "step": 2705 }, { "epoch": 2.170008019246191, "grad_norm": 0.815339207649231, "learning_rate": 3.7544690121959704e-06, "loss": 0.467, "step": 2706 }, { "epoch": 2.170809943865277, "grad_norm": 0.8234750628471375, "learning_rate": 3.7477085971414785e-06, "loss": 0.4913, "step": 2707 }, { "epoch": 2.1716118684843626, "grad_norm": 0.8718725442886353, "learning_rate": 3.7409528702680078e-06, "loss": 0.505, "step": 2708 }, { "epoch": 2.1724137931034484, "grad_norm": 0.8053902983665466, "learning_rate": 3.7342018366412336e-06, "loss": 0.4958, "step": 2709 }, { "epoch": 2.173215717722534, "grad_norm": 0.8399242758750916, "learning_rate": 3.7274555013233118e-06, "loss": 0.4882, "step": 2710 }, { "epoch": 2.17401764234162, "grad_norm": 0.8590044975280762, "learning_rate": 3.720713869372875e-06, "loss": 0.4969, "step": 2711 }, { "epoch": 2.1748195669607058, "grad_norm": 0.8541808724403381, "learning_rate": 3.71397694584503e-06, "loss": 0.4689, "step": 2712 }, { "epoch": 2.1756214915797916, "grad_norm": 0.8489252328872681, "learning_rate": 3.7072447357913477e-06, "loss": 0.4941, "step": 2713 }, { "epoch": 2.1764234161988774, "grad_norm": 0.8415629267692566, "learning_rate": 3.700517244259868e-06, "loss": 0.4818, "step": 2714 }, { "epoch": 2.177225340817963, "grad_norm": 0.8425331711769104, "learning_rate": 3.693794476295096e-06, "loss": 0.5023, "step": 2715 }, { "epoch": 2.178027265437049, "grad_norm": 0.8632931113243103, "learning_rate": 3.687076436937992e-06, "loss": 0.4943, "step": 2716 }, { "epoch": 2.1788291900561347, "grad_norm": 0.8151559829711914, "learning_rate": 3.6803631312259724e-06, "loss": 0.4764, "step": 2717 }, { "epoch": 2.1796311146752205, "grad_norm": 0.8196760416030884, "learning_rate": 3.6736545641928965e-06, "loss": 0.4779, "step": 2718 }, { "epoch": 2.1804330392943063, "grad_norm": 0.7820659279823303, "learning_rate": 3.6669507408690806e-06, "loss": 0.4855, "step": 2719 }, { "epoch": 2.181234963913392, "grad_norm": 0.8023489713668823, "learning_rate": 3.6602516662812824e-06, "loss": 0.4851, "step": 2720 }, { "epoch": 2.182036888532478, "grad_norm": 0.9628251194953918, "learning_rate": 3.653557345452685e-06, "loss": 0.4893, "step": 2721 }, { "epoch": 2.1828388131515637, "grad_norm": 0.8556442260742188, "learning_rate": 3.6468677834029343e-06, "loss": 0.4775, "step": 2722 }, { "epoch": 2.1836407377706495, "grad_norm": 0.9213688373565674, "learning_rate": 3.6401829851480786e-06, "loss": 0.4782, "step": 2723 }, { "epoch": 2.1844426623897353, "grad_norm": 0.8511019349098206, "learning_rate": 3.6335029557006117e-06, "loss": 0.487, "step": 2724 }, { "epoch": 2.185244587008821, "grad_norm": 0.8275080323219299, "learning_rate": 3.626827700069452e-06, "loss": 0.4673, "step": 2725 }, { "epoch": 2.186046511627907, "grad_norm": 0.8424668908119202, "learning_rate": 3.6201572232599227e-06, "loss": 0.5106, "step": 2726 }, { "epoch": 2.1868484362469927, "grad_norm": 0.9268709421157837, "learning_rate": 3.6134915302737862e-06, "loss": 0.5259, "step": 2727 }, { "epoch": 2.1876503608660784, "grad_norm": 0.8410111665725708, "learning_rate": 3.606830626109198e-06, "loss": 0.5069, "step": 2728 }, { "epoch": 2.1884522854851642, "grad_norm": 0.8103172779083252, "learning_rate": 3.600174515760733e-06, "loss": 0.4787, "step": 2729 }, { "epoch": 2.18925421010425, "grad_norm": 0.8217037916183472, "learning_rate": 3.5935232042193734e-06, "loss": 0.5043, "step": 2730 }, { "epoch": 2.190056134723336, "grad_norm": 0.8149628639221191, "learning_rate": 3.58687669647249e-06, "loss": 0.5117, "step": 2731 }, { "epoch": 2.1908580593424216, "grad_norm": 0.8104637265205383, "learning_rate": 3.5802349975038718e-06, "loss": 0.4684, "step": 2732 }, { "epoch": 2.1916599839615074, "grad_norm": 0.8468414545059204, "learning_rate": 3.573598112293687e-06, "loss": 0.4875, "step": 2733 }, { "epoch": 2.1924619085805936, "grad_norm": 0.8977518677711487, "learning_rate": 3.5669660458184886e-06, "loss": 0.5076, "step": 2734 }, { "epoch": 2.1932638331996794, "grad_norm": 0.8071417808532715, "learning_rate": 3.560338803051241e-06, "loss": 0.4962, "step": 2735 }, { "epoch": 2.1940657578187652, "grad_norm": 0.8739727139472961, "learning_rate": 3.5537163889612656e-06, "loss": 0.5087, "step": 2736 }, { "epoch": 2.194867682437851, "grad_norm": 0.8069101572036743, "learning_rate": 3.547098808514279e-06, "loss": 0.4997, "step": 2737 }, { "epoch": 2.195669607056937, "grad_norm": 0.8516671061515808, "learning_rate": 3.5404860666723695e-06, "loss": 0.4875, "step": 2738 }, { "epoch": 2.1964715316760226, "grad_norm": 0.8388312458992004, "learning_rate": 3.5338781683939882e-06, "loss": 0.4838, "step": 2739 }, { "epoch": 2.1972734562951084, "grad_norm": 0.8394985198974609, "learning_rate": 3.527275118633974e-06, "loss": 0.5076, "step": 2740 }, { "epoch": 2.198075380914194, "grad_norm": 0.8449310064315796, "learning_rate": 3.52067692234351e-06, "loss": 0.4994, "step": 2741 }, { "epoch": 2.19887730553328, "grad_norm": 0.806086003780365, "learning_rate": 3.514083584470149e-06, "loss": 0.4839, "step": 2742 }, { "epoch": 2.1996792301523658, "grad_norm": 0.8380177617073059, "learning_rate": 3.507495109957808e-06, "loss": 0.4765, "step": 2743 }, { "epoch": 2.2004811547714516, "grad_norm": 0.8069396615028381, "learning_rate": 3.5009115037467355e-06, "loss": 0.4788, "step": 2744 }, { "epoch": 2.2012830793905374, "grad_norm": 0.8960397243499756, "learning_rate": 3.4943327707735586e-06, "loss": 0.505, "step": 2745 }, { "epoch": 2.202085004009623, "grad_norm": 0.8247140645980835, "learning_rate": 3.4877589159712266e-06, "loss": 0.4837, "step": 2746 }, { "epoch": 2.202886928628709, "grad_norm": 0.8580472469329834, "learning_rate": 3.481189944269041e-06, "loss": 0.4965, "step": 2747 }, { "epoch": 2.2036888532477947, "grad_norm": 0.8549031615257263, "learning_rate": 3.4746258605926443e-06, "loss": 0.4888, "step": 2748 }, { "epoch": 2.2044907778668805, "grad_norm": 0.8439877033233643, "learning_rate": 3.468066669864004e-06, "loss": 0.4763, "step": 2749 }, { "epoch": 2.2052927024859663, "grad_norm": 0.843323290348053, "learning_rate": 3.461512377001427e-06, "loss": 0.5199, "step": 2750 }, { "epoch": 2.206094627105052, "grad_norm": 0.8714971542358398, "learning_rate": 3.4549629869195467e-06, "loss": 0.489, "step": 2751 }, { "epoch": 2.206896551724138, "grad_norm": 0.8079264163970947, "learning_rate": 3.448418504529318e-06, "loss": 0.4635, "step": 2752 }, { "epoch": 2.2076984763432237, "grad_norm": 0.8408187031745911, "learning_rate": 3.44187893473802e-06, "loss": 0.4858, "step": 2753 }, { "epoch": 2.2085004009623095, "grad_norm": 0.8397002220153809, "learning_rate": 3.435344282449239e-06, "loss": 0.4884, "step": 2754 }, { "epoch": 2.2093023255813953, "grad_norm": 0.9133360981941223, "learning_rate": 3.4288145525628813e-06, "loss": 0.4985, "step": 2755 }, { "epoch": 2.210104250200481, "grad_norm": 0.886596143245697, "learning_rate": 3.422289749975163e-06, "loss": 0.5094, "step": 2756 }, { "epoch": 2.210906174819567, "grad_norm": 0.8573530912399292, "learning_rate": 3.415769879578601e-06, "loss": 0.5027, "step": 2757 }, { "epoch": 2.2117080994386527, "grad_norm": 0.8812514543533325, "learning_rate": 3.4092549462620215e-06, "loss": 0.4878, "step": 2758 }, { "epoch": 2.2125100240577384, "grad_norm": 0.8391367197036743, "learning_rate": 3.4027449549105353e-06, "loss": 0.4836, "step": 2759 }, { "epoch": 2.2133119486768242, "grad_norm": 0.7842381596565247, "learning_rate": 3.3962399104055597e-06, "loss": 0.4747, "step": 2760 }, { "epoch": 2.21411387329591, "grad_norm": 0.8407445549964905, "learning_rate": 3.3897398176247984e-06, "loss": 0.4775, "step": 2761 }, { "epoch": 2.214915797914996, "grad_norm": 0.8051870465278625, "learning_rate": 3.383244681442246e-06, "loss": 0.5205, "step": 2762 }, { "epoch": 2.2157177225340816, "grad_norm": 0.788661539554596, "learning_rate": 3.376754506728167e-06, "loss": 0.4794, "step": 2763 }, { "epoch": 2.2165196471531674, "grad_norm": 0.8352063298225403, "learning_rate": 3.370269298349128e-06, "loss": 0.4687, "step": 2764 }, { "epoch": 2.2173215717722536, "grad_norm": 0.8610337376594543, "learning_rate": 3.363789061167949e-06, "loss": 0.4769, "step": 2765 }, { "epoch": 2.218123496391339, "grad_norm": 0.8776218891143799, "learning_rate": 3.3573138000437367e-06, "loss": 0.469, "step": 2766 }, { "epoch": 2.2189254210104252, "grad_norm": 0.8633357882499695, "learning_rate": 3.3508435198318645e-06, "loss": 0.4961, "step": 2767 }, { "epoch": 2.219727345629511, "grad_norm": 0.8187628984451294, "learning_rate": 3.34437822538396e-06, "loss": 0.4973, "step": 2768 }, { "epoch": 2.220529270248597, "grad_norm": 0.805854082107544, "learning_rate": 3.337917921547934e-06, "loss": 0.5006, "step": 2769 }, { "epoch": 2.2213311948676826, "grad_norm": 0.8261088132858276, "learning_rate": 3.3314626131679328e-06, "loss": 0.4847, "step": 2770 }, { "epoch": 2.2221331194867684, "grad_norm": 0.8578211069107056, "learning_rate": 3.3250123050843696e-06, "loss": 0.4876, "step": 2771 }, { "epoch": 2.222935044105854, "grad_norm": 0.8757601976394653, "learning_rate": 3.318567002133909e-06, "loss": 0.4817, "step": 2772 }, { "epoch": 2.22373696872494, "grad_norm": 0.8412430286407471, "learning_rate": 3.312126709149447e-06, "loss": 0.4905, "step": 2773 }, { "epoch": 2.2245388933440258, "grad_norm": 0.833476722240448, "learning_rate": 3.3056914309601483e-06, "loss": 0.5099, "step": 2774 }, { "epoch": 2.2253408179631116, "grad_norm": 0.8237016797065735, "learning_rate": 3.299261172391399e-06, "loss": 0.4878, "step": 2775 }, { "epoch": 2.2261427425821974, "grad_norm": 0.8863388299942017, "learning_rate": 3.2928359382648166e-06, "loss": 0.5018, "step": 2776 }, { "epoch": 2.226944667201283, "grad_norm": 0.9207762479782104, "learning_rate": 3.286415733398276e-06, "loss": 0.5004, "step": 2777 }, { "epoch": 2.227746591820369, "grad_norm": 0.8882100582122803, "learning_rate": 3.280000562605854e-06, "loss": 0.4769, "step": 2778 }, { "epoch": 2.2285485164394547, "grad_norm": 0.8794893622398376, "learning_rate": 3.2735904306978684e-06, "loss": 0.4896, "step": 2779 }, { "epoch": 2.2293504410585405, "grad_norm": 0.8332772850990295, "learning_rate": 3.2671853424808574e-06, "loss": 0.466, "step": 2780 }, { "epoch": 2.2301523656776263, "grad_norm": 0.827366292476654, "learning_rate": 3.2607853027575643e-06, "loss": 0.4785, "step": 2781 }, { "epoch": 2.230954290296712, "grad_norm": 0.8560011386871338, "learning_rate": 3.2543903163269697e-06, "loss": 0.5128, "step": 2782 }, { "epoch": 2.231756214915798, "grad_norm": 0.8379847407341003, "learning_rate": 3.2480003879842424e-06, "loss": 0.4706, "step": 2783 }, { "epoch": 2.2325581395348837, "grad_norm": 0.8355280756950378, "learning_rate": 3.2416155225207726e-06, "loss": 0.4936, "step": 2784 }, { "epoch": 2.2333600641539695, "grad_norm": 0.8986692428588867, "learning_rate": 3.2352357247241517e-06, "loss": 0.4908, "step": 2785 }, { "epoch": 2.2341619887730553, "grad_norm": 0.8830366730690002, "learning_rate": 3.2288609993781606e-06, "loss": 0.4813, "step": 2786 }, { "epoch": 2.234963913392141, "grad_norm": 0.8496928811073303, "learning_rate": 3.2224913512627976e-06, "loss": 0.4854, "step": 2787 }, { "epoch": 2.235765838011227, "grad_norm": 0.8446789979934692, "learning_rate": 3.2161267851542333e-06, "loss": 0.5021, "step": 2788 }, { "epoch": 2.2365677626303127, "grad_norm": 0.8232909440994263, "learning_rate": 3.2097673058248378e-06, "loss": 0.4641, "step": 2789 }, { "epoch": 2.2373696872493984, "grad_norm": 0.8378138542175293, "learning_rate": 3.2034129180431705e-06, "loss": 0.4796, "step": 2790 }, { "epoch": 2.2381716118684842, "grad_norm": 0.8232404589653015, "learning_rate": 3.1970636265739595e-06, "loss": 0.4931, "step": 2791 }, { "epoch": 2.23897353648757, "grad_norm": 0.8911257982254028, "learning_rate": 3.1907194361781234e-06, "loss": 0.4851, "step": 2792 }, { "epoch": 2.239775461106656, "grad_norm": 0.8609054684638977, "learning_rate": 3.1843803516127537e-06, "loss": 0.4906, "step": 2793 }, { "epoch": 2.2405773857257416, "grad_norm": 0.869034469127655, "learning_rate": 3.178046377631109e-06, "loss": 0.4871, "step": 2794 }, { "epoch": 2.2413793103448274, "grad_norm": 0.8282964825630188, "learning_rate": 3.1717175189826246e-06, "loss": 0.4746, "step": 2795 }, { "epoch": 2.242181234963913, "grad_norm": 0.7694876194000244, "learning_rate": 3.1653937804128863e-06, "loss": 0.4599, "step": 2796 }, { "epoch": 2.242983159582999, "grad_norm": 0.8961299657821655, "learning_rate": 3.159075166663653e-06, "loss": 0.465, "step": 2797 }, { "epoch": 2.2437850842020852, "grad_norm": 0.8370431661605835, "learning_rate": 3.1527616824728356e-06, "loss": 0.4863, "step": 2798 }, { "epoch": 2.244587008821171, "grad_norm": 0.8903129696846008, "learning_rate": 3.1464533325744997e-06, "loss": 0.4786, "step": 2799 }, { "epoch": 2.245388933440257, "grad_norm": 0.8716090321540833, "learning_rate": 3.140150121698864e-06, "loss": 0.5046, "step": 2800 }, { "epoch": 2.2461908580593426, "grad_norm": 0.8388259410858154, "learning_rate": 3.1338520545722852e-06, "loss": 0.4695, "step": 2801 }, { "epoch": 2.2469927826784284, "grad_norm": 0.8695220351219177, "learning_rate": 3.1275591359172698e-06, "loss": 0.497, "step": 2802 }, { "epoch": 2.247794707297514, "grad_norm": 0.8859379291534424, "learning_rate": 3.1212713704524644e-06, "loss": 0.5044, "step": 2803 }, { "epoch": 2.2485966319166, "grad_norm": 0.8411895632743835, "learning_rate": 3.114988762892649e-06, "loss": 0.4905, "step": 2804 }, { "epoch": 2.2493985565356858, "grad_norm": 0.874116837978363, "learning_rate": 3.1087113179487394e-06, "loss": 0.5039, "step": 2805 }, { "epoch": 2.2502004811547716, "grad_norm": 0.8561678528785706, "learning_rate": 3.102439040327773e-06, "loss": 0.4955, "step": 2806 }, { "epoch": 2.2510024057738574, "grad_norm": 0.8340683579444885, "learning_rate": 3.096171934732918e-06, "loss": 0.4832, "step": 2807 }, { "epoch": 2.251804330392943, "grad_norm": 0.8148999810218811, "learning_rate": 3.0899100058634646e-06, "loss": 0.4948, "step": 2808 }, { "epoch": 2.252606255012029, "grad_norm": 0.827923595905304, "learning_rate": 3.0836532584148237e-06, "loss": 0.4748, "step": 2809 }, { "epoch": 2.2534081796311147, "grad_norm": 0.852433443069458, "learning_rate": 3.0774016970785116e-06, "loss": 0.5062, "step": 2810 }, { "epoch": 2.2542101042502005, "grad_norm": 0.8013387322425842, "learning_rate": 3.0711553265421645e-06, "loss": 0.4702, "step": 2811 }, { "epoch": 2.2550120288692863, "grad_norm": 0.8474105596542358, "learning_rate": 3.0649141514895243e-06, "loss": 0.4855, "step": 2812 }, { "epoch": 2.255813953488372, "grad_norm": 0.844879150390625, "learning_rate": 3.058678176600436e-06, "loss": 0.503, "step": 2813 }, { "epoch": 2.256615878107458, "grad_norm": 0.8638342022895813, "learning_rate": 3.0524474065508492e-06, "loss": 0.4956, "step": 2814 }, { "epoch": 2.2574178027265437, "grad_norm": 0.8933137655258179, "learning_rate": 3.0462218460128e-06, "loss": 0.5089, "step": 2815 }, { "epoch": 2.2582197273456295, "grad_norm": 0.8648194074630737, "learning_rate": 3.0400014996544314e-06, "loss": 0.4774, "step": 2816 }, { "epoch": 2.2590216519647153, "grad_norm": 0.8064270615577698, "learning_rate": 3.0337863721399694e-06, "loss": 0.4732, "step": 2817 }, { "epoch": 2.259823576583801, "grad_norm": 0.8423921465873718, "learning_rate": 3.0275764681297292e-06, "loss": 0.4763, "step": 2818 }, { "epoch": 2.260625501202887, "grad_norm": 0.8474961519241333, "learning_rate": 3.02137179228011e-06, "loss": 0.4917, "step": 2819 }, { "epoch": 2.2614274258219726, "grad_norm": 0.8607854843139648, "learning_rate": 3.0151723492435837e-06, "loss": 0.4909, "step": 2820 }, { "epoch": 2.2622293504410584, "grad_norm": 0.8570009469985962, "learning_rate": 3.008978143668707e-06, "loss": 0.48, "step": 2821 }, { "epoch": 2.2630312750601442, "grad_norm": 0.8390832543373108, "learning_rate": 3.00278918020011e-06, "loss": 0.4902, "step": 2822 }, { "epoch": 2.26383319967923, "grad_norm": 0.8215196132659912, "learning_rate": 2.9966054634784756e-06, "loss": 0.4789, "step": 2823 }, { "epoch": 2.264635124298316, "grad_norm": 0.836609423160553, "learning_rate": 2.990426998140582e-06, "loss": 0.4532, "step": 2824 }, { "epoch": 2.2654370489174016, "grad_norm": 0.8278366923332214, "learning_rate": 2.9842537888192414e-06, "loss": 0.4777, "step": 2825 }, { "epoch": 2.2662389735364874, "grad_norm": 0.8481134176254272, "learning_rate": 2.97808584014334e-06, "loss": 0.476, "step": 2826 }, { "epoch": 2.267040898155573, "grad_norm": 0.8720846176147461, "learning_rate": 2.9719231567378182e-06, "loss": 0.4861, "step": 2827 }, { "epoch": 2.267842822774659, "grad_norm": 0.8726981282234192, "learning_rate": 2.9657657432236573e-06, "loss": 0.4858, "step": 2828 }, { "epoch": 2.268644747393745, "grad_norm": 0.9136954545974731, "learning_rate": 2.959613604217908e-06, "loss": 0.5009, "step": 2829 }, { "epoch": 2.2694466720128306, "grad_norm": 0.8403001427650452, "learning_rate": 2.953466744333644e-06, "loss": 0.4844, "step": 2830 }, { "epoch": 2.270248596631917, "grad_norm": 0.8584656715393066, "learning_rate": 2.947325168179994e-06, "loss": 0.4881, "step": 2831 }, { "epoch": 2.2710505212510026, "grad_norm": 0.8448304533958435, "learning_rate": 2.9411888803621237e-06, "loss": 0.4938, "step": 2832 }, { "epoch": 2.2718524458700884, "grad_norm": 0.8816596269607544, "learning_rate": 2.9350578854812194e-06, "loss": 0.5019, "step": 2833 }, { "epoch": 2.272654370489174, "grad_norm": 0.8737924098968506, "learning_rate": 2.9289321881345257e-06, "loss": 0.4926, "step": 2834 }, { "epoch": 2.27345629510826, "grad_norm": 0.8343014121055603, "learning_rate": 2.922811792915291e-06, "loss": 0.4696, "step": 2835 }, { "epoch": 2.2742582197273458, "grad_norm": 0.8349881768226624, "learning_rate": 2.916696704412789e-06, "loss": 0.4689, "step": 2836 }, { "epoch": 2.2750601443464316, "grad_norm": 0.8880947232246399, "learning_rate": 2.9105869272123366e-06, "loss": 0.5054, "step": 2837 }, { "epoch": 2.2758620689655173, "grad_norm": 0.8601458072662354, "learning_rate": 2.9044824658952407e-06, "loss": 0.4892, "step": 2838 }, { "epoch": 2.276663993584603, "grad_norm": 0.8508468270301819, "learning_rate": 2.898383325038838e-06, "loss": 0.48, "step": 2839 }, { "epoch": 2.277465918203689, "grad_norm": 0.8209986090660095, "learning_rate": 2.8922895092164773e-06, "loss": 0.4873, "step": 2840 }, { "epoch": 2.2782678428227747, "grad_norm": 0.820483922958374, "learning_rate": 2.886201022997497e-06, "loss": 0.4926, "step": 2841 }, { "epoch": 2.2790697674418605, "grad_norm": 0.8504437208175659, "learning_rate": 2.8801178709472645e-06, "loss": 0.4753, "step": 2842 }, { "epoch": 2.2798716920609463, "grad_norm": 0.8321656584739685, "learning_rate": 2.8740400576271265e-06, "loss": 0.465, "step": 2843 }, { "epoch": 2.280673616680032, "grad_norm": 0.8143665194511414, "learning_rate": 2.8679675875944356e-06, "loss": 0.4641, "step": 2844 }, { "epoch": 2.281475541299118, "grad_norm": 0.8252444267272949, "learning_rate": 2.8619004654025418e-06, "loss": 0.4784, "step": 2845 }, { "epoch": 2.2822774659182037, "grad_norm": 0.8725248575210571, "learning_rate": 2.85583869560077e-06, "loss": 0.4836, "step": 2846 }, { "epoch": 2.2830793905372895, "grad_norm": 0.8364203572273254, "learning_rate": 2.8497822827344522e-06, "loss": 0.4934, "step": 2847 }, { "epoch": 2.2838813151563753, "grad_norm": 0.9842785000801086, "learning_rate": 2.8437312313448863e-06, "loss": 0.5294, "step": 2848 }, { "epoch": 2.284683239775461, "grad_norm": 0.8217899799346924, "learning_rate": 2.837685545969359e-06, "loss": 0.4845, "step": 2849 }, { "epoch": 2.285485164394547, "grad_norm": 0.847512423992157, "learning_rate": 2.8316452311411326e-06, "loss": 0.4888, "step": 2850 }, { "epoch": 2.2862870890136326, "grad_norm": 0.8483214974403381, "learning_rate": 2.8256102913894355e-06, "loss": 0.4856, "step": 2851 }, { "epoch": 2.2870890136327184, "grad_norm": 0.8673418760299683, "learning_rate": 2.8195807312394763e-06, "loss": 0.4837, "step": 2852 }, { "epoch": 2.2878909382518042, "grad_norm": 0.8354519009590149, "learning_rate": 2.8135565552124224e-06, "loss": 0.4729, "step": 2853 }, { "epoch": 2.28869286287089, "grad_norm": 0.8773465752601624, "learning_rate": 2.8075377678254058e-06, "loss": 0.492, "step": 2854 }, { "epoch": 2.289494787489976, "grad_norm": 0.8436979055404663, "learning_rate": 2.801524373591522e-06, "loss": 0.4971, "step": 2855 }, { "epoch": 2.2902967121090616, "grad_norm": 0.8587144613265991, "learning_rate": 2.7955163770198136e-06, "loss": 0.4994, "step": 2856 }, { "epoch": 2.2910986367281474, "grad_norm": 0.8141018152236938, "learning_rate": 2.789513782615283e-06, "loss": 0.4767, "step": 2857 }, { "epoch": 2.291900561347233, "grad_norm": 0.8428380489349365, "learning_rate": 2.78351659487888e-06, "loss": 0.5074, "step": 2858 }, { "epoch": 2.292702485966319, "grad_norm": 0.8327274918556213, "learning_rate": 2.777524818307501e-06, "loss": 0.4801, "step": 2859 }, { "epoch": 2.293504410585405, "grad_norm": 0.8546361327171326, "learning_rate": 2.7715384573939865e-06, "loss": 0.4726, "step": 2860 }, { "epoch": 2.2943063352044906, "grad_norm": 0.8553916811943054, "learning_rate": 2.7655575166271067e-06, "loss": 0.4861, "step": 2861 }, { "epoch": 2.295108259823577, "grad_norm": 0.8089895844459534, "learning_rate": 2.7595820004915795e-06, "loss": 0.4627, "step": 2862 }, { "epoch": 2.295910184442662, "grad_norm": 0.8807501196861267, "learning_rate": 2.7536119134680493e-06, "loss": 0.4779, "step": 2863 }, { "epoch": 2.2967121090617484, "grad_norm": 0.8504637479782104, "learning_rate": 2.747647260033095e-06, "loss": 0.5165, "step": 2864 }, { "epoch": 2.297514033680834, "grad_norm": 0.8142641186714172, "learning_rate": 2.7416880446592087e-06, "loss": 0.4904, "step": 2865 }, { "epoch": 2.29831595829992, "grad_norm": 0.8312305808067322, "learning_rate": 2.7357342718148184e-06, "loss": 0.5014, "step": 2866 }, { "epoch": 2.2991178829190058, "grad_norm": 0.8162922859191895, "learning_rate": 2.729785945964264e-06, "loss": 0.4728, "step": 2867 }, { "epoch": 2.2999198075380916, "grad_norm": 0.8514026999473572, "learning_rate": 2.723843071567803e-06, "loss": 0.4964, "step": 2868 }, { "epoch": 2.3007217321571773, "grad_norm": 0.8272191882133484, "learning_rate": 2.717905653081608e-06, "loss": 0.4783, "step": 2869 }, { "epoch": 2.301523656776263, "grad_norm": 0.8552436232566833, "learning_rate": 2.7119736949577534e-06, "loss": 0.4773, "step": 2870 }, { "epoch": 2.302325581395349, "grad_norm": 0.8255532383918762, "learning_rate": 2.706047201644224e-06, "loss": 0.4754, "step": 2871 }, { "epoch": 2.3031275060144347, "grad_norm": 0.8571800589561462, "learning_rate": 2.7001261775849086e-06, "loss": 0.5073, "step": 2872 }, { "epoch": 2.3039294306335205, "grad_norm": 0.8622461557388306, "learning_rate": 2.69421062721959e-06, "loss": 0.5018, "step": 2873 }, { "epoch": 2.3047313552526063, "grad_norm": 0.8514299392700195, "learning_rate": 2.688300554983955e-06, "loss": 0.4727, "step": 2874 }, { "epoch": 2.305533279871692, "grad_norm": 0.8603047728538513, "learning_rate": 2.682395965309569e-06, "loss": 0.4859, "step": 2875 }, { "epoch": 2.306335204490778, "grad_norm": 0.8229160308837891, "learning_rate": 2.6764968626238986e-06, "loss": 0.4869, "step": 2876 }, { "epoch": 2.3071371291098637, "grad_norm": 0.8354660868644714, "learning_rate": 2.6706032513502913e-06, "loss": 0.4736, "step": 2877 }, { "epoch": 2.3079390537289495, "grad_norm": 0.8746702075004578, "learning_rate": 2.664715135907977e-06, "loss": 0.482, "step": 2878 }, { "epoch": 2.3087409783480353, "grad_norm": 0.8408623933792114, "learning_rate": 2.65883252071207e-06, "loss": 0.4917, "step": 2879 }, { "epoch": 2.309542902967121, "grad_norm": 0.8481791615486145, "learning_rate": 2.652955410173548e-06, "loss": 0.4974, "step": 2880 }, { "epoch": 2.310344827586207, "grad_norm": 0.8575053215026855, "learning_rate": 2.6470838086992724e-06, "loss": 0.4732, "step": 2881 }, { "epoch": 2.3111467522052926, "grad_norm": 0.857205867767334, "learning_rate": 2.641217720691972e-06, "loss": 0.4728, "step": 2882 }, { "epoch": 2.3119486768243784, "grad_norm": 0.8567532300949097, "learning_rate": 2.6353571505502317e-06, "loss": 0.4909, "step": 2883 }, { "epoch": 2.3127506014434642, "grad_norm": 0.8322728276252747, "learning_rate": 2.6295021026685176e-06, "loss": 0.4863, "step": 2884 }, { "epoch": 2.31355252606255, "grad_norm": 0.8363150954246521, "learning_rate": 2.623652581437135e-06, "loss": 0.4861, "step": 2885 }, { "epoch": 2.314354450681636, "grad_norm": 0.8756260871887207, "learning_rate": 2.617808591242258e-06, "loss": 0.5115, "step": 2886 }, { "epoch": 2.3151563753007216, "grad_norm": 0.8478529453277588, "learning_rate": 2.6119701364659124e-06, "loss": 0.4944, "step": 2887 }, { "epoch": 2.3159582999198074, "grad_norm": 0.8674932718276978, "learning_rate": 2.6061372214859595e-06, "loss": 0.4886, "step": 2888 }, { "epoch": 2.316760224538893, "grad_norm": 0.8758816719055176, "learning_rate": 2.6003098506761316e-06, "loss": 0.5083, "step": 2889 }, { "epoch": 2.317562149157979, "grad_norm": 0.8525510430335999, "learning_rate": 2.5944880284059804e-06, "loss": 0.4809, "step": 2890 }, { "epoch": 2.3183640737770648, "grad_norm": 0.840334415435791, "learning_rate": 2.588671759040909e-06, "loss": 0.4929, "step": 2891 }, { "epoch": 2.3191659983961506, "grad_norm": 0.8207436203956604, "learning_rate": 2.582861046942158e-06, "loss": 0.4818, "step": 2892 }, { "epoch": 2.319967923015237, "grad_norm": 0.8692548871040344, "learning_rate": 2.577055896466788e-06, "loss": 0.4912, "step": 2893 }, { "epoch": 2.320769847634322, "grad_norm": 0.8304778337478638, "learning_rate": 2.571256311967709e-06, "loss": 0.4636, "step": 2894 }, { "epoch": 2.3215717722534084, "grad_norm": 0.8468001484870911, "learning_rate": 2.565462297793644e-06, "loss": 0.4784, "step": 2895 }, { "epoch": 2.322373696872494, "grad_norm": 0.8657370209693909, "learning_rate": 2.5596738582891335e-06, "loss": 0.4685, "step": 2896 }, { "epoch": 2.32317562149158, "grad_norm": 0.8483834862709045, "learning_rate": 2.5538909977945593e-06, "loss": 0.4642, "step": 2897 }, { "epoch": 2.3239775461106658, "grad_norm": 0.8330668210983276, "learning_rate": 2.5481137206460994e-06, "loss": 0.4746, "step": 2898 }, { "epoch": 2.3247794707297516, "grad_norm": 0.7988243103027344, "learning_rate": 2.542342031175754e-06, "loss": 0.4604, "step": 2899 }, { "epoch": 2.3255813953488373, "grad_norm": 0.8841968774795532, "learning_rate": 2.536575933711336e-06, "loss": 0.4834, "step": 2900 }, { "epoch": 2.326383319967923, "grad_norm": 0.9015833735466003, "learning_rate": 2.5308154325764543e-06, "loss": 0.4787, "step": 2901 }, { "epoch": 2.327185244587009, "grad_norm": 0.8655214905738831, "learning_rate": 2.5250605320905387e-06, "loss": 0.5111, "step": 2902 }, { "epoch": 2.3279871692060947, "grad_norm": 0.8751394152641296, "learning_rate": 2.519311236568801e-06, "loss": 0.4828, "step": 2903 }, { "epoch": 2.3287890938251805, "grad_norm": 0.8501570820808411, "learning_rate": 2.5135675503222623e-06, "loss": 0.4704, "step": 2904 }, { "epoch": 2.3295910184442663, "grad_norm": 0.8615387678146362, "learning_rate": 2.5078294776577372e-06, "loss": 0.4816, "step": 2905 }, { "epoch": 2.330392943063352, "grad_norm": 0.8755018711090088, "learning_rate": 2.5020970228778198e-06, "loss": 0.4831, "step": 2906 }, { "epoch": 2.331194867682438, "grad_norm": 0.877673327922821, "learning_rate": 2.49637019028091e-06, "loss": 0.4664, "step": 2907 }, { "epoch": 2.3319967923015237, "grad_norm": 0.8591618537902832, "learning_rate": 2.4906489841611736e-06, "loss": 0.4914, "step": 2908 }, { "epoch": 2.3327987169206095, "grad_norm": 0.820887565612793, "learning_rate": 2.48493340880857e-06, "loss": 0.4756, "step": 2909 }, { "epoch": 2.3336006415396953, "grad_norm": 0.8460831642150879, "learning_rate": 2.4792234685088312e-06, "loss": 0.4756, "step": 2910 }, { "epoch": 2.334402566158781, "grad_norm": 0.8340601921081543, "learning_rate": 2.473519167543467e-06, "loss": 0.4647, "step": 2911 }, { "epoch": 2.335204490777867, "grad_norm": 0.8154265284538269, "learning_rate": 2.4678205101897523e-06, "loss": 0.4587, "step": 2912 }, { "epoch": 2.3360064153969526, "grad_norm": 0.8957749605178833, "learning_rate": 2.462127500720737e-06, "loss": 0.4981, "step": 2913 }, { "epoch": 2.3368083400160384, "grad_norm": 0.8637145757675171, "learning_rate": 2.456440143405232e-06, "loss": 0.4774, "step": 2914 }, { "epoch": 2.3376102646351242, "grad_norm": 0.8834245800971985, "learning_rate": 2.4507584425078133e-06, "loss": 0.4996, "step": 2915 }, { "epoch": 2.33841218925421, "grad_norm": 0.8881711363792419, "learning_rate": 2.4450824022888166e-06, "loss": 0.509, "step": 2916 }, { "epoch": 2.339214113873296, "grad_norm": 0.9144017100334167, "learning_rate": 2.4394120270043233e-06, "loss": 0.4873, "step": 2917 }, { "epoch": 2.3400160384923816, "grad_norm": 0.8431651592254639, "learning_rate": 2.433747320906177e-06, "loss": 0.464, "step": 2918 }, { "epoch": 2.3408179631114674, "grad_norm": 0.8841955661773682, "learning_rate": 2.4280882882419676e-06, "loss": 0.4791, "step": 2919 }, { "epoch": 2.341619887730553, "grad_norm": 0.9004083871841431, "learning_rate": 2.4224349332550313e-06, "loss": 0.5035, "step": 2920 }, { "epoch": 2.342421812349639, "grad_norm": 0.8716943264007568, "learning_rate": 2.4167872601844476e-06, "loss": 0.4744, "step": 2921 }, { "epoch": 2.3432237369687248, "grad_norm": 0.8678621053695679, "learning_rate": 2.411145273265029e-06, "loss": 0.4983, "step": 2922 }, { "epoch": 2.3440256615878106, "grad_norm": 0.8493649959564209, "learning_rate": 2.405508976727332e-06, "loss": 0.4798, "step": 2923 }, { "epoch": 2.344827586206897, "grad_norm": 0.8921751379966736, "learning_rate": 2.3998783747976473e-06, "loss": 0.5009, "step": 2924 }, { "epoch": 2.345629510825982, "grad_norm": 0.8245465755462646, "learning_rate": 2.3942534716979827e-06, "loss": 0.473, "step": 2925 }, { "epoch": 2.3464314354450684, "grad_norm": 0.7934049367904663, "learning_rate": 2.3886342716460932e-06, "loss": 0.452, "step": 2926 }, { "epoch": 2.3472333600641537, "grad_norm": 0.8302115201950073, "learning_rate": 2.3830207788554394e-06, "loss": 0.4759, "step": 2927 }, { "epoch": 2.34803528468324, "grad_norm": 0.8664802312850952, "learning_rate": 2.3774129975352112e-06, "loss": 0.4752, "step": 2928 }, { "epoch": 2.3488372093023258, "grad_norm": 0.8159329295158386, "learning_rate": 2.371810931890316e-06, "loss": 0.4827, "step": 2929 }, { "epoch": 2.3496391339214115, "grad_norm": 0.8080800771713257, "learning_rate": 2.366214586121366e-06, "loss": 0.4715, "step": 2930 }, { "epoch": 2.3504410585404973, "grad_norm": 0.8522915840148926, "learning_rate": 2.360623964424703e-06, "loss": 0.494, "step": 2931 }, { "epoch": 2.351242983159583, "grad_norm": 0.8710372447967529, "learning_rate": 2.3550390709923575e-06, "loss": 0.4748, "step": 2932 }, { "epoch": 2.352044907778669, "grad_norm": 0.8452877998352051, "learning_rate": 2.349459910012075e-06, "loss": 0.4708, "step": 2933 }, { "epoch": 2.3528468323977547, "grad_norm": 0.8797398805618286, "learning_rate": 2.343886485667303e-06, "loss": 0.4782, "step": 2934 }, { "epoch": 2.3536487570168405, "grad_norm": 0.8327314853668213, "learning_rate": 2.3383188021371773e-06, "loss": 0.478, "step": 2935 }, { "epoch": 2.3544506816359263, "grad_norm": 0.8496788740158081, "learning_rate": 2.332756863596547e-06, "loss": 0.4457, "step": 2936 }, { "epoch": 2.355252606255012, "grad_norm": 0.842319130897522, "learning_rate": 2.327200674215937e-06, "loss": 0.4896, "step": 2937 }, { "epoch": 2.356054530874098, "grad_norm": 0.8751649260520935, "learning_rate": 2.3216502381615633e-06, "loss": 0.5128, "step": 2938 }, { "epoch": 2.3568564554931837, "grad_norm": 0.8716158866882324, "learning_rate": 2.316105559595342e-06, "loss": 0.4961, "step": 2939 }, { "epoch": 2.3576583801122695, "grad_norm": 0.9204161763191223, "learning_rate": 2.310566642674854e-06, "loss": 0.4853, "step": 2940 }, { "epoch": 2.3584603047313553, "grad_norm": 0.9012963175773621, "learning_rate": 2.3050334915533713e-06, "loss": 0.4946, "step": 2941 }, { "epoch": 2.359262229350441, "grad_norm": 0.8790763020515442, "learning_rate": 2.2995061103798397e-06, "loss": 0.4866, "step": 2942 }, { "epoch": 2.360064153969527, "grad_norm": 0.8495937585830688, "learning_rate": 2.2939845032988707e-06, "loss": 0.4877, "step": 2943 }, { "epoch": 2.3608660785886126, "grad_norm": 0.8435322046279907, "learning_rate": 2.288468674450766e-06, "loss": 0.4856, "step": 2944 }, { "epoch": 2.3616680032076984, "grad_norm": 0.8671093583106995, "learning_rate": 2.28295862797147e-06, "loss": 0.4737, "step": 2945 }, { "epoch": 2.362469927826784, "grad_norm": 0.8573446273803711, "learning_rate": 2.27745436799261e-06, "loss": 0.5076, "step": 2946 }, { "epoch": 2.36327185244587, "grad_norm": 0.8513672947883606, "learning_rate": 2.271955898641467e-06, "loss": 0.4653, "step": 2947 }, { "epoch": 2.364073777064956, "grad_norm": 0.8536444306373596, "learning_rate": 2.2664632240409746e-06, "loss": 0.4721, "step": 2948 }, { "epoch": 2.3648757016840416, "grad_norm": 0.8997591733932495, "learning_rate": 2.260976348309737e-06, "loss": 0.5051, "step": 2949 }, { "epoch": 2.3656776263031274, "grad_norm": 0.854433536529541, "learning_rate": 2.255495275561993e-06, "loss": 0.485, "step": 2950 }, { "epoch": 2.366479550922213, "grad_norm": 0.9025599956512451, "learning_rate": 2.2500200099076395e-06, "loss": 0.4982, "step": 2951 }, { "epoch": 2.367281475541299, "grad_norm": 0.7927220463752747, "learning_rate": 2.2445505554522207e-06, "loss": 0.4662, "step": 2952 }, { "epoch": 2.3680834001603848, "grad_norm": 0.8282037973403931, "learning_rate": 2.239086916296914e-06, "loss": 0.4682, "step": 2953 }, { "epoch": 2.3688853247794706, "grad_norm": 0.8752132654190063, "learning_rate": 2.2336290965385454e-06, "loss": 0.4766, "step": 2954 }, { "epoch": 2.3696872493985564, "grad_norm": 0.8500027656555176, "learning_rate": 2.228177100269573e-06, "loss": 0.4887, "step": 2955 }, { "epoch": 2.370489174017642, "grad_norm": 0.8315219879150391, "learning_rate": 2.22273093157809e-06, "loss": 0.495, "step": 2956 }, { "epoch": 2.3712910986367284, "grad_norm": 0.8728612065315247, "learning_rate": 2.217290594547822e-06, "loss": 0.4768, "step": 2957 }, { "epoch": 2.3720930232558137, "grad_norm": 0.8619360327720642, "learning_rate": 2.2118560932581123e-06, "loss": 0.4768, "step": 2958 }, { "epoch": 2.3728949478749, "grad_norm": 0.8790128231048584, "learning_rate": 2.2064274317839394e-06, "loss": 0.4638, "step": 2959 }, { "epoch": 2.3736968724939858, "grad_norm": 0.8765968680381775, "learning_rate": 2.2010046141958973e-06, "loss": 0.4762, "step": 2960 }, { "epoch": 2.3744987971130715, "grad_norm": 0.790704607963562, "learning_rate": 2.1955876445602008e-06, "loss": 0.4694, "step": 2961 }, { "epoch": 2.3753007217321573, "grad_norm": 0.8622111082077026, "learning_rate": 2.190176526938679e-06, "loss": 0.4642, "step": 2962 }, { "epoch": 2.376102646351243, "grad_norm": 0.8704236149787903, "learning_rate": 2.1847712653887687e-06, "loss": 0.4809, "step": 2963 }, { "epoch": 2.376904570970329, "grad_norm": 0.8916087746620178, "learning_rate": 2.17937186396352e-06, "loss": 0.5033, "step": 2964 }, { "epoch": 2.3777064955894147, "grad_norm": 0.8955850601196289, "learning_rate": 2.1739783267115888e-06, "loss": 0.5026, "step": 2965 }, { "epoch": 2.3785084202085005, "grad_norm": 0.860222339630127, "learning_rate": 2.1685906576772365e-06, "loss": 0.4896, "step": 2966 }, { "epoch": 2.3793103448275863, "grad_norm": 0.8153157234191895, "learning_rate": 2.1632088609003133e-06, "loss": 0.4615, "step": 2967 }, { "epoch": 2.380112269446672, "grad_norm": 0.8858957290649414, "learning_rate": 2.157832940416279e-06, "loss": 0.4947, "step": 2968 }, { "epoch": 2.380914194065758, "grad_norm": 0.8330368995666504, "learning_rate": 2.1524629002561803e-06, "loss": 0.4623, "step": 2969 }, { "epoch": 2.3817161186848437, "grad_norm": 0.9089009165763855, "learning_rate": 2.1470987444466564e-06, "loss": 0.4907, "step": 2970 }, { "epoch": 2.3825180433039295, "grad_norm": 0.805916965007782, "learning_rate": 2.141740477009937e-06, "loss": 0.4827, "step": 2971 }, { "epoch": 2.3833199679230153, "grad_norm": 0.85927814245224, "learning_rate": 2.1363881019638277e-06, "loss": 0.4669, "step": 2972 }, { "epoch": 2.384121892542101, "grad_norm": 0.9052659273147583, "learning_rate": 2.1310416233217246e-06, "loss": 0.5256, "step": 2973 }, { "epoch": 2.384923817161187, "grad_norm": 0.8463844656944275, "learning_rate": 2.1257010450926e-06, "loss": 0.4947, "step": 2974 }, { "epoch": 2.3857257417802726, "grad_norm": 0.8669213652610779, "learning_rate": 2.1203663712809995e-06, "loss": 0.4768, "step": 2975 }, { "epoch": 2.3865276663993584, "grad_norm": 0.8719237446784973, "learning_rate": 2.115037605887048e-06, "loss": 0.5004, "step": 2976 }, { "epoch": 2.387329591018444, "grad_norm": 0.8576990365982056, "learning_rate": 2.1097147529064286e-06, "loss": 0.4877, "step": 2977 }, { "epoch": 2.38813151563753, "grad_norm": 0.8337488770484924, "learning_rate": 2.104397816330401e-06, "loss": 0.4715, "step": 2978 }, { "epoch": 2.388933440256616, "grad_norm": 0.8252597451210022, "learning_rate": 2.0990868001457853e-06, "loss": 0.4819, "step": 2979 }, { "epoch": 2.3897353648757016, "grad_norm": 0.8549776077270508, "learning_rate": 2.093781708334962e-06, "loss": 0.4702, "step": 2980 }, { "epoch": 2.3905372894947874, "grad_norm": 0.8355644345283508, "learning_rate": 2.088482544875873e-06, "loss": 0.4758, "step": 2981 }, { "epoch": 2.391339214113873, "grad_norm": 0.857735276222229, "learning_rate": 2.0831893137420046e-06, "loss": 0.4998, "step": 2982 }, { "epoch": 2.392141138732959, "grad_norm": 0.9026484489440918, "learning_rate": 2.077902018902407e-06, "loss": 0.475, "step": 2983 }, { "epoch": 2.3929430633520448, "grad_norm": 0.8243964314460754, "learning_rate": 2.072620664321674e-06, "loss": 0.4798, "step": 2984 }, { "epoch": 2.3937449879711306, "grad_norm": 0.8589446544647217, "learning_rate": 2.067345253959938e-06, "loss": 0.4483, "step": 2985 }, { "epoch": 2.3945469125902163, "grad_norm": 0.8188499212265015, "learning_rate": 2.0620757917728927e-06, "loss": 0.4368, "step": 2986 }, { "epoch": 2.395348837209302, "grad_norm": 0.8684476613998413, "learning_rate": 2.0568122817117507e-06, "loss": 0.4714, "step": 2987 }, { "epoch": 2.3961507618283884, "grad_norm": 0.8384298086166382, "learning_rate": 2.051554727723276e-06, "loss": 0.4778, "step": 2988 }, { "epoch": 2.3969526864474737, "grad_norm": 0.872604250907898, "learning_rate": 2.046303133749764e-06, "loss": 0.488, "step": 2989 }, { "epoch": 2.39775461106656, "grad_norm": 0.9264422655105591, "learning_rate": 2.041057503729028e-06, "loss": 0.4955, "step": 2990 }, { "epoch": 2.3985565356856453, "grad_norm": 0.8388825058937073, "learning_rate": 2.035817841594434e-06, "loss": 0.4809, "step": 2991 }, { "epoch": 2.3993584603047315, "grad_norm": 0.9164281487464905, "learning_rate": 2.0305841512748494e-06, "loss": 0.4903, "step": 2992 }, { "epoch": 2.4001603849238173, "grad_norm": 0.9148788452148438, "learning_rate": 2.0253564366946764e-06, "loss": 0.5062, "step": 2993 }, { "epoch": 2.400962309542903, "grad_norm": 0.8578090071678162, "learning_rate": 2.020134701773836e-06, "loss": 0.4907, "step": 2994 }, { "epoch": 2.401764234161989, "grad_norm": 0.8483836650848389, "learning_rate": 2.0149189504277553e-06, "loss": 0.4526, "step": 2995 }, { "epoch": 2.4025661587810747, "grad_norm": 0.8625277280807495, "learning_rate": 2.0097091865673923e-06, "loss": 0.468, "step": 2996 }, { "epoch": 2.4033680834001605, "grad_norm": 0.8608683347702026, "learning_rate": 2.0045054140992002e-06, "loss": 0.4907, "step": 2997 }, { "epoch": 2.4041700080192463, "grad_norm": 0.83229660987854, "learning_rate": 1.9993076369251406e-06, "loss": 0.4727, "step": 2998 }, { "epoch": 2.404971932638332, "grad_norm": 0.8340911865234375, "learning_rate": 1.9941158589426924e-06, "loss": 0.472, "step": 2999 }, { "epoch": 2.405773857257418, "grad_norm": 0.9387159943580627, "learning_rate": 1.9889300840448224e-06, "loss": 0.4952, "step": 3000 }, { "epoch": 2.4065757818765037, "grad_norm": 0.8406565189361572, "learning_rate": 1.98375031612e-06, "loss": 0.4659, "step": 3001 }, { "epoch": 2.4073777064955895, "grad_norm": 0.9237212538719177, "learning_rate": 1.9785765590521978e-06, "loss": 0.4815, "step": 3002 }, { "epoch": 2.4081796311146753, "grad_norm": 0.8607485294342041, "learning_rate": 1.9734088167208664e-06, "loss": 0.4842, "step": 3003 }, { "epoch": 2.408981555733761, "grad_norm": 0.8381433486938477, "learning_rate": 1.968247093000963e-06, "loss": 0.4651, "step": 3004 }, { "epoch": 2.409783480352847, "grad_norm": 0.8409081697463989, "learning_rate": 1.96309139176292e-06, "loss": 0.484, "step": 3005 }, { "epoch": 2.4105854049719326, "grad_norm": 0.8303772807121277, "learning_rate": 1.9579417168726566e-06, "loss": 0.4851, "step": 3006 }, { "epoch": 2.4113873295910184, "grad_norm": 0.8264473676681519, "learning_rate": 1.9527980721915798e-06, "loss": 0.4631, "step": 3007 }, { "epoch": 2.412189254210104, "grad_norm": 0.8889646530151367, "learning_rate": 1.9476604615765605e-06, "loss": 0.4722, "step": 3008 }, { "epoch": 2.41299117882919, "grad_norm": 0.8740735650062561, "learning_rate": 1.942528888879964e-06, "loss": 0.4926, "step": 3009 }, { "epoch": 2.413793103448276, "grad_norm": 0.835271418094635, "learning_rate": 1.937403357949611e-06, "loss": 0.4663, "step": 3010 }, { "epoch": 2.4145950280673616, "grad_norm": 0.8700141310691833, "learning_rate": 1.932283872628803e-06, "loss": 0.4773, "step": 3011 }, { "epoch": 2.4153969526864474, "grad_norm": 0.8676986694335938, "learning_rate": 1.927170436756305e-06, "loss": 0.4952, "step": 3012 }, { "epoch": 2.416198877305533, "grad_norm": 0.902349591255188, "learning_rate": 1.922063054166341e-06, "loss": 0.4806, "step": 3013 }, { "epoch": 2.417000801924619, "grad_norm": 0.8744420409202576, "learning_rate": 1.916961728688603e-06, "loss": 0.4784, "step": 3014 }, { "epoch": 2.4178027265437048, "grad_norm": 0.8729916214942932, "learning_rate": 1.9118664641482386e-06, "loss": 0.4974, "step": 3015 }, { "epoch": 2.4186046511627906, "grad_norm": 0.9082834720611572, "learning_rate": 1.9067772643658511e-06, "loss": 0.487, "step": 3016 }, { "epoch": 2.4194065757818763, "grad_norm": 0.8420137166976929, "learning_rate": 1.901694133157499e-06, "loss": 0.4468, "step": 3017 }, { "epoch": 2.420208500400962, "grad_norm": 0.8739819526672363, "learning_rate": 1.896617074334679e-06, "loss": 0.4624, "step": 3018 }, { "epoch": 2.421010425020048, "grad_norm": 0.8350537419319153, "learning_rate": 1.8915460917043494e-06, "loss": 0.4767, "step": 3019 }, { "epoch": 2.4218123496391337, "grad_norm": 0.8347825407981873, "learning_rate": 1.8864811890689016e-06, "loss": 0.458, "step": 3020 }, { "epoch": 2.42261427425822, "grad_norm": 0.8250964879989624, "learning_rate": 1.8814223702261757e-06, "loss": 0.4694, "step": 3021 }, { "epoch": 2.4234161988773053, "grad_norm": 0.8303848505020142, "learning_rate": 1.8763696389694463e-06, "loss": 0.4831, "step": 3022 }, { "epoch": 2.4242181234963915, "grad_norm": 0.8795514106750488, "learning_rate": 1.8713229990874194e-06, "loss": 0.4736, "step": 3023 }, { "epoch": 2.4250200481154773, "grad_norm": 0.8881685733795166, "learning_rate": 1.86628245436424e-06, "loss": 0.4751, "step": 3024 }, { "epoch": 2.425821972734563, "grad_norm": 0.9160009622573853, "learning_rate": 1.8612480085794804e-06, "loss": 0.5201, "step": 3025 }, { "epoch": 2.426623897353649, "grad_norm": 0.8275811076164246, "learning_rate": 1.8562196655081422e-06, "loss": 0.4675, "step": 3026 }, { "epoch": 2.4274258219727347, "grad_norm": 0.895634651184082, "learning_rate": 1.8511974289206413e-06, "loss": 0.4896, "step": 3027 }, { "epoch": 2.4282277465918205, "grad_norm": 0.8538333177566528, "learning_rate": 1.8461813025828268e-06, "loss": 0.4749, "step": 3028 }, { "epoch": 2.4290296712109063, "grad_norm": 0.8479071855545044, "learning_rate": 1.8411712902559597e-06, "loss": 0.4798, "step": 3029 }, { "epoch": 2.429831595829992, "grad_norm": 0.875977635383606, "learning_rate": 1.8361673956967175e-06, "loss": 0.4951, "step": 3030 }, { "epoch": 2.430633520449078, "grad_norm": 0.9012177586555481, "learning_rate": 1.831169622657194e-06, "loss": 0.5221, "step": 3031 }, { "epoch": 2.4314354450681637, "grad_norm": 0.8884789943695068, "learning_rate": 1.826177974884885e-06, "loss": 0.4712, "step": 3032 }, { "epoch": 2.4322373696872495, "grad_norm": 0.8643089532852173, "learning_rate": 1.8211924561227001e-06, "loss": 0.4645, "step": 3033 }, { "epoch": 2.4330392943063353, "grad_norm": 0.8726524710655212, "learning_rate": 1.816213070108951e-06, "loss": 0.5044, "step": 3034 }, { "epoch": 2.433841218925421, "grad_norm": 0.8304829597473145, "learning_rate": 1.8112398205773507e-06, "loss": 0.4747, "step": 3035 }, { "epoch": 2.434643143544507, "grad_norm": 0.8465933799743652, "learning_rate": 1.8062727112570133e-06, "loss": 0.4924, "step": 3036 }, { "epoch": 2.4354450681635926, "grad_norm": 0.8618703484535217, "learning_rate": 1.8013117458724416e-06, "loss": 0.4974, "step": 3037 }, { "epoch": 2.4362469927826784, "grad_norm": 0.872769832611084, "learning_rate": 1.79635692814354e-06, "loss": 0.4641, "step": 3038 }, { "epoch": 2.437048917401764, "grad_norm": 0.8966688513755798, "learning_rate": 1.7914082617856022e-06, "loss": 0.4678, "step": 3039 }, { "epoch": 2.43785084202085, "grad_norm": 0.8599264025688171, "learning_rate": 1.7864657505092964e-06, "loss": 0.4602, "step": 3040 }, { "epoch": 2.438652766639936, "grad_norm": 0.9014189839363098, "learning_rate": 1.7815293980206993e-06, "loss": 0.4846, "step": 3041 }, { "epoch": 2.4394546912590216, "grad_norm": 0.8558383584022522, "learning_rate": 1.776599208021247e-06, "loss": 0.4674, "step": 3042 }, { "epoch": 2.4402566158781074, "grad_norm": 0.8910719156265259, "learning_rate": 1.7716751842077663e-06, "loss": 0.4783, "step": 3043 }, { "epoch": 2.441058540497193, "grad_norm": 0.865467369556427, "learning_rate": 1.7667573302724606e-06, "loss": 0.4687, "step": 3044 }, { "epoch": 2.441860465116279, "grad_norm": 0.9217147827148438, "learning_rate": 1.7618456499028968e-06, "loss": 0.487, "step": 3045 }, { "epoch": 2.4426623897353648, "grad_norm": 0.8558123707771301, "learning_rate": 1.7569401467820302e-06, "loss": 0.4507, "step": 3046 }, { "epoch": 2.4434643143544506, "grad_norm": 0.8037233352661133, "learning_rate": 1.752040824588167e-06, "loss": 0.4415, "step": 3047 }, { "epoch": 2.4442662389735363, "grad_norm": 0.9270918369293213, "learning_rate": 1.7471476869949877e-06, "loss": 0.5039, "step": 3048 }, { "epoch": 2.445068163592622, "grad_norm": 0.8629381060600281, "learning_rate": 1.7422607376715362e-06, "loss": 0.4791, "step": 3049 }, { "epoch": 2.445870088211708, "grad_norm": 0.8409698605537415, "learning_rate": 1.7373799802822067e-06, "loss": 0.4942, "step": 3050 }, { "epoch": 2.4466720128307937, "grad_norm": 0.8830838203430176, "learning_rate": 1.7325054184867652e-06, "loss": 0.4756, "step": 3051 }, { "epoch": 2.44747393744988, "grad_norm": 0.9066561460494995, "learning_rate": 1.7276370559403188e-06, "loss": 0.478, "step": 3052 }, { "epoch": 2.4482758620689653, "grad_norm": 0.9000374674797058, "learning_rate": 1.7227748962933343e-06, "loss": 0.4911, "step": 3053 }, { "epoch": 2.4490777866880515, "grad_norm": 0.9526277184486389, "learning_rate": 1.7179189431916254e-06, "loss": 0.5002, "step": 3054 }, { "epoch": 2.449879711307137, "grad_norm": 0.85796719789505, "learning_rate": 1.713069200276346e-06, "loss": 0.4973, "step": 3055 }, { "epoch": 2.450681635926223, "grad_norm": 0.8333845734596252, "learning_rate": 1.708225671184003e-06, "loss": 0.4688, "step": 3056 }, { "epoch": 2.451483560545309, "grad_norm": 0.8949407935142517, "learning_rate": 1.7033883595464407e-06, "loss": 0.4913, "step": 3057 }, { "epoch": 2.4522854851643947, "grad_norm": 0.8218083381652832, "learning_rate": 1.6985572689908326e-06, "loss": 0.4581, "step": 3058 }, { "epoch": 2.4530874097834805, "grad_norm": 0.8660341501235962, "learning_rate": 1.693732403139705e-06, "loss": 0.4905, "step": 3059 }, { "epoch": 2.4538893344025663, "grad_norm": 0.8607407808303833, "learning_rate": 1.688913765610899e-06, "loss": 0.4618, "step": 3060 }, { "epoch": 2.454691259021652, "grad_norm": 0.8669100403785706, "learning_rate": 1.684101360017596e-06, "loss": 0.4626, "step": 3061 }, { "epoch": 2.455493183640738, "grad_norm": 0.8576903939247131, "learning_rate": 1.6792951899683018e-06, "loss": 0.4873, "step": 3062 }, { "epoch": 2.4562951082598237, "grad_norm": 0.8291400074958801, "learning_rate": 1.6744952590668452e-06, "loss": 0.4976, "step": 3063 }, { "epoch": 2.4570970328789095, "grad_norm": 0.8194727897644043, "learning_rate": 1.669701570912381e-06, "loss": 0.4716, "step": 3064 }, { "epoch": 2.4578989574979953, "grad_norm": 0.8513212203979492, "learning_rate": 1.6649141290993765e-06, "loss": 0.4765, "step": 3065 }, { "epoch": 2.458700882117081, "grad_norm": 0.8940701484680176, "learning_rate": 1.6601329372176177e-06, "loss": 0.4738, "step": 3066 }, { "epoch": 2.459502806736167, "grad_norm": 0.8830768465995789, "learning_rate": 1.6553579988522083e-06, "loss": 0.4837, "step": 3067 }, { "epoch": 2.4603047313552526, "grad_norm": 0.8472455143928528, "learning_rate": 1.6505893175835585e-06, "loss": 0.4635, "step": 3068 }, { "epoch": 2.4611066559743384, "grad_norm": 0.8620253801345825, "learning_rate": 1.6458268969873892e-06, "loss": 0.4813, "step": 3069 }, { "epoch": 2.461908580593424, "grad_norm": 0.8335537910461426, "learning_rate": 1.6410707406347227e-06, "loss": 0.4595, "step": 3070 }, { "epoch": 2.46271050521251, "grad_norm": 0.8217028975486755, "learning_rate": 1.6363208520918882e-06, "loss": 0.4551, "step": 3071 }, { "epoch": 2.463512429831596, "grad_norm": 0.8587481379508972, "learning_rate": 1.6315772349205139e-06, "loss": 0.487, "step": 3072 }, { "epoch": 2.4643143544506816, "grad_norm": 0.8531529903411865, "learning_rate": 1.6268398926775286e-06, "loss": 0.477, "step": 3073 }, { "epoch": 2.4651162790697674, "grad_norm": 0.8320519328117371, "learning_rate": 1.6221088289151477e-06, "loss": 0.4625, "step": 3074 }, { "epoch": 2.465918203688853, "grad_norm": 0.8659622669219971, "learning_rate": 1.6173840471808856e-06, "loss": 0.4866, "step": 3075 }, { "epoch": 2.466720128307939, "grad_norm": 0.8781241178512573, "learning_rate": 1.612665551017546e-06, "loss": 0.4842, "step": 3076 }, { "epoch": 2.4675220529270248, "grad_norm": 0.8649770021438599, "learning_rate": 1.6079533439632166e-06, "loss": 0.4856, "step": 3077 }, { "epoch": 2.4683239775461105, "grad_norm": 0.8794416785240173, "learning_rate": 1.6032474295512733e-06, "loss": 0.4727, "step": 3078 }, { "epoch": 2.4691259021651963, "grad_norm": 0.8567477464675903, "learning_rate": 1.598547811310368e-06, "loss": 0.4757, "step": 3079 }, { "epoch": 2.469927826784282, "grad_norm": 0.8346998691558838, "learning_rate": 1.5938544927644351e-06, "loss": 0.4751, "step": 3080 }, { "epoch": 2.470729751403368, "grad_norm": 0.8880207538604736, "learning_rate": 1.5891674774326848e-06, "loss": 0.4901, "step": 3081 }, { "epoch": 2.4715316760224537, "grad_norm": 0.8977006077766418, "learning_rate": 1.5844867688296017e-06, "loss": 0.494, "step": 3082 }, { "epoch": 2.4723336006415395, "grad_norm": 0.8824754357337952, "learning_rate": 1.5798123704649416e-06, "loss": 0.4929, "step": 3083 }, { "epoch": 2.4731355252606253, "grad_norm": 0.9107778072357178, "learning_rate": 1.5751442858437238e-06, "loss": 0.4771, "step": 3084 }, { "epoch": 2.4739374498797115, "grad_norm": 0.8373488783836365, "learning_rate": 1.5704825184662397e-06, "loss": 0.4824, "step": 3085 }, { "epoch": 2.474739374498797, "grad_norm": 0.8496052026748657, "learning_rate": 1.5658270718280433e-06, "loss": 0.4644, "step": 3086 }, { "epoch": 2.475541299117883, "grad_norm": 0.884526252746582, "learning_rate": 1.5611779494199398e-06, "loss": 0.5145, "step": 3087 }, { "epoch": 2.476343223736969, "grad_norm": 0.8632438778877258, "learning_rate": 1.5565351547280084e-06, "loss": 0.4807, "step": 3088 }, { "epoch": 2.4771451483560547, "grad_norm": 0.8612059354782104, "learning_rate": 1.5518986912335686e-06, "loss": 0.4579, "step": 3089 }, { "epoch": 2.4779470729751405, "grad_norm": 0.8448330163955688, "learning_rate": 1.5472685624132012e-06, "loss": 0.4687, "step": 3090 }, { "epoch": 2.4787489975942263, "grad_norm": 0.8578251600265503, "learning_rate": 1.5426447717387349e-06, "loss": 0.4987, "step": 3091 }, { "epoch": 2.479550922213312, "grad_norm": 0.8487175703048706, "learning_rate": 1.5380273226772403e-06, "loss": 0.4817, "step": 3092 }, { "epoch": 2.480352846832398, "grad_norm": 0.8690900802612305, "learning_rate": 1.5334162186910474e-06, "loss": 0.4834, "step": 3093 }, { "epoch": 2.4811547714514837, "grad_norm": 0.8409072160720825, "learning_rate": 1.5288114632377105e-06, "loss": 0.4603, "step": 3094 }, { "epoch": 2.4819566960705695, "grad_norm": 0.8334317207336426, "learning_rate": 1.5242130597700355e-06, "loss": 0.48, "step": 3095 }, { "epoch": 2.4827586206896552, "grad_norm": 0.8757611513137817, "learning_rate": 1.5196210117360643e-06, "loss": 0.4855, "step": 3096 }, { "epoch": 2.483560545308741, "grad_norm": 0.8928870558738708, "learning_rate": 1.5150353225790626e-06, "loss": 0.4787, "step": 3097 }, { "epoch": 2.484362469927827, "grad_norm": 0.8667248487472534, "learning_rate": 1.5104559957375475e-06, "loss": 0.4617, "step": 3098 }, { "epoch": 2.4851643945469126, "grad_norm": 0.8710841536521912, "learning_rate": 1.505883034645248e-06, "loss": 0.4625, "step": 3099 }, { "epoch": 2.4859663191659984, "grad_norm": 0.8655977249145508, "learning_rate": 1.5013164427311223e-06, "loss": 0.4653, "step": 3100 }, { "epoch": 2.486768243785084, "grad_norm": 0.8573647737503052, "learning_rate": 1.4967562234193655e-06, "loss": 0.4835, "step": 3101 }, { "epoch": 2.48757016840417, "grad_norm": 0.8854028582572937, "learning_rate": 1.4922023801293795e-06, "loss": 0.4899, "step": 3102 }, { "epoch": 2.488372093023256, "grad_norm": 0.843406617641449, "learning_rate": 1.4876549162757915e-06, "loss": 0.468, "step": 3103 }, { "epoch": 2.4891740176423416, "grad_norm": 0.8388236165046692, "learning_rate": 1.4831138352684482e-06, "loss": 0.467, "step": 3104 }, { "epoch": 2.4899759422614274, "grad_norm": 0.864824116230011, "learning_rate": 1.4785791405123995e-06, "loss": 0.4789, "step": 3105 }, { "epoch": 2.490777866880513, "grad_norm": 0.8695855140686035, "learning_rate": 1.474050835407923e-06, "loss": 0.4778, "step": 3106 }, { "epoch": 2.491579791499599, "grad_norm": 0.8766945004463196, "learning_rate": 1.4695289233504894e-06, "loss": 0.4668, "step": 3107 }, { "epoch": 2.4923817161186848, "grad_norm": 0.8195257782936096, "learning_rate": 1.4650134077307853e-06, "loss": 0.4725, "step": 3108 }, { "epoch": 2.4931836407377705, "grad_norm": 0.8423482775688171, "learning_rate": 1.4605042919347e-06, "loss": 0.462, "step": 3109 }, { "epoch": 2.4939855653568563, "grad_norm": 0.8940702080726624, "learning_rate": 1.4560015793433145e-06, "loss": 0.5092, "step": 3110 }, { "epoch": 2.494787489975942, "grad_norm": 0.8922168612480164, "learning_rate": 1.451505273332926e-06, "loss": 0.4787, "step": 3111 }, { "epoch": 2.495589414595028, "grad_norm": 0.8811621069908142, "learning_rate": 1.4470153772750118e-06, "loss": 0.487, "step": 3112 }, { "epoch": 2.4963913392141137, "grad_norm": 0.8688673973083496, "learning_rate": 1.4425318945362488e-06, "loss": 0.465, "step": 3113 }, { "epoch": 2.4971932638331995, "grad_norm": 0.8423486351966858, "learning_rate": 1.438054828478509e-06, "loss": 0.4719, "step": 3114 }, { "epoch": 2.4979951884522853, "grad_norm": 0.8947927355766296, "learning_rate": 1.4335841824588436e-06, "loss": 0.4796, "step": 3115 }, { "epoch": 2.4987971130713715, "grad_norm": 0.8390734791755676, "learning_rate": 1.429119959829499e-06, "loss": 0.468, "step": 3116 }, { "epoch": 2.499599037690457, "grad_norm": 0.8673422336578369, "learning_rate": 1.4246621639378998e-06, "loss": 0.4661, "step": 3117 }, { "epoch": 2.500400962309543, "grad_norm": 0.833712637424469, "learning_rate": 1.4202107981266532e-06, "loss": 0.4809, "step": 3118 }, { "epoch": 2.5012028869286285, "grad_norm": 0.8182956576347351, "learning_rate": 1.4157658657335494e-06, "loss": 0.4687, "step": 3119 }, { "epoch": 2.5020048115477147, "grad_norm": 0.8784580826759338, "learning_rate": 1.411327370091542e-06, "loss": 0.48, "step": 3120 }, { "epoch": 2.5028067361668, "grad_norm": 0.8753635883331299, "learning_rate": 1.406895314528771e-06, "loss": 0.5129, "step": 3121 }, { "epoch": 2.5036086607858863, "grad_norm": 0.8735189437866211, "learning_rate": 1.4024697023685429e-06, "loss": 0.4794, "step": 3122 }, { "epoch": 2.504410585404972, "grad_norm": 0.9127287864685059, "learning_rate": 1.3980505369293306e-06, "loss": 0.4402, "step": 3123 }, { "epoch": 2.505212510024058, "grad_norm": 0.86842280626297, "learning_rate": 1.3936378215247771e-06, "loss": 0.4943, "step": 3124 }, { "epoch": 2.5060144346431437, "grad_norm": 0.8117753863334656, "learning_rate": 1.389231559463684e-06, "loss": 0.4747, "step": 3125 }, { "epoch": 2.5068163592622295, "grad_norm": 0.8560154438018799, "learning_rate": 1.3848317540500178e-06, "loss": 0.5145, "step": 3126 }, { "epoch": 2.5076182838813152, "grad_norm": 0.885636568069458, "learning_rate": 1.3804384085829026e-06, "loss": 0.4802, "step": 3127 }, { "epoch": 2.508420208500401, "grad_norm": 0.8547667264938354, "learning_rate": 1.376051526356621e-06, "loss": 0.4833, "step": 3128 }, { "epoch": 2.509222133119487, "grad_norm": 0.8286969065666199, "learning_rate": 1.3716711106606007e-06, "loss": 0.4416, "step": 3129 }, { "epoch": 2.5100240577385726, "grad_norm": 0.9037246704101562, "learning_rate": 1.367297164779431e-06, "loss": 0.4571, "step": 3130 }, { "epoch": 2.5108259823576584, "grad_norm": 0.8304953575134277, "learning_rate": 1.3629296919928447e-06, "loss": 0.4926, "step": 3131 }, { "epoch": 2.511627906976744, "grad_norm": 0.876008927822113, "learning_rate": 1.3585686955757205e-06, "loss": 0.4773, "step": 3132 }, { "epoch": 2.51242983159583, "grad_norm": 0.9258241653442383, "learning_rate": 1.3542141787980855e-06, "loss": 0.4937, "step": 3133 }, { "epoch": 2.513231756214916, "grad_norm": 0.846127986907959, "learning_rate": 1.3498661449251006e-06, "loss": 0.469, "step": 3134 }, { "epoch": 2.5140336808340016, "grad_norm": 0.8556084036827087, "learning_rate": 1.3455245972170694e-06, "loss": 0.4554, "step": 3135 }, { "epoch": 2.5148356054530874, "grad_norm": 0.8977357745170593, "learning_rate": 1.341189538929436e-06, "loss": 0.4738, "step": 3136 }, { "epoch": 2.515637530072173, "grad_norm": 0.8365195989608765, "learning_rate": 1.3368609733127714e-06, "loss": 0.4724, "step": 3137 }, { "epoch": 2.516439454691259, "grad_norm": 0.8728964328765869, "learning_rate": 1.3325389036127855e-06, "loss": 0.4714, "step": 3138 }, { "epoch": 2.5172413793103448, "grad_norm": 0.8206337094306946, "learning_rate": 1.3282233330703087e-06, "loss": 0.4576, "step": 3139 }, { "epoch": 2.5180433039294305, "grad_norm": 0.8753436207771301, "learning_rate": 1.3239142649213044e-06, "loss": 0.4825, "step": 3140 }, { "epoch": 2.5188452285485163, "grad_norm": 0.8451640009880066, "learning_rate": 1.3196117023968613e-06, "loss": 0.4762, "step": 3141 }, { "epoch": 2.519647153167602, "grad_norm": 0.8681123852729797, "learning_rate": 1.315315648723181e-06, "loss": 0.4776, "step": 3142 }, { "epoch": 2.520449077786688, "grad_norm": 0.9339450001716614, "learning_rate": 1.311026107121599e-06, "loss": 0.4872, "step": 3143 }, { "epoch": 2.5212510024057737, "grad_norm": 0.8395841121673584, "learning_rate": 1.3067430808085534e-06, "loss": 0.4718, "step": 3144 }, { "epoch": 2.5220529270248595, "grad_norm": 0.8910388946533203, "learning_rate": 1.3024665729956054e-06, "loss": 0.4844, "step": 3145 }, { "epoch": 2.5228548516439453, "grad_norm": 0.8879891037940979, "learning_rate": 1.2981965868894287e-06, "loss": 0.4937, "step": 3146 }, { "epoch": 2.5236567762630315, "grad_norm": 0.8522821068763733, "learning_rate": 1.2939331256917974e-06, "loss": 0.4719, "step": 3147 }, { "epoch": 2.524458700882117, "grad_norm": 0.8639122843742371, "learning_rate": 1.2896761925996082e-06, "loss": 0.4651, "step": 3148 }, { "epoch": 2.525260625501203, "grad_norm": 0.832323431968689, "learning_rate": 1.2854257908048483e-06, "loss": 0.4804, "step": 3149 }, { "epoch": 2.5260625501202885, "grad_norm": 0.8917650580406189, "learning_rate": 1.2811819234946165e-06, "loss": 0.4942, "step": 3150 }, { "epoch": 2.5268644747393747, "grad_norm": 0.8569352030754089, "learning_rate": 1.2769445938511104e-06, "loss": 0.4817, "step": 3151 }, { "epoch": 2.52766639935846, "grad_norm": 0.8589757084846497, "learning_rate": 1.2727138050516175e-06, "loss": 0.4906, "step": 3152 }, { "epoch": 2.5284683239775463, "grad_norm": 0.868578314781189, "learning_rate": 1.2684895602685377e-06, "loss": 0.5072, "step": 3153 }, { "epoch": 2.529270248596632, "grad_norm": 0.8775882124900818, "learning_rate": 1.264271862669344e-06, "loss": 0.4789, "step": 3154 }, { "epoch": 2.530072173215718, "grad_norm": 0.9492253661155701, "learning_rate": 1.2600607154166146e-06, "loss": 0.4941, "step": 3155 }, { "epoch": 2.5308740978348037, "grad_norm": 0.8520070910453796, "learning_rate": 1.255856121668012e-06, "loss": 0.4633, "step": 3156 }, { "epoch": 2.5316760224538895, "grad_norm": 0.8257150053977966, "learning_rate": 1.2516580845762804e-06, "loss": 0.4652, "step": 3157 }, { "epoch": 2.5324779470729752, "grad_norm": 0.885618269443512, "learning_rate": 1.2474666072892527e-06, "loss": 0.4865, "step": 3158 }, { "epoch": 2.533279871692061, "grad_norm": 0.8766368627548218, "learning_rate": 1.2432816929498425e-06, "loss": 0.4958, "step": 3159 }, { "epoch": 2.534081796311147, "grad_norm": 0.8656640648841858, "learning_rate": 1.2391033446960355e-06, "loss": 0.4913, "step": 3160 }, { "epoch": 2.5348837209302326, "grad_norm": 0.8739482760429382, "learning_rate": 1.2349315656609085e-06, "loss": 0.4721, "step": 3161 }, { "epoch": 2.5356856455493184, "grad_norm": 0.9599235653877258, "learning_rate": 1.230766358972596e-06, "loss": 0.4921, "step": 3162 }, { "epoch": 2.536487570168404, "grad_norm": 0.8633760213851929, "learning_rate": 1.2266077277543155e-06, "loss": 0.4668, "step": 3163 }, { "epoch": 2.53728949478749, "grad_norm": 0.8226913809776306, "learning_rate": 1.22245567512435e-06, "loss": 0.4562, "step": 3164 }, { "epoch": 2.538091419406576, "grad_norm": 0.8109039664268494, "learning_rate": 1.218310204196046e-06, "loss": 0.4515, "step": 3165 }, { "epoch": 2.5388933440256616, "grad_norm": 0.8582691550254822, "learning_rate": 1.214171318077827e-06, "loss": 0.4797, "step": 3166 }, { "epoch": 2.5396952686447474, "grad_norm": 0.8951324224472046, "learning_rate": 1.2100390198731627e-06, "loss": 0.4859, "step": 3167 }, { "epoch": 2.540497193263833, "grad_norm": 0.8818310499191284, "learning_rate": 1.2059133126805956e-06, "loss": 0.4909, "step": 3168 }, { "epoch": 2.541299117882919, "grad_norm": 0.8491972088813782, "learning_rate": 1.201794199593721e-06, "loss": 0.4824, "step": 3169 }, { "epoch": 2.5421010425020047, "grad_norm": 0.9278321266174316, "learning_rate": 1.197681683701185e-06, "loss": 0.4734, "step": 3170 }, { "epoch": 2.5429029671210905, "grad_norm": 0.8294722437858582, "learning_rate": 1.193575768086701e-06, "loss": 0.4577, "step": 3171 }, { "epoch": 2.5437048917401763, "grad_norm": 0.7998701333999634, "learning_rate": 1.1894764558290172e-06, "loss": 0.4741, "step": 3172 }, { "epoch": 2.544506816359262, "grad_norm": 0.8978516459465027, "learning_rate": 1.1853837500019406e-06, "loss": 0.4739, "step": 3173 }, { "epoch": 2.545308740978348, "grad_norm": 0.8539999723434448, "learning_rate": 1.1812976536743226e-06, "loss": 0.4827, "step": 3174 }, { "epoch": 2.5461106655974337, "grad_norm": 0.8564225435256958, "learning_rate": 1.1772181699100538e-06, "loss": 0.4658, "step": 3175 }, { "epoch": 2.5469125902165195, "grad_norm": 0.8567453026771545, "learning_rate": 1.1731453017680716e-06, "loss": 0.4624, "step": 3176 }, { "epoch": 2.5477145148356053, "grad_norm": 0.8754240870475769, "learning_rate": 1.169079052302352e-06, "loss": 0.4923, "step": 3177 }, { "epoch": 2.5485164394546915, "grad_norm": 0.8477223515510559, "learning_rate": 1.1650194245619062e-06, "loss": 0.5005, "step": 3178 }, { "epoch": 2.549318364073777, "grad_norm": 0.8424686789512634, "learning_rate": 1.1609664215907846e-06, "loss": 0.4712, "step": 3179 }, { "epoch": 2.550120288692863, "grad_norm": 0.8633278012275696, "learning_rate": 1.1569200464280616e-06, "loss": 0.502, "step": 3180 }, { "epoch": 2.5509222133119485, "grad_norm": 0.8947163224220276, "learning_rate": 1.1528803021078505e-06, "loss": 0.4818, "step": 3181 }, { "epoch": 2.5517241379310347, "grad_norm": 0.8522531986236572, "learning_rate": 1.148847191659288e-06, "loss": 0.4868, "step": 3182 }, { "epoch": 2.55252606255012, "grad_norm": 0.8658236265182495, "learning_rate": 1.1448207181065385e-06, "loss": 0.4717, "step": 3183 }, { "epoch": 2.5533279871692063, "grad_norm": 0.919199526309967, "learning_rate": 1.1408008844687901e-06, "loss": 0.5037, "step": 3184 }, { "epoch": 2.5541299117882916, "grad_norm": 0.8818656802177429, "learning_rate": 1.1367876937602474e-06, "loss": 0.4904, "step": 3185 }, { "epoch": 2.554931836407378, "grad_norm": 0.8673224449157715, "learning_rate": 1.1327811489901398e-06, "loss": 0.4395, "step": 3186 }, { "epoch": 2.5557337610264637, "grad_norm": 0.8441712260246277, "learning_rate": 1.1287812531627108e-06, "loss": 0.4864, "step": 3187 }, { "epoch": 2.5565356856455494, "grad_norm": 0.8633151650428772, "learning_rate": 1.1247880092772202e-06, "loss": 0.4841, "step": 3188 }, { "epoch": 2.5573376102646352, "grad_norm": 0.8488594889640808, "learning_rate": 1.120801420327935e-06, "loss": 0.4965, "step": 3189 }, { "epoch": 2.558139534883721, "grad_norm": 0.8840992450714111, "learning_rate": 1.1168214893041363e-06, "loss": 0.4798, "step": 3190 }, { "epoch": 2.558941459502807, "grad_norm": 0.909084677696228, "learning_rate": 1.1128482191901124e-06, "loss": 0.4761, "step": 3191 }, { "epoch": 2.5597433841218926, "grad_norm": 0.8316271901130676, "learning_rate": 1.1088816129651569e-06, "loss": 0.4709, "step": 3192 }, { "epoch": 2.5605453087409784, "grad_norm": 0.8329962491989136, "learning_rate": 1.1049216736035673e-06, "loss": 0.4643, "step": 3193 }, { "epoch": 2.561347233360064, "grad_norm": 0.8709492683410645, "learning_rate": 1.1009684040746394e-06, "loss": 0.4716, "step": 3194 }, { "epoch": 2.56214915797915, "grad_norm": 0.8427078723907471, "learning_rate": 1.0970218073426674e-06, "loss": 0.4831, "step": 3195 }, { "epoch": 2.562951082598236, "grad_norm": 0.8805751204490662, "learning_rate": 1.093081886366948e-06, "loss": 0.4631, "step": 3196 }, { "epoch": 2.5637530072173216, "grad_norm": 0.8472068905830383, "learning_rate": 1.0891486441017652e-06, "loss": 0.4822, "step": 3197 }, { "epoch": 2.5645549318364074, "grad_norm": 0.8687289357185364, "learning_rate": 1.085222083496401e-06, "loss": 0.4685, "step": 3198 }, { "epoch": 2.565356856455493, "grad_norm": 0.857964813709259, "learning_rate": 1.0813022074951208e-06, "loss": 0.4577, "step": 3199 }, { "epoch": 2.566158781074579, "grad_norm": 0.8346413373947144, "learning_rate": 1.0773890190371828e-06, "loss": 0.4715, "step": 3200 }, { "epoch": 2.5669607056936647, "grad_norm": 0.8784050345420837, "learning_rate": 1.07348252105683e-06, "loss": 0.5006, "step": 3201 }, { "epoch": 2.5677626303127505, "grad_norm": 0.8425981402397156, "learning_rate": 1.0695827164832828e-06, "loss": 0.486, "step": 3202 }, { "epoch": 2.5685645549318363, "grad_norm": 0.886447012424469, "learning_rate": 1.0656896082407554e-06, "loss": 0.4815, "step": 3203 }, { "epoch": 2.569366479550922, "grad_norm": 0.8257114291191101, "learning_rate": 1.0618031992484267e-06, "loss": 0.4633, "step": 3204 }, { "epoch": 2.570168404170008, "grad_norm": 0.8750101923942566, "learning_rate": 1.0579234924204608e-06, "loss": 0.4793, "step": 3205 }, { "epoch": 2.5709703287890937, "grad_norm": 0.9001625180244446, "learning_rate": 1.0540504906659955e-06, "loss": 0.4668, "step": 3206 }, { "epoch": 2.5717722534081795, "grad_norm": 0.8314618468284607, "learning_rate": 1.0501841968891324e-06, "loss": 0.4718, "step": 3207 }, { "epoch": 2.5725741780272653, "grad_norm": 0.8423780202865601, "learning_rate": 1.0463246139889604e-06, "loss": 0.4886, "step": 3208 }, { "epoch": 2.573376102646351, "grad_norm": 0.8424227237701416, "learning_rate": 1.04247174485952e-06, "loss": 0.4784, "step": 3209 }, { "epoch": 2.574178027265437, "grad_norm": 0.8639745116233826, "learning_rate": 1.0386255923898236e-06, "loss": 0.4853, "step": 3210 }, { "epoch": 2.574979951884523, "grad_norm": 0.906941294670105, "learning_rate": 1.0347861594638519e-06, "loss": 0.4797, "step": 3211 }, { "epoch": 2.5757818765036085, "grad_norm": 0.8813538551330566, "learning_rate": 1.0309534489605344e-06, "loss": 0.4974, "step": 3212 }, { "epoch": 2.5765838011226947, "grad_norm": 0.8710605502128601, "learning_rate": 1.0271274637537764e-06, "loss": 0.476, "step": 3213 }, { "epoch": 2.57738572574178, "grad_norm": 0.9004622101783752, "learning_rate": 1.0233082067124266e-06, "loss": 0.4821, "step": 3214 }, { "epoch": 2.5781876503608663, "grad_norm": 0.9231355786323547, "learning_rate": 1.0194956807002965e-06, "loss": 0.4919, "step": 3215 }, { "epoch": 2.5789895749799516, "grad_norm": 0.8449090719223022, "learning_rate": 1.015689888576149e-06, "loss": 0.4442, "step": 3216 }, { "epoch": 2.579791499599038, "grad_norm": 0.9300955533981323, "learning_rate": 1.0118908331936915e-06, "loss": 0.4931, "step": 3217 }, { "epoch": 2.5805934242181237, "grad_norm": 0.8253883719444275, "learning_rate": 1.0080985174015901e-06, "loss": 0.4689, "step": 3218 }, { "epoch": 2.5813953488372094, "grad_norm": 0.9097961783409119, "learning_rate": 1.0043129440434496e-06, "loss": 0.4745, "step": 3219 }, { "epoch": 2.5821972734562952, "grad_norm": 0.8564082384109497, "learning_rate": 1.000534115957823e-06, "loss": 0.4759, "step": 3220 }, { "epoch": 2.582999198075381, "grad_norm": 0.8668890595436096, "learning_rate": 9.96762035978206e-07, "loss": 0.4745, "step": 3221 }, { "epoch": 2.583801122694467, "grad_norm": 0.8841666579246521, "learning_rate": 9.929967069330282e-07, "loss": 0.4765, "step": 3222 }, { "epoch": 2.5846030473135526, "grad_norm": 0.8739838600158691, "learning_rate": 9.892381316456656e-07, "loss": 0.4634, "step": 3223 }, { "epoch": 2.5854049719326384, "grad_norm": 0.8660984039306641, "learning_rate": 9.854863129344229e-07, "loss": 0.4719, "step": 3224 }, { "epoch": 2.586206896551724, "grad_norm": 0.8962447643280029, "learning_rate": 9.817412536125449e-07, "loss": 0.479, "step": 3225 }, { "epoch": 2.58700882117081, "grad_norm": 0.8654753565788269, "learning_rate": 9.780029564882032e-07, "loss": 0.4506, "step": 3226 }, { "epoch": 2.587810745789896, "grad_norm": 0.8893976211547852, "learning_rate": 9.74271424364498e-07, "loss": 0.459, "step": 3227 }, { "epoch": 2.5886126704089816, "grad_norm": 0.9372872710227966, "learning_rate": 9.70546660039462e-07, "loss": 0.4895, "step": 3228 }, { "epoch": 2.5894145950280674, "grad_norm": 0.8435534238815308, "learning_rate": 9.66828666306049e-07, "loss": 0.4948, "step": 3229 }, { "epoch": 2.590216519647153, "grad_norm": 0.8981894850730896, "learning_rate": 9.631174459521398e-07, "loss": 0.4559, "step": 3230 }, { "epoch": 2.591018444266239, "grad_norm": 0.8509783744812012, "learning_rate": 9.594130017605296e-07, "loss": 0.4704, "step": 3231 }, { "epoch": 2.5918203688853247, "grad_norm": 0.8438981175422668, "learning_rate": 9.5571533650894e-07, "loss": 0.4753, "step": 3232 }, { "epoch": 2.5926222935044105, "grad_norm": 0.8406031131744385, "learning_rate": 9.520244529700041e-07, "loss": 0.4596, "step": 3233 }, { "epoch": 2.5934242181234963, "grad_norm": 0.8662395477294922, "learning_rate": 9.483403539112735e-07, "loss": 0.4481, "step": 3234 }, { "epoch": 2.594226142742582, "grad_norm": 0.843547523021698, "learning_rate": 9.44663042095213e-07, "loss": 0.4611, "step": 3235 }, { "epoch": 2.595028067361668, "grad_norm": 0.8858677744865417, "learning_rate": 9.409925202791925e-07, "loss": 0.4842, "step": 3236 }, { "epoch": 2.5958299919807537, "grad_norm": 0.8714892864227295, "learning_rate": 9.37328791215496e-07, "loss": 0.462, "step": 3237 }, { "epoch": 2.5966319165998395, "grad_norm": 0.8578234910964966, "learning_rate": 9.336718576513127e-07, "loss": 0.4721, "step": 3238 }, { "epoch": 2.5974338412189253, "grad_norm": 0.8844730257987976, "learning_rate": 9.300217223287345e-07, "loss": 0.4791, "step": 3239 }, { "epoch": 2.598235765838011, "grad_norm": 0.8658874034881592, "learning_rate": 9.263783879847599e-07, "loss": 0.4751, "step": 3240 }, { "epoch": 2.599037690457097, "grad_norm": 0.8326420187950134, "learning_rate": 9.227418573512825e-07, "loss": 0.4736, "step": 3241 }, { "epoch": 2.599839615076183, "grad_norm": 0.8331664800643921, "learning_rate": 9.191121331550967e-07, "loss": 0.4664, "step": 3242 }, { "epoch": 2.6006415396952685, "grad_norm": 0.915407657623291, "learning_rate": 9.154892181178954e-07, "loss": 0.4962, "step": 3243 }, { "epoch": 2.6014434643143547, "grad_norm": 0.8679154515266418, "learning_rate": 9.11873114956261e-07, "loss": 0.4601, "step": 3244 }, { "epoch": 2.60224538893344, "grad_norm": 0.8816805481910706, "learning_rate": 9.082638263816756e-07, "loss": 0.4632, "step": 3245 }, { "epoch": 2.6030473135525263, "grad_norm": 0.8480551838874817, "learning_rate": 9.046613551005012e-07, "loss": 0.4638, "step": 3246 }, { "epoch": 2.6038492381716116, "grad_norm": 0.8449149131774902, "learning_rate": 9.010657038139947e-07, "loss": 0.4933, "step": 3247 }, { "epoch": 2.604651162790698, "grad_norm": 0.8377960920333862, "learning_rate": 8.974768752183016e-07, "loss": 0.4751, "step": 3248 }, { "epoch": 2.605453087409783, "grad_norm": 0.8451635837554932, "learning_rate": 8.938948720044416e-07, "loss": 0.4731, "step": 3249 }, { "epoch": 2.6062550120288694, "grad_norm": 0.8990350365638733, "learning_rate": 8.903196968583295e-07, "loss": 0.4893, "step": 3250 }, { "epoch": 2.6070569366479552, "grad_norm": 0.8640419244766235, "learning_rate": 8.867513524607485e-07, "loss": 0.4816, "step": 3251 }, { "epoch": 2.607858861267041, "grad_norm": 0.8659276366233826, "learning_rate": 8.831898414873663e-07, "loss": 0.4691, "step": 3252 }, { "epoch": 2.608660785886127, "grad_norm": 0.8834055066108704, "learning_rate": 8.796351666087266e-07, "loss": 0.4584, "step": 3253 }, { "epoch": 2.6094627105052126, "grad_norm": 0.8305429220199585, "learning_rate": 8.760873304902406e-07, "loss": 0.448, "step": 3254 }, { "epoch": 2.6102646351242984, "grad_norm": 0.8624115586280823, "learning_rate": 8.725463357922037e-07, "loss": 0.4716, "step": 3255 }, { "epoch": 2.611066559743384, "grad_norm": 0.838100016117096, "learning_rate": 8.690121851697697e-07, "loss": 0.4564, "step": 3256 }, { "epoch": 2.61186848436247, "grad_norm": 0.8911872506141663, "learning_rate": 8.654848812729655e-07, "loss": 0.4695, "step": 3257 }, { "epoch": 2.612670408981556, "grad_norm": 0.9087029099464417, "learning_rate": 8.619644267466876e-07, "loss": 0.487, "step": 3258 }, { "epoch": 2.6134723336006416, "grad_norm": 0.8725029826164246, "learning_rate": 8.584508242306844e-07, "loss": 0.4856, "step": 3259 }, { "epoch": 2.6142742582197274, "grad_norm": 0.9196523427963257, "learning_rate": 8.549440763595851e-07, "loss": 0.4754, "step": 3260 }, { "epoch": 2.615076182838813, "grad_norm": 0.7980285286903381, "learning_rate": 8.514441857628619e-07, "loss": 0.4519, "step": 3261 }, { "epoch": 2.615878107457899, "grad_norm": 0.8673176169395447, "learning_rate": 8.479511550648512e-07, "loss": 0.4633, "step": 3262 }, { "epoch": 2.6166800320769847, "grad_norm": 0.8644046783447266, "learning_rate": 8.44464986884751e-07, "loss": 0.4848, "step": 3263 }, { "epoch": 2.6174819566960705, "grad_norm": 0.8942599296569824, "learning_rate": 8.40985683836606e-07, "loss": 0.4959, "step": 3264 }, { "epoch": 2.6182838813151563, "grad_norm": 0.8743693232536316, "learning_rate": 8.375132485293158e-07, "loss": 0.4744, "step": 3265 }, { "epoch": 2.619085805934242, "grad_norm": 0.852287232875824, "learning_rate": 8.340476835666345e-07, "loss": 0.4812, "step": 3266 }, { "epoch": 2.619887730553328, "grad_norm": 0.8935552835464478, "learning_rate": 8.305889915471532e-07, "loss": 0.4776, "step": 3267 }, { "epoch": 2.6206896551724137, "grad_norm": 0.8734551072120667, "learning_rate": 8.271371750643265e-07, "loss": 0.4794, "step": 3268 }, { "epoch": 2.6214915797914995, "grad_norm": 0.8658239245414734, "learning_rate": 8.236922367064359e-07, "loss": 0.4691, "step": 3269 }, { "epoch": 2.6222935044105853, "grad_norm": 0.8544544577598572, "learning_rate": 8.202541790566176e-07, "loss": 0.4987, "step": 3270 }, { "epoch": 2.623095429029671, "grad_norm": 0.8634313344955444, "learning_rate": 8.16823004692845e-07, "loss": 0.4704, "step": 3271 }, { "epoch": 2.623897353648757, "grad_norm": 0.9538077116012573, "learning_rate": 8.133987161879231e-07, "loss": 0.4827, "step": 3272 }, { "epoch": 2.6246992782678427, "grad_norm": 0.8885698914527893, "learning_rate": 8.099813161095094e-07, "loss": 0.4685, "step": 3273 }, { "epoch": 2.6255012028869285, "grad_norm": 0.8560696840286255, "learning_rate": 8.065708070200806e-07, "loss": 0.4699, "step": 3274 }, { "epoch": 2.6263031275060147, "grad_norm": 0.8704147934913635, "learning_rate": 8.031671914769545e-07, "loss": 0.4734, "step": 3275 }, { "epoch": 2.6271050521251, "grad_norm": 0.8830083608627319, "learning_rate": 7.997704720322785e-07, "loss": 0.4737, "step": 3276 }, { "epoch": 2.6279069767441863, "grad_norm": 0.8642050623893738, "learning_rate": 7.963806512330275e-07, "loss": 0.4699, "step": 3277 }, { "epoch": 2.6287089013632716, "grad_norm": 0.8747822642326355, "learning_rate": 7.929977316210036e-07, "loss": 0.4592, "step": 3278 }, { "epoch": 2.629510825982358, "grad_norm": 0.8815390467643738, "learning_rate": 7.896217157328357e-07, "loss": 0.4706, "step": 3279 }, { "epoch": 2.630312750601443, "grad_norm": 0.8473573327064514, "learning_rate": 7.862526060999775e-07, "loss": 0.4753, "step": 3280 }, { "epoch": 2.6311146752205294, "grad_norm": 0.8307991623878479, "learning_rate": 7.828904052487019e-07, "loss": 0.467, "step": 3281 }, { "epoch": 2.6319165998396152, "grad_norm": 0.8285624384880066, "learning_rate": 7.795351157000986e-07, "loss": 0.4571, "step": 3282 }, { "epoch": 2.632718524458701, "grad_norm": 0.8307252526283264, "learning_rate": 7.761867399700796e-07, "loss": 0.485, "step": 3283 }, { "epoch": 2.633520449077787, "grad_norm": 0.8082962036132812, "learning_rate": 7.72845280569372e-07, "loss": 0.4594, "step": 3284 }, { "epoch": 2.6343223736968726, "grad_norm": 0.8508808016777039, "learning_rate": 7.69510740003514e-07, "loss": 0.4566, "step": 3285 }, { "epoch": 2.6351242983159584, "grad_norm": 0.8629611134529114, "learning_rate": 7.66183120772862e-07, "loss": 0.4669, "step": 3286 }, { "epoch": 2.635926222935044, "grad_norm": 0.8811891078948975, "learning_rate": 7.628624253725725e-07, "loss": 0.4717, "step": 3287 }, { "epoch": 2.63672814755413, "grad_norm": 0.8686394691467285, "learning_rate": 7.59548656292618e-07, "loss": 0.4819, "step": 3288 }, { "epoch": 2.637530072173216, "grad_norm": 0.9036654829978943, "learning_rate": 7.562418160177765e-07, "loss": 0.4884, "step": 3289 }, { "epoch": 2.6383319967923016, "grad_norm": 0.8560638427734375, "learning_rate": 7.529419070276312e-07, "loss": 0.4787, "step": 3290 }, { "epoch": 2.6391339214113874, "grad_norm": 0.8455655574798584, "learning_rate": 7.496489317965616e-07, "loss": 0.4466, "step": 3291 }, { "epoch": 2.639935846030473, "grad_norm": 0.9102020859718323, "learning_rate": 7.463628927937549e-07, "loss": 0.5097, "step": 3292 }, { "epoch": 2.640737770649559, "grad_norm": 0.8347408175468445, "learning_rate": 7.430837924831958e-07, "loss": 0.4628, "step": 3293 }, { "epoch": 2.6415396952686447, "grad_norm": 0.8465882539749146, "learning_rate": 7.398116333236638e-07, "loss": 0.4683, "step": 3294 }, { "epoch": 2.6423416198877305, "grad_norm": 0.8932040929794312, "learning_rate": 7.365464177687387e-07, "loss": 0.4939, "step": 3295 }, { "epoch": 2.6431435445068163, "grad_norm": 0.8394157290458679, "learning_rate": 7.332881482667853e-07, "loss": 0.4709, "step": 3296 }, { "epoch": 2.643945469125902, "grad_norm": 0.8661454319953918, "learning_rate": 7.300368272609692e-07, "loss": 0.4766, "step": 3297 }, { "epoch": 2.644747393744988, "grad_norm": 0.85788893699646, "learning_rate": 7.267924571892382e-07, "loss": 0.4845, "step": 3298 }, { "epoch": 2.6455493183640737, "grad_norm": 0.8850892186164856, "learning_rate": 7.23555040484335e-07, "loss": 0.4652, "step": 3299 }, { "epoch": 2.6463512429831595, "grad_norm": 0.9400017857551575, "learning_rate": 7.203245795737834e-07, "loss": 0.4986, "step": 3300 }, { "epoch": 2.6471531676022453, "grad_norm": 0.8622959852218628, "learning_rate": 7.171010768798925e-07, "loss": 0.4876, "step": 3301 }, { "epoch": 2.647955092221331, "grad_norm": 0.8049087524414062, "learning_rate": 7.138845348197532e-07, "loss": 0.4665, "step": 3302 }, { "epoch": 2.648757016840417, "grad_norm": 0.8678800463676453, "learning_rate": 7.106749558052428e-07, "loss": 0.4727, "step": 3303 }, { "epoch": 2.6495589414595027, "grad_norm": 0.8367797136306763, "learning_rate": 7.074723422430052e-07, "loss": 0.4743, "step": 3304 }, { "epoch": 2.6503608660785885, "grad_norm": 0.8551909327507019, "learning_rate": 7.042766965344782e-07, "loss": 0.4498, "step": 3305 }, { "epoch": 2.6511627906976747, "grad_norm": 0.8910350203514099, "learning_rate": 7.010880210758597e-07, "loss": 0.4905, "step": 3306 }, { "epoch": 2.65196471531676, "grad_norm": 1.0603433847427368, "learning_rate": 6.979063182581291e-07, "loss": 0.4695, "step": 3307 }, { "epoch": 2.6527666399358463, "grad_norm": 0.8967297673225403, "learning_rate": 6.94731590467036e-07, "loss": 0.4916, "step": 3308 }, { "epoch": 2.6535685645549316, "grad_norm": 0.9202722311019897, "learning_rate": 6.915638400830959e-07, "loss": 0.4965, "step": 3309 }, { "epoch": 2.654370489174018, "grad_norm": 0.8630130290985107, "learning_rate": 6.884030694816024e-07, "loss": 0.4634, "step": 3310 }, { "epoch": 2.655172413793103, "grad_norm": 0.8898342847824097, "learning_rate": 6.852492810326028e-07, "loss": 0.463, "step": 3311 }, { "epoch": 2.6559743384121894, "grad_norm": 0.8422430753707886, "learning_rate": 6.821024771009188e-07, "loss": 0.4686, "step": 3312 }, { "epoch": 2.656776263031275, "grad_norm": 0.8274664282798767, "learning_rate": 6.789626600461307e-07, "loss": 0.4703, "step": 3313 }, { "epoch": 2.657578187650361, "grad_norm": 0.8955613970756531, "learning_rate": 6.758298322225765e-07, "loss": 0.4771, "step": 3314 }, { "epoch": 2.658380112269447, "grad_norm": 0.8348634243011475, "learning_rate": 6.727039959793635e-07, "loss": 0.4513, "step": 3315 }, { "epoch": 2.6591820368885326, "grad_norm": 0.8191150426864624, "learning_rate": 6.69585153660347e-07, "loss": 0.454, "step": 3316 }, { "epoch": 2.6599839615076184, "grad_norm": 0.8912159204483032, "learning_rate": 6.664733076041374e-07, "loss": 0.4835, "step": 3317 }, { "epoch": 2.660785886126704, "grad_norm": 0.8224286437034607, "learning_rate": 6.633684601441092e-07, "loss": 0.4868, "step": 3318 }, { "epoch": 2.66158781074579, "grad_norm": 0.8863853812217712, "learning_rate": 6.602706136083792e-07, "loss": 0.4837, "step": 3319 }, { "epoch": 2.6623897353648758, "grad_norm": 0.9140603542327881, "learning_rate": 6.57179770319819e-07, "loss": 0.4754, "step": 3320 }, { "epoch": 2.6631916599839616, "grad_norm": 0.8462338447570801, "learning_rate": 6.540959325960494e-07, "loss": 0.4739, "step": 3321 }, { "epoch": 2.6639935846030474, "grad_norm": 0.90171217918396, "learning_rate": 6.510191027494339e-07, "loss": 0.4652, "step": 3322 }, { "epoch": 2.664795509222133, "grad_norm": 0.878610372543335, "learning_rate": 6.479492830870881e-07, "loss": 0.4673, "step": 3323 }, { "epoch": 2.665597433841219, "grad_norm": 0.8632857799530029, "learning_rate": 6.448864759108642e-07, "loss": 0.4864, "step": 3324 }, { "epoch": 2.6663993584603047, "grad_norm": 0.8719748258590698, "learning_rate": 6.418306835173605e-07, "loss": 0.5048, "step": 3325 }, { "epoch": 2.6672012830793905, "grad_norm": 0.8836221098899841, "learning_rate": 6.387819081979163e-07, "loss": 0.4702, "step": 3326 }, { "epoch": 2.6680032076984763, "grad_norm": 0.8506346940994263, "learning_rate": 6.35740152238602e-07, "loss": 0.4492, "step": 3327 }, { "epoch": 2.668805132317562, "grad_norm": 0.9193611145019531, "learning_rate": 6.327054179202352e-07, "loss": 0.4562, "step": 3328 }, { "epoch": 2.669607056936648, "grad_norm": 0.8532218933105469, "learning_rate": 6.296777075183602e-07, "loss": 0.4693, "step": 3329 }, { "epoch": 2.6704089815557337, "grad_norm": 0.8159738779067993, "learning_rate": 6.266570233032576e-07, "loss": 0.4745, "step": 3330 }, { "epoch": 2.6712109061748195, "grad_norm": 0.8848310112953186, "learning_rate": 6.236433675399412e-07, "loss": 0.4801, "step": 3331 }, { "epoch": 2.6720128307939053, "grad_norm": 0.80049067735672, "learning_rate": 6.206367424881487e-07, "loss": 0.4469, "step": 3332 }, { "epoch": 2.672814755412991, "grad_norm": 0.8821406960487366, "learning_rate": 6.176371504023537e-07, "loss": 0.4731, "step": 3333 }, { "epoch": 2.673616680032077, "grad_norm": 0.8810633420944214, "learning_rate": 6.146445935317502e-07, "loss": 0.467, "step": 3334 }, { "epoch": 2.6744186046511627, "grad_norm": 0.8649298548698425, "learning_rate": 6.116590741202611e-07, "loss": 0.4911, "step": 3335 }, { "epoch": 2.6752205292702484, "grad_norm": 0.8359307646751404, "learning_rate": 6.08680594406531e-07, "loss": 0.4564, "step": 3336 }, { "epoch": 2.6760224538893342, "grad_norm": 0.876586377620697, "learning_rate": 6.057091566239226e-07, "loss": 0.4856, "step": 3337 }, { "epoch": 2.67682437850842, "grad_norm": 0.9136984944343567, "learning_rate": 6.027447630005234e-07, "loss": 0.5089, "step": 3338 }, { "epoch": 2.6776263031275063, "grad_norm": 0.899994432926178, "learning_rate": 5.997874157591344e-07, "loss": 0.4879, "step": 3339 }, { "epoch": 2.6784282277465916, "grad_norm": 0.8839572072029114, "learning_rate": 5.968371171172782e-07, "loss": 0.4718, "step": 3340 }, { "epoch": 2.679230152365678, "grad_norm": 0.8831132650375366, "learning_rate": 5.938938692871887e-07, "loss": 0.4668, "step": 3341 }, { "epoch": 2.680032076984763, "grad_norm": 0.9043929576873779, "learning_rate": 5.909576744758117e-07, "loss": 0.4733, "step": 3342 }, { "epoch": 2.6808340016038494, "grad_norm": 0.8958361744880676, "learning_rate": 5.880285348848069e-07, "loss": 0.478, "step": 3343 }, { "epoch": 2.681635926222935, "grad_norm": 0.8278128504753113, "learning_rate": 5.851064527105421e-07, "loss": 0.4736, "step": 3344 }, { "epoch": 2.682437850842021, "grad_norm": 0.8567398190498352, "learning_rate": 5.821914301440956e-07, "loss": 0.4548, "step": 3345 }, { "epoch": 2.683239775461107, "grad_norm": 0.9090203046798706, "learning_rate": 5.792834693712502e-07, "loss": 0.4774, "step": 3346 }, { "epoch": 2.6840417000801926, "grad_norm": 0.8930779099464417, "learning_rate": 5.763825725724925e-07, "loss": 0.4796, "step": 3347 }, { "epoch": 2.6848436246992784, "grad_norm": 0.8618937730789185, "learning_rate": 5.734887419230151e-07, "loss": 0.4736, "step": 3348 }, { "epoch": 2.685645549318364, "grad_norm": 0.8632767796516418, "learning_rate": 5.70601979592711e-07, "loss": 0.4666, "step": 3349 }, { "epoch": 2.68644747393745, "grad_norm": 0.8496332764625549, "learning_rate": 5.67722287746173e-07, "loss": 0.4702, "step": 3350 }, { "epoch": 2.6872493985565358, "grad_norm": 0.9124326705932617, "learning_rate": 5.648496685426908e-07, "loss": 0.5128, "step": 3351 }, { "epoch": 2.6880513231756216, "grad_norm": 0.8609637022018433, "learning_rate": 5.619841241362522e-07, "loss": 0.4635, "step": 3352 }, { "epoch": 2.6888532477947074, "grad_norm": 0.8587467074394226, "learning_rate": 5.591256566755399e-07, "loss": 0.4702, "step": 3353 }, { "epoch": 2.689655172413793, "grad_norm": 0.8724468350410461, "learning_rate": 5.562742683039313e-07, "loss": 0.5009, "step": 3354 }, { "epoch": 2.690457097032879, "grad_norm": 0.8646350502967834, "learning_rate": 5.534299611594962e-07, "loss": 0.462, "step": 3355 }, { "epoch": 2.6912590216519647, "grad_norm": 0.8256325721740723, "learning_rate": 5.505927373749887e-07, "loss": 0.4507, "step": 3356 }, { "epoch": 2.6920609462710505, "grad_norm": 0.8692865371704102, "learning_rate": 5.477625990778579e-07, "loss": 0.4719, "step": 3357 }, { "epoch": 2.6928628708901363, "grad_norm": 0.8937420845031738, "learning_rate": 5.449395483902376e-07, "loss": 0.4955, "step": 3358 }, { "epoch": 2.693664795509222, "grad_norm": 0.8685393333435059, "learning_rate": 5.421235874289488e-07, "loss": 0.4784, "step": 3359 }, { "epoch": 2.694466720128308, "grad_norm": 0.8677978515625, "learning_rate": 5.393147183054936e-07, "loss": 0.4807, "step": 3360 }, { "epoch": 2.6952686447473937, "grad_norm": 0.8556413650512695, "learning_rate": 5.365129431260574e-07, "loss": 0.4603, "step": 3361 }, { "epoch": 2.6960705693664795, "grad_norm": 0.9038963913917542, "learning_rate": 5.337182639915073e-07, "loss": 0.4935, "step": 3362 }, { "epoch": 2.6968724939855653, "grad_norm": 0.8712881803512573, "learning_rate": 5.309306829973892e-07, "loss": 0.4893, "step": 3363 }, { "epoch": 2.697674418604651, "grad_norm": 0.8390231728553772, "learning_rate": 5.281502022339236e-07, "loss": 0.4682, "step": 3364 }, { "epoch": 2.698476343223737, "grad_norm": 0.8879370093345642, "learning_rate": 5.253768237860146e-07, "loss": 0.4695, "step": 3365 }, { "epoch": 2.6992782678428227, "grad_norm": 0.9008282423019409, "learning_rate": 5.226105497332323e-07, "loss": 0.4811, "step": 3366 }, { "epoch": 2.7000801924619084, "grad_norm": 0.9389668107032776, "learning_rate": 5.19851382149823e-07, "loss": 0.4956, "step": 3367 }, { "epoch": 2.7008821170809942, "grad_norm": 0.8313438296318054, "learning_rate": 5.170993231047072e-07, "loss": 0.4591, "step": 3368 }, { "epoch": 2.70168404170008, "grad_norm": 0.8891331553459167, "learning_rate": 5.143543746614688e-07, "loss": 0.4968, "step": 3369 }, { "epoch": 2.7024859663191663, "grad_norm": 0.861348569393158, "learning_rate": 5.116165388783678e-07, "loss": 0.4716, "step": 3370 }, { "epoch": 2.7032878909382516, "grad_norm": 0.8426232933998108, "learning_rate": 5.088858178083223e-07, "loss": 0.487, "step": 3371 }, { "epoch": 2.704089815557338, "grad_norm": 0.8665948510169983, "learning_rate": 5.06162213498923e-07, "loss": 0.4549, "step": 3372 }, { "epoch": 2.704891740176423, "grad_norm": 0.8754244446754456, "learning_rate": 5.034457279924221e-07, "loss": 0.4989, "step": 3373 }, { "epoch": 2.7056936647955094, "grad_norm": 0.8739911317825317, "learning_rate": 5.007363633257278e-07, "loss": 0.4558, "step": 3374 }, { "epoch": 2.706495589414595, "grad_norm": 0.8367862701416016, "learning_rate": 4.980341215304196e-07, "loss": 0.4608, "step": 3375 }, { "epoch": 2.707297514033681, "grad_norm": 0.8726009130477905, "learning_rate": 4.953390046327278e-07, "loss": 0.4706, "step": 3376 }, { "epoch": 2.7080994386527664, "grad_norm": 0.8462924361228943, "learning_rate": 4.926510146535434e-07, "loss": 0.4618, "step": 3377 }, { "epoch": 2.7089013632718526, "grad_norm": 0.9273150563240051, "learning_rate": 4.899701536084134e-07, "loss": 0.4875, "step": 3378 }, { "epoch": 2.7097032878909384, "grad_norm": 0.8953900933265686, "learning_rate": 4.872964235075361e-07, "loss": 0.4932, "step": 3379 }, { "epoch": 2.710505212510024, "grad_norm": 0.8775522708892822, "learning_rate": 4.846298263557681e-07, "loss": 0.4835, "step": 3380 }, { "epoch": 2.71130713712911, "grad_norm": 0.8793307542800903, "learning_rate": 4.819703641526141e-07, "loss": 0.4748, "step": 3381 }, { "epoch": 2.7121090617481958, "grad_norm": 0.8890559673309326, "learning_rate": 4.793180388922292e-07, "loss": 0.4906, "step": 3382 }, { "epoch": 2.7129109863672816, "grad_norm": 0.8522745370864868, "learning_rate": 4.766728525634179e-07, "loss": 0.466, "step": 3383 }, { "epoch": 2.7137129109863674, "grad_norm": 0.8627252578735352, "learning_rate": 4.7403480714963037e-07, "loss": 0.4565, "step": 3384 }, { "epoch": 2.714514835605453, "grad_norm": 0.8617047071456909, "learning_rate": 4.71403904628962e-07, "loss": 0.4707, "step": 3385 }, { "epoch": 2.715316760224539, "grad_norm": 0.8587998747825623, "learning_rate": 4.6878014697415374e-07, "loss": 0.4742, "step": 3386 }, { "epoch": 2.7161186848436247, "grad_norm": 0.8326482176780701, "learning_rate": 4.661635361525885e-07, "loss": 0.4557, "step": 3387 }, { "epoch": 2.7169206094627105, "grad_norm": 0.8801354765892029, "learning_rate": 4.635540741262923e-07, "loss": 0.4722, "step": 3388 }, { "epoch": 2.7177225340817963, "grad_norm": 0.8579322695732117, "learning_rate": 4.6095176285192556e-07, "loss": 0.4689, "step": 3389 }, { "epoch": 2.718524458700882, "grad_norm": 0.9005702137947083, "learning_rate": 4.583566042807908e-07, "loss": 0.4769, "step": 3390 }, { "epoch": 2.719326383319968, "grad_norm": 0.8632087707519531, "learning_rate": 4.557686003588269e-07, "loss": 0.4771, "step": 3391 }, { "epoch": 2.7201283079390537, "grad_norm": 0.8352670669555664, "learning_rate": 4.531877530266071e-07, "loss": 0.4509, "step": 3392 }, { "epoch": 2.7209302325581395, "grad_norm": 0.8583428263664246, "learning_rate": 4.506140642193391e-07, "loss": 0.4455, "step": 3393 }, { "epoch": 2.7217321571772253, "grad_norm": 0.9356517195701599, "learning_rate": 4.4804753586686013e-07, "loss": 0.4665, "step": 3394 }, { "epoch": 2.722534081796311, "grad_norm": 0.8645097017288208, "learning_rate": 4.454881698936431e-07, "loss": 0.4485, "step": 3395 }, { "epoch": 2.723336006415397, "grad_norm": 0.902538537979126, "learning_rate": 4.4293596821878613e-07, "loss": 0.4833, "step": 3396 }, { "epoch": 2.7241379310344827, "grad_norm": 0.8527434468269348, "learning_rate": 4.403909327560207e-07, "loss": 0.4663, "step": 3397 }, { "epoch": 2.7249398556535684, "grad_norm": 0.8391342759132385, "learning_rate": 4.378530654136948e-07, "loss": 0.4596, "step": 3398 }, { "epoch": 2.7257417802726542, "grad_norm": 0.8753255009651184, "learning_rate": 4.3532236809479265e-07, "loss": 0.461, "step": 3399 }, { "epoch": 2.72654370489174, "grad_norm": 0.8954243659973145, "learning_rate": 4.327988426969154e-07, "loss": 0.4777, "step": 3400 }, { "epoch": 2.727345629510826, "grad_norm": 0.8503953218460083, "learning_rate": 4.3028249111228824e-07, "loss": 0.5002, "step": 3401 }, { "epoch": 2.7281475541299116, "grad_norm": 0.8919215798377991, "learning_rate": 4.277733152277597e-07, "loss": 0.4702, "step": 3402 }, { "epoch": 2.728949478748998, "grad_norm": 0.8524766564369202, "learning_rate": 4.2527131692479127e-07, "loss": 0.457, "step": 3403 }, { "epoch": 2.729751403368083, "grad_norm": 0.8316718935966492, "learning_rate": 4.227764980794691e-07, "loss": 0.4827, "step": 3404 }, { "epoch": 2.7305533279871694, "grad_norm": 0.9017997980117798, "learning_rate": 4.202888605624944e-07, "loss": 0.4885, "step": 3405 }, { "epoch": 2.731355252606255, "grad_norm": 0.8500910401344299, "learning_rate": 4.178084062391774e-07, "loss": 0.4698, "step": 3406 }, { "epoch": 2.732157177225341, "grad_norm": 0.8581136465072632, "learning_rate": 4.153351369694536e-07, "loss": 0.4843, "step": 3407 }, { "epoch": 2.7329591018444264, "grad_norm": 0.8434959053993225, "learning_rate": 4.128690546078606e-07, "loss": 0.4624, "step": 3408 }, { "epoch": 2.7337610264635126, "grad_norm": 0.9332457184791565, "learning_rate": 4.104101610035527e-07, "loss": 0.4558, "step": 3409 }, { "epoch": 2.7345629510825984, "grad_norm": 0.8714927434921265, "learning_rate": 4.0795845800029156e-07, "loss": 0.4803, "step": 3410 }, { "epoch": 2.735364875701684, "grad_norm": 0.8897708654403687, "learning_rate": 4.055139474364456e-07, "loss": 0.4536, "step": 3411 }, { "epoch": 2.73616680032077, "grad_norm": 0.8492273688316345, "learning_rate": 4.030766311449952e-07, "loss": 0.4663, "step": 3412 }, { "epoch": 2.7369687249398558, "grad_norm": 0.8852546811103821, "learning_rate": 4.006465109535218e-07, "loss": 0.4609, "step": 3413 }, { "epoch": 2.7377706495589416, "grad_norm": 0.8187031149864197, "learning_rate": 3.9822358868421116e-07, "loss": 0.4631, "step": 3414 }, { "epoch": 2.7385725741780274, "grad_norm": 0.8688458204269409, "learning_rate": 3.958078661538567e-07, "loss": 0.4694, "step": 3415 }, { "epoch": 2.739374498797113, "grad_norm": 0.97969651222229, "learning_rate": 3.933993451738427e-07, "loss": 0.4959, "step": 3416 }, { "epoch": 2.740176423416199, "grad_norm": 0.8926424384117126, "learning_rate": 3.909980275501679e-07, "loss": 0.4829, "step": 3417 }, { "epoch": 2.7409783480352847, "grad_norm": 0.8417994379997253, "learning_rate": 3.8860391508341754e-07, "loss": 0.4633, "step": 3418 }, { "epoch": 2.7417802726543705, "grad_norm": 0.9168954491615295, "learning_rate": 3.8621700956877784e-07, "loss": 0.4935, "step": 3419 }, { "epoch": 2.7425821972734563, "grad_norm": 0.8628125786781311, "learning_rate": 3.8383731279603597e-07, "loss": 0.4714, "step": 3420 }, { "epoch": 2.743384121892542, "grad_norm": 0.8950872421264648, "learning_rate": 3.8146482654956574e-07, "loss": 0.4796, "step": 3421 }, { "epoch": 2.744186046511628, "grad_norm": 0.8650218844413757, "learning_rate": 3.7909955260833966e-07, "loss": 0.4808, "step": 3422 }, { "epoch": 2.7449879711307137, "grad_norm": 0.8490626215934753, "learning_rate": 3.767414927459223e-07, "loss": 0.4782, "step": 3423 }, { "epoch": 2.7457898957497995, "grad_norm": 0.8351301550865173, "learning_rate": 3.743906487304627e-07, "loss": 0.4578, "step": 3424 }, { "epoch": 2.7465918203688853, "grad_norm": 0.8559272289276123, "learning_rate": 3.720470223247097e-07, "loss": 0.4564, "step": 3425 }, { "epoch": 2.747393744987971, "grad_norm": 0.8744351267814636, "learning_rate": 3.697106152859886e-07, "loss": 0.4932, "step": 3426 }, { "epoch": 2.748195669607057, "grad_norm": 0.8866355419158936, "learning_rate": 3.6738142936622035e-07, "loss": 0.4939, "step": 3427 }, { "epoch": 2.7489975942261426, "grad_norm": 0.8977624177932739, "learning_rate": 3.650594663119089e-07, "loss": 0.4924, "step": 3428 }, { "epoch": 2.7497995188452284, "grad_norm": 0.8765467405319214, "learning_rate": 3.6274472786413605e-07, "loss": 0.4753, "step": 3429 }, { "epoch": 2.7506014434643142, "grad_norm": 0.8574792146682739, "learning_rate": 3.604372157585767e-07, "loss": 0.4566, "step": 3430 }, { "epoch": 2.7514033680834, "grad_norm": 0.9319592714309692, "learning_rate": 3.5813693172548016e-07, "loss": 0.4866, "step": 3431 }, { "epoch": 2.752205292702486, "grad_norm": 0.8465300798416138, "learning_rate": 3.5584387748967665e-07, "loss": 0.4619, "step": 3432 }, { "epoch": 2.7530072173215716, "grad_norm": 0.8767644166946411, "learning_rate": 3.535580547705797e-07, "loss": 0.4739, "step": 3433 }, { "epoch": 2.753809141940658, "grad_norm": 0.8457480072975159, "learning_rate": 3.512794652821716e-07, "loss": 0.478, "step": 3434 }, { "epoch": 2.754611066559743, "grad_norm": 0.871969997882843, "learning_rate": 3.490081107330223e-07, "loss": 0.4537, "step": 3435 }, { "epoch": 2.7554129911788294, "grad_norm": 0.8666412234306335, "learning_rate": 3.4674399282626616e-07, "loss": 0.4704, "step": 3436 }, { "epoch": 2.7562149157979148, "grad_norm": 0.8694742918014526, "learning_rate": 3.4448711325961834e-07, "loss": 0.4902, "step": 3437 }, { "epoch": 2.757016840417001, "grad_norm": 0.8497282266616821, "learning_rate": 3.422374737253642e-07, "loss": 0.4604, "step": 3438 }, { "epoch": 2.7578187650360864, "grad_norm": 0.9280922412872314, "learning_rate": 3.399950759103576e-07, "loss": 0.4865, "step": 3439 }, { "epoch": 2.7586206896551726, "grad_norm": 0.8891953229904175, "learning_rate": 3.37759921496027e-07, "loss": 0.477, "step": 3440 }, { "epoch": 2.759422614274258, "grad_norm": 0.9152265787124634, "learning_rate": 3.355320121583672e-07, "loss": 0.4823, "step": 3441 }, { "epoch": 2.760224538893344, "grad_norm": 0.8602423071861267, "learning_rate": 3.3331134956793965e-07, "loss": 0.474, "step": 3442 }, { "epoch": 2.76102646351243, "grad_norm": 0.8670658469200134, "learning_rate": 3.3109793538987356e-07, "loss": 0.4873, "step": 3443 }, { "epoch": 2.7618283881315158, "grad_norm": 0.9038890600204468, "learning_rate": 3.288917712838613e-07, "loss": 0.4697, "step": 3444 }, { "epoch": 2.7626303127506016, "grad_norm": 0.9339286684989929, "learning_rate": 3.266928589041607e-07, "loss": 0.4837, "step": 3445 }, { "epoch": 2.7634322373696873, "grad_norm": 0.8574177622795105, "learning_rate": 3.2450119989959064e-07, "loss": 0.4526, "step": 3446 }, { "epoch": 2.764234161988773, "grad_norm": 0.8637518882751465, "learning_rate": 3.2231679591353203e-07, "loss": 0.4726, "step": 3447 }, { "epoch": 2.765036086607859, "grad_norm": 0.8453518152236938, "learning_rate": 3.201396485839259e-07, "loss": 0.4474, "step": 3448 }, { "epoch": 2.7658380112269447, "grad_norm": 0.8873100280761719, "learning_rate": 3.179697595432707e-07, "loss": 0.4729, "step": 3449 }, { "epoch": 2.7666399358460305, "grad_norm": 0.8434620499610901, "learning_rate": 3.158071304186228e-07, "loss": 0.4532, "step": 3450 }, { "epoch": 2.7674418604651163, "grad_norm": 0.8255607485771179, "learning_rate": 3.136517628315949e-07, "loss": 0.4721, "step": 3451 }, { "epoch": 2.768243785084202, "grad_norm": 0.8967651724815369, "learning_rate": 3.1150365839835773e-07, "loss": 0.4832, "step": 3452 }, { "epoch": 2.769045709703288, "grad_norm": 0.9030566811561584, "learning_rate": 3.093628187296294e-07, "loss": 0.5076, "step": 3453 }, { "epoch": 2.7698476343223737, "grad_norm": 0.8555736541748047, "learning_rate": 3.0722924543068687e-07, "loss": 0.4568, "step": 3454 }, { "epoch": 2.7706495589414595, "grad_norm": 0.8584993481636047, "learning_rate": 3.0510294010135387e-07, "loss": 0.4755, "step": 3455 }, { "epoch": 2.7714514835605453, "grad_norm": 0.9319979548454285, "learning_rate": 3.0298390433600945e-07, "loss": 0.494, "step": 3456 }, { "epoch": 2.772253408179631, "grad_norm": 0.8971447348594666, "learning_rate": 3.008721397235781e-07, "loss": 0.4819, "step": 3457 }, { "epoch": 2.773055332798717, "grad_norm": 0.8856346011161804, "learning_rate": 2.9876764784753096e-07, "loss": 0.4959, "step": 3458 }, { "epoch": 2.7738572574178026, "grad_norm": 0.8331868052482605, "learning_rate": 2.966704302858892e-07, "loss": 0.4604, "step": 3459 }, { "epoch": 2.7746591820368884, "grad_norm": 0.8967451453208923, "learning_rate": 2.945804886112169e-07, "loss": 0.4757, "step": 3460 }, { "epoch": 2.7754611066559742, "grad_norm": 0.8893353343009949, "learning_rate": 2.924978243906251e-07, "loss": 0.4762, "step": 3461 }, { "epoch": 2.77626303127506, "grad_norm": 0.9258598685264587, "learning_rate": 2.9042243918576574e-07, "loss": 0.4907, "step": 3462 }, { "epoch": 2.777064955894146, "grad_norm": 0.8462851047515869, "learning_rate": 2.883543345528328e-07, "loss": 0.4659, "step": 3463 }, { "epoch": 2.7778668805132316, "grad_norm": 0.8520975112915039, "learning_rate": 2.862935120425614e-07, "loss": 0.4619, "step": 3464 }, { "epoch": 2.7786688051323174, "grad_norm": 0.9390130639076233, "learning_rate": 2.8423997320022765e-07, "loss": 0.4773, "step": 3465 }, { "epoch": 2.779470729751403, "grad_norm": 0.9307414293289185, "learning_rate": 2.821937195656421e-07, "loss": 0.4857, "step": 3466 }, { "epoch": 2.7802726543704894, "grad_norm": 0.9021451473236084, "learning_rate": 2.801547526731596e-07, "loss": 0.4645, "step": 3467 }, { "epoch": 2.7810745789895748, "grad_norm": 0.8632877469062805, "learning_rate": 2.781230740516649e-07, "loss": 0.4581, "step": 3468 }, { "epoch": 2.781876503608661, "grad_norm": 0.8747298717498779, "learning_rate": 2.760986852245784e-07, "loss": 0.4629, "step": 3469 }, { "epoch": 2.7826784282277464, "grad_norm": 0.8483293652534485, "learning_rate": 2.7408158770985905e-07, "loss": 0.4507, "step": 3470 }, { "epoch": 2.7834803528468326, "grad_norm": 0.853502094745636, "learning_rate": 2.720717830199904e-07, "loss": 0.4687, "step": 3471 }, { "epoch": 2.784282277465918, "grad_norm": 0.9311491250991821, "learning_rate": 2.70069272661998e-07, "loss": 0.5025, "step": 3472 }, { "epoch": 2.785084202085004, "grad_norm": 0.930825412273407, "learning_rate": 2.680740581374286e-07, "loss": 0.4954, "step": 3473 }, { "epoch": 2.78588612670409, "grad_norm": 0.866186797618866, "learning_rate": 2.6608614094236317e-07, "loss": 0.4692, "step": 3474 }, { "epoch": 2.7866880513231758, "grad_norm": 0.8751170635223389, "learning_rate": 2.641055225674105e-07, "loss": 0.4558, "step": 3475 }, { "epoch": 2.7874899759422616, "grad_norm": 0.8723040819168091, "learning_rate": 2.6213220449770373e-07, "loss": 0.4459, "step": 3476 }, { "epoch": 2.7882919005613473, "grad_norm": 0.8558136820793152, "learning_rate": 2.6016618821290583e-07, "loss": 0.4557, "step": 3477 }, { "epoch": 2.789093825180433, "grad_norm": 0.8481677770614624, "learning_rate": 2.5820747518720326e-07, "loss": 0.4699, "step": 3478 }, { "epoch": 2.789895749799519, "grad_norm": 0.8892841339111328, "learning_rate": 2.5625606688930107e-07, "loss": 0.4521, "step": 3479 }, { "epoch": 2.7906976744186047, "grad_norm": 0.9031122326850891, "learning_rate": 2.5431196478243767e-07, "loss": 0.4977, "step": 3480 }, { "epoch": 2.7914995990376905, "grad_norm": 0.8958612680435181, "learning_rate": 2.5237517032436374e-07, "loss": 0.4658, "step": 3481 }, { "epoch": 2.7923015236567763, "grad_norm": 0.8278775811195374, "learning_rate": 2.5044568496735534e-07, "loss": 0.4532, "step": 3482 }, { "epoch": 2.793103448275862, "grad_norm": 0.8773555159568787, "learning_rate": 2.485235101582051e-07, "loss": 0.4817, "step": 3483 }, { "epoch": 2.793905372894948, "grad_norm": 0.8488254547119141, "learning_rate": 2.466086473382234e-07, "loss": 0.4544, "step": 3484 }, { "epoch": 2.7947072975140337, "grad_norm": 0.8559311628341675, "learning_rate": 2.4470109794324405e-07, "loss": 0.4533, "step": 3485 }, { "epoch": 2.7955092221331195, "grad_norm": 0.9088083505630493, "learning_rate": 2.4280086340360944e-07, "loss": 0.4757, "step": 3486 }, { "epoch": 2.7963111467522053, "grad_norm": 0.8725008964538574, "learning_rate": 2.409079451441809e-07, "loss": 0.4424, "step": 3487 }, { "epoch": 2.797113071371291, "grad_norm": 0.8594610095024109, "learning_rate": 2.3902234458433315e-07, "loss": 0.4581, "step": 3488 }, { "epoch": 2.797914995990377, "grad_norm": 0.8405497670173645, "learning_rate": 2.371440631379529e-07, "loss": 0.4769, "step": 3489 }, { "epoch": 2.7987169206094626, "grad_norm": 0.9169846177101135, "learning_rate": 2.3527310221344136e-07, "loss": 0.4886, "step": 3490 }, { "epoch": 2.7995188452285484, "grad_norm": 0.9240108132362366, "learning_rate": 2.334094632137063e-07, "loss": 0.4912, "step": 3491 }, { "epoch": 2.8003207698476342, "grad_norm": 0.853226900100708, "learning_rate": 2.3155314753616874e-07, "loss": 0.4897, "step": 3492 }, { "epoch": 2.80112269446672, "grad_norm": 0.863446831703186, "learning_rate": 2.297041565727598e-07, "loss": 0.4654, "step": 3493 }, { "epoch": 2.801924619085806, "grad_norm": 0.8915846347808838, "learning_rate": 2.2786249170991148e-07, "loss": 0.4854, "step": 3494 }, { "epoch": 2.8027265437048916, "grad_norm": 0.8765985369682312, "learning_rate": 2.260281543285703e-07, "loss": 0.4911, "step": 3495 }, { "epoch": 2.8035284683239774, "grad_norm": 0.8515004515647888, "learning_rate": 2.2420114580418262e-07, "loss": 0.4665, "step": 3496 }, { "epoch": 2.804330392943063, "grad_norm": 0.8506413698196411, "learning_rate": 2.2238146750670264e-07, "loss": 0.4608, "step": 3497 }, { "epoch": 2.8051323175621494, "grad_norm": 0.8681690096855164, "learning_rate": 2.205691208005889e-07, "loss": 0.4648, "step": 3498 }, { "epoch": 2.8059342421812348, "grad_norm": 0.855263888835907, "learning_rate": 2.1876410704479767e-07, "loss": 0.4622, "step": 3499 }, { "epoch": 2.806736166800321, "grad_norm": 0.8927332758903503, "learning_rate": 2.1696642759279074e-07, "loss": 0.4671, "step": 3500 }, { "epoch": 2.8075380914194064, "grad_norm": 0.8977309465408325, "learning_rate": 2.1517608379252985e-07, "loss": 0.4874, "step": 3501 }, { "epoch": 2.8083400160384926, "grad_norm": 0.8782375454902649, "learning_rate": 2.133930769864756e-07, "loss": 0.4645, "step": 3502 }, { "epoch": 2.809141940657578, "grad_norm": 0.8151038885116577, "learning_rate": 2.1161740851158742e-07, "loss": 0.4574, "step": 3503 }, { "epoch": 2.809943865276664, "grad_norm": 0.884947657585144, "learning_rate": 2.0984907969932134e-07, "loss": 0.4472, "step": 3504 }, { "epoch": 2.8107457898957495, "grad_norm": 0.8524783253669739, "learning_rate": 2.0808809187563118e-07, "loss": 0.4669, "step": 3505 }, { "epoch": 2.8115477145148358, "grad_norm": 0.8649755120277405, "learning_rate": 2.063344463609651e-07, "loss": 0.4632, "step": 3506 }, { "epoch": 2.8123496391339216, "grad_norm": 0.849073052406311, "learning_rate": 2.0458814447026687e-07, "loss": 0.4809, "step": 3507 }, { "epoch": 2.8131515637530073, "grad_norm": 0.872164785861969, "learning_rate": 2.0284918751297235e-07, "loss": 0.4942, "step": 3508 }, { "epoch": 2.813953488372093, "grad_norm": 0.8174782395362854, "learning_rate": 2.011175767930118e-07, "loss": 0.44, "step": 3509 }, { "epoch": 2.814755412991179, "grad_norm": 0.8372223973274231, "learning_rate": 1.9939331360880442e-07, "loss": 0.4673, "step": 3510 }, { "epoch": 2.8155573376102647, "grad_norm": 0.8555891513824463, "learning_rate": 1.9767639925326155e-07, "loss": 0.4708, "step": 3511 }, { "epoch": 2.8163592622293505, "grad_norm": 0.8843154311180115, "learning_rate": 1.9596683501378666e-07, "loss": 0.4717, "step": 3512 }, { "epoch": 2.8171611868484363, "grad_norm": 0.8556921482086182, "learning_rate": 1.942646221722655e-07, "loss": 0.4618, "step": 3513 }, { "epoch": 2.817963111467522, "grad_norm": 0.8737980127334595, "learning_rate": 1.9256976200507814e-07, "loss": 0.4838, "step": 3514 }, { "epoch": 2.818765036086608, "grad_norm": 0.8971306085586548, "learning_rate": 1.9088225578308582e-07, "loss": 0.4755, "step": 3515 }, { "epoch": 2.8195669607056937, "grad_norm": 0.8777005672454834, "learning_rate": 1.892021047716408e-07, "loss": 0.4624, "step": 3516 }, { "epoch": 2.8203688853247795, "grad_norm": 0.844862163066864, "learning_rate": 1.8752931023057753e-07, "loss": 0.45, "step": 3517 }, { "epoch": 2.8211708099438653, "grad_norm": 0.8649774789810181, "learning_rate": 1.858638734142104e-07, "loss": 0.4738, "step": 3518 }, { "epoch": 2.821972734562951, "grad_norm": 0.8356218934059143, "learning_rate": 1.842057955713461e-07, "loss": 0.4632, "step": 3519 }, { "epoch": 2.822774659182037, "grad_norm": 0.8642269968986511, "learning_rate": 1.8255507794526338e-07, "loss": 0.4708, "step": 3520 }, { "epoch": 2.8235765838011226, "grad_norm": 0.8712006211280823, "learning_rate": 1.8091172177372994e-07, "loss": 0.4649, "step": 3521 }, { "epoch": 2.8243785084202084, "grad_norm": 0.8947505354881287, "learning_rate": 1.7927572828898788e-07, "loss": 0.4599, "step": 3522 }, { "epoch": 2.8251804330392942, "grad_norm": 0.8280917406082153, "learning_rate": 1.776470987177614e-07, "loss": 0.4514, "step": 3523 }, { "epoch": 2.82598235765838, "grad_norm": 0.8675678968429565, "learning_rate": 1.7602583428125263e-07, "loss": 0.4769, "step": 3524 }, { "epoch": 2.826784282277466, "grad_norm": 0.8008211255073547, "learning_rate": 1.744119361951413e-07, "loss": 0.4635, "step": 3525 }, { "epoch": 2.8275862068965516, "grad_norm": 0.8509759306907654, "learning_rate": 1.728054056695816e-07, "loss": 0.4531, "step": 3526 }, { "epoch": 2.8283881315156374, "grad_norm": 0.8491147756576538, "learning_rate": 1.712062439092077e-07, "loss": 0.4622, "step": 3527 }, { "epoch": 2.829190056134723, "grad_norm": 0.8817112445831299, "learning_rate": 1.6961445211312265e-07, "loss": 0.4772, "step": 3528 }, { "epoch": 2.829991980753809, "grad_norm": 0.8488009572029114, "learning_rate": 1.6803003147490727e-07, "loss": 0.4619, "step": 3529 }, { "epoch": 2.8307939053728948, "grad_norm": 0.8665116429328918, "learning_rate": 1.6645298318261449e-07, "loss": 0.4584, "step": 3530 }, { "epoch": 2.831595829991981, "grad_norm": 0.8897413015365601, "learning_rate": 1.648833084187673e-07, "loss": 0.4742, "step": 3531 }, { "epoch": 2.8323977546110664, "grad_norm": 0.8791276216506958, "learning_rate": 1.6332100836036425e-07, "loss": 0.4718, "step": 3532 }, { "epoch": 2.8331996792301526, "grad_norm": 0.8625409603118896, "learning_rate": 1.617660841788682e-07, "loss": 0.4715, "step": 3533 }, { "epoch": 2.834001603849238, "grad_norm": 0.887173593044281, "learning_rate": 1.602185370402154e-07, "loss": 0.5021, "step": 3534 }, { "epoch": 2.834803528468324, "grad_norm": 0.9261402487754822, "learning_rate": 1.5867836810481095e-07, "loss": 0.5014, "step": 3535 }, { "epoch": 2.8356054530874095, "grad_norm": 0.8724820017814636, "learning_rate": 1.5714557852752222e-07, "loss": 0.4643, "step": 3536 }, { "epoch": 2.8364073777064958, "grad_norm": 0.8310959339141846, "learning_rate": 1.5562016945769088e-07, "loss": 0.4687, "step": 3537 }, { "epoch": 2.8372093023255816, "grad_norm": 0.8877079486846924, "learning_rate": 1.5410214203911754e-07, "loss": 0.4681, "step": 3538 }, { "epoch": 2.8380112269446673, "grad_norm": 0.8687597513198853, "learning_rate": 1.5259149741007284e-07, "loss": 0.4639, "step": 3539 }, { "epoch": 2.838813151563753, "grad_norm": 0.8550201058387756, "learning_rate": 1.5108823670328954e-07, "loss": 0.4792, "step": 3540 }, { "epoch": 2.839615076182839, "grad_norm": 0.9106943011283875, "learning_rate": 1.4959236104596265e-07, "loss": 0.4758, "step": 3541 }, { "epoch": 2.8404170008019247, "grad_norm": 0.8858274221420288, "learning_rate": 1.4810387155975158e-07, "loss": 0.4608, "step": 3542 }, { "epoch": 2.8412189254210105, "grad_norm": 0.8574510216712952, "learning_rate": 1.466227693607747e-07, "loss": 0.4688, "step": 3543 }, { "epoch": 2.8420208500400963, "grad_norm": 0.8784916400909424, "learning_rate": 1.4514905555961578e-07, "loss": 0.4763, "step": 3544 }, { "epoch": 2.842822774659182, "grad_norm": 0.863102912902832, "learning_rate": 1.4368273126131428e-07, "loss": 0.4472, "step": 3545 }, { "epoch": 2.843624699278268, "grad_norm": 0.8521451354026794, "learning_rate": 1.4222379756536841e-07, "loss": 0.4649, "step": 3546 }, { "epoch": 2.8444266238973537, "grad_norm": 0.8406957983970642, "learning_rate": 1.4077225556573872e-07, "loss": 0.4707, "step": 3547 }, { "epoch": 2.8452285485164395, "grad_norm": 0.8615586757659912, "learning_rate": 1.3932810635083893e-07, "loss": 0.4524, "step": 3548 }, { "epoch": 2.8460304731355253, "grad_norm": 0.8704116344451904, "learning_rate": 1.378913510035429e-07, "loss": 0.4781, "step": 3549 }, { "epoch": 2.846832397754611, "grad_norm": 0.8566069602966309, "learning_rate": 1.3646199060117881e-07, "loss": 0.4628, "step": 3550 }, { "epoch": 2.847634322373697, "grad_norm": 0.8848322629928589, "learning_rate": 1.3504002621552937e-07, "loss": 0.4675, "step": 3551 }, { "epoch": 2.8484362469927826, "grad_norm": 0.8653082251548767, "learning_rate": 1.3362545891283052e-07, "loss": 0.4589, "step": 3552 }, { "epoch": 2.8492381716118684, "grad_norm": 0.8225123286247253, "learning_rate": 1.3221828975377382e-07, "loss": 0.4574, "step": 3553 }, { "epoch": 2.8500400962309542, "grad_norm": 0.838966965675354, "learning_rate": 1.3081851979350412e-07, "loss": 0.4877, "step": 3554 }, { "epoch": 2.85084202085004, "grad_norm": 0.8537912368774414, "learning_rate": 1.294261500816152e-07, "loss": 0.4686, "step": 3555 }, { "epoch": 2.851643945469126, "grad_norm": 0.9012706279754639, "learning_rate": 1.2804118166215297e-07, "loss": 0.4968, "step": 3556 }, { "epoch": 2.8524458700882116, "grad_norm": 0.8772068619728088, "learning_rate": 1.266636155736145e-07, "loss": 0.474, "step": 3557 }, { "epoch": 2.8532477947072974, "grad_norm": 0.8455215692520142, "learning_rate": 1.252934528489458e-07, "loss": 0.4907, "step": 3558 }, { "epoch": 2.854049719326383, "grad_norm": 0.9360870718955994, "learning_rate": 1.2393069451554163e-07, "loss": 0.5056, "step": 3559 }, { "epoch": 2.854851643945469, "grad_norm": 0.8860448598861694, "learning_rate": 1.2257534159524353e-07, "loss": 0.4777, "step": 3560 }, { "epoch": 2.8556535685645548, "grad_norm": 0.8781315088272095, "learning_rate": 1.21227395104343e-07, "loss": 0.4508, "step": 3561 }, { "epoch": 2.856455493183641, "grad_norm": 0.8404563069343567, "learning_rate": 1.1988685605357486e-07, "loss": 0.4514, "step": 3562 }, { "epoch": 2.8572574178027264, "grad_norm": 0.8559548258781433, "learning_rate": 1.1855372544812172e-07, "loss": 0.4616, "step": 3563 }, { "epoch": 2.8580593424218126, "grad_norm": 0.9076322913169861, "learning_rate": 1.172280042876106e-07, "loss": 0.4811, "step": 3564 }, { "epoch": 2.858861267040898, "grad_norm": 0.9246238470077515, "learning_rate": 1.1590969356611081e-07, "loss": 0.4823, "step": 3565 }, { "epoch": 2.859663191659984, "grad_norm": 0.9094520211219788, "learning_rate": 1.1459879427213827e-07, "loss": 0.483, "step": 3566 }, { "epoch": 2.8604651162790695, "grad_norm": 0.8826652765274048, "learning_rate": 1.1329530738865003e-07, "loss": 0.5114, "step": 3567 }, { "epoch": 2.8612670408981558, "grad_norm": 0.8294800519943237, "learning_rate": 1.1199923389304201e-07, "loss": 0.4441, "step": 3568 }, { "epoch": 2.862068965517241, "grad_norm": 0.8477868437767029, "learning_rate": 1.1071057475715797e-07, "loss": 0.4362, "step": 3569 }, { "epoch": 2.8628708901363273, "grad_norm": 0.8606266379356384, "learning_rate": 1.0942933094727715e-07, "loss": 0.4741, "step": 3570 }, { "epoch": 2.863672814755413, "grad_norm": 0.877900242805481, "learning_rate": 1.0815550342411885e-07, "loss": 0.4655, "step": 3571 }, { "epoch": 2.864474739374499, "grad_norm": 0.8586121201515198, "learning_rate": 1.0688909314284346e-07, "loss": 0.4838, "step": 3572 }, { "epoch": 2.8652766639935847, "grad_norm": 0.8372088074684143, "learning_rate": 1.0563010105304694e-07, "loss": 0.4737, "step": 3573 }, { "epoch": 2.8660785886126705, "grad_norm": 0.8549312353134155, "learning_rate": 1.0437852809876636e-07, "loss": 0.4736, "step": 3574 }, { "epoch": 2.8668805132317563, "grad_norm": 0.8639695644378662, "learning_rate": 1.0313437521847325e-07, "loss": 0.4589, "step": 3575 }, { "epoch": 2.867682437850842, "grad_norm": 0.8854097723960876, "learning_rate": 1.0189764334507579e-07, "loss": 0.4866, "step": 3576 }, { "epoch": 2.868484362469928, "grad_norm": 0.8394895792007446, "learning_rate": 1.0066833340591664e-07, "loss": 0.4556, "step": 3577 }, { "epoch": 2.8692862870890137, "grad_norm": 0.8737897276878357, "learning_rate": 9.944644632277512e-08, "loss": 0.4882, "step": 3578 }, { "epoch": 2.8700882117080995, "grad_norm": 0.9116494655609131, "learning_rate": 9.823198301186387e-08, "loss": 0.463, "step": 3579 }, { "epoch": 2.8708901363271853, "grad_norm": 0.8393471837043762, "learning_rate": 9.702494438383003e-08, "loss": 0.4651, "step": 3580 }, { "epoch": 2.871692060946271, "grad_norm": 0.8796486258506775, "learning_rate": 9.582533134374849e-08, "loss": 0.4699, "step": 3581 }, { "epoch": 2.872493985565357, "grad_norm": 0.8335583209991455, "learning_rate": 9.463314479113416e-08, "loss": 0.4686, "step": 3582 }, { "epoch": 2.8732959101844426, "grad_norm": 0.8907720446586609, "learning_rate": 9.344838561992642e-08, "loss": 0.4928, "step": 3583 }, { "epoch": 2.8740978348035284, "grad_norm": 0.8920674920082092, "learning_rate": 9.227105471849795e-08, "loss": 0.4846, "step": 3584 }, { "epoch": 2.874899759422614, "grad_norm": 0.8768170475959778, "learning_rate": 9.110115296965482e-08, "loss": 0.4715, "step": 3585 }, { "epoch": 2.8757016840417, "grad_norm": 0.8467321395874023, "learning_rate": 8.993868125062533e-08, "loss": 0.4579, "step": 3586 }, { "epoch": 2.876503608660786, "grad_norm": 0.9148172736167908, "learning_rate": 8.87836404330722e-08, "loss": 0.4825, "step": 3587 }, { "epoch": 2.8773055332798716, "grad_norm": 0.8339017629623413, "learning_rate": 8.763603138308485e-08, "loss": 0.4647, "step": 3588 }, { "epoch": 2.8781074578989574, "grad_norm": 0.8492835760116577, "learning_rate": 8.64958549611783e-08, "loss": 0.4491, "step": 3589 }, { "epoch": 2.878909382518043, "grad_norm": 0.8415125608444214, "learning_rate": 8.536311202229641e-08, "loss": 0.484, "step": 3590 }, { "epoch": 2.879711307137129, "grad_norm": 0.8640526533126831, "learning_rate": 8.423780341580756e-08, "loss": 0.4564, "step": 3591 }, { "epoch": 2.8805132317562148, "grad_norm": 0.8569619655609131, "learning_rate": 8.311992998550789e-08, "loss": 0.4632, "step": 3592 }, { "epoch": 2.8813151563753006, "grad_norm": 0.8628082871437073, "learning_rate": 8.200949256961687e-08, "loss": 0.4823, "step": 3593 }, { "epoch": 2.8821170809943863, "grad_norm": 0.8405731916427612, "learning_rate": 8.090649200077627e-08, "loss": 0.4679, "step": 3594 }, { "epoch": 2.8829190056134726, "grad_norm": 0.831079363822937, "learning_rate": 7.98109291060567e-08, "loss": 0.4371, "step": 3595 }, { "epoch": 2.883720930232558, "grad_norm": 0.8602555990219116, "learning_rate": 7.872280470694549e-08, "loss": 0.4744, "step": 3596 }, { "epoch": 2.884522854851644, "grad_norm": 0.8295644521713257, "learning_rate": 7.764211961935664e-08, "loss": 0.4517, "step": 3597 }, { "epoch": 2.8853247794707295, "grad_norm": 0.8858298659324646, "learning_rate": 7.656887465362528e-08, "loss": 0.4617, "step": 3598 }, { "epoch": 2.8861267040898158, "grad_norm": 0.8486381769180298, "learning_rate": 7.550307061450546e-08, "loss": 0.474, "step": 3599 }, { "epoch": 2.886928628708901, "grad_norm": 0.8799077272415161, "learning_rate": 7.444470830117456e-08, "loss": 0.4681, "step": 3600 }, { "epoch": 2.8877305533279873, "grad_norm": 0.8846973776817322, "learning_rate": 7.339378850722889e-08, "loss": 0.4839, "step": 3601 }, { "epoch": 2.888532477947073, "grad_norm": 0.8882151246070862, "learning_rate": 7.235031202068255e-08, "loss": 0.4527, "step": 3602 }, { "epoch": 2.889334402566159, "grad_norm": 0.8758078813552856, "learning_rate": 7.131427962397076e-08, "loss": 0.5004, "step": 3603 }, { "epoch": 2.8901363271852447, "grad_norm": 0.9011175632476807, "learning_rate": 7.028569209394653e-08, "loss": 0.4439, "step": 3604 }, { "epoch": 2.8909382518043305, "grad_norm": 0.8999505043029785, "learning_rate": 6.92645502018785e-08, "loss": 0.5111, "step": 3605 }, { "epoch": 2.8917401764234163, "grad_norm": 0.9070544242858887, "learning_rate": 6.825085471345416e-08, "loss": 0.4621, "step": 3606 }, { "epoch": 2.892542101042502, "grad_norm": 0.8917433619499207, "learning_rate": 6.724460638877661e-08, "loss": 0.4679, "step": 3607 }, { "epoch": 2.893344025661588, "grad_norm": 0.8720741868019104, "learning_rate": 6.624580598236563e-08, "loss": 0.4559, "step": 3608 }, { "epoch": 2.8941459502806737, "grad_norm": 0.8378724455833435, "learning_rate": 6.525445424315546e-08, "loss": 0.4542, "step": 3609 }, { "epoch": 2.8949478748997595, "grad_norm": 0.9167259335517883, "learning_rate": 6.427055191449483e-08, "loss": 0.4639, "step": 3610 }, { "epoch": 2.8957497995188453, "grad_norm": 0.9500483274459839, "learning_rate": 6.329409973414913e-08, "loss": 0.4829, "step": 3611 }, { "epoch": 2.896551724137931, "grad_norm": 0.8519693613052368, "learning_rate": 6.23250984342938e-08, "loss": 0.4823, "step": 3612 }, { "epoch": 2.897353648757017, "grad_norm": 0.8825336694717407, "learning_rate": 6.136354874151874e-08, "loss": 0.4694, "step": 3613 }, { "epoch": 2.8981555733761026, "grad_norm": 0.8310946226119995, "learning_rate": 6.04094513768283e-08, "loss": 0.468, "step": 3614 }, { "epoch": 2.8989574979951884, "grad_norm": 0.7865362763404846, "learning_rate": 5.9462807055635787e-08, "loss": 0.4555, "step": 3615 }, { "epoch": 2.899759422614274, "grad_norm": 0.8779739141464233, "learning_rate": 5.852361648776672e-08, "loss": 0.4533, "step": 3616 }, { "epoch": 2.90056134723336, "grad_norm": 0.9257890582084656, "learning_rate": 5.7591880377459995e-08, "loss": 0.4784, "step": 3617 }, { "epoch": 2.901363271852446, "grad_norm": 0.8655847907066345, "learning_rate": 5.666759942336231e-08, "loss": 0.4619, "step": 3618 }, { "epoch": 2.9021651964715316, "grad_norm": 0.9047146439552307, "learning_rate": 5.5750774318531486e-08, "loss": 0.4821, "step": 3619 }, { "epoch": 2.9029671210906174, "grad_norm": 0.8486894965171814, "learning_rate": 5.4841405750433175e-08, "loss": 0.4506, "step": 3620 }, { "epoch": 2.903769045709703, "grad_norm": 0.9041795134544373, "learning_rate": 5.393949440094415e-08, "loss": 0.4725, "step": 3621 }, { "epoch": 2.904570970328789, "grad_norm": 0.8865926265716553, "learning_rate": 5.304504094634677e-08, "loss": 0.5029, "step": 3622 }, { "epoch": 2.9053728949478748, "grad_norm": 0.8497272729873657, "learning_rate": 5.2158046057333434e-08, "loss": 0.4617, "step": 3623 }, { "epoch": 2.9061748195669606, "grad_norm": 0.8616043925285339, "learning_rate": 5.1278510399004334e-08, "loss": 0.4697, "step": 3624 }, { "epoch": 2.9069767441860463, "grad_norm": 0.8522729873657227, "learning_rate": 5.040643463086303e-08, "loss": 0.4575, "step": 3625 }, { "epoch": 2.9077786688051326, "grad_norm": 0.8534807562828064, "learning_rate": 4.954181940682201e-08, "loss": 0.4719, "step": 3626 }, { "epoch": 2.908580593424218, "grad_norm": 0.8463496565818787, "learning_rate": 4.8684665375201553e-08, "loss": 0.4726, "step": 3627 }, { "epoch": 2.909382518043304, "grad_norm": 0.8361274003982544, "learning_rate": 4.7834973178721986e-08, "loss": 0.4646, "step": 3628 }, { "epoch": 2.9101844426623895, "grad_norm": 0.8507230877876282, "learning_rate": 4.6992743454513654e-08, "loss": 0.4596, "step": 3629 }, { "epoch": 2.9109863672814758, "grad_norm": 0.8449583649635315, "learning_rate": 4.615797683410694e-08, "loss": 0.4887, "step": 3630 }, { "epoch": 2.911788291900561, "grad_norm": 0.8460783362388611, "learning_rate": 4.533067394344115e-08, "loss": 0.4702, "step": 3631 }, { "epoch": 2.9125902165196473, "grad_norm": 0.89262855052948, "learning_rate": 4.4510835402853394e-08, "loss": 0.478, "step": 3632 }, { "epoch": 2.9133921411387327, "grad_norm": 0.8806936144828796, "learning_rate": 4.369846182708748e-08, "loss": 0.4603, "step": 3633 }, { "epoch": 2.914194065757819, "grad_norm": 0.9069940447807312, "learning_rate": 4.289355382529059e-08, "loss": 0.4593, "step": 3634 }, { "epoch": 2.9149959903769047, "grad_norm": 0.9094383716583252, "learning_rate": 4.2096112001006604e-08, "loss": 0.4879, "step": 3635 }, { "epoch": 2.9157979149959905, "grad_norm": 0.8786039352416992, "learning_rate": 4.1306136952187214e-08, "loss": 0.4808, "step": 3636 }, { "epoch": 2.9165998396150763, "grad_norm": 0.9403489828109741, "learning_rate": 4.052362927118303e-08, "loss": 0.4875, "step": 3637 }, { "epoch": 2.917401764234162, "grad_norm": 0.7999061346054077, "learning_rate": 3.974858954474248e-08, "loss": 0.45, "step": 3638 }, { "epoch": 2.918203688853248, "grad_norm": 0.8469072580337524, "learning_rate": 3.898101835401846e-08, "loss": 0.4521, "step": 3639 }, { "epoch": 2.9190056134723337, "grad_norm": 0.8923320770263672, "learning_rate": 3.82209162745617e-08, "loss": 0.48, "step": 3640 }, { "epoch": 2.9198075380914195, "grad_norm": 0.8438341021537781, "learning_rate": 3.746828387632184e-08, "loss": 0.4469, "step": 3641 }, { "epoch": 2.9206094627105053, "grad_norm": 0.8810524940490723, "learning_rate": 3.672312172365078e-08, "loss": 0.4609, "step": 3642 }, { "epoch": 2.921411387329591, "grad_norm": 0.902431309223175, "learning_rate": 3.598543037529378e-08, "loss": 0.4814, "step": 3643 }, { "epoch": 2.922213311948677, "grad_norm": 0.8674122095108032, "learning_rate": 3.525521038439728e-08, "loss": 0.465, "step": 3644 }, { "epoch": 2.9230152365677626, "grad_norm": 0.8674834370613098, "learning_rate": 3.4532462298506596e-08, "loss": 0.4554, "step": 3645 }, { "epoch": 2.9238171611868484, "grad_norm": 0.8510974049568176, "learning_rate": 3.3817186659560466e-08, "loss": 0.4579, "step": 3646 }, { "epoch": 2.924619085805934, "grad_norm": 0.8944957852363586, "learning_rate": 3.3109384003899844e-08, "loss": 0.4764, "step": 3647 }, { "epoch": 2.92542101042502, "grad_norm": 0.8609873056411743, "learning_rate": 3.2409054862256875e-08, "loss": 0.4763, "step": 3648 }, { "epoch": 2.926222935044106, "grad_norm": 0.8213399648666382, "learning_rate": 3.17161997597637e-08, "loss": 0.4574, "step": 3649 }, { "epoch": 2.9270248596631916, "grad_norm": 0.8279301524162292, "learning_rate": 3.103081921594586e-08, "loss": 0.467, "step": 3650 }, { "epoch": 2.9278267842822774, "grad_norm": 0.8031629323959351, "learning_rate": 3.03529137447256e-08, "loss": 0.4496, "step": 3651 }, { "epoch": 2.928628708901363, "grad_norm": 0.8145686984062195, "learning_rate": 2.968248385441852e-08, "loss": 0.4587, "step": 3652 }, { "epoch": 2.929430633520449, "grad_norm": 0.8566755652427673, "learning_rate": 2.9019530047736944e-08, "loss": 0.4765, "step": 3653 }, { "epoch": 2.9302325581395348, "grad_norm": 0.8937522768974304, "learning_rate": 2.836405282178656e-08, "loss": 0.4975, "step": 3654 }, { "epoch": 2.9310344827586206, "grad_norm": 0.8584054708480835, "learning_rate": 2.7716052668064208e-08, "loss": 0.4558, "step": 3655 }, { "epoch": 2.9318364073777063, "grad_norm": 0.8949685096740723, "learning_rate": 2.707553007246455e-08, "loss": 0.491, "step": 3656 }, { "epoch": 2.932638331996792, "grad_norm": 0.8435682654380798, "learning_rate": 2.6442485515273397e-08, "loss": 0.4682, "step": 3657 }, { "epoch": 2.933440256615878, "grad_norm": 0.8290677070617676, "learning_rate": 2.581691947116771e-08, "loss": 0.4771, "step": 3658 }, { "epoch": 2.934242181234964, "grad_norm": 0.8999835252761841, "learning_rate": 2.5198832409218944e-08, "loss": 0.4779, "step": 3659 }, { "epoch": 2.9350441058540495, "grad_norm": 0.8274721503257751, "learning_rate": 2.458822479288969e-08, "loss": 0.4586, "step": 3660 }, { "epoch": 2.9358460304731357, "grad_norm": 0.8735244274139404, "learning_rate": 2.3985097080033715e-08, "loss": 0.4738, "step": 3661 }, { "epoch": 2.936647955092221, "grad_norm": 0.8940444588661194, "learning_rate": 2.3389449722898137e-08, "loss": 0.4773, "step": 3662 }, { "epoch": 2.9374498797113073, "grad_norm": 0.9149758219718933, "learning_rate": 2.2801283168119028e-08, "loss": 0.4798, "step": 3663 }, { "epoch": 2.9382518043303927, "grad_norm": 0.8782206773757935, "learning_rate": 2.222059785672359e-08, "loss": 0.4672, "step": 3664 }, { "epoch": 2.939053728949479, "grad_norm": 0.8763145804405212, "learning_rate": 2.1647394224129092e-08, "loss": 0.483, "step": 3665 }, { "epoch": 2.9398556535685647, "grad_norm": 0.8692938089370728, "learning_rate": 2.108167270014394e-08, "loss": 0.4818, "step": 3666 }, { "epoch": 2.9406575781876505, "grad_norm": 0.8743387460708618, "learning_rate": 2.052343370896437e-08, "loss": 0.4554, "step": 3667 }, { "epoch": 2.9414595028067363, "grad_norm": 0.8664737343788147, "learning_rate": 1.9972677669177766e-08, "loss": 0.4854, "step": 3668 }, { "epoch": 2.942261427425822, "grad_norm": 0.8775107860565186, "learning_rate": 1.942940499376045e-08, "loss": 0.4664, "step": 3669 }, { "epoch": 2.943063352044908, "grad_norm": 0.8614839315414429, "learning_rate": 1.889361609007434e-08, "loss": 0.4388, "step": 3670 }, { "epoch": 2.9438652766639937, "grad_norm": 0.9102433323860168, "learning_rate": 1.836531135987474e-08, "loss": 0.4858, "step": 3671 }, { "epoch": 2.9446672012830795, "grad_norm": 0.8594423532485962, "learning_rate": 1.7844491199301428e-08, "loss": 0.4674, "step": 3672 }, { "epoch": 2.9454691259021653, "grad_norm": 0.9068805575370789, "learning_rate": 1.733115599888202e-08, "loss": 0.4674, "step": 3673 }, { "epoch": 2.946271050521251, "grad_norm": 0.9371825456619263, "learning_rate": 1.682530614353528e-08, "loss": 0.5031, "step": 3674 }, { "epoch": 2.947072975140337, "grad_norm": 0.9196876287460327, "learning_rate": 1.6326942012562242e-08, "loss": 0.4942, "step": 3675 }, { "epoch": 2.9478748997594226, "grad_norm": 0.7983100414276123, "learning_rate": 1.5836063979656202e-08, "loss": 0.4588, "step": 3676 }, { "epoch": 2.9486768243785084, "grad_norm": 0.8527657389640808, "learning_rate": 1.535267241289051e-08, "loss": 0.4615, "step": 3677 }, { "epoch": 2.949478748997594, "grad_norm": 0.8166870474815369, "learning_rate": 1.4876767674730786e-08, "loss": 0.4497, "step": 3678 }, { "epoch": 2.95028067361668, "grad_norm": 0.9895662665367126, "learning_rate": 1.4408350122027126e-08, "loss": 0.4559, "step": 3679 }, { "epoch": 2.951082598235766, "grad_norm": 0.8738000988960266, "learning_rate": 1.3947420106013021e-08, "loss": 0.4756, "step": 3680 }, { "epoch": 2.9518845228548516, "grad_norm": 0.8730235695838928, "learning_rate": 1.3493977972312e-08, "loss": 0.4423, "step": 3681 }, { "epoch": 2.9526864474739374, "grad_norm": 0.9113731980323792, "learning_rate": 1.3048024060928754e-08, "loss": 0.4899, "step": 3682 }, { "epoch": 2.953488372093023, "grad_norm": 0.8570337891578674, "learning_rate": 1.2609558706253578e-08, "loss": 0.4661, "step": 3683 }, { "epoch": 2.954290296712109, "grad_norm": 0.8690351843833923, "learning_rate": 1.2178582237065695e-08, "loss": 0.4826, "step": 3684 }, { "epoch": 2.9550922213311948, "grad_norm": 0.8450669050216675, "learning_rate": 1.1755094976523273e-08, "loss": 0.4757, "step": 3685 }, { "epoch": 2.9558941459502805, "grad_norm": 0.8436766266822815, "learning_rate": 1.1339097242173414e-08, "loss": 0.449, "step": 3686 }, { "epoch": 2.9566960705693663, "grad_norm": 0.9269471168518066, "learning_rate": 1.0930589345944376e-08, "loss": 0.4733, "step": 3687 }, { "epoch": 2.957497995188452, "grad_norm": 0.8610197305679321, "learning_rate": 1.0529571594150023e-08, "loss": 0.4396, "step": 3688 }, { "epoch": 2.958299919807538, "grad_norm": 0.879664957523346, "learning_rate": 1.013604428748538e-08, "loss": 0.4536, "step": 3689 }, { "epoch": 2.959101844426624, "grad_norm": 0.9061784148216248, "learning_rate": 9.750007721032184e-09, "loss": 0.489, "step": 3690 }, { "epoch": 2.9599037690457095, "grad_norm": 0.8848870992660522, "learning_rate": 9.371462184254443e-09, "loss": 0.4883, "step": 3691 }, { "epoch": 2.9607056936647957, "grad_norm": 0.8711914420127869, "learning_rate": 9.000407960996216e-09, "loss": 0.4915, "step": 3692 }, { "epoch": 2.961507618283881, "grad_norm": 0.8504834175109863, "learning_rate": 8.636845329488274e-09, "loss": 0.4564, "step": 3693 }, { "epoch": 2.9623095429029673, "grad_norm": 0.8460233807563782, "learning_rate": 8.280774562342552e-09, "loss": 0.4749, "step": 3694 }, { "epoch": 2.9631114675220527, "grad_norm": 0.9107170104980469, "learning_rate": 7.932195926552144e-09, "loss": 0.4831, "step": 3695 }, { "epoch": 2.963913392141139, "grad_norm": 0.9115839600563049, "learning_rate": 7.591109683492415e-09, "loss": 0.5077, "step": 3696 }, { "epoch": 2.9647153167602243, "grad_norm": 0.849886953830719, "learning_rate": 7.257516088923222e-09, "loss": 0.4558, "step": 3697 }, { "epoch": 2.9655172413793105, "grad_norm": 0.8572622537612915, "learning_rate": 6.9314153929833646e-09, "loss": 0.4509, "step": 3698 }, { "epoch": 2.9663191659983963, "grad_norm": 0.8848903179168701, "learning_rate": 6.612807840195024e-09, "loss": 0.4814, "step": 3699 }, { "epoch": 2.967121090617482, "grad_norm": 0.811222493648529, "learning_rate": 6.301693669459319e-09, "loss": 0.4582, "step": 3700 }, { "epoch": 2.967923015236568, "grad_norm": 0.8343945741653442, "learning_rate": 5.998073114062975e-09, "loss": 0.4534, "step": 3701 }, { "epoch": 2.9687249398556537, "grad_norm": 0.8955221176147461, "learning_rate": 5.701946401668324e-09, "loss": 0.4938, "step": 3702 }, { "epoch": 2.9695268644747395, "grad_norm": 0.9045392274856567, "learning_rate": 5.413313754322192e-09, "loss": 0.4815, "step": 3703 }, { "epoch": 2.9703287890938253, "grad_norm": 0.8312162756919861, "learning_rate": 5.132175388452565e-09, "loss": 0.4714, "step": 3704 }, { "epoch": 2.971130713712911, "grad_norm": 0.8867566585540771, "learning_rate": 4.858531514864151e-09, "loss": 0.4735, "step": 3705 }, { "epoch": 2.971932638331997, "grad_norm": 0.9006369709968567, "learning_rate": 4.592382338746148e-09, "loss": 0.4656, "step": 3706 }, { "epoch": 2.9727345629510826, "grad_norm": 0.8595499396324158, "learning_rate": 4.3337280596655876e-09, "loss": 0.4582, "step": 3707 }, { "epoch": 2.9735364875701684, "grad_norm": 0.927030622959137, "learning_rate": 4.082568871570658e-09, "loss": 0.4734, "step": 3708 }, { "epoch": 2.974338412189254, "grad_norm": 0.8537117838859558, "learning_rate": 3.838904962788492e-09, "loss": 0.4759, "step": 3709 }, { "epoch": 2.97514033680834, "grad_norm": 0.8765459656715393, "learning_rate": 3.602736516027383e-09, "loss": 0.4711, "step": 3710 }, { "epoch": 2.975942261427426, "grad_norm": 0.8582831025123596, "learning_rate": 3.374063708373454e-09, "loss": 0.4654, "step": 3711 }, { "epoch": 2.9767441860465116, "grad_norm": 0.9093928337097168, "learning_rate": 3.15288671129399e-09, "loss": 0.4866, "step": 3712 }, { "epoch": 2.9775461106655974, "grad_norm": 0.9072393178939819, "learning_rate": 2.9392056906352162e-09, "loss": 0.4787, "step": 3713 }, { "epoch": 2.978348035284683, "grad_norm": 0.8780279159545898, "learning_rate": 2.7330208066222996e-09, "loss": 0.4809, "step": 3714 }, { "epoch": 2.979149959903769, "grad_norm": 0.8489347100257874, "learning_rate": 2.5343322138593472e-09, "loss": 0.4603, "step": 3715 }, { "epoch": 2.9799518845228548, "grad_norm": 0.9047530889511108, "learning_rate": 2.3431400613305176e-09, "loss": 0.5009, "step": 3716 }, { "epoch": 2.9807538091419405, "grad_norm": 0.8335652947425842, "learning_rate": 2.1594444923978e-09, "loss": 0.454, "step": 3717 }, { "epoch": 2.9815557337610263, "grad_norm": 0.8720241189002991, "learning_rate": 1.983245644802123e-09, "loss": 0.471, "step": 3718 }, { "epoch": 2.982357658380112, "grad_norm": 0.8841453790664673, "learning_rate": 1.8145436506633585e-09, "loss": 0.4726, "step": 3719 }, { "epoch": 2.983159582999198, "grad_norm": 0.8820040822029114, "learning_rate": 1.6533386364814274e-09, "loss": 0.4732, "step": 3720 }, { "epoch": 2.9839615076182837, "grad_norm": 0.8319164514541626, "learning_rate": 1.4996307231307517e-09, "loss": 0.4638, "step": 3721 }, { "epoch": 2.9847634322373695, "grad_norm": 0.8456513285636902, "learning_rate": 1.3534200258691343e-09, "loss": 0.4903, "step": 3722 }, { "epoch": 2.9855653568564557, "grad_norm": 0.8635490536689758, "learning_rate": 1.2147066543288787e-09, "loss": 0.4643, "step": 3723 }, { "epoch": 2.986367281475541, "grad_norm": 0.8873840570449829, "learning_rate": 1.0834907125223392e-09, "loss": 0.4919, "step": 3724 }, { "epoch": 2.9871692060946273, "grad_norm": 0.8440971970558167, "learning_rate": 9.59772298840811e-10, "loss": 0.4576, "step": 3725 }, { "epoch": 2.9879711307137127, "grad_norm": 0.8713566064834595, "learning_rate": 8.435515060500888e-10, "loss": 0.4615, "step": 3726 }, { "epoch": 2.988773055332799, "grad_norm": 0.877994954586029, "learning_rate": 7.348284212993495e-10, "loss": 0.4763, "step": 3727 }, { "epoch": 2.9895749799518843, "grad_norm": 0.8891599178314209, "learning_rate": 6.336031261111597e-10, "loss": 0.4877, "step": 3728 }, { "epoch": 2.9903769045709705, "grad_norm": 0.8762668967247009, "learning_rate": 5.398756963881368e-10, "loss": 0.4856, "step": 3729 }, { "epoch": 2.9911788291900563, "grad_norm": 0.8617690205574036, "learning_rate": 4.5364620240961885e-10, "loss": 0.4847, "step": 3730 }, { "epoch": 2.991980753809142, "grad_norm": 0.8446850180625916, "learning_rate": 3.749147088349947e-10, "loss": 0.4549, "step": 3731 }, { "epoch": 2.992782678428228, "grad_norm": 0.9086197018623352, "learning_rate": 3.0368127469815324e-10, "loss": 0.4741, "step": 3732 }, { "epoch": 2.9935846030473137, "grad_norm": 0.9534429907798767, "learning_rate": 2.399459534130344e-10, "loss": 0.4584, "step": 3733 }, { "epoch": 2.9943865276663995, "grad_norm": 0.8938235640525818, "learning_rate": 1.8370879277140874e-10, "loss": 0.4728, "step": 3734 }, { "epoch": 2.9951884522854852, "grad_norm": 0.8239015936851501, "learning_rate": 1.3496983493954673e-10, "loss": 0.4659, "step": 3735 }, { "epoch": 2.995990376904571, "grad_norm": 0.8396766781806946, "learning_rate": 9.372911646599037e-11, "loss": 0.4637, "step": 3736 }, { "epoch": 2.996792301523657, "grad_norm": 0.8573855757713318, "learning_rate": 5.998666827378153e-11, "loss": 0.4648, "step": 3737 }, { "epoch": 2.9975942261427426, "grad_norm": 0.8772425055503845, "learning_rate": 3.3742515662682496e-11, "loss": 0.4768, "step": 3738 }, { "epoch": 2.9983961507618284, "grad_norm": 0.9464996457099915, "learning_rate": 1.4996678313616842e-11, "loss": 0.4722, "step": 3739 }, { "epoch": 2.999198075380914, "grad_norm": 0.9083192944526672, "learning_rate": 3.749170280897829e-12, "loss": 0.4938, "step": 3740 }, { "epoch": 3.0, "grad_norm": 0.7624624967575073, "learning_rate": 0.0, "loss": 0.3898, "step": 3741 }, { "epoch": 3.0, "step": 3741, "total_flos": 7.432135506978144e+18, "train_loss": 0.7324594869090414, "train_runtime": 79144.443, "train_samples_per_second": 18.139, "train_steps_per_second": 0.047 } ], "logging_steps": 1.0, "max_steps": 3741, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 32860.0, "total_flos": 7.432135506978144e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }