{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": -4135, "global_step": 4135, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002418379685610641, "grad_norm": 93.15248531529272, "learning_rate": 2.3809523809523807e-08, "loss": 0.32383036613464355, "step": 1 }, { "epoch": 0.0004836759371221282, "grad_norm": 15.840045478805159, "learning_rate": 4.7619047619047613e-08, "loss": 0.16184215247631073, "step": 2 }, { "epoch": 0.0007255139056831923, "grad_norm": 17.574925974175475, "learning_rate": 7.142857142857142e-08, "loss": 0.2632005512714386, "step": 3 }, { "epoch": 0.0009673518742442564, "grad_norm": 49.201258842939524, "learning_rate": 9.523809523809523e-08, "loss": 0.33832237124443054, "step": 4 }, { "epoch": 0.0012091898428053204, "grad_norm": 32.97155213426597, "learning_rate": 1.1904761904761903e-07, "loss": 0.37205514311790466, "step": 5 }, { "epoch": 0.0014510278113663846, "grad_norm": 21.67822530421112, "learning_rate": 1.4285714285714285e-07, "loss": 0.21760626137256622, "step": 6 }, { "epoch": 0.0016928657799274486, "grad_norm": 33.578534218051466, "learning_rate": 1.6666666666666665e-07, "loss": 0.2320365458726883, "step": 7 }, { "epoch": 0.0019347037484885128, "grad_norm": 25.06012430548027, "learning_rate": 1.9047619047619045e-07, "loss": 0.45992279052734375, "step": 8 }, { "epoch": 0.002176541717049577, "grad_norm": 43.20167773366461, "learning_rate": 2.1428571428571426e-07, "loss": 0.3295772969722748, "step": 9 }, { "epoch": 0.0024183796856106408, "grad_norm": 19.646063170583307, "learning_rate": 2.3809523809523806e-07, "loss": 0.27988728880882263, "step": 10 }, { "epoch": 0.002660217654171705, "grad_norm": 43.53208360296801, "learning_rate": 2.619047619047619e-07, "loss": 0.3287881910800934, "step": 11 }, { "epoch": 0.002902055622732769, "grad_norm": 54.401109762713375, "learning_rate": 2.857142857142857e-07, "loss": 0.30849191546440125, "step": 12 }, { "epoch": 0.003143893591293833, "grad_norm": 26.009067924888775, "learning_rate": 3.095238095238095e-07, "loss": 0.32605499029159546, "step": 13 }, { "epoch": 0.003385731559854897, "grad_norm": 34.03497165378152, "learning_rate": 3.333333333333333e-07, "loss": 0.3992941081523895, "step": 14 }, { "epoch": 0.0036275695284159614, "grad_norm": 98.96370862487689, "learning_rate": 3.5714285714285716e-07, "loss": 0.29103413224220276, "step": 15 }, { "epoch": 0.0038694074969770256, "grad_norm": 32.7787737971883, "learning_rate": 3.809523809523809e-07, "loss": 0.32470962405204773, "step": 16 }, { "epoch": 0.004111245465538089, "grad_norm": 47.15980469418834, "learning_rate": 4.0476190476190476e-07, "loss": 0.32555505633354187, "step": 17 }, { "epoch": 0.004353083434099154, "grad_norm": 32.440861014731546, "learning_rate": 4.285714285714285e-07, "loss": 0.40953752398490906, "step": 18 }, { "epoch": 0.004594921402660218, "grad_norm": 40.81171247107882, "learning_rate": 4.5238095238095237e-07, "loss": 0.28687429428100586, "step": 19 }, { "epoch": 0.0048367593712212815, "grad_norm": 29.388766431331227, "learning_rate": 4.761904761904761e-07, "loss": 0.261170893907547, "step": 20 }, { "epoch": 0.005078597339782346, "grad_norm": 18.282621234329213, "learning_rate": 5e-07, "loss": 0.27888137102127075, "step": 21 }, { "epoch": 0.00532043530834341, "grad_norm": 12.344145893180226, "learning_rate": 5.238095238095238e-07, "loss": 0.2543157637119293, "step": 22 }, { "epoch": 0.005562273276904474, "grad_norm": 32.66303095460167, "learning_rate": 5.476190476190477e-07, "loss": 0.26570364832878113, "step": 23 }, { "epoch": 0.005804111245465538, "grad_norm": 21.544812186301154, "learning_rate": 5.714285714285714e-07, "loss": 0.23033533990383148, "step": 24 }, { "epoch": 0.006045949214026602, "grad_norm": 91.62447309256105, "learning_rate": 5.952380952380952e-07, "loss": 0.22947613894939423, "step": 25 }, { "epoch": 0.006287787182587666, "grad_norm": 48.173020808095636, "learning_rate": 6.19047619047619e-07, "loss": 0.2255934774875641, "step": 26 }, { "epoch": 0.0065296251511487305, "grad_norm": 16.617837462265904, "learning_rate": 6.428571428571429e-07, "loss": 0.1932579129934311, "step": 27 }, { "epoch": 0.006771463119709794, "grad_norm": 57.687528989426184, "learning_rate": 6.666666666666666e-07, "loss": 0.21009349822998047, "step": 28 }, { "epoch": 0.007013301088270859, "grad_norm": 12.922110847461749, "learning_rate": 6.904761904761904e-07, "loss": 0.14761732518672943, "step": 29 }, { "epoch": 0.007255139056831923, "grad_norm": 25.209200909836724, "learning_rate": 7.142857142857143e-07, "loss": 0.1940431147813797, "step": 30 }, { "epoch": 0.0074969770253929865, "grad_norm": 23.75704220591051, "learning_rate": 7.380952380952381e-07, "loss": 0.1254955381155014, "step": 31 }, { "epoch": 0.007738814993954051, "grad_norm": 34.93333239679947, "learning_rate": 7.619047619047618e-07, "loss": 0.15958736836910248, "step": 32 }, { "epoch": 0.007980652962515114, "grad_norm": 18.88857234496105, "learning_rate": 7.857142857142856e-07, "loss": 0.19912497699260712, "step": 33 }, { "epoch": 0.008222490931076179, "grad_norm": 44.20942004018036, "learning_rate": 8.095238095238095e-07, "loss": 0.22939424216747284, "step": 34 }, { "epoch": 0.008464328899637243, "grad_norm": 171.65916243344424, "learning_rate": 8.333333333333333e-07, "loss": 0.27353784441947937, "step": 35 }, { "epoch": 0.008706166868198308, "grad_norm": 19.01320241142847, "learning_rate": 8.57142857142857e-07, "loss": 0.21060191094875336, "step": 36 }, { "epoch": 0.00894800483675937, "grad_norm": 17.02943872422652, "learning_rate": 8.809523809523809e-07, "loss": 0.11522521823644638, "step": 37 }, { "epoch": 0.009189842805320435, "grad_norm": 12.744282657281806, "learning_rate": 9.047619047619047e-07, "loss": 0.12396448105573654, "step": 38 }, { "epoch": 0.0094316807738815, "grad_norm": 27.87755336987451, "learning_rate": 9.285714285714285e-07, "loss": 0.12505869567394257, "step": 39 }, { "epoch": 0.009673518742442563, "grad_norm": 12.897511758273916, "learning_rate": 9.523809523809522e-07, "loss": 0.12626582384109497, "step": 40 }, { "epoch": 0.009915356711003628, "grad_norm": 35.29032702460954, "learning_rate": 9.761904761904762e-07, "loss": 0.13134591281414032, "step": 41 }, { "epoch": 0.010157194679564692, "grad_norm": 12.732072341728706, "learning_rate": 1e-06, "loss": 0.15075750648975372, "step": 42 }, { "epoch": 0.010399032648125755, "grad_norm": 17.278524036262816, "learning_rate": 9.999998527157717e-07, "loss": 0.15133531391620636, "step": 43 }, { "epoch": 0.01064087061668682, "grad_norm": 56.6505873335376, "learning_rate": 9.999994108631737e-07, "loss": 0.1261449158191681, "step": 44 }, { "epoch": 0.010882708585247884, "grad_norm": 19.970359348042074, "learning_rate": 9.999986744424663e-07, "loss": 0.20098748803138733, "step": 45 }, { "epoch": 0.011124546553808947, "grad_norm": 65.76202446334035, "learning_rate": 9.999976434540833e-07, "loss": 0.15544195473194122, "step": 46 }, { "epoch": 0.011366384522370012, "grad_norm": 62.127765482535175, "learning_rate": 9.999963178986321e-07, "loss": 0.11217515915632248, "step": 47 }, { "epoch": 0.011608222490931077, "grad_norm": 14.710396651727057, "learning_rate": 9.999946977768938e-07, "loss": 0.10717935860157013, "step": 48 }, { "epoch": 0.01185006045949214, "grad_norm": 26.948167878418072, "learning_rate": 9.999927830898227e-07, "loss": 0.1413966417312622, "step": 49 }, { "epoch": 0.012091898428053204, "grad_norm": 7.8417070568690725, "learning_rate": 9.999905738385467e-07, "loss": 0.06797324866056442, "step": 50 }, { "epoch": 0.012333736396614269, "grad_norm": 38.81327006644171, "learning_rate": 9.999880700243677e-07, "loss": 0.11355441063642502, "step": 51 }, { "epoch": 0.012575574365175332, "grad_norm": 18.908753808210637, "learning_rate": 9.999852716487605e-07, "loss": 0.11116082966327667, "step": 52 }, { "epoch": 0.012817412333736396, "grad_norm": 14.107705441927731, "learning_rate": 9.999821787133736e-07, "loss": 0.10473835468292236, "step": 53 }, { "epoch": 0.013059250302297461, "grad_norm": 53.728982827336885, "learning_rate": 9.999787912200295e-07, "loss": 0.10859884321689606, "step": 54 }, { "epoch": 0.013301088270858524, "grad_norm": 7.902962594361869, "learning_rate": 9.99975109170724e-07, "loss": 0.06653588265180588, "step": 55 }, { "epoch": 0.013542926239419589, "grad_norm": 44.43680191495179, "learning_rate": 9.999711325676258e-07, "loss": 0.10317238420248032, "step": 56 }, { "epoch": 0.013784764207980653, "grad_norm": 12.544773869806932, "learning_rate": 9.999668614130782e-07, "loss": 0.17407210171222687, "step": 57 }, { "epoch": 0.014026602176541718, "grad_norm": 22.428882438851517, "learning_rate": 9.99962295709597e-07, "loss": 0.1242809072136879, "step": 58 }, { "epoch": 0.01426844014510278, "grad_norm": 7.125402444073081, "learning_rate": 9.999574354598726e-07, "loss": 0.05168929323554039, "step": 59 }, { "epoch": 0.014510278113663845, "grad_norm": 18.280497437934876, "learning_rate": 9.999522806667679e-07, "loss": 0.08527190238237381, "step": 60 }, { "epoch": 0.01475211608222491, "grad_norm": 9.620986660635186, "learning_rate": 9.999468313333198e-07, "loss": 0.10409373044967651, "step": 61 }, { "epoch": 0.014993954050785973, "grad_norm": 10.136384887645606, "learning_rate": 9.99941087462739e-07, "loss": 0.18378473818302155, "step": 62 }, { "epoch": 0.015235792019347038, "grad_norm": 12.342705308446263, "learning_rate": 9.99935049058409e-07, "loss": 0.2070598155260086, "step": 63 }, { "epoch": 0.015477629987908102, "grad_norm": 7.472682960543552, "learning_rate": 9.999287161238878e-07, "loss": 0.09224314987659454, "step": 64 }, { "epoch": 0.015719467956469165, "grad_norm": 14.090639892163448, "learning_rate": 9.99922088662906e-07, "loss": 0.1092061772942543, "step": 65 }, { "epoch": 0.015961305925030228, "grad_norm": 10.02213120081886, "learning_rate": 9.99915166679368e-07, "loss": 0.06808570772409439, "step": 66 }, { "epoch": 0.016203143893591294, "grad_norm": 24.758956363507885, "learning_rate": 9.99907950177352e-07, "loss": 0.0813119038939476, "step": 67 }, { "epoch": 0.016444981862152357, "grad_norm": 14.96714078822161, "learning_rate": 9.999004391611094e-07, "loss": 0.09322303533554077, "step": 68 }, { "epoch": 0.016686819830713424, "grad_norm": 7.62091448086726, "learning_rate": 9.998926336350653e-07, "loss": 0.06087801605463028, "step": 69 }, { "epoch": 0.016928657799274487, "grad_norm": 14.356367005236352, "learning_rate": 9.99884533603818e-07, "loss": 0.07776658236980438, "step": 70 }, { "epoch": 0.01717049576783555, "grad_norm": 24.60802469344685, "learning_rate": 9.998761390721402e-07, "loss": 0.1109154000878334, "step": 71 }, { "epoch": 0.017412333736396616, "grad_norm": 27.837631662550425, "learning_rate": 9.998674500449766e-07, "loss": 0.11855783313512802, "step": 72 }, { "epoch": 0.01765417170495768, "grad_norm": 7.4689527193541725, "learning_rate": 9.998584665274466e-07, "loss": 0.07796009629964828, "step": 73 }, { "epoch": 0.01789600967351874, "grad_norm": 35.99790132723795, "learning_rate": 9.998491885248426e-07, "loss": 0.20714294910430908, "step": 74 }, { "epoch": 0.018137847642079808, "grad_norm": 13.956199450619955, "learning_rate": 9.998396160426308e-07, "loss": 0.09796114265918732, "step": 75 }, { "epoch": 0.01837968561064087, "grad_norm": 8.809472403115675, "learning_rate": 9.998297490864507e-07, "loss": 0.12643121182918549, "step": 76 }, { "epoch": 0.018621523579201934, "grad_norm": 7.318718724265448, "learning_rate": 9.998195876621152e-07, "loss": 0.07691281288862228, "step": 77 }, { "epoch": 0.018863361547763, "grad_norm": 16.569164682712415, "learning_rate": 9.998091317756106e-07, "loss": 0.15347960591316223, "step": 78 }, { "epoch": 0.019105199516324063, "grad_norm": 54.19745296578447, "learning_rate": 9.99798381433097e-07, "loss": 0.1490813046693802, "step": 79 }, { "epoch": 0.019347037484885126, "grad_norm": 24.000806588404746, "learning_rate": 9.99787336640908e-07, "loss": 0.06701283901929855, "step": 80 }, { "epoch": 0.019588875453446192, "grad_norm": 7.6149282305705785, "learning_rate": 9.997759974055503e-07, "loss": 0.18617689609527588, "step": 81 }, { "epoch": 0.019830713422007255, "grad_norm": 22.6192200874238, "learning_rate": 9.997643637337041e-07, "loss": 0.09171732515096664, "step": 82 }, { "epoch": 0.020072551390568318, "grad_norm": 10.80623076575082, "learning_rate": 9.997524356322236e-07, "loss": 0.09009187668561935, "step": 83 }, { "epoch": 0.020314389359129385, "grad_norm": 4.944011782998469, "learning_rate": 9.99740213108136e-07, "loss": 0.04918142035603523, "step": 84 }, { "epoch": 0.020556227327690448, "grad_norm": 13.673325853552186, "learning_rate": 9.997276961686417e-07, "loss": 0.04075977951288223, "step": 85 }, { "epoch": 0.02079806529625151, "grad_norm": 12.906254672936486, "learning_rate": 9.997148848211154e-07, "loss": 0.18937762081623077, "step": 86 }, { "epoch": 0.021039903264812577, "grad_norm": 7.583477419406845, "learning_rate": 9.997017790731042e-07, "loss": 0.06415397673845291, "step": 87 }, { "epoch": 0.02128174123337364, "grad_norm": 14.771671327037655, "learning_rate": 9.996883789323295e-07, "loss": 0.03749893233180046, "step": 88 }, { "epoch": 0.021523579201934703, "grad_norm": 11.676184588697472, "learning_rate": 9.99674684406686e-07, "loss": 0.05170537903904915, "step": 89 }, { "epoch": 0.02176541717049577, "grad_norm": 16.763377663784976, "learning_rate": 9.996606955042412e-07, "loss": 0.08109436184167862, "step": 90 }, { "epoch": 0.022007255139056832, "grad_norm": 25.855248823992174, "learning_rate": 9.996464122332368e-07, "loss": 0.05320333316922188, "step": 91 }, { "epoch": 0.022249093107617895, "grad_norm": 4.886241645556846, "learning_rate": 9.996318346020875e-07, "loss": 0.06915604323148727, "step": 92 }, { "epoch": 0.02249093107617896, "grad_norm": 4.946607654602435, "learning_rate": 9.996169626193815e-07, "loss": 0.04084423556923866, "step": 93 }, { "epoch": 0.022732769044740024, "grad_norm": 16.534423686585935, "learning_rate": 9.996017962938805e-07, "loss": 0.09178774058818817, "step": 94 }, { "epoch": 0.022974607013301087, "grad_norm": 7.797142136325584, "learning_rate": 9.995863356345194e-07, "loss": 0.06262782961130142, "step": 95 }, { "epoch": 0.023216444981862153, "grad_norm": 11.46971430113489, "learning_rate": 9.995705806504067e-07, "loss": 0.06733354181051254, "step": 96 }, { "epoch": 0.023458282950423216, "grad_norm": 8.086317058103512, "learning_rate": 9.995545313508244e-07, "loss": 0.056669432669878006, "step": 97 }, { "epoch": 0.02370012091898428, "grad_norm": 14.614087477805336, "learning_rate": 9.995381877452275e-07, "loss": 0.07947435230016708, "step": 98 }, { "epoch": 0.023941958887545346, "grad_norm": 8.686875966814055, "learning_rate": 9.99521549843245e-07, "loss": 0.0822877511382103, "step": 99 }, { "epoch": 0.02418379685610641, "grad_norm": 13.71236973136494, "learning_rate": 9.995046176546784e-07, "loss": 0.09890566766262054, "step": 100 }, { "epoch": 0.02442563482466747, "grad_norm": 20.533190626139568, "learning_rate": 9.994873911895035e-07, "loss": 0.12081976979970932, "step": 101 }, { "epoch": 0.024667472793228538, "grad_norm": 44.743780070989764, "learning_rate": 9.994698704578688e-07, "loss": 0.047381915152072906, "step": 102 }, { "epoch": 0.0249093107617896, "grad_norm": 26.786900835662067, "learning_rate": 9.994520554700965e-07, "loss": 0.09361269325017929, "step": 103 }, { "epoch": 0.025151148730350664, "grad_norm": 11.399616872368417, "learning_rate": 9.994339462366818e-07, "loss": 0.06938806921243668, "step": 104 }, { "epoch": 0.02539298669891173, "grad_norm": 11.165833173810382, "learning_rate": 9.99415542768294e-07, "loss": 0.060668885707855225, "step": 105 }, { "epoch": 0.025634824667472793, "grad_norm": 31.42466703181445, "learning_rate": 9.99396845075775e-07, "loss": 0.10382001847028732, "step": 106 }, { "epoch": 0.025876662636033856, "grad_norm": 24.30209355174527, "learning_rate": 9.993778531701405e-07, "loss": 0.075384221971035, "step": 107 }, { "epoch": 0.026118500604594922, "grad_norm": 10.470961854503003, "learning_rate": 9.993585670625789e-07, "loss": 0.06691765785217285, "step": 108 }, { "epoch": 0.026360338573155985, "grad_norm": 6.465898158611976, "learning_rate": 9.993389867644526e-07, "loss": 0.03924579545855522, "step": 109 }, { "epoch": 0.026602176541717048, "grad_norm": 5.927250882251877, "learning_rate": 9.993191122872973e-07, "loss": 0.050991762429475784, "step": 110 }, { "epoch": 0.026844014510278114, "grad_norm": 10.877698331474807, "learning_rate": 9.992989436428213e-07, "loss": 0.06766051799058914, "step": 111 }, { "epoch": 0.027085852478839177, "grad_norm": 8.195447408927965, "learning_rate": 9.992784808429072e-07, "loss": 0.041602086275815964, "step": 112 }, { "epoch": 0.027327690447400244, "grad_norm": 13.395890845705653, "learning_rate": 9.992577238996104e-07, "loss": 0.07241940498352051, "step": 113 }, { "epoch": 0.027569528415961306, "grad_norm": 5.603993643499542, "learning_rate": 9.99236672825159e-07, "loss": 0.06484944373369217, "step": 114 }, { "epoch": 0.02781136638452237, "grad_norm": 37.550033395607855, "learning_rate": 9.992153276319553e-07, "loss": 0.17962154746055603, "step": 115 }, { "epoch": 0.028053204353083436, "grad_norm": 7.407116121229213, "learning_rate": 9.991936883325746e-07, "loss": 0.043460097163915634, "step": 116 }, { "epoch": 0.0282950423216445, "grad_norm": 12.774158780190465, "learning_rate": 9.991717549397654e-07, "loss": 0.09650229662656784, "step": 117 }, { "epoch": 0.02853688029020556, "grad_norm": 2.8325279721765426, "learning_rate": 9.991495274664495e-07, "loss": 0.027905050665140152, "step": 118 }, { "epoch": 0.028778718258766628, "grad_norm": 4.4818887739437665, "learning_rate": 9.991270059257216e-07, "loss": 0.08270382881164551, "step": 119 }, { "epoch": 0.02902055622732769, "grad_norm": 5.654226538772649, "learning_rate": 9.991041903308505e-07, "loss": 0.09950055927038193, "step": 120 }, { "epoch": 0.029262394195888754, "grad_norm": 8.533175144184618, "learning_rate": 9.990810806952773e-07, "loss": 0.06005362421274185, "step": 121 }, { "epoch": 0.02950423216444982, "grad_norm": 17.556700520188805, "learning_rate": 9.99057677032617e-07, "loss": 0.04972727224230766, "step": 122 }, { "epoch": 0.029746070133010883, "grad_norm": 9.364429424553238, "learning_rate": 9.990339793566574e-07, "loss": 0.044771697372198105, "step": 123 }, { "epoch": 0.029987908101571946, "grad_norm": 41.787445781203864, "learning_rate": 9.990099876813597e-07, "loss": 0.043104421347379684, "step": 124 }, { "epoch": 0.030229746070133012, "grad_norm": 11.312044656965108, "learning_rate": 9.989857020208583e-07, "loss": 0.09098125249147415, "step": 125 }, { "epoch": 0.030471584038694075, "grad_norm": 24.482182974231907, "learning_rate": 9.989611223894608e-07, "loss": 0.0465354360640049, "step": 126 }, { "epoch": 0.030713422007255138, "grad_norm": 6.857354071914509, "learning_rate": 9.989362488016477e-07, "loss": 0.02690097689628601, "step": 127 }, { "epoch": 0.030955259975816204, "grad_norm": 14.069773148725517, "learning_rate": 9.989110812720735e-07, "loss": 0.14198385179042816, "step": 128 }, { "epoch": 0.031197097944377267, "grad_norm": 11.733210560280927, "learning_rate": 9.98885619815565e-07, "loss": 0.15882451832294464, "step": 129 }, { "epoch": 0.03143893591293833, "grad_norm": 5.026088873906866, "learning_rate": 9.988598644471224e-07, "loss": 0.0975576788187027, "step": 130 }, { "epoch": 0.0316807738814994, "grad_norm": 66.38005011749833, "learning_rate": 9.988338151819192e-07, "loss": 0.07375521957874298, "step": 131 }, { "epoch": 0.031922611850060456, "grad_norm": 19.74062784196232, "learning_rate": 9.98807472035302e-07, "loss": 0.055760350078344345, "step": 132 }, { "epoch": 0.03216444981862152, "grad_norm": 6.510010529854281, "learning_rate": 9.987808350227906e-07, "loss": 0.05819067358970642, "step": 133 }, { "epoch": 0.03240628778718259, "grad_norm": 9.074963933736294, "learning_rate": 9.98753904160078e-07, "loss": 0.054221149533987045, "step": 134 }, { "epoch": 0.032648125755743655, "grad_norm": 12.795482755969832, "learning_rate": 9.987266794630297e-07, "loss": 0.11743482202291489, "step": 135 }, { "epoch": 0.032889963724304715, "grad_norm": 5.659749827373747, "learning_rate": 9.98699160947685e-07, "loss": 0.04678014665842056, "step": 136 }, { "epoch": 0.03313180169286578, "grad_norm": 8.69226714150847, "learning_rate": 9.986713486302564e-07, "loss": 0.035213325172662735, "step": 137 }, { "epoch": 0.03337363966142685, "grad_norm": 10.770938151812956, "learning_rate": 9.986432425271285e-07, "loss": 0.08532987534999847, "step": 138 }, { "epoch": 0.03361547762998791, "grad_norm": 8.89997987173703, "learning_rate": 9.986148426548601e-07, "loss": 0.06571108847856522, "step": 139 }, { "epoch": 0.03385731559854897, "grad_norm": 4.449084611160744, "learning_rate": 9.985861490301826e-07, "loss": 0.04590371623635292, "step": 140 }, { "epoch": 0.03409915356711004, "grad_norm": 8.147743548124886, "learning_rate": 9.985571616700003e-07, "loss": 0.05265605449676514, "step": 141 }, { "epoch": 0.0343409915356711, "grad_norm": 27.79028308426094, "learning_rate": 9.985278805913907e-07, "loss": 0.06932403147220612, "step": 142 }, { "epoch": 0.034582829504232165, "grad_norm": 8.471416199480874, "learning_rate": 9.984983058116047e-07, "loss": 0.10655541718006134, "step": 143 }, { "epoch": 0.03482466747279323, "grad_norm": 7.16891144860415, "learning_rate": 9.984684373480658e-07, "loss": 0.07407259196043015, "step": 144 }, { "epoch": 0.03506650544135429, "grad_norm": 8.870742107767464, "learning_rate": 9.984382752183703e-07, "loss": 0.058111466467380524, "step": 145 }, { "epoch": 0.03530834340991536, "grad_norm": 22.892332377154517, "learning_rate": 9.98407819440288e-07, "loss": 0.08340444415807724, "step": 146 }, { "epoch": 0.035550181378476424, "grad_norm": 6.896097759513516, "learning_rate": 9.983770700317617e-07, "loss": 0.07887491583824158, "step": 147 }, { "epoch": 0.03579201934703748, "grad_norm": 6.980228278824066, "learning_rate": 9.983460270109066e-07, "loss": 0.041064582765102386, "step": 148 }, { "epoch": 0.03603385731559855, "grad_norm": 9.865092724563203, "learning_rate": 9.983146903960116e-07, "loss": 0.03461770713329315, "step": 149 }, { "epoch": 0.036275695284159616, "grad_norm": 106.44041170529061, "learning_rate": 9.982830602055383e-07, "loss": 0.06780921667814255, "step": 150 }, { "epoch": 0.036517533252720676, "grad_norm": 9.133770737969277, "learning_rate": 9.982511364581212e-07, "loss": 0.049321409314870834, "step": 151 }, { "epoch": 0.03675937122128174, "grad_norm": 34.429600659680005, "learning_rate": 9.982189191725674e-07, "loss": 0.07608553022146225, "step": 152 }, { "epoch": 0.03700120918984281, "grad_norm": 5.895951235445973, "learning_rate": 9.981864083678578e-07, "loss": 0.06844236701726913, "step": 153 }, { "epoch": 0.03724304715840387, "grad_norm": 9.043461703882476, "learning_rate": 9.981536040631453e-07, "loss": 0.028822874650359154, "step": 154 }, { "epoch": 0.037484885126964934, "grad_norm": 13.235766465568082, "learning_rate": 9.981205062777564e-07, "loss": 0.0506402961909771, "step": 155 }, { "epoch": 0.037726723095526, "grad_norm": 9.215686728397692, "learning_rate": 9.9808711503119e-07, "loss": 0.05572427436709404, "step": 156 }, { "epoch": 0.03796856106408706, "grad_norm": 14.215372965313712, "learning_rate": 9.980534303431183e-07, "loss": 0.046799708157777786, "step": 157 }, { "epoch": 0.038210399032648126, "grad_norm": 16.748842231613803, "learning_rate": 9.980194522333862e-07, "loss": 0.06463446468114853, "step": 158 }, { "epoch": 0.03845223700120919, "grad_norm": 13.176353565530528, "learning_rate": 9.979851807220114e-07, "loss": 0.07123716920614243, "step": 159 }, { "epoch": 0.03869407496977025, "grad_norm": 4.918834804509347, "learning_rate": 9.979506158291844e-07, "loss": 0.031029725447297096, "step": 160 }, { "epoch": 0.03893591293833132, "grad_norm": 8.339526227338407, "learning_rate": 9.979157575752689e-07, "loss": 0.04357690364122391, "step": 161 }, { "epoch": 0.039177750906892385, "grad_norm": 4.2425340140245105, "learning_rate": 9.97880605980801e-07, "loss": 0.10264042764902115, "step": 162 }, { "epoch": 0.039419588875453444, "grad_norm": 16.854342071304192, "learning_rate": 9.978451610664898e-07, "loss": 0.051643241196870804, "step": 163 }, { "epoch": 0.03966142684401451, "grad_norm": 10.727583447667019, "learning_rate": 9.978094228532172e-07, "loss": 0.07448288053274155, "step": 164 }, { "epoch": 0.03990326481257558, "grad_norm": 4.0596640827120005, "learning_rate": 9.977733913620381e-07, "loss": 0.01595469005405903, "step": 165 }, { "epoch": 0.040145102781136636, "grad_norm": 11.76501373285768, "learning_rate": 9.977370666141798e-07, "loss": 0.05386677011847496, "step": 166 }, { "epoch": 0.0403869407496977, "grad_norm": 44.25163832630999, "learning_rate": 9.977004486310424e-07, "loss": 0.057693686336278915, "step": 167 }, { "epoch": 0.04062877871825877, "grad_norm": 18.162752517958044, "learning_rate": 9.976635374341992e-07, "loss": 0.04143417254090309, "step": 168 }, { "epoch": 0.04087061668681983, "grad_norm": 7.2050116755363725, "learning_rate": 9.97626333045396e-07, "loss": 0.02324373461306095, "step": 169 }, { "epoch": 0.041112454655380895, "grad_norm": 5.627793508339779, "learning_rate": 9.97588835486551e-07, "loss": 0.044494546949863434, "step": 170 }, { "epoch": 0.04135429262394196, "grad_norm": 9.695834723607408, "learning_rate": 9.975510447797555e-07, "loss": 0.03704506903886795, "step": 171 }, { "epoch": 0.04159613059250302, "grad_norm": 12.139958858505112, "learning_rate": 9.975129609472733e-07, "loss": 0.026762187480926514, "step": 172 }, { "epoch": 0.04183796856106409, "grad_norm": 7.980829427083269, "learning_rate": 9.974745840115414e-07, "loss": 0.05307764932513237, "step": 173 }, { "epoch": 0.042079806529625154, "grad_norm": 6.15248520910639, "learning_rate": 9.974359139951685e-07, "loss": 0.022393105551600456, "step": 174 }, { "epoch": 0.04232164449818621, "grad_norm": 25.94999641014139, "learning_rate": 9.97396950920937e-07, "loss": 0.052264124155044556, "step": 175 }, { "epoch": 0.04256348246674728, "grad_norm": 4.703395103545103, "learning_rate": 9.97357694811801e-07, "loss": 0.054454635828733444, "step": 176 }, { "epoch": 0.042805320435308346, "grad_norm": 21.97571839612801, "learning_rate": 9.973181456908884e-07, "loss": 0.03179094195365906, "step": 177 }, { "epoch": 0.043047158403869405, "grad_norm": 8.037858746039333, "learning_rate": 9.972783035814984e-07, "loss": 0.06844225525856018, "step": 178 }, { "epoch": 0.04328899637243047, "grad_norm": 32.137306667723976, "learning_rate": 9.97238168507104e-07, "loss": 0.10325445979833603, "step": 179 }, { "epoch": 0.04353083434099154, "grad_norm": 4.043203155242854, "learning_rate": 9.971977404913496e-07, "loss": 0.02358928695321083, "step": 180 }, { "epoch": 0.0437726723095526, "grad_norm": 27.223671439278167, "learning_rate": 9.971570195580535e-07, "loss": 0.04346121475100517, "step": 181 }, { "epoch": 0.044014510278113664, "grad_norm": 57.351496424519226, "learning_rate": 9.971160057312055e-07, "loss": 0.02945083938539028, "step": 182 }, { "epoch": 0.04425634824667473, "grad_norm": 13.06192694762561, "learning_rate": 9.970746990349685e-07, "loss": 0.038820188492536545, "step": 183 }, { "epoch": 0.04449818621523579, "grad_norm": 9.05236281345655, "learning_rate": 9.970330994936777e-07, "loss": 0.05093064904212952, "step": 184 }, { "epoch": 0.044740024183796856, "grad_norm": 14.693815886650894, "learning_rate": 9.969912071318412e-07, "loss": 0.04141543433070183, "step": 185 }, { "epoch": 0.04498186215235792, "grad_norm": 25.751380389823588, "learning_rate": 9.96949021974139e-07, "loss": 0.034665703773498535, "step": 186 }, { "epoch": 0.04522370012091898, "grad_norm": 5.321169171992921, "learning_rate": 9.96906544045424e-07, "loss": 0.020367706194519997, "step": 187 }, { "epoch": 0.04546553808948005, "grad_norm": 31.63266022533824, "learning_rate": 9.968637733707217e-07, "loss": 0.055141814053058624, "step": 188 }, { "epoch": 0.045707376058041115, "grad_norm": 88.16427035055284, "learning_rate": 9.968207099752299e-07, "loss": 0.06207519397139549, "step": 189 }, { "epoch": 0.045949214026602174, "grad_norm": 9.7820620097177, "learning_rate": 9.967773538843185e-07, "loss": 0.026699448004364967, "step": 190 }, { "epoch": 0.04619105199516324, "grad_norm": 7.528668457203264, "learning_rate": 9.967337051235303e-07, "loss": 0.047702934592962265, "step": 191 }, { "epoch": 0.04643288996372431, "grad_norm": 5.982577717536571, "learning_rate": 9.966897637185808e-07, "loss": 0.05481648072600365, "step": 192 }, { "epoch": 0.046674727932285366, "grad_norm": 38.666522688272316, "learning_rate": 9.966455296953569e-07, "loss": 0.043204642832279205, "step": 193 }, { "epoch": 0.04691656590084643, "grad_norm": 5.253996286610243, "learning_rate": 9.966010030799189e-07, "loss": 0.026525957509875298, "step": 194 }, { "epoch": 0.0471584038694075, "grad_norm": 4.98087227670462, "learning_rate": 9.96556183898499e-07, "loss": 0.04633742943406105, "step": 195 }, { "epoch": 0.04740024183796856, "grad_norm": 4.054948911006663, "learning_rate": 9.965110721775017e-07, "loss": 0.03144947439432144, "step": 196 }, { "epoch": 0.047642079806529625, "grad_norm": 7.210895611645975, "learning_rate": 9.96465667943504e-07, "loss": 0.04177483916282654, "step": 197 }, { "epoch": 0.04788391777509069, "grad_norm": 16.663050758641724, "learning_rate": 9.964199712232553e-07, "loss": 0.09656596183776855, "step": 198 }, { "epoch": 0.04812575574365175, "grad_norm": 2.9926953322733545, "learning_rate": 9.963739820436772e-07, "loss": 0.06673910468816757, "step": 199 }, { "epoch": 0.04836759371221282, "grad_norm": 8.172511693184147, "learning_rate": 9.963277004318638e-07, "loss": 0.03805834427475929, "step": 200 }, { "epoch": 0.04860943168077388, "grad_norm": 90.53310738921327, "learning_rate": 9.96281126415081e-07, "loss": 0.0706912949681282, "step": 201 }, { "epoch": 0.04885126964933494, "grad_norm": 5.134537840244266, "learning_rate": 9.962342600207674e-07, "loss": 0.03454097732901573, "step": 202 }, { "epoch": 0.04909310761789601, "grad_norm": 4.194395039717693, "learning_rate": 9.961871012765338e-07, "loss": 0.02843826450407505, "step": 203 }, { "epoch": 0.049334945586457075, "grad_norm": 12.823951003971999, "learning_rate": 9.96139650210163e-07, "loss": 0.03216351196169853, "step": 204 }, { "epoch": 0.049576783555018135, "grad_norm": 8.040780206167396, "learning_rate": 9.9609190684961e-07, "loss": 0.11281435936689377, "step": 205 }, { "epoch": 0.0498186215235792, "grad_norm": 21.198988576635962, "learning_rate": 9.96043871223003e-07, "loss": 0.08905129879713058, "step": 206 }, { "epoch": 0.05006045949214027, "grad_norm": 7.488509351546926, "learning_rate": 9.959955433586404e-07, "loss": 0.0361047089099884, "step": 207 }, { "epoch": 0.05030229746070133, "grad_norm": 29.280931206091353, "learning_rate": 9.95946923284995e-07, "loss": 0.03586842864751816, "step": 208 }, { "epoch": 0.05054413542926239, "grad_norm": 29.135397209367667, "learning_rate": 9.9589801103071e-07, "loss": 0.05030377581715584, "step": 209 }, { "epoch": 0.05078597339782346, "grad_norm": 6.22137170526195, "learning_rate": 9.958488066246015e-07, "loss": 0.023552769795060158, "step": 210 }, { "epoch": 0.05102781136638452, "grad_norm": 7.118206207409155, "learning_rate": 9.957993100956579e-07, "loss": 0.045635491609573364, "step": 211 }, { "epoch": 0.051269649334945586, "grad_norm": 12.746032897568698, "learning_rate": 9.957495214730391e-07, "loss": 0.06984677910804749, "step": 212 }, { "epoch": 0.05151148730350665, "grad_norm": 5.192649020734254, "learning_rate": 9.956994407860778e-07, "loss": 0.04235006123781204, "step": 213 }, { "epoch": 0.05175332527206771, "grad_norm": 14.201533140222923, "learning_rate": 9.956490680642781e-07, "loss": 0.04894381761550903, "step": 214 }, { "epoch": 0.05199516324062878, "grad_norm": 7.4552840467240475, "learning_rate": 9.955984033373165e-07, "loss": 0.046008504927158356, "step": 215 }, { "epoch": 0.052237001209189844, "grad_norm": 6.794006864109208, "learning_rate": 9.955474466350416e-07, "loss": 0.03359410911798477, "step": 216 }, { "epoch": 0.052478839177750904, "grad_norm": 17.224082271315055, "learning_rate": 9.954961979874737e-07, "loss": 0.02827048860490322, "step": 217 }, { "epoch": 0.05272067714631197, "grad_norm": 8.982931991555482, "learning_rate": 9.954446574248052e-07, "loss": 0.023162707686424255, "step": 218 }, { "epoch": 0.052962515114873036, "grad_norm": 9.732480597788726, "learning_rate": 9.953928249774007e-07, "loss": 0.05730158090591431, "step": 219 }, { "epoch": 0.053204353083434096, "grad_norm": 11.360734471474256, "learning_rate": 9.953407006757966e-07, "loss": 0.037422217428684235, "step": 220 }, { "epoch": 0.05344619105199516, "grad_norm": 13.525692792279234, "learning_rate": 9.95288284550701e-07, "loss": 0.026011312380433083, "step": 221 }, { "epoch": 0.05368802902055623, "grad_norm": 12.833651962487961, "learning_rate": 9.952355766329946e-07, "loss": 0.06225277855992317, "step": 222 }, { "epoch": 0.05392986698911729, "grad_norm": 7.559154807878858, "learning_rate": 9.951825769537294e-07, "loss": 0.05326172336935997, "step": 223 }, { "epoch": 0.054171704957678354, "grad_norm": 8.129710108901694, "learning_rate": 9.951292855441294e-07, "loss": 0.03519377112388611, "step": 224 }, { "epoch": 0.05441354292623942, "grad_norm": 10.41706221510073, "learning_rate": 9.950757024355905e-07, "loss": 0.0424572117626667, "step": 225 }, { "epoch": 0.05465538089480049, "grad_norm": 2.6579521269227455, "learning_rate": 9.950218276596808e-07, "loss": 0.019740602001547813, "step": 226 }, { "epoch": 0.05489721886336155, "grad_norm": 5.435527197135125, "learning_rate": 9.949676612481393e-07, "loss": 0.03586394712328911, "step": 227 }, { "epoch": 0.05513905683192261, "grad_norm": 25.52941215441115, "learning_rate": 9.94913203232878e-07, "loss": 0.05218319222331047, "step": 228 }, { "epoch": 0.05538089480048368, "grad_norm": 5.461427631306327, "learning_rate": 9.9485845364598e-07, "loss": 0.05214390158653259, "step": 229 }, { "epoch": 0.05562273276904474, "grad_norm": 4.010024445392408, "learning_rate": 9.948034125197003e-07, "loss": 0.07325726002454758, "step": 230 }, { "epoch": 0.055864570737605805, "grad_norm": 11.914323604439943, "learning_rate": 9.947480798864653e-07, "loss": 0.0395016185939312, "step": 231 }, { "epoch": 0.05610640870616687, "grad_norm": 13.567394355634844, "learning_rate": 9.946924557788741e-07, "loss": 0.03938199207186699, "step": 232 }, { "epoch": 0.05634824667472793, "grad_norm": 6.61131501361442, "learning_rate": 9.946365402296967e-07, "loss": 0.02819916047155857, "step": 233 }, { "epoch": 0.056590084643289, "grad_norm": 2.545558098080551, "learning_rate": 9.945803332718746e-07, "loss": 0.02472972683608532, "step": 234 }, { "epoch": 0.056831922611850064, "grad_norm": 4.02244711418921, "learning_rate": 9.945238349385218e-07, "loss": 0.03314167261123657, "step": 235 }, { "epoch": 0.05707376058041112, "grad_norm": 9.102212184381498, "learning_rate": 9.944670452629236e-07, "loss": 0.05485197529196739, "step": 236 }, { "epoch": 0.05731559854897219, "grad_norm": 8.207065546134245, "learning_rate": 9.944099642785367e-07, "loss": 0.018048608675599098, "step": 237 }, { "epoch": 0.057557436517533256, "grad_norm": 8.02671039764219, "learning_rate": 9.943525920189898e-07, "loss": 0.024160346016287804, "step": 238 }, { "epoch": 0.057799274486094315, "grad_norm": 2.6399532157275436, "learning_rate": 9.942949285180828e-07, "loss": 0.02260797657072544, "step": 239 }, { "epoch": 0.05804111245465538, "grad_norm": 3.4019893235932526, "learning_rate": 9.942369738097873e-07, "loss": 0.02205321192741394, "step": 240 }, { "epoch": 0.05828295042321645, "grad_norm": 18.563730994728363, "learning_rate": 9.94178727928247e-07, "loss": 0.11086203902959824, "step": 241 }, { "epoch": 0.05852478839177751, "grad_norm": 5.9329521162588845, "learning_rate": 9.941201909077766e-07, "loss": 0.04817016050219536, "step": 242 }, { "epoch": 0.058766626360338574, "grad_norm": 5.157903601457872, "learning_rate": 9.940613627828621e-07, "loss": 0.019627710804343224, "step": 243 }, { "epoch": 0.05900846432889964, "grad_norm": 4.0046923039445135, "learning_rate": 9.940022435881614e-07, "loss": 0.08007845282554626, "step": 244 }, { "epoch": 0.0592503022974607, "grad_norm": 3.9820109788960307, "learning_rate": 9.93942833358504e-07, "loss": 0.023032398894429207, "step": 245 }, { "epoch": 0.059492140266021766, "grad_norm": 6.264687186496765, "learning_rate": 9.938831321288906e-07, "loss": 0.05389104038476944, "step": 246 }, { "epoch": 0.05973397823458283, "grad_norm": 6.384145785317052, "learning_rate": 9.938231399344932e-07, "loss": 0.03322941064834595, "step": 247 }, { "epoch": 0.05997581620314389, "grad_norm": 10.18210998783532, "learning_rate": 9.937628568106555e-07, "loss": 0.07890744507312775, "step": 248 }, { "epoch": 0.06021765417170496, "grad_norm": 6.279427601322591, "learning_rate": 9.937022827928929e-07, "loss": 0.039677996188402176, "step": 249 }, { "epoch": 0.060459492140266025, "grad_norm": 27.975800437607752, "learning_rate": 9.936414179168912e-07, "loss": 0.024234838783740997, "step": 250 }, { "epoch": 0.060701330108827084, "grad_norm": 8.81522449370049, "learning_rate": 9.935802622185085e-07, "loss": 0.049865465611219406, "step": 251 }, { "epoch": 0.06094316807738815, "grad_norm": 8.844157074556753, "learning_rate": 9.93518815733774e-07, "loss": 0.018404770642518997, "step": 252 }, { "epoch": 0.06118500604594922, "grad_norm": 7.556336926569462, "learning_rate": 9.934570784988876e-07, "loss": 0.07107749581336975, "step": 253 }, { "epoch": 0.061426844014510276, "grad_norm": 30.518689766696028, "learning_rate": 9.933950505502213e-07, "loss": 0.08673776686191559, "step": 254 }, { "epoch": 0.06166868198307134, "grad_norm": 10.54811071351192, "learning_rate": 9.93332731924318e-07, "loss": 0.06307865679264069, "step": 255 }, { "epoch": 0.06191051995163241, "grad_norm": 5.692715183466282, "learning_rate": 9.93270122657892e-07, "loss": 0.02054356411099434, "step": 256 }, { "epoch": 0.06215235792019347, "grad_norm": 5.315373884192119, "learning_rate": 9.932072227878288e-07, "loss": 0.03766556829214096, "step": 257 }, { "epoch": 0.062394195888754535, "grad_norm": 3.753355727814219, "learning_rate": 9.931440323511849e-07, "loss": 0.028750255703926086, "step": 258 }, { "epoch": 0.0626360338573156, "grad_norm": 4.861589243986976, "learning_rate": 9.93080551385188e-07, "loss": 0.02375703491270542, "step": 259 }, { "epoch": 0.06287787182587666, "grad_norm": 21.49324581682189, "learning_rate": 9.93016779927237e-07, "loss": 0.02640252187848091, "step": 260 }, { "epoch": 0.06311970979443772, "grad_norm": 6.099287792561969, "learning_rate": 9.929527180149023e-07, "loss": 0.01994096301496029, "step": 261 }, { "epoch": 0.0633615477629988, "grad_norm": 12.11524417162216, "learning_rate": 9.928883656859253e-07, "loss": 0.03500045835971832, "step": 262 }, { "epoch": 0.06360338573155985, "grad_norm": 22.095938799880557, "learning_rate": 9.92823722978218e-07, "loss": 0.024420056492090225, "step": 263 }, { "epoch": 0.06384522370012091, "grad_norm": 3.8059860784013884, "learning_rate": 9.927587899298638e-07, "loss": 0.03408743813633919, "step": 264 }, { "epoch": 0.06408706166868199, "grad_norm": 3.4916486992638944, "learning_rate": 9.926935665791173e-07, "loss": 0.026157325133681297, "step": 265 }, { "epoch": 0.06432889963724304, "grad_norm": 33.35657065935332, "learning_rate": 9.926280529644037e-07, "loss": 0.03836845979094505, "step": 266 }, { "epoch": 0.0645707376058041, "grad_norm": 6.627630834749875, "learning_rate": 9.9256224912432e-07, "loss": 0.04632845148444176, "step": 267 }, { "epoch": 0.06481257557436518, "grad_norm": 9.870303156047024, "learning_rate": 9.924961550976334e-07, "loss": 0.07144607603549957, "step": 268 }, { "epoch": 0.06505441354292624, "grad_norm": 4.941552311885013, "learning_rate": 9.924297709232822e-07, "loss": 0.018702132627367973, "step": 269 }, { "epoch": 0.06529625151148731, "grad_norm": 5.140972925885148, "learning_rate": 9.92363096640376e-07, "loss": 0.02702120505273342, "step": 270 }, { "epoch": 0.06553808948004837, "grad_norm": 2.7423036595168666, "learning_rate": 9.922961322881947e-07, "loss": 0.03003646247088909, "step": 271 }, { "epoch": 0.06577992744860943, "grad_norm": 9.717536903146597, "learning_rate": 9.9222887790619e-07, "loss": 0.0295526385307312, "step": 272 }, { "epoch": 0.0660217654171705, "grad_norm": 5.564258980689012, "learning_rate": 9.921613335339836e-07, "loss": 0.07489899545907974, "step": 273 }, { "epoch": 0.06626360338573156, "grad_norm": 4.345188100496648, "learning_rate": 9.920934992113684e-07, "loss": 0.030385857447981834, "step": 274 }, { "epoch": 0.06650544135429262, "grad_norm": 13.272587006547226, "learning_rate": 9.920253749783081e-07, "loss": 0.03441377729177475, "step": 275 }, { "epoch": 0.0667472793228537, "grad_norm": 4.524492250764648, "learning_rate": 9.919569608749373e-07, "loss": 0.029446378350257874, "step": 276 }, { "epoch": 0.06698911729141475, "grad_norm": 9.458121219628968, "learning_rate": 9.918882569415613e-07, "loss": 0.030893921852111816, "step": 277 }, { "epoch": 0.06723095525997581, "grad_norm": 7.4321366056155, "learning_rate": 9.918192632186561e-07, "loss": 0.04126463457942009, "step": 278 }, { "epoch": 0.06747279322853689, "grad_norm": 4.288101538207599, "learning_rate": 9.917499797468684e-07, "loss": 0.02330179698765278, "step": 279 }, { "epoch": 0.06771463119709795, "grad_norm": 18.171714247150575, "learning_rate": 9.916804065670158e-07, "loss": 0.05857769772410393, "step": 280 }, { "epoch": 0.067956469165659, "grad_norm": 5.97813454180041, "learning_rate": 9.916105437200862e-07, "loss": 0.018233908340334892, "step": 281 }, { "epoch": 0.06819830713422008, "grad_norm": 9.772161799853937, "learning_rate": 9.915403912472382e-07, "loss": 0.012509405612945557, "step": 282 }, { "epoch": 0.06844014510278114, "grad_norm": 10.427268143497107, "learning_rate": 9.914699491898018e-07, "loss": 0.0529322512447834, "step": 283 }, { "epoch": 0.0686819830713422, "grad_norm": 6.37307602618607, "learning_rate": 9.913992175892765e-07, "loss": 0.046117234975099564, "step": 284 }, { "epoch": 0.06892382103990327, "grad_norm": 6.624192067661226, "learning_rate": 9.913281964873331e-07, "loss": 0.022856518626213074, "step": 285 }, { "epoch": 0.06916565900846433, "grad_norm": 32.46737976217815, "learning_rate": 9.912568859258129e-07, "loss": 0.04309385269880295, "step": 286 }, { "epoch": 0.06940749697702539, "grad_norm": 10.595553569029315, "learning_rate": 9.911852859467273e-07, "loss": 0.029378950595855713, "step": 287 }, { "epoch": 0.06964933494558646, "grad_norm": 8.635671488397705, "learning_rate": 9.911133965922588e-07, "loss": 0.02571406401693821, "step": 288 }, { "epoch": 0.06989117291414752, "grad_norm": 11.621209297813248, "learning_rate": 9.910412179047598e-07, "loss": 0.06967755407094955, "step": 289 }, { "epoch": 0.07013301088270858, "grad_norm": 12.421736956194017, "learning_rate": 9.909687499267533e-07, "loss": 0.040945541113615036, "step": 290 }, { "epoch": 0.07037484885126966, "grad_norm": 19.147616506688053, "learning_rate": 9.908959927009335e-07, "loss": 0.06241339445114136, "step": 291 }, { "epoch": 0.07061668681983072, "grad_norm": 11.195359388189722, "learning_rate": 9.908229462701636e-07, "loss": 0.04782486706972122, "step": 292 }, { "epoch": 0.07085852478839177, "grad_norm": 9.082312982422518, "learning_rate": 9.907496106774786e-07, "loss": 0.02872578613460064, "step": 293 }, { "epoch": 0.07110036275695285, "grad_norm": 13.774084988438771, "learning_rate": 9.906759859660827e-07, "loss": 0.04545127972960472, "step": 294 }, { "epoch": 0.07134220072551391, "grad_norm": 11.534837420109305, "learning_rate": 9.906020721793512e-07, "loss": 0.02711383067071438, "step": 295 }, { "epoch": 0.07158403869407497, "grad_norm": 6.314426593508443, "learning_rate": 9.905278693608293e-07, "loss": 0.013873882591724396, "step": 296 }, { "epoch": 0.07182587666263604, "grad_norm": 7.041369394086983, "learning_rate": 9.904533775542328e-07, "loss": 0.020124081522226334, "step": 297 }, { "epoch": 0.0720677146311971, "grad_norm": 19.72190632049391, "learning_rate": 9.903785968034475e-07, "loss": 0.038079991936683655, "step": 298 }, { "epoch": 0.07230955259975816, "grad_norm": 4.222994206835045, "learning_rate": 9.903035271525293e-07, "loss": 0.020969517529010773, "step": 299 }, { "epoch": 0.07255139056831923, "grad_norm": 25.781426322236392, "learning_rate": 9.902281686457048e-07, "loss": 0.029340846464037895, "step": 300 }, { "epoch": 0.07279322853688029, "grad_norm": 7.25742957246357, "learning_rate": 9.901525213273705e-07, "loss": 0.026628529652953148, "step": 301 }, { "epoch": 0.07303506650544135, "grad_norm": 6.434459301554406, "learning_rate": 9.900765852420927e-07, "loss": 0.030225664377212524, "step": 302 }, { "epoch": 0.07327690447400242, "grad_norm": 2.8208763597729116, "learning_rate": 9.900003604346082e-07, "loss": 0.016172997653484344, "step": 303 }, { "epoch": 0.07351874244256348, "grad_norm": 5.719013303665639, "learning_rate": 9.899238469498244e-07, "loss": 0.02513575553894043, "step": 304 }, { "epoch": 0.07376058041112454, "grad_norm": 6.349896501966209, "learning_rate": 9.898470448328174e-07, "loss": 0.0347142331302166, "step": 305 }, { "epoch": 0.07400241837968562, "grad_norm": 14.36939202336481, "learning_rate": 9.897699541288347e-07, "loss": 0.12189729511737823, "step": 306 }, { "epoch": 0.07424425634824668, "grad_norm": 5.140578510629546, "learning_rate": 9.896925748832931e-07, "loss": 0.031401559710502625, "step": 307 }, { "epoch": 0.07448609431680774, "grad_norm": 3.001820610305707, "learning_rate": 9.896149071417798e-07, "loss": 0.019327307119965553, "step": 308 }, { "epoch": 0.07472793228536881, "grad_norm": 5.307862246702578, "learning_rate": 9.895369509500512e-07, "loss": 0.02602509595453739, "step": 309 }, { "epoch": 0.07496977025392987, "grad_norm": 7.136969558889285, "learning_rate": 9.89458706354035e-07, "loss": 0.043480999767780304, "step": 310 }, { "epoch": 0.07521160822249093, "grad_norm": 7.6419333793202044, "learning_rate": 9.89380173399827e-07, "loss": 0.021763667464256287, "step": 311 }, { "epoch": 0.075453446191052, "grad_norm": 5.5176069640642815, "learning_rate": 9.893013521336947e-07, "loss": 0.02694801799952984, "step": 312 }, { "epoch": 0.07569528415961306, "grad_norm": 4.546071250565106, "learning_rate": 9.892222426020742e-07, "loss": 0.02984328381717205, "step": 313 }, { "epoch": 0.07593712212817412, "grad_norm": 3.7079533016859947, "learning_rate": 9.891428448515717e-07, "loss": 0.014596203342080116, "step": 314 }, { "epoch": 0.0761789600967352, "grad_norm": 11.483367204650506, "learning_rate": 9.890631589289639e-07, "loss": 0.03168027848005295, "step": 315 }, { "epoch": 0.07642079806529625, "grad_norm": 5.899867631009102, "learning_rate": 9.88983184881196e-07, "loss": 0.027452481910586357, "step": 316 }, { "epoch": 0.07666263603385731, "grad_norm": 9.532713523587951, "learning_rate": 9.889029227553844e-07, "loss": 0.06169246509671211, "step": 317 }, { "epoch": 0.07690447400241839, "grad_norm": 11.204290003511849, "learning_rate": 9.88822372598814e-07, "loss": 0.024766117334365845, "step": 318 }, { "epoch": 0.07714631197097944, "grad_norm": 5.550552438839093, "learning_rate": 9.8874153445894e-07, "loss": 0.0654192715883255, "step": 319 }, { "epoch": 0.0773881499395405, "grad_norm": 4.694721454062142, "learning_rate": 9.886604083833871e-07, "loss": 0.030094563961029053, "step": 320 }, { "epoch": 0.07762998790810158, "grad_norm": 8.786258641522886, "learning_rate": 9.885789944199498e-07, "loss": 0.0352611243724823, "step": 321 }, { "epoch": 0.07787182587666264, "grad_norm": 6.959921896294265, "learning_rate": 9.884972926165918e-07, "loss": 0.03490905091166496, "step": 322 }, { "epoch": 0.0781136638452237, "grad_norm": 11.842641158474354, "learning_rate": 9.88415303021447e-07, "loss": 0.03374776989221573, "step": 323 }, { "epoch": 0.07835550181378477, "grad_norm": 2.3979589652506634, "learning_rate": 9.883330256828182e-07, "loss": 0.026812219992280006, "step": 324 }, { "epoch": 0.07859733978234583, "grad_norm": 4.284454280977983, "learning_rate": 9.882504606491781e-07, "loss": 0.06769489496946335, "step": 325 }, { "epoch": 0.07883917775090689, "grad_norm": 9.067765996717405, "learning_rate": 9.881676079691688e-07, "loss": 0.03671472519636154, "step": 326 }, { "epoch": 0.07908101571946796, "grad_norm": 12.689396737034297, "learning_rate": 9.88084467691602e-07, "loss": 0.06947670131921768, "step": 327 }, { "epoch": 0.07932285368802902, "grad_norm": 3.061732465641028, "learning_rate": 9.880010398654586e-07, "loss": 0.07499926537275314, "step": 328 }, { "epoch": 0.07956469165659008, "grad_norm": 2.8288148417445553, "learning_rate": 9.879173245398891e-07, "loss": 0.02287144772708416, "step": 329 }, { "epoch": 0.07980652962515115, "grad_norm": 29.880753936041323, "learning_rate": 9.878333217642132e-07, "loss": 0.07482564449310303, "step": 330 }, { "epoch": 0.08004836759371221, "grad_norm": 8.993448301403573, "learning_rate": 9.8774903158792e-07, "loss": 0.03814808651804924, "step": 331 }, { "epoch": 0.08029020556227327, "grad_norm": 3.5367485616752146, "learning_rate": 9.87664454060668e-07, "loss": 0.03705501928925514, "step": 332 }, { "epoch": 0.08053204353083435, "grad_norm": 4.976817752868921, "learning_rate": 9.875795892322849e-07, "loss": 0.03067578189074993, "step": 333 }, { "epoch": 0.0807738814993954, "grad_norm": 4.7381492557849265, "learning_rate": 9.874944371527678e-07, "loss": 0.030120229348540306, "step": 334 }, { "epoch": 0.08101571946795647, "grad_norm": 8.238609762897346, "learning_rate": 9.87408997872283e-07, "loss": 0.04120826721191406, "step": 335 }, { "epoch": 0.08125755743651754, "grad_norm": 11.55131354067174, "learning_rate": 9.873232714411658e-07, "loss": 0.05676756054162979, "step": 336 }, { "epoch": 0.0814993954050786, "grad_norm": 8.000581116428448, "learning_rate": 9.872372579099207e-07, "loss": 0.051289357244968414, "step": 337 }, { "epoch": 0.08174123337363966, "grad_norm": 3.809834358658782, "learning_rate": 9.871509573292215e-07, "loss": 0.03123457543551922, "step": 338 }, { "epoch": 0.08198307134220073, "grad_norm": 2.0700284451643682, "learning_rate": 9.870643697499114e-07, "loss": 0.02433883026242256, "step": 339 }, { "epoch": 0.08222490931076179, "grad_norm": 8.057702732797532, "learning_rate": 9.869774952230018e-07, "loss": 0.012565597891807556, "step": 340 }, { "epoch": 0.08246674727932285, "grad_norm": 39.45705704772604, "learning_rate": 9.868903337996743e-07, "loss": 0.04444601759314537, "step": 341 }, { "epoch": 0.08270858524788392, "grad_norm": 4.734128565055762, "learning_rate": 9.868028855312783e-07, "loss": 0.019376171752810478, "step": 342 }, { "epoch": 0.08295042321644498, "grad_norm": 9.703116986083451, "learning_rate": 9.867151504693332e-07, "loss": 0.05342843756079674, "step": 343 }, { "epoch": 0.08319226118500604, "grad_norm": 4.061550569165729, "learning_rate": 9.866271286655269e-07, "loss": 0.02582485042512417, "step": 344 }, { "epoch": 0.08343409915356712, "grad_norm": 4.886111210300933, "learning_rate": 9.86538820171716e-07, "loss": 0.02685128152370453, "step": 345 }, { "epoch": 0.08367593712212817, "grad_norm": 16.770868545688387, "learning_rate": 9.864502250399266e-07, "loss": 0.04925503209233284, "step": 346 }, { "epoch": 0.08391777509068923, "grad_norm": 5.590026206189532, "learning_rate": 9.863613433223533e-07, "loss": 0.12441708892583847, "step": 347 }, { "epoch": 0.08415961305925031, "grad_norm": 4.414601274500806, "learning_rate": 9.862721750713597e-07, "loss": 0.027532577514648438, "step": 348 }, { "epoch": 0.08440145102781137, "grad_norm": 8.89302515549137, "learning_rate": 9.861827203394779e-07, "loss": 0.05551127344369888, "step": 349 }, { "epoch": 0.08464328899637243, "grad_norm": 29.113949668412527, "learning_rate": 9.86092979179409e-07, "loss": 0.030084583908319473, "step": 350 }, { "epoch": 0.0848851269649335, "grad_norm": 1.6553864959434665, "learning_rate": 9.86002951644023e-07, "loss": 0.01683752052485943, "step": 351 }, { "epoch": 0.08512696493349456, "grad_norm": 3.8747455952189713, "learning_rate": 9.859126377863584e-07, "loss": 0.020497018471360207, "step": 352 }, { "epoch": 0.08536880290205562, "grad_norm": 11.63209594351076, "learning_rate": 9.858220376596224e-07, "loss": 0.04884575679898262, "step": 353 }, { "epoch": 0.08561064087061669, "grad_norm": 5.902203239675242, "learning_rate": 9.85731151317191e-07, "loss": 0.012627688236534595, "step": 354 }, { "epoch": 0.08585247883917775, "grad_norm": 10.483291372661508, "learning_rate": 9.856399788126082e-07, "loss": 0.024865511804819107, "step": 355 }, { "epoch": 0.08609431680773881, "grad_norm": 4.3972382455504855, "learning_rate": 9.855485201995876e-07, "loss": 0.039173420518636703, "step": 356 }, { "epoch": 0.08633615477629988, "grad_norm": 4.240559082835588, "learning_rate": 9.854567755320107e-07, "loss": 0.022094106301665306, "step": 357 }, { "epoch": 0.08657799274486094, "grad_norm": 8.949525478748045, "learning_rate": 9.853647448639278e-07, "loss": 0.015154085122048855, "step": 358 }, { "epoch": 0.086819830713422, "grad_norm": 3.668958960616967, "learning_rate": 9.852724282495573e-07, "loss": 0.013824285939335823, "step": 359 }, { "epoch": 0.08706166868198308, "grad_norm": 23.39642260053767, "learning_rate": 9.851798257432863e-07, "loss": 0.016401369124650955, "step": 360 }, { "epoch": 0.08730350665054414, "grad_norm": 5.458798247505017, "learning_rate": 9.850869373996708e-07, "loss": 0.01395796425640583, "step": 361 }, { "epoch": 0.0875453446191052, "grad_norm": 13.413624925407651, "learning_rate": 9.849937632734343e-07, "loss": 0.10338475555181503, "step": 362 }, { "epoch": 0.08778718258766627, "grad_norm": 20.911299227363713, "learning_rate": 9.849003034194694e-07, "loss": 0.03764844685792923, "step": 363 }, { "epoch": 0.08802902055622733, "grad_norm": 6.407380297096483, "learning_rate": 9.848065578928365e-07, "loss": 0.029034171253442764, "step": 364 }, { "epoch": 0.08827085852478839, "grad_norm": 3.002574083898521, "learning_rate": 9.847125267487648e-07, "loss": 0.019366033375263214, "step": 365 }, { "epoch": 0.08851269649334946, "grad_norm": 3.8256773431528326, "learning_rate": 9.846182100426511e-07, "loss": 0.06213868409395218, "step": 366 }, { "epoch": 0.08875453446191052, "grad_norm": 35.32674112771976, "learning_rate": 9.845236078300614e-07, "loss": 0.01555687841027975, "step": 367 }, { "epoch": 0.08899637243047158, "grad_norm": 5.057943150092516, "learning_rate": 9.844287201667292e-07, "loss": 0.02650228515267372, "step": 368 }, { "epoch": 0.08923821039903265, "grad_norm": 2.435801004796271, "learning_rate": 9.84333547108556e-07, "loss": 0.025700662285089493, "step": 369 }, { "epoch": 0.08948004836759371, "grad_norm": 3.165852864345665, "learning_rate": 9.842380887116122e-07, "loss": 0.01857210509479046, "step": 370 }, { "epoch": 0.08972188633615477, "grad_norm": 3.0124767234935694, "learning_rate": 9.841423450321356e-07, "loss": 0.02835632860660553, "step": 371 }, { "epoch": 0.08996372430471584, "grad_norm": 4.369638747325174, "learning_rate": 9.840463161265322e-07, "loss": 0.06070256233215332, "step": 372 }, { "epoch": 0.0902055622732769, "grad_norm": 2.6610147624255522, "learning_rate": 9.839500020513765e-07, "loss": 0.015178239904344082, "step": 373 }, { "epoch": 0.09044740024183796, "grad_norm": 11.610425887522696, "learning_rate": 9.838534028634107e-07, "loss": 0.022438807412981987, "step": 374 }, { "epoch": 0.09068923821039904, "grad_norm": 7.650898762312626, "learning_rate": 9.837565186195445e-07, "loss": 0.037190522998571396, "step": 375 }, { "epoch": 0.0909310761789601, "grad_norm": 9.88715988271303, "learning_rate": 9.836593493768564e-07, "loss": 0.07098021358251572, "step": 376 }, { "epoch": 0.09117291414752116, "grad_norm": 12.970630785297063, "learning_rate": 9.835618951925924e-07, "loss": 0.03698275238275528, "step": 377 }, { "epoch": 0.09141475211608223, "grad_norm": 2.3315309469716583, "learning_rate": 9.83464156124166e-07, "loss": 0.019901392981410027, "step": 378 }, { "epoch": 0.09165659008464329, "grad_norm": 6.746383670745028, "learning_rate": 9.833661322291592e-07, "loss": 0.028359079733490944, "step": 379 }, { "epoch": 0.09189842805320435, "grad_norm": 5.5445953938245385, "learning_rate": 9.832678235653213e-07, "loss": 0.05323168635368347, "step": 380 }, { "epoch": 0.09214026602176542, "grad_norm": 34.39642022555908, "learning_rate": 9.831692301905699e-07, "loss": 0.05484229326248169, "step": 381 }, { "epoch": 0.09238210399032648, "grad_norm": 15.09189862607399, "learning_rate": 9.830703521629897e-07, "loss": 0.05163722112774849, "step": 382 }, { "epoch": 0.09262394195888754, "grad_norm": 15.786318000137834, "learning_rate": 9.829711895408334e-07, "loss": 0.04381785914301872, "step": 383 }, { "epoch": 0.09286577992744861, "grad_norm": 3.0540882901264754, "learning_rate": 9.828717423825213e-07, "loss": 0.015355822630226612, "step": 384 }, { "epoch": 0.09310761789600967, "grad_norm": 9.465970600601606, "learning_rate": 9.827720107466418e-07, "loss": 0.018094878643751144, "step": 385 }, { "epoch": 0.09334945586457073, "grad_norm": 7.747123015200206, "learning_rate": 9.826719946919499e-07, "loss": 0.05744027718901634, "step": 386 }, { "epoch": 0.0935912938331318, "grad_norm": 63.59603754756123, "learning_rate": 9.825716942773691e-07, "loss": 0.0543791763484478, "step": 387 }, { "epoch": 0.09383313180169287, "grad_norm": 7.4052239483868085, "learning_rate": 9.824711095619903e-07, "loss": 0.045304279774427414, "step": 388 }, { "epoch": 0.09407496977025392, "grad_norm": 9.539363137374634, "learning_rate": 9.823702406050712e-07, "loss": 0.024721182882785797, "step": 389 }, { "epoch": 0.094316807738815, "grad_norm": 8.715952664906194, "learning_rate": 9.822690874660376e-07, "loss": 0.012875239364802837, "step": 390 }, { "epoch": 0.09455864570737606, "grad_norm": 2.269665619019178, "learning_rate": 9.821676502044827e-07, "loss": 0.024601202458143234, "step": 391 }, { "epoch": 0.09480048367593712, "grad_norm": 11.81203887989774, "learning_rate": 9.820659288801667e-07, "loss": 0.02182074822485447, "step": 392 }, { "epoch": 0.09504232164449819, "grad_norm": 5.5425886639933335, "learning_rate": 9.819639235530174e-07, "loss": 0.011581333354115486, "step": 393 }, { "epoch": 0.09528415961305925, "grad_norm": 2.292087191493124, "learning_rate": 9.818616342831303e-07, "loss": 0.02469540201127529, "step": 394 }, { "epoch": 0.09552599758162031, "grad_norm": 3.404063428343881, "learning_rate": 9.817590611307672e-07, "loss": 0.018912067636847496, "step": 395 }, { "epoch": 0.09576783555018138, "grad_norm": 6.497930100114627, "learning_rate": 9.81656204156358e-07, "loss": 0.023566508665680885, "step": 396 }, { "epoch": 0.09600967351874244, "grad_norm": 4.067904527192611, "learning_rate": 9.815530634205e-07, "loss": 0.023528341203927994, "step": 397 }, { "epoch": 0.0962515114873035, "grad_norm": 21.561097210135404, "learning_rate": 9.814496389839561e-07, "loss": 0.01704517751932144, "step": 398 }, { "epoch": 0.09649334945586457, "grad_norm": 6.184682555022278, "learning_rate": 9.813459309076587e-07, "loss": 0.023208025842905045, "step": 399 }, { "epoch": 0.09673518742442563, "grad_norm": 3.5658117638389717, "learning_rate": 9.81241939252705e-07, "loss": 0.027810359373688698, "step": 400 }, { "epoch": 0.0969770253929867, "grad_norm": 9.805053024528853, "learning_rate": 9.81137664080361e-07, "loss": 0.03773852437734604, "step": 401 }, { "epoch": 0.09721886336154777, "grad_norm": 3.379612744166051, "learning_rate": 9.810331054520589e-07, "loss": 0.0331646092236042, "step": 402 }, { "epoch": 0.09746070133010883, "grad_norm": 719.5662319802774, "learning_rate": 9.80928263429398e-07, "loss": 0.03313680365681648, "step": 403 }, { "epoch": 0.09770253929866989, "grad_norm": 3.205408163762604, "learning_rate": 9.808231380741447e-07, "loss": 0.01744227670133114, "step": 404 }, { "epoch": 0.09794437726723096, "grad_norm": 2.0564587541312562, "learning_rate": 9.80717729448232e-07, "loss": 0.030079562216997147, "step": 405 }, { "epoch": 0.09818621523579202, "grad_norm": 5.690325282293782, "learning_rate": 9.806120376137602e-07, "loss": 0.021610481664538383, "step": 406 }, { "epoch": 0.09842805320435308, "grad_norm": 5.857536559555554, "learning_rate": 9.805060626329961e-07, "loss": 0.020212190225720406, "step": 407 }, { "epoch": 0.09866989117291415, "grad_norm": 4.838726575127383, "learning_rate": 9.803998045683736e-07, "loss": 0.031572550535202026, "step": 408 }, { "epoch": 0.09891172914147521, "grad_norm": 3.604072157670048, "learning_rate": 9.802932634824934e-07, "loss": 0.022763855755329132, "step": 409 }, { "epoch": 0.09915356711003627, "grad_norm": 1.9717181978810243, "learning_rate": 9.801864394381225e-07, "loss": 0.017710024490952492, "step": 410 }, { "epoch": 0.09939540507859734, "grad_norm": 3.738129472373839, "learning_rate": 9.800793324981949e-07, "loss": 0.03971096873283386, "step": 411 }, { "epoch": 0.0996372430471584, "grad_norm": 42.11301985189815, "learning_rate": 9.799719427258115e-07, "loss": 0.026198817417025566, "step": 412 }, { "epoch": 0.09987908101571946, "grad_norm": 4.830234511370726, "learning_rate": 9.798642701842393e-07, "loss": 0.014492957852780819, "step": 413 }, { "epoch": 0.10012091898428054, "grad_norm": 63.95145881246363, "learning_rate": 9.797563149369123e-07, "loss": 0.09400873631238937, "step": 414 }, { "epoch": 0.1003627569528416, "grad_norm": 3.686281501445722, "learning_rate": 9.796480770474312e-07, "loss": 0.01889420859515667, "step": 415 }, { "epoch": 0.10060459492140265, "grad_norm": 5.142699918210833, "learning_rate": 9.795395565795624e-07, "loss": 0.018650909885764122, "step": 416 }, { "epoch": 0.10084643288996373, "grad_norm": 6.867638228338733, "learning_rate": 9.794307535972397e-07, "loss": 0.06561310589313507, "step": 417 }, { "epoch": 0.10108827085852479, "grad_norm": 7.060897544652469, "learning_rate": 9.793216681645625e-07, "loss": 0.034246526658535004, "step": 418 }, { "epoch": 0.10133010882708585, "grad_norm": 4.9823196774931535, "learning_rate": 9.792123003457978e-07, "loss": 0.08141250908374786, "step": 419 }, { "epoch": 0.10157194679564692, "grad_norm": 3.3381469952203853, "learning_rate": 9.791026502053777e-07, "loss": 0.016312887892127037, "step": 420 }, { "epoch": 0.10181378476420798, "grad_norm": 5.891340976532417, "learning_rate": 9.78992717807901e-07, "loss": 0.0257174801081419, "step": 421 }, { "epoch": 0.10205562273276904, "grad_norm": 4.28352983009039, "learning_rate": 9.788825032181337e-07, "loss": 0.02070876769721508, "step": 422 }, { "epoch": 0.10229746070133011, "grad_norm": 1.610954829600795, "learning_rate": 9.787720065010064e-07, "loss": 0.011124279350042343, "step": 423 }, { "epoch": 0.10253929866989117, "grad_norm": 4.3399370197652525, "learning_rate": 9.786612277216171e-07, "loss": 0.016579262912273407, "step": 424 }, { "epoch": 0.10278113663845223, "grad_norm": 2.778130888507815, "learning_rate": 9.785501669452298e-07, "loss": 0.02096121571958065, "step": 425 }, { "epoch": 0.1030229746070133, "grad_norm": 10.565642931975189, "learning_rate": 9.784388242372743e-07, "loss": 0.04197466000914574, "step": 426 }, { "epoch": 0.10326481257557436, "grad_norm": 2.358616657122161, "learning_rate": 9.783271996633472e-07, "loss": 0.016930047422647476, "step": 427 }, { "epoch": 0.10350665054413542, "grad_norm": 2.9472184554103884, "learning_rate": 9.782152932892098e-07, "loss": 0.018854551017284393, "step": 428 }, { "epoch": 0.1037484885126965, "grad_norm": 7.764141956413922, "learning_rate": 9.78103105180791e-07, "loss": 0.07317142933607101, "step": 429 }, { "epoch": 0.10399032648125756, "grad_norm": 7.412425994818137, "learning_rate": 9.779906354041847e-07, "loss": 0.034490495920181274, "step": 430 }, { "epoch": 0.10423216444981862, "grad_norm": 35.880145527190805, "learning_rate": 9.778778840256511e-07, "loss": 0.02026946283876896, "step": 431 }, { "epoch": 0.10447400241837969, "grad_norm": 8.032423618903463, "learning_rate": 9.777648511116161e-07, "loss": 0.03615846112370491, "step": 432 }, { "epoch": 0.10471584038694075, "grad_norm": 6.6069521364300074, "learning_rate": 9.776515367286715e-07, "loss": 0.049956198781728745, "step": 433 }, { "epoch": 0.10495767835550181, "grad_norm": 4.782992588142874, "learning_rate": 9.77537940943575e-07, "loss": 0.017256900668144226, "step": 434 }, { "epoch": 0.10519951632406288, "grad_norm": 3.796634853746054, "learning_rate": 9.774240638232503e-07, "loss": 0.03184620663523674, "step": 435 }, { "epoch": 0.10544135429262394, "grad_norm": 5.711697414133154, "learning_rate": 9.773099054347862e-07, "loss": 0.05217091366648674, "step": 436 }, { "epoch": 0.105683192261185, "grad_norm": 6.535714268291782, "learning_rate": 9.77195465845438e-07, "loss": 0.06959414482116699, "step": 437 }, { "epoch": 0.10592503022974607, "grad_norm": 2.6332117187191404, "learning_rate": 9.77080745122626e-07, "loss": 0.013384995050728321, "step": 438 }, { "epoch": 0.10616686819830713, "grad_norm": 2.4578246027123516, "learning_rate": 9.769657433339369e-07, "loss": 0.021600518375635147, "step": 439 }, { "epoch": 0.10640870616686819, "grad_norm": 7.42331486734156, "learning_rate": 9.768504605471219e-07, "loss": 0.03035934641957283, "step": 440 }, { "epoch": 0.10665054413542926, "grad_norm": 3.415507522619151, "learning_rate": 9.767348968300987e-07, "loss": 0.04282107576727867, "step": 441 }, { "epoch": 0.10689238210399032, "grad_norm": 5.29464790952193, "learning_rate": 9.7661905225095e-07, "loss": 0.07458814233541489, "step": 442 }, { "epoch": 0.10713422007255138, "grad_norm": 15.342600968580305, "learning_rate": 9.765029268779244e-07, "loss": 0.020159859210252762, "step": 443 }, { "epoch": 0.10737605804111246, "grad_norm": 2.0260239135486624, "learning_rate": 9.763865207794354e-07, "loss": 0.01714084856212139, "step": 444 }, { "epoch": 0.10761789600967352, "grad_norm": 6.442732515417552, "learning_rate": 9.762698340240618e-07, "loss": 0.026070594787597656, "step": 445 }, { "epoch": 0.10785973397823458, "grad_norm": 6.396624370841246, "learning_rate": 9.761528666805486e-07, "loss": 0.028188152238726616, "step": 446 }, { "epoch": 0.10810157194679565, "grad_norm": 6.240320769006397, "learning_rate": 9.760356188178055e-07, "loss": 0.026898115873336792, "step": 447 }, { "epoch": 0.10834340991535671, "grad_norm": 8.611758679658589, "learning_rate": 9.759180905049073e-07, "loss": 0.01951686479151249, "step": 448 }, { "epoch": 0.10858524788391777, "grad_norm": 7.869172023186797, "learning_rate": 9.758002818110945e-07, "loss": 0.021912116557359695, "step": 449 }, { "epoch": 0.10882708585247884, "grad_norm": 7.981083123086744, "learning_rate": 9.756821928057725e-07, "loss": 0.038386691361665726, "step": 450 }, { "epoch": 0.1090689238210399, "grad_norm": 3.119481227726249, "learning_rate": 9.755638235585117e-07, "loss": 0.05295968055725098, "step": 451 }, { "epoch": 0.10931076178960097, "grad_norm": 3.496687602379166, "learning_rate": 9.75445174139048e-07, "loss": 0.034567367285490036, "step": 452 }, { "epoch": 0.10955259975816203, "grad_norm": 11.543059418415899, "learning_rate": 9.753262446172824e-07, "loss": 0.030379200354218483, "step": 453 }, { "epoch": 0.1097944377267231, "grad_norm": 4.257438197955587, "learning_rate": 9.752070350632801e-07, "loss": 0.026634875684976578, "step": 454 }, { "epoch": 0.11003627569528417, "grad_norm": 2.319563898429766, "learning_rate": 9.750875455472722e-07, "loss": 0.018501795828342438, "step": 455 }, { "epoch": 0.11027811366384523, "grad_norm": 3.4898006659936796, "learning_rate": 9.749677761396541e-07, "loss": 0.033804941922426224, "step": 456 }, { "epoch": 0.11051995163240629, "grad_norm": 8.690763694792334, "learning_rate": 9.74847726910987e-07, "loss": 0.035311199724674225, "step": 457 }, { "epoch": 0.11076178960096736, "grad_norm": 1.8780182554688116, "learning_rate": 9.747273979319956e-07, "loss": 0.009340774267911911, "step": 458 }, { "epoch": 0.11100362756952842, "grad_norm": 17.16121349163015, "learning_rate": 9.746067892735706e-07, "loss": 0.02411702089011669, "step": 459 }, { "epoch": 0.11124546553808948, "grad_norm": 15.091767444685956, "learning_rate": 9.744859010067667e-07, "loss": 0.01724426820874214, "step": 460 }, { "epoch": 0.11148730350665055, "grad_norm": 4.4676736603422516, "learning_rate": 9.74364733202804e-07, "loss": 0.1081286072731018, "step": 461 }, { "epoch": 0.11172914147521161, "grad_norm": 2.729550492046388, "learning_rate": 9.742432859330666e-07, "loss": 0.006442164070904255, "step": 462 }, { "epoch": 0.11197097944377267, "grad_norm": 4.460899814796831, "learning_rate": 9.74121559269104e-07, "loss": 0.0197848342359066, "step": 463 }, { "epoch": 0.11221281741233374, "grad_norm": 5.082103313011029, "learning_rate": 9.739995532826294e-07, "loss": 0.015606023371219635, "step": 464 }, { "epoch": 0.1124546553808948, "grad_norm": 9.129564515116808, "learning_rate": 9.73877268045521e-07, "loss": 0.014806831255555153, "step": 465 }, { "epoch": 0.11269649334945586, "grad_norm": 26.805705607724903, "learning_rate": 9.73754703629822e-07, "loss": 0.05611666664481163, "step": 466 }, { "epoch": 0.11293833131801694, "grad_norm": 7.552506770647806, "learning_rate": 9.736318601077392e-07, "loss": 0.02693885564804077, "step": 467 }, { "epoch": 0.113180169286578, "grad_norm": 22.54835114712933, "learning_rate": 9.735087375516447e-07, "loss": 0.07121674716472626, "step": 468 }, { "epoch": 0.11342200725513905, "grad_norm": 2.8350666656832577, "learning_rate": 9.73385336034074e-07, "loss": 0.022883227095007896, "step": 469 }, { "epoch": 0.11366384522370013, "grad_norm": 4.855655116396765, "learning_rate": 9.73261655627728e-07, "loss": 0.0391194149851799, "step": 470 }, { "epoch": 0.11390568319226119, "grad_norm": 1.3903943396832645, "learning_rate": 9.73137696405471e-07, "loss": 0.01752437837421894, "step": 471 }, { "epoch": 0.11414752116082225, "grad_norm": 4.803653222008973, "learning_rate": 9.730134584403321e-07, "loss": 0.015718940645456314, "step": 472 }, { "epoch": 0.11438935912938332, "grad_norm": 3.238454807674394, "learning_rate": 9.728889418055047e-07, "loss": 0.04734649136662483, "step": 473 }, { "epoch": 0.11463119709794438, "grad_norm": 4.107135758895328, "learning_rate": 9.727641465743457e-07, "loss": 0.07490003108978271, "step": 474 }, { "epoch": 0.11487303506650544, "grad_norm": 4.952747413037892, "learning_rate": 9.726390728203771e-07, "loss": 0.0775594636797905, "step": 475 }, { "epoch": 0.11511487303506651, "grad_norm": 5.7447313802974325, "learning_rate": 9.72513720617284e-07, "loss": 0.023147892206907272, "step": 476 }, { "epoch": 0.11535671100362757, "grad_norm": 25.13699505609902, "learning_rate": 9.72388090038916e-07, "loss": 0.045793939381837845, "step": 477 }, { "epoch": 0.11559854897218863, "grad_norm": 5.723265549434063, "learning_rate": 9.72262181159287e-07, "loss": 0.07690295577049255, "step": 478 }, { "epoch": 0.1158403869407497, "grad_norm": 4.205466597746218, "learning_rate": 9.721359940525746e-07, "loss": 0.05775188282132149, "step": 479 }, { "epoch": 0.11608222490931076, "grad_norm": 4.502216908827216, "learning_rate": 9.7200952879312e-07, "loss": 0.04044659063220024, "step": 480 }, { "epoch": 0.11632406287787182, "grad_norm": 11.689759501748497, "learning_rate": 9.718827854554287e-07, "loss": 0.018377501517534256, "step": 481 }, { "epoch": 0.1165659008464329, "grad_norm": 10.83254558207877, "learning_rate": 9.7175576411417e-07, "loss": 0.016555333510041237, "step": 482 }, { "epoch": 0.11680773881499396, "grad_norm": 4.175556606937638, "learning_rate": 9.716284648441766e-07, "loss": 0.025641947984695435, "step": 483 }, { "epoch": 0.11704957678355501, "grad_norm": 151.90306138817533, "learning_rate": 9.715008877204454e-07, "loss": 0.013313176110386848, "step": 484 }, { "epoch": 0.11729141475211609, "grad_norm": 8.548432645848722, "learning_rate": 9.713730328181368e-07, "loss": 0.014611166901886463, "step": 485 }, { "epoch": 0.11753325272067715, "grad_norm": 9.649227343585702, "learning_rate": 9.712449002125747e-07, "loss": 0.04556228965520859, "step": 486 }, { "epoch": 0.11777509068923821, "grad_norm": 4.317106681353593, "learning_rate": 9.711164899792468e-07, "loss": 0.020457817241549492, "step": 487 }, { "epoch": 0.11801692865779928, "grad_norm": 22.625702518487575, "learning_rate": 9.709878021938042e-07, "loss": 0.043352097272872925, "step": 488 }, { "epoch": 0.11825876662636034, "grad_norm": 4.4308386933616255, "learning_rate": 9.70858836932062e-07, "loss": 0.015177628956735134, "step": 489 }, { "epoch": 0.1185006045949214, "grad_norm": 10.15314222211219, "learning_rate": 9.70729594269998e-07, "loss": 0.051018644124269485, "step": 490 }, { "epoch": 0.11874244256348247, "grad_norm": 36.604750079870925, "learning_rate": 9.70600074283754e-07, "loss": 0.025173118337988853, "step": 491 }, { "epoch": 0.11898428053204353, "grad_norm": 2.372530795432482, "learning_rate": 9.704702770496352e-07, "loss": 0.007204992230981588, "step": 492 }, { "epoch": 0.11922611850060459, "grad_norm": 8.390267042560158, "learning_rate": 9.703402026441095e-07, "loss": 0.018323242664337158, "step": 493 }, { "epoch": 0.11946795646916566, "grad_norm": 11.859070703462328, "learning_rate": 9.702098511438085e-07, "loss": 0.03343012556433678, "step": 494 }, { "epoch": 0.11970979443772672, "grad_norm": 2.3701347140015896, "learning_rate": 9.700792226255277e-07, "loss": 0.02026558853685856, "step": 495 }, { "epoch": 0.11995163240628778, "grad_norm": 8.208226378071727, "learning_rate": 9.699483171662248e-07, "loss": 0.012539234943687916, "step": 496 }, { "epoch": 0.12019347037484886, "grad_norm": 6.08319859382965, "learning_rate": 9.698171348430209e-07, "loss": 0.01263208407908678, "step": 497 }, { "epoch": 0.12043530834340992, "grad_norm": 6.725181659812907, "learning_rate": 9.696856757332006e-07, "loss": 0.04903299733996391, "step": 498 }, { "epoch": 0.12067714631197098, "grad_norm": 4.60417630448475, "learning_rate": 9.69553939914211e-07, "loss": 0.024863271042704582, "step": 499 }, { "epoch": 0.12091898428053205, "grad_norm": 2.264159543049261, "learning_rate": 9.69421927463663e-07, "loss": 0.03104342892765999, "step": 500 }, { "epoch": 0.12116082224909311, "grad_norm": 1.7324115754378002, "learning_rate": 9.692896384593297e-07, "loss": 0.028360387310385704, "step": 501 }, { "epoch": 0.12140266021765417, "grad_norm": 5.025467287578985, "learning_rate": 9.691570729791473e-07, "loss": 0.041185274720191956, "step": 502 }, { "epoch": 0.12164449818621524, "grad_norm": 3.210678303865848, "learning_rate": 9.690242311012156e-07, "loss": 0.026405055075883865, "step": 503 }, { "epoch": 0.1218863361547763, "grad_norm": 5.297030542404307, "learning_rate": 9.68891112903796e-07, "loss": 0.03461233526468277, "step": 504 }, { "epoch": 0.12212817412333736, "grad_norm": 2.4465306012957395, "learning_rate": 9.687577184653136e-07, "loss": 0.027733081951737404, "step": 505 }, { "epoch": 0.12237001209189843, "grad_norm": 3.545823530482947, "learning_rate": 9.68624047864356e-07, "loss": 0.04620419070124626, "step": 506 }, { "epoch": 0.12261185006045949, "grad_norm": 3.7352267648002666, "learning_rate": 9.684901011796736e-07, "loss": 0.018964117392897606, "step": 507 }, { "epoch": 0.12285368802902055, "grad_norm": 2.2029706927513297, "learning_rate": 9.683558784901792e-07, "loss": 0.0160403810441494, "step": 508 }, { "epoch": 0.12309552599758163, "grad_norm": 3.2502681027142755, "learning_rate": 9.682213798749482e-07, "loss": 0.01568601094186306, "step": 509 }, { "epoch": 0.12333736396614269, "grad_norm": 8.052280967365531, "learning_rate": 9.68086605413219e-07, "loss": 0.02046944759786129, "step": 510 }, { "epoch": 0.12357920193470374, "grad_norm": 13.257240271088154, "learning_rate": 9.679515551843919e-07, "loss": 0.05149371549487114, "step": 511 }, { "epoch": 0.12382103990326482, "grad_norm": 4.575305696098738, "learning_rate": 9.678162292680304e-07, "loss": 0.006055945996195078, "step": 512 }, { "epoch": 0.12406287787182588, "grad_norm": 6.751262553615797, "learning_rate": 9.676806277438597e-07, "loss": 0.09099238365888596, "step": 513 }, { "epoch": 0.12430471584038694, "grad_norm": 52.93909701651898, "learning_rate": 9.675447506917675e-07, "loss": 0.017082834616303444, "step": 514 }, { "epoch": 0.12454655380894801, "grad_norm": 10.432961837499679, "learning_rate": 9.674085981918044e-07, "loss": 0.06910183280706406, "step": 515 }, { "epoch": 0.12478839177750907, "grad_norm": 3.6461631465551556, "learning_rate": 9.672721703241825e-07, "loss": 0.02198006585240364, "step": 516 }, { "epoch": 0.12503022974607014, "grad_norm": 9.04445053364083, "learning_rate": 9.671354671692766e-07, "loss": 0.027238911017775536, "step": 517 }, { "epoch": 0.1252720677146312, "grad_norm": 4.8891777771865526, "learning_rate": 9.669984888076237e-07, "loss": 0.022918811067938805, "step": 518 }, { "epoch": 0.12551390568319226, "grad_norm": 3.012738295548066, "learning_rate": 9.668612353199228e-07, "loss": 0.02461833693087101, "step": 519 }, { "epoch": 0.12575574365175332, "grad_norm": 2.153338256731026, "learning_rate": 9.667237067870347e-07, "loss": 0.04350479319691658, "step": 520 }, { "epoch": 0.12599758162031438, "grad_norm": 4.494806349248698, "learning_rate": 9.66585903289983e-07, "loss": 0.05558193475008011, "step": 521 }, { "epoch": 0.12623941958887544, "grad_norm": 2.242068665070364, "learning_rate": 9.664478249099523e-07, "loss": 0.01778293028473854, "step": 522 }, { "epoch": 0.12648125755743653, "grad_norm": 3.5276264657629786, "learning_rate": 9.663094717282903e-07, "loss": 0.01141910906881094, "step": 523 }, { "epoch": 0.1267230955259976, "grad_norm": 1.7204249918847914, "learning_rate": 9.661708438265053e-07, "loss": 0.005802286323159933, "step": 524 }, { "epoch": 0.12696493349455865, "grad_norm": 7.244401372780891, "learning_rate": 9.660319412862686e-07, "loss": 0.0187513567507267, "step": 525 }, { "epoch": 0.1272067714631197, "grad_norm": 2.8475360559801297, "learning_rate": 9.658927641894126e-07, "loss": 0.03049096278846264, "step": 526 }, { "epoch": 0.12744860943168076, "grad_norm": 2.7103442601962873, "learning_rate": 9.65753312617932e-07, "loss": 0.03841012343764305, "step": 527 }, { "epoch": 0.12769044740024182, "grad_norm": 5.136297060804826, "learning_rate": 9.656135866539821e-07, "loss": 0.033001724630594254, "step": 528 }, { "epoch": 0.1279322853688029, "grad_norm": 3.8732405236016616, "learning_rate": 9.654735863798814e-07, "loss": 0.011299367062747478, "step": 529 }, { "epoch": 0.12817412333736397, "grad_norm": 8.727540386953864, "learning_rate": 9.653333118781088e-07, "loss": 0.05022061988711357, "step": 530 }, { "epoch": 0.12841596130592503, "grad_norm": 3.1971173054382556, "learning_rate": 9.651927632313054e-07, "loss": 0.031003041192889214, "step": 531 }, { "epoch": 0.1286577992744861, "grad_norm": 7.331167951999255, "learning_rate": 9.650519405222736e-07, "loss": 0.06904685497283936, "step": 532 }, { "epoch": 0.12889963724304715, "grad_norm": 9.722251913161752, "learning_rate": 9.649108438339771e-07, "loss": 0.1275792270898819, "step": 533 }, { "epoch": 0.1291414752116082, "grad_norm": 17.775044572352606, "learning_rate": 9.647694732495412e-07, "loss": 0.06274550408124924, "step": 534 }, { "epoch": 0.1293833131801693, "grad_norm": 18.511577685157125, "learning_rate": 9.646278288522526e-07, "loss": 0.008330057375133038, "step": 535 }, { "epoch": 0.12962515114873036, "grad_norm": 6.461083079429915, "learning_rate": 9.644859107255593e-07, "loss": 0.03958391025662422, "step": 536 }, { "epoch": 0.12986698911729141, "grad_norm": 2.342117882274959, "learning_rate": 9.6434371895307e-07, "loss": 0.037806954234838486, "step": 537 }, { "epoch": 0.13010882708585247, "grad_norm": 7.6847119776942785, "learning_rate": 9.64201253618556e-07, "loss": 0.052417218685150146, "step": 538 }, { "epoch": 0.13035066505441353, "grad_norm": 9.722627041974201, "learning_rate": 9.640585148059483e-07, "loss": 0.01657881774008274, "step": 539 }, { "epoch": 0.13059250302297462, "grad_norm": 7.558384897032162, "learning_rate": 9.639155025993397e-07, "loss": 0.021040335297584534, "step": 540 }, { "epoch": 0.13083434099153568, "grad_norm": 12.790551836348975, "learning_rate": 9.637722170829841e-07, "loss": 0.010376832447946072, "step": 541 }, { "epoch": 0.13107617896009674, "grad_norm": 4.408946486229192, "learning_rate": 9.63628658341296e-07, "loss": 0.018874987959861755, "step": 542 }, { "epoch": 0.1313180169286578, "grad_norm": 5.444077786717753, "learning_rate": 9.634848264588515e-07, "loss": 0.0306665301322937, "step": 543 }, { "epoch": 0.13155985489721886, "grad_norm": 1.452903467722926, "learning_rate": 9.63340721520387e-07, "loss": 0.006252019200474024, "step": 544 }, { "epoch": 0.13180169286577992, "grad_norm": 6.315950213477566, "learning_rate": 9.631963436108005e-07, "loss": 0.034929435700178146, "step": 545 }, { "epoch": 0.132043530834341, "grad_norm": 3.1164514732654154, "learning_rate": 9.630516928151496e-07, "loss": 0.02888059988617897, "step": 546 }, { "epoch": 0.13228536880290206, "grad_norm": 5.278761081741741, "learning_rate": 9.62906769218654e-07, "loss": 0.025352075695991516, "step": 547 }, { "epoch": 0.13252720677146312, "grad_norm": 17.1331747409248, "learning_rate": 9.627615729066934e-07, "loss": 0.060754358768463135, "step": 548 }, { "epoch": 0.13276904474002418, "grad_norm": 4.6375929793027675, "learning_rate": 9.62616103964808e-07, "loss": 0.009134539403021336, "step": 549 }, { "epoch": 0.13301088270858524, "grad_norm": 36.42398003514108, "learning_rate": 9.624703624786996e-07, "loss": 0.02149037830531597, "step": 550 }, { "epoch": 0.1332527206771463, "grad_norm": 9.39976552173674, "learning_rate": 9.62324348534229e-07, "loss": 0.022730333730578423, "step": 551 }, { "epoch": 0.1334945586457074, "grad_norm": 3.3027949899912414, "learning_rate": 9.621780622174195e-07, "loss": 0.04321328550577164, "step": 552 }, { "epoch": 0.13373639661426845, "grad_norm": 3.954605775852571, "learning_rate": 9.620315036144528e-07, "loss": 0.054517995566129684, "step": 553 }, { "epoch": 0.1339782345828295, "grad_norm": 7.749745976573085, "learning_rate": 9.618846728116724e-07, "loss": 0.00873743649572134, "step": 554 }, { "epoch": 0.13422007255139057, "grad_norm": 4.179783731471594, "learning_rate": 9.617375698955818e-07, "loss": 0.034900542348623276, "step": 555 }, { "epoch": 0.13446191051995163, "grad_norm": 6.1710972226000065, "learning_rate": 9.615901949528446e-07, "loss": 0.012266584672033787, "step": 556 }, { "epoch": 0.1347037484885127, "grad_norm": 7.3112029382731345, "learning_rate": 9.614425480702849e-07, "loss": 0.061410773545503616, "step": 557 }, { "epoch": 0.13494558645707377, "grad_norm": 4.0514031313976275, "learning_rate": 9.612946293348867e-07, "loss": 0.030741123482584953, "step": 558 }, { "epoch": 0.13518742442563483, "grad_norm": 2.267878345885637, "learning_rate": 9.611464388337948e-07, "loss": 0.00821173470467329, "step": 559 }, { "epoch": 0.1354292623941959, "grad_norm": 16.445904223944364, "learning_rate": 9.609979766543135e-07, "loss": 0.02879643440246582, "step": 560 }, { "epoch": 0.13567110036275695, "grad_norm": 153.13344674693226, "learning_rate": 9.608492428839071e-07, "loss": 0.02343338169157505, "step": 561 }, { "epoch": 0.135912938331318, "grad_norm": 10.624924589266227, "learning_rate": 9.607002376102005e-07, "loss": 0.08276867121458054, "step": 562 }, { "epoch": 0.13615477629987907, "grad_norm": 3.740283192310781, "learning_rate": 9.605509609209782e-07, "loss": 0.022497251629829407, "step": 563 }, { "epoch": 0.13639661426844016, "grad_norm": 2.8376522828612356, "learning_rate": 9.604014129041844e-07, "loss": 0.03105173446238041, "step": 564 }, { "epoch": 0.13663845223700122, "grad_norm": 6.490762497496724, "learning_rate": 9.602515936479234e-07, "loss": 0.018408438190817833, "step": 565 }, { "epoch": 0.13688029020556228, "grad_norm": 4.187217384682305, "learning_rate": 9.601015032404595e-07, "loss": 0.015344900079071522, "step": 566 }, { "epoch": 0.13712212817412334, "grad_norm": 6.421992184239142, "learning_rate": 9.599511417702162e-07, "loss": 0.015658317133784294, "step": 567 }, { "epoch": 0.1373639661426844, "grad_norm": 2.5663326751077005, "learning_rate": 9.59800509325777e-07, "loss": 0.012731212191283703, "step": 568 }, { "epoch": 0.13760580411124546, "grad_norm": 4.48541888263211, "learning_rate": 9.596496059958854e-07, "loss": 0.03144587203860283, "step": 569 }, { "epoch": 0.13784764207980654, "grad_norm": 2.7057311443654513, "learning_rate": 9.594984318694438e-07, "loss": 0.010856274515390396, "step": 570 }, { "epoch": 0.1380894800483676, "grad_norm": 5.457154395286791, "learning_rate": 9.593469870355143e-07, "loss": 0.014700107276439667, "step": 571 }, { "epoch": 0.13833131801692866, "grad_norm": 11.115909069006214, "learning_rate": 9.59195271583319e-07, "loss": 0.01404012180864811, "step": 572 }, { "epoch": 0.13857315598548972, "grad_norm": 5.832557399475971, "learning_rate": 9.59043285602239e-07, "loss": 0.02931492030620575, "step": 573 }, { "epoch": 0.13881499395405078, "grad_norm": 3.8210692038339036, "learning_rate": 9.588910291818146e-07, "loss": 0.02064582332968712, "step": 574 }, { "epoch": 0.13905683192261184, "grad_norm": 9.248507916265147, "learning_rate": 9.58738502411746e-07, "loss": 0.028401434421539307, "step": 575 }, { "epoch": 0.13929866989117293, "grad_norm": 2.1890530184708976, "learning_rate": 9.585857053818922e-07, "loss": 0.039540715515613556, "step": 576 }, { "epoch": 0.139540507859734, "grad_norm": 3.567695072195247, "learning_rate": 9.584326381822716e-07, "loss": 0.014658181928098202, "step": 577 }, { "epoch": 0.13978234582829505, "grad_norm": 4.315309109891371, "learning_rate": 9.582793009030617e-07, "loss": 0.033838290721178055, "step": 578 }, { "epoch": 0.1400241837968561, "grad_norm": 6.040240463014487, "learning_rate": 9.58125693634599e-07, "loss": 0.023242270573973656, "step": 579 }, { "epoch": 0.14026602176541716, "grad_norm": 15.988014541771918, "learning_rate": 9.579718164673797e-07, "loss": 0.017756471410393715, "step": 580 }, { "epoch": 0.14050785973397822, "grad_norm": 6.690566901997597, "learning_rate": 9.57817669492058e-07, "loss": 0.057492222636938095, "step": 581 }, { "epoch": 0.1407496977025393, "grad_norm": 2.556354642541278, "learning_rate": 9.576632527994478e-07, "loss": 0.01360743772238493, "step": 582 }, { "epoch": 0.14099153567110037, "grad_norm": 13.746096160054806, "learning_rate": 9.575085664805217e-07, "loss": 0.022793641313910484, "step": 583 }, { "epoch": 0.14123337363966143, "grad_norm": 3.5924274208032836, "learning_rate": 9.57353610626411e-07, "loss": 0.01741703413426876, "step": 584 }, { "epoch": 0.1414752116082225, "grad_norm": 7.067088516216049, "learning_rate": 9.571983853284058e-07, "loss": 0.11926671117544174, "step": 585 }, { "epoch": 0.14171704957678355, "grad_norm": 4.541963813524804, "learning_rate": 9.570428906779555e-07, "loss": 0.07686582207679749, "step": 586 }, { "epoch": 0.1419588875453446, "grad_norm": 6.802974390582444, "learning_rate": 9.568871267666673e-07, "loss": 0.018661638721823692, "step": 587 }, { "epoch": 0.1422007255139057, "grad_norm": 2.41756984027508, "learning_rate": 9.56731093686308e-07, "loss": 0.023855572566390038, "step": 588 }, { "epoch": 0.14244256348246676, "grad_norm": 4.1827777972733555, "learning_rate": 9.565747915288016e-07, "loss": 0.07231690734624863, "step": 589 }, { "epoch": 0.14268440145102781, "grad_norm": 2.6610491582586224, "learning_rate": 9.564182203862323e-07, "loss": 0.012444592081010342, "step": 590 }, { "epoch": 0.14292623941958887, "grad_norm": 3.5070490431041588, "learning_rate": 9.562613803508416e-07, "loss": 0.03140707686543465, "step": 591 }, { "epoch": 0.14316807738814993, "grad_norm": 61.95936027328553, "learning_rate": 9.561042715150297e-07, "loss": 0.017223281785845757, "step": 592 }, { "epoch": 0.143409915356711, "grad_norm": 2.4079049444515075, "learning_rate": 9.559468939713551e-07, "loss": 0.009137618355453014, "step": 593 }, { "epoch": 0.14365175332527208, "grad_norm": 2.920344762593032, "learning_rate": 9.55789247812535e-07, "loss": 0.026969129219651222, "step": 594 }, { "epoch": 0.14389359129383314, "grad_norm": 3.3140630681362877, "learning_rate": 9.556313331314445e-07, "loss": 0.013478273525834084, "step": 595 }, { "epoch": 0.1441354292623942, "grad_norm": 1.9525894097729384, "learning_rate": 9.55473150021117e-07, "loss": 0.02529832534492016, "step": 596 }, { "epoch": 0.14437726723095526, "grad_norm": 3.111618925566648, "learning_rate": 9.55314698574744e-07, "loss": 0.026422295719385147, "step": 597 }, { "epoch": 0.14461910519951632, "grad_norm": 4.583171594345442, "learning_rate": 9.55155978885675e-07, "loss": 0.020298639312386513, "step": 598 }, { "epoch": 0.14486094316807738, "grad_norm": 14.30112810967442, "learning_rate": 9.549969910474173e-07, "loss": 0.0315251350402832, "step": 599 }, { "epoch": 0.14510278113663846, "grad_norm": 13.361644123770954, "learning_rate": 9.54837735153637e-07, "loss": 0.030747825279831886, "step": 600 }, { "epoch": 0.14534461910519952, "grad_norm": 12.598333481418946, "learning_rate": 9.54678211298158e-07, "loss": 0.04699411615729332, "step": 601 }, { "epoch": 0.14558645707376058, "grad_norm": 2.8549579922645636, "learning_rate": 9.545184195749608e-07, "loss": 0.018079591915011406, "step": 602 }, { "epoch": 0.14582829504232164, "grad_norm": 6.7044640306705645, "learning_rate": 9.543583600781848e-07, "loss": 0.07067277282476425, "step": 603 }, { "epoch": 0.1460701330108827, "grad_norm": 2.3720277835877805, "learning_rate": 9.541980329021273e-07, "loss": 0.014254671521484852, "step": 604 }, { "epoch": 0.14631197097944376, "grad_norm": 3.2546138914903424, "learning_rate": 9.54037438141243e-07, "loss": 0.012652777135372162, "step": 605 }, { "epoch": 0.14655380894800485, "grad_norm": 10.70905236638256, "learning_rate": 9.538765758901437e-07, "loss": 0.04643530398607254, "step": 606 }, { "epoch": 0.1467956469165659, "grad_norm": 5.955704855106703, "learning_rate": 9.537154462435999e-07, "loss": 0.02673325501382351, "step": 607 }, { "epoch": 0.14703748488512697, "grad_norm": 3.736269150592858, "learning_rate": 9.535540492965384e-07, "loss": 0.03302062302827835, "step": 608 }, { "epoch": 0.14727932285368803, "grad_norm": 3.663493148035699, "learning_rate": 9.533923851440446e-07, "loss": 0.013011819683015347, "step": 609 }, { "epoch": 0.1475211608222491, "grad_norm": 7.08301418701625, "learning_rate": 9.532304538813607e-07, "loss": 0.027599578723311424, "step": 610 }, { "epoch": 0.14776299879081015, "grad_norm": 5.482948982176569, "learning_rate": 9.530682556038863e-07, "loss": 0.03238585218787193, "step": 611 }, { "epoch": 0.14800483675937123, "grad_norm": 3.573815260700814, "learning_rate": 9.529057904071782e-07, "loss": 0.03803853318095207, "step": 612 }, { "epoch": 0.1482466747279323, "grad_norm": 2.415435641812021, "learning_rate": 9.52743058386951e-07, "loss": 0.01630382426083088, "step": 613 }, { "epoch": 0.14848851269649335, "grad_norm": 2.9826971218668095, "learning_rate": 9.525800596390761e-07, "loss": 0.023435739800333977, "step": 614 }, { "epoch": 0.1487303506650544, "grad_norm": 5.210112766176275, "learning_rate": 9.524167942595817e-07, "loss": 0.02265503816306591, "step": 615 }, { "epoch": 0.14897218863361547, "grad_norm": 19.567221985929542, "learning_rate": 9.522532623446539e-07, "loss": 0.04036083444952965, "step": 616 }, { "epoch": 0.14921402660217653, "grad_norm": 3.9415939012089134, "learning_rate": 9.520894639906354e-07, "loss": 0.06394623965024948, "step": 617 }, { "epoch": 0.14945586457073762, "grad_norm": 1.7018369787329712, "learning_rate": 9.519253992940255e-07, "loss": 0.013493633829057217, "step": 618 }, { "epoch": 0.14969770253929868, "grad_norm": 3.9142753954384983, "learning_rate": 9.517610683514809e-07, "loss": 0.06221342831850052, "step": 619 }, { "epoch": 0.14993954050785974, "grad_norm": 2.958897365839819, "learning_rate": 9.515964712598151e-07, "loss": 0.008025716058909893, "step": 620 }, { "epoch": 0.1501813784764208, "grad_norm": 9.11535088824535, "learning_rate": 9.514316081159983e-07, "loss": 0.01104850135743618, "step": 621 }, { "epoch": 0.15042321644498186, "grad_norm": 3.8567098582155817, "learning_rate": 9.512664790171577e-07, "loss": 0.025747006759047508, "step": 622 }, { "epoch": 0.15066505441354291, "grad_norm": 1.652412522577135, "learning_rate": 9.511010840605764e-07, "loss": 0.01808718964457512, "step": 623 }, { "epoch": 0.150906892382104, "grad_norm": 2.688872653465709, "learning_rate": 9.50935423343695e-07, "loss": 0.039501726627349854, "step": 624 }, { "epoch": 0.15114873035066506, "grad_norm": 3.7829046374073254, "learning_rate": 9.507694969641104e-07, "loss": 0.047850366681814194, "step": 625 }, { "epoch": 0.15139056831922612, "grad_norm": 6.502051128001511, "learning_rate": 9.50603305019576e-07, "loss": 0.02279713563621044, "step": 626 }, { "epoch": 0.15163240628778718, "grad_norm": 4.405065513724163, "learning_rate": 9.504368476080014e-07, "loss": 0.013287733308970928, "step": 627 }, { "epoch": 0.15187424425634824, "grad_norm": 7.914672881648259, "learning_rate": 9.502701248274528e-07, "loss": 0.02400103397667408, "step": 628 }, { "epoch": 0.1521160822249093, "grad_norm": 4.913745896342223, "learning_rate": 9.50103136776153e-07, "loss": 0.010175581090152264, "step": 629 }, { "epoch": 0.1523579201934704, "grad_norm": 8.55249389800114, "learning_rate": 9.499358835524806e-07, "loss": 0.020296242088079453, "step": 630 }, { "epoch": 0.15259975816203145, "grad_norm": 4.9319645614784315, "learning_rate": 9.497683652549706e-07, "loss": 0.01182286161929369, "step": 631 }, { "epoch": 0.1528415961305925, "grad_norm": 21.82626292648906, "learning_rate": 9.496005819823146e-07, "loss": 0.019947471097111702, "step": 632 }, { "epoch": 0.15308343409915356, "grad_norm": 4.14550150524215, "learning_rate": 9.494325338333593e-07, "loss": 0.041394639760255814, "step": 633 }, { "epoch": 0.15332527206771462, "grad_norm": 7.853715521291297, "learning_rate": 9.492642209071086e-07, "loss": 0.028140434995293617, "step": 634 }, { "epoch": 0.15356711003627568, "grad_norm": 15.043930931462342, "learning_rate": 9.490956433027218e-07, "loss": 0.011774892918765545, "step": 635 }, { "epoch": 0.15380894800483677, "grad_norm": 3.499161306117305, "learning_rate": 9.489268011195138e-07, "loss": 0.020619451999664307, "step": 636 }, { "epoch": 0.15405078597339783, "grad_norm": 2.4164259528007, "learning_rate": 9.487576944569561e-07, "loss": 0.008711284957826138, "step": 637 }, { "epoch": 0.1542926239419589, "grad_norm": 3.0876323943496593, "learning_rate": 9.485883234146757e-07, "loss": 0.014994703233242035, "step": 638 }, { "epoch": 0.15453446191051995, "grad_norm": 4.552390026895866, "learning_rate": 9.484186880924552e-07, "loss": 0.045589298009872437, "step": 639 }, { "epoch": 0.154776299879081, "grad_norm": 4.00181763432952, "learning_rate": 9.482487885902329e-07, "loss": 0.04632509499788284, "step": 640 }, { "epoch": 0.15501813784764207, "grad_norm": 5.173573208661614, "learning_rate": 9.480786250081032e-07, "loss": 0.04112405329942703, "step": 641 }, { "epoch": 0.15525997581620316, "grad_norm": 9.184474091660226, "learning_rate": 9.479081974463156e-07, "loss": 0.016404205933213234, "step": 642 }, { "epoch": 0.15550181378476421, "grad_norm": 5.15859660621892, "learning_rate": 9.477375060052753e-07, "loss": 0.03162645921111107, "step": 643 }, { "epoch": 0.15574365175332527, "grad_norm": 4.00263045658556, "learning_rate": 9.475665507855426e-07, "loss": 0.01095670834183693, "step": 644 }, { "epoch": 0.15598548972188633, "grad_norm": 14.716626491221513, "learning_rate": 9.47395331887834e-07, "loss": 0.01592683419585228, "step": 645 }, { "epoch": 0.1562273276904474, "grad_norm": 1.5339416229934193, "learning_rate": 9.472238494130207e-07, "loss": 0.0069520557299256325, "step": 646 }, { "epoch": 0.15646916565900845, "grad_norm": 3.1037034991308166, "learning_rate": 9.470521034621293e-07, "loss": 0.015410803258419037, "step": 647 }, { "epoch": 0.15671100362756954, "grad_norm": 2.9271743332453637, "learning_rate": 9.468800941363416e-07, "loss": 0.012890768237411976, "step": 648 }, { "epoch": 0.1569528415961306, "grad_norm": 3.334477975456722, "learning_rate": 9.467078215369948e-07, "loss": 0.00933604221791029, "step": 649 }, { "epoch": 0.15719467956469166, "grad_norm": 1.664724239718666, "learning_rate": 9.46535285765581e-07, "loss": 0.013111586682498455, "step": 650 }, { "epoch": 0.15743651753325272, "grad_norm": 5.521408918217965, "learning_rate": 9.463624869237476e-07, "loss": 0.10192456096410751, "step": 651 }, { "epoch": 0.15767835550181378, "grad_norm": 2.072950846670766, "learning_rate": 9.461894251132964e-07, "loss": 0.016820868477225304, "step": 652 }, { "epoch": 0.15792019347037484, "grad_norm": 7.357120536956791, "learning_rate": 9.460161004361846e-07, "loss": 0.013320199213922024, "step": 653 }, { "epoch": 0.15816203143893592, "grad_norm": 5.034784577172547, "learning_rate": 9.458425129945244e-07, "loss": 0.01127737294882536, "step": 654 }, { "epoch": 0.15840386940749698, "grad_norm": 7.084237970075643, "learning_rate": 9.456686628905823e-07, "loss": 0.051295824348926544, "step": 655 }, { "epoch": 0.15864570737605804, "grad_norm": 2.4672956872185523, "learning_rate": 9.4549455022678e-07, "loss": 0.01519786100834608, "step": 656 }, { "epoch": 0.1588875453446191, "grad_norm": 4.067326076909411, "learning_rate": 9.453201751056936e-07, "loss": 0.045611679553985596, "step": 657 }, { "epoch": 0.15912938331318016, "grad_norm": 13.957034465558886, "learning_rate": 9.451455376300539e-07, "loss": 0.04380093887448311, "step": 658 }, { "epoch": 0.15937122128174122, "grad_norm": 5.302896338396183, "learning_rate": 9.449706379027464e-07, "loss": 0.027953267097473145, "step": 659 }, { "epoch": 0.1596130592503023, "grad_norm": 6.073453725286618, "learning_rate": 9.447954760268108e-07, "loss": 0.019679641351103783, "step": 660 }, { "epoch": 0.15985489721886337, "grad_norm": 3.9207689220360074, "learning_rate": 9.446200521054414e-07, "loss": 0.03612281754612923, "step": 661 }, { "epoch": 0.16009673518742443, "grad_norm": 28.75086220480843, "learning_rate": 9.444443662419874e-07, "loss": 0.02052314020693302, "step": 662 }, { "epoch": 0.1603385731559855, "grad_norm": 3.879202089902048, "learning_rate": 9.442684185399512e-07, "loss": 0.010841729119420052, "step": 663 }, { "epoch": 0.16058041112454655, "grad_norm": 8.547817864298157, "learning_rate": 9.440922091029905e-07, "loss": 0.052131157368421555, "step": 664 }, { "epoch": 0.1608222490931076, "grad_norm": 5.614914751623719, "learning_rate": 9.439157380349165e-07, "loss": 0.02016458846628666, "step": 665 }, { "epoch": 0.1610640870616687, "grad_norm": 2.635340613583152, "learning_rate": 9.437390054396951e-07, "loss": 0.030630996450781822, "step": 666 }, { "epoch": 0.16130592503022975, "grad_norm": 4.134647760995435, "learning_rate": 9.435620114214458e-07, "loss": 0.05541041120886803, "step": 667 }, { "epoch": 0.1615477629987908, "grad_norm": 4.133877230349663, "learning_rate": 9.433847560844423e-07, "loss": 0.03603570908308029, "step": 668 }, { "epoch": 0.16178960096735187, "grad_norm": 2.7844260507933707, "learning_rate": 9.432072395331125e-07, "loss": 0.01713135652244091, "step": 669 }, { "epoch": 0.16203143893591293, "grad_norm": 20.167493980177998, "learning_rate": 9.430294618720376e-07, "loss": 0.016138741746544838, "step": 670 }, { "epoch": 0.162273276904474, "grad_norm": 17.613296539888484, "learning_rate": 9.428514232059533e-07, "loss": 0.021677454933524132, "step": 671 }, { "epoch": 0.16251511487303508, "grad_norm": 6.265060623619476, "learning_rate": 9.426731236397485e-07, "loss": 0.02605392411351204, "step": 672 }, { "epoch": 0.16275695284159614, "grad_norm": 2.242902275253643, "learning_rate": 9.424945632784663e-07, "loss": 0.02023914083838463, "step": 673 }, { "epoch": 0.1629987908101572, "grad_norm": 8.3906797879277, "learning_rate": 9.423157422273029e-07, "loss": 0.05018920823931694, "step": 674 }, { "epoch": 0.16324062877871826, "grad_norm": 13.144992492700913, "learning_rate": 9.421366605916085e-07, "loss": 0.016601594164967537, "step": 675 }, { "epoch": 0.16348246674727931, "grad_norm": 7.088736571964898, "learning_rate": 9.41957318476887e-07, "loss": 0.015173153951764107, "step": 676 }, { "epoch": 0.16372430471584037, "grad_norm": 6.725078674996813, "learning_rate": 9.417777159887951e-07, "loss": 0.036866556853055954, "step": 677 }, { "epoch": 0.16396614268440146, "grad_norm": 11.74049767856659, "learning_rate": 9.415978532331433e-07, "loss": 0.021860918030142784, "step": 678 }, { "epoch": 0.16420798065296252, "grad_norm": 5.098330479077531, "learning_rate": 9.414177303158955e-07, "loss": 0.02347620204091072, "step": 679 }, { "epoch": 0.16444981862152358, "grad_norm": 2.543101851619819, "learning_rate": 9.412373473431688e-07, "loss": 0.010130825452506542, "step": 680 }, { "epoch": 0.16469165659008464, "grad_norm": 3.4572461113841673, "learning_rate": 9.410567044212332e-07, "loss": 0.014628860168159008, "step": 681 }, { "epoch": 0.1649334945586457, "grad_norm": 3.204709782745601, "learning_rate": 9.408758016565125e-07, "loss": 0.030411487445235252, "step": 682 }, { "epoch": 0.16517533252720676, "grad_norm": 9.71174984248544, "learning_rate": 9.406946391555829e-07, "loss": 0.013124734163284302, "step": 683 }, { "epoch": 0.16541717049576785, "grad_norm": 5.853287976421975, "learning_rate": 9.405132170251739e-07, "loss": 0.027661556378006935, "step": 684 }, { "epoch": 0.1656590084643289, "grad_norm": 2.0931924282783205, "learning_rate": 9.403315353721682e-07, "loss": 0.030865734443068504, "step": 685 }, { "epoch": 0.16590084643288996, "grad_norm": 0.7645090023305381, "learning_rate": 9.401495943036011e-07, "loss": 0.009887042455375195, "step": 686 }, { "epoch": 0.16614268440145102, "grad_norm": 11.51262976803338, "learning_rate": 9.399673939266607e-07, "loss": 0.030398553237318993, "step": 687 }, { "epoch": 0.16638452237001208, "grad_norm": 8.380260973568026, "learning_rate": 9.397849343486881e-07, "loss": 0.00982047151774168, "step": 688 }, { "epoch": 0.16662636033857314, "grad_norm": 3.2526502913915087, "learning_rate": 9.396022156771768e-07, "loss": 0.034936364740133286, "step": 689 }, { "epoch": 0.16686819830713423, "grad_norm": 6.316125097496445, "learning_rate": 9.394192380197732e-07, "loss": 0.01936674304306507, "step": 690 }, { "epoch": 0.1671100362756953, "grad_norm": 3.20063758175202, "learning_rate": 9.392360014842761e-07, "loss": 0.03343018889427185, "step": 691 }, { "epoch": 0.16735187424425635, "grad_norm": 3.4077918822974396, "learning_rate": 9.390525061786372e-07, "loss": 0.029391450807452202, "step": 692 }, { "epoch": 0.1675937122128174, "grad_norm": 1.9915305642312084, "learning_rate": 9.3886875221096e-07, "loss": 0.024091903120279312, "step": 693 }, { "epoch": 0.16783555018137847, "grad_norm": 2.763706446589059, "learning_rate": 9.38684739689501e-07, "loss": 0.03284630924463272, "step": 694 }, { "epoch": 0.16807738814993953, "grad_norm": 15.218980663351768, "learning_rate": 9.385004687226687e-07, "loss": 0.018346723169088364, "step": 695 }, { "epoch": 0.16831922611850061, "grad_norm": 2.3744828595235554, "learning_rate": 9.383159394190239e-07, "loss": 0.011685955338180065, "step": 696 }, { "epoch": 0.16856106408706167, "grad_norm": 5.5010043386636, "learning_rate": 9.381311518872794e-07, "loss": 0.00605169078335166, "step": 697 }, { "epoch": 0.16880290205562273, "grad_norm": 5.523227827724723, "learning_rate": 9.379461062363009e-07, "loss": 0.04047609120607376, "step": 698 }, { "epoch": 0.1690447400241838, "grad_norm": 1.6010694954919575, "learning_rate": 9.377608025751051e-07, "loss": 0.004210449289530516, "step": 699 }, { "epoch": 0.16928657799274485, "grad_norm": 13.608179730147974, "learning_rate": 9.375752410128616e-07, "loss": 0.017998594790697098, "step": 700 }, { "epoch": 0.1695284159613059, "grad_norm": 6.16528323790479, "learning_rate": 9.373894216588913e-07, "loss": 0.012960485182702541, "step": 701 }, { "epoch": 0.169770253929867, "grad_norm": 5.509713891444982, "learning_rate": 9.372033446226673e-07, "loss": 0.010757483541965485, "step": 702 }, { "epoch": 0.17001209189842806, "grad_norm": 0.8371889494757829, "learning_rate": 9.370170100138146e-07, "loss": 0.0035183043219149113, "step": 703 }, { "epoch": 0.17025392986698912, "grad_norm": 4.740179789113618, "learning_rate": 9.368304179421096e-07, "loss": 0.03548162803053856, "step": 704 }, { "epoch": 0.17049576783555018, "grad_norm": 4.6521861253195995, "learning_rate": 9.366435685174807e-07, "loss": 0.025674426928162575, "step": 705 }, { "epoch": 0.17073760580411124, "grad_norm": 1.9493257777323152, "learning_rate": 9.364564618500079e-07, "loss": 0.020836899057030678, "step": 706 }, { "epoch": 0.1709794437726723, "grad_norm": 4.687979705943294, "learning_rate": 9.362690980499224e-07, "loss": 0.01929955556988716, "step": 707 }, { "epoch": 0.17122128174123338, "grad_norm": 2.432351144897001, "learning_rate": 9.360814772276071e-07, "loss": 0.02240988425910473, "step": 708 }, { "epoch": 0.17146311970979444, "grad_norm": 5.798988182540858, "learning_rate": 9.358935994935966e-07, "loss": 0.018372613936662674, "step": 709 }, { "epoch": 0.1717049576783555, "grad_norm": 3.9390646501816935, "learning_rate": 9.357054649585765e-07, "loss": 0.02717004530131817, "step": 710 }, { "epoch": 0.17194679564691656, "grad_norm": 4.139015590832012, "learning_rate": 9.355170737333838e-07, "loss": 0.02087303251028061, "step": 711 }, { "epoch": 0.17218863361547762, "grad_norm": 9.533932314811723, "learning_rate": 9.353284259290067e-07, "loss": 0.05906255170702934, "step": 712 }, { "epoch": 0.17243047158403868, "grad_norm": 5.325412282385707, "learning_rate": 9.351395216565846e-07, "loss": 0.01801144890487194, "step": 713 }, { "epoch": 0.17267230955259977, "grad_norm": 10.18949018835177, "learning_rate": 9.34950361027408e-07, "loss": 0.035379596054553986, "step": 714 }, { "epoch": 0.17291414752116083, "grad_norm": 1.3479068979832554, "learning_rate": 9.347609441529184e-07, "loss": 0.021276522427797318, "step": 715 }, { "epoch": 0.1731559854897219, "grad_norm": 3.395006551321706, "learning_rate": 9.345712711447083e-07, "loss": 0.013283729553222656, "step": 716 }, { "epoch": 0.17339782345828295, "grad_norm": 6.773568451342424, "learning_rate": 9.34381342114521e-07, "loss": 0.005675783380866051, "step": 717 }, { "epoch": 0.173639661426844, "grad_norm": 3.3730247151461654, "learning_rate": 9.341911571742507e-07, "loss": 0.02526015415787697, "step": 718 }, { "epoch": 0.17388149939540506, "grad_norm": 14.464823863031032, "learning_rate": 9.340007164359425e-07, "loss": 0.04995402321219444, "step": 719 }, { "epoch": 0.17412333736396615, "grad_norm": 1.276890899800039, "learning_rate": 9.338100200117917e-07, "loss": 0.004832831211388111, "step": 720 }, { "epoch": 0.1743651753325272, "grad_norm": 7.335011235333526, "learning_rate": 9.336190680141451e-07, "loss": 0.03556312248110771, "step": 721 }, { "epoch": 0.17460701330108827, "grad_norm": 3.4147675785362037, "learning_rate": 9.334278605554995e-07, "loss": 0.01802273839712143, "step": 722 }, { "epoch": 0.17484885126964933, "grad_norm": 3.4879268452756276, "learning_rate": 9.332363977485019e-07, "loss": 0.01612120307981968, "step": 723 }, { "epoch": 0.1750906892382104, "grad_norm": 3.3214783449212635, "learning_rate": 9.330446797059504e-07, "loss": 0.020245863124728203, "step": 724 }, { "epoch": 0.17533252720677148, "grad_norm": 5.917393845774513, "learning_rate": 9.32852706540793e-07, "loss": 0.01744142174720764, "step": 725 }, { "epoch": 0.17557436517533254, "grad_norm": 7.09091491269787, "learning_rate": 9.326604783661285e-07, "loss": 0.03987488895654678, "step": 726 }, { "epoch": 0.1758162031438936, "grad_norm": 4.616415470604285, "learning_rate": 9.324679952952053e-07, "loss": 0.022275660187005997, "step": 727 }, { "epoch": 0.17605804111245466, "grad_norm": 21.36211275789391, "learning_rate": 9.322752574414223e-07, "loss": 0.052244704216718674, "step": 728 }, { "epoch": 0.17629987908101571, "grad_norm": 2.7489157327859557, "learning_rate": 9.320822649183286e-07, "loss": 0.013843178749084473, "step": 729 }, { "epoch": 0.17654171704957677, "grad_norm": 6.172210428568702, "learning_rate": 9.318890178396231e-07, "loss": 0.01648416742682457, "step": 730 }, { "epoch": 0.17678355501813786, "grad_norm": 4.653642351199997, "learning_rate": 9.316955163191551e-07, "loss": 0.013724415563046932, "step": 731 }, { "epoch": 0.17702539298669892, "grad_norm": 3.2549559314438197, "learning_rate": 9.31501760470923e-07, "loss": 0.008761520497500896, "step": 732 }, { "epoch": 0.17726723095525998, "grad_norm": 2.1053781910679086, "learning_rate": 9.31307750409076e-07, "loss": 0.031080881133675575, "step": 733 }, { "epoch": 0.17750906892382104, "grad_norm": 5.845456381794855, "learning_rate": 9.311134862479122e-07, "loss": 0.027506202459335327, "step": 734 }, { "epoch": 0.1777509068923821, "grad_norm": 5.157024573719814, "learning_rate": 9.309189681018801e-07, "loss": 0.035425540059804916, "step": 735 }, { "epoch": 0.17799274486094316, "grad_norm": 5.919997279675177, "learning_rate": 9.307241960855772e-07, "loss": 0.09313102066516876, "step": 736 }, { "epoch": 0.17823458282950425, "grad_norm": 5.890419683838841, "learning_rate": 9.305291703137512e-07, "loss": 0.017935046926140785, "step": 737 }, { "epoch": 0.1784764207980653, "grad_norm": 1.8556714735568967, "learning_rate": 9.303338909012988e-07, "loss": 0.010827980004251003, "step": 738 }, { "epoch": 0.17871825876662636, "grad_norm": 2.3481222242745567, "learning_rate": 9.301383579632662e-07, "loss": 0.01748673804104328, "step": 739 }, { "epoch": 0.17896009673518742, "grad_norm": 4.639910512479043, "learning_rate": 9.299425716148495e-07, "loss": 0.032452765852212906, "step": 740 }, { "epoch": 0.17920193470374848, "grad_norm": 4.136314808056502, "learning_rate": 9.297465319713933e-07, "loss": 0.030842140316963196, "step": 741 }, { "epoch": 0.17944377267230954, "grad_norm": 2.7116798835102944, "learning_rate": 9.295502391483918e-07, "loss": 0.01020167674869299, "step": 742 }, { "epoch": 0.17968561064087063, "grad_norm": 2.1295528154519823, "learning_rate": 9.293536932614885e-07, "loss": 0.0111354636028409, "step": 743 }, { "epoch": 0.1799274486094317, "grad_norm": 1.8610912548400604, "learning_rate": 9.291568944264757e-07, "loss": 0.011614768765866756, "step": 744 }, { "epoch": 0.18016928657799275, "grad_norm": 4.902662023139264, "learning_rate": 9.28959842759295e-07, "loss": 0.02020816132426262, "step": 745 }, { "epoch": 0.1804111245465538, "grad_norm": 5.819112210902513, "learning_rate": 9.287625383760367e-07, "loss": 0.03291354700922966, "step": 746 }, { "epoch": 0.18065296251511487, "grad_norm": 7.178272594528483, "learning_rate": 9.285649813929401e-07, "loss": 0.045835286378860474, "step": 747 }, { "epoch": 0.18089480048367593, "grad_norm": 3.40728410154825, "learning_rate": 9.283671719263933e-07, "loss": 0.02105989120900631, "step": 748 }, { "epoch": 0.18113663845223701, "grad_norm": 1.8780397730071032, "learning_rate": 9.281691100929332e-07, "loss": 0.007778009865432978, "step": 749 }, { "epoch": 0.18137847642079807, "grad_norm": 6.968076922963094, "learning_rate": 9.279707960092455e-07, "loss": 0.02905704639852047, "step": 750 }, { "epoch": 0.18162031438935913, "grad_norm": 3.376507952933423, "learning_rate": 9.277722297921639e-07, "loss": 0.036070894449949265, "step": 751 }, { "epoch": 0.1818621523579202, "grad_norm": 3.182218319716474, "learning_rate": 9.275734115586716e-07, "loss": 0.01036971714347601, "step": 752 }, { "epoch": 0.18210399032648125, "grad_norm": 3.249875435542532, "learning_rate": 9.273743414258995e-07, "loss": 0.013624699786305428, "step": 753 }, { "epoch": 0.1823458282950423, "grad_norm": 1.8980963150845067, "learning_rate": 9.271750195111272e-07, "loss": 0.006147967651486397, "step": 754 }, { "epoch": 0.1825876662636034, "grad_norm": 3.2368209091800386, "learning_rate": 9.269754459317826e-07, "loss": 0.006910301279276609, "step": 755 }, { "epoch": 0.18282950423216446, "grad_norm": 4.942558496916007, "learning_rate": 9.267756208054418e-07, "loss": 0.03168981894850731, "step": 756 }, { "epoch": 0.18307134220072552, "grad_norm": 19.702989320021842, "learning_rate": 9.265755442498292e-07, "loss": 0.04030637815594673, "step": 757 }, { "epoch": 0.18331318016928658, "grad_norm": 9.668776460058915, "learning_rate": 9.263752163828173e-07, "loss": 0.040524955838918686, "step": 758 }, { "epoch": 0.18355501813784764, "grad_norm": 37.80862700958107, "learning_rate": 9.261746373224266e-07, "loss": 0.015602539293467999, "step": 759 }, { "epoch": 0.1837968561064087, "grad_norm": 2.873244479386213, "learning_rate": 9.259738071868257e-07, "loss": 0.011531451717019081, "step": 760 }, { "epoch": 0.18403869407496978, "grad_norm": 2.290732027910652, "learning_rate": 9.257727260943311e-07, "loss": 0.005669614300131798, "step": 761 }, { "epoch": 0.18428053204353084, "grad_norm": 10.421764430936195, "learning_rate": 9.255713941634069e-07, "loss": 0.02358885481953621, "step": 762 }, { "epoch": 0.1845223700120919, "grad_norm": 5.819022101058639, "learning_rate": 9.253698115126654e-07, "loss": 0.0642046257853508, "step": 763 }, { "epoch": 0.18476420798065296, "grad_norm": 9.030498280065855, "learning_rate": 9.251679782608661e-07, "loss": 0.01578792929649353, "step": 764 }, { "epoch": 0.18500604594921402, "grad_norm": 3.0408444144319224, "learning_rate": 9.249658945269167e-07, "loss": 0.009135051630437374, "step": 765 }, { "epoch": 0.18524788391777508, "grad_norm": 2.6447986500065763, "learning_rate": 9.247635604298721e-07, "loss": 0.007242071907967329, "step": 766 }, { "epoch": 0.18548972188633617, "grad_norm": 2.917244646342416, "learning_rate": 9.245609760889346e-07, "loss": 0.01385722029954195, "step": 767 }, { "epoch": 0.18573155985489723, "grad_norm": 6.3130068799739885, "learning_rate": 9.243581416234543e-07, "loss": 0.03518155962228775, "step": 768 }, { "epoch": 0.1859733978234583, "grad_norm": 7.454666450890998, "learning_rate": 9.241550571529285e-07, "loss": 0.03335906192660332, "step": 769 }, { "epoch": 0.18621523579201935, "grad_norm": 5.917261091833194, "learning_rate": 9.239517227970017e-07, "loss": 0.0417647548019886, "step": 770 }, { "epoch": 0.1864570737605804, "grad_norm": 5.169249442740347, "learning_rate": 9.237481386754656e-07, "loss": 0.03588370233774185, "step": 771 }, { "epoch": 0.18669891172914146, "grad_norm": 3.010859034508118, "learning_rate": 9.235443049082593e-07, "loss": 0.040776122361421585, "step": 772 }, { "epoch": 0.18694074969770255, "grad_norm": 3.035097728278783, "learning_rate": 9.233402216154687e-07, "loss": 0.014247170649468899, "step": 773 }, { "epoch": 0.1871825876662636, "grad_norm": 3.201060693453548, "learning_rate": 9.231358889173267e-07, "loss": 0.03344593569636345, "step": 774 }, { "epoch": 0.18742442563482467, "grad_norm": 4.076402517635225, "learning_rate": 9.229313069342132e-07, "loss": 0.024121014401316643, "step": 775 }, { "epoch": 0.18766626360338573, "grad_norm": 6.659401672679489, "learning_rate": 9.227264757866553e-07, "loss": 0.015275041572749615, "step": 776 }, { "epoch": 0.1879081015719468, "grad_norm": 5.773647279308916, "learning_rate": 9.225213955953263e-07, "loss": 0.015066809020936489, "step": 777 }, { "epoch": 0.18814993954050785, "grad_norm": 0.8792700091120489, "learning_rate": 9.223160664810466e-07, "loss": 0.003343848278746009, "step": 778 }, { "epoch": 0.18839177750906894, "grad_norm": 1.8099767815886243, "learning_rate": 9.221104885647832e-07, "loss": 0.019549110904335976, "step": 779 }, { "epoch": 0.18863361547763, "grad_norm": 5.196586162948917, "learning_rate": 9.219046619676498e-07, "loss": 0.02076437883079052, "step": 780 }, { "epoch": 0.18887545344619106, "grad_norm": 8.859738049163774, "learning_rate": 9.216985868109061e-07, "loss": 0.009672491811215878, "step": 781 }, { "epoch": 0.18911729141475211, "grad_norm": 2.526750361172611, "learning_rate": 9.214922632159588e-07, "loss": 0.012746541760861874, "step": 782 }, { "epoch": 0.18935912938331317, "grad_norm": 2.5594743252874466, "learning_rate": 9.212856913043606e-07, "loss": 0.022056808695197105, "step": 783 }, { "epoch": 0.18960096735187423, "grad_norm": 3.5482995082520588, "learning_rate": 9.210788711978109e-07, "loss": 0.01415701862424612, "step": 784 }, { "epoch": 0.18984280532043532, "grad_norm": 3.11304057024173, "learning_rate": 9.208718030181547e-07, "loss": 0.042600568383932114, "step": 785 }, { "epoch": 0.19008464328899638, "grad_norm": 3.2262272276593884, "learning_rate": 9.206644868873838e-07, "loss": 0.010915132239460945, "step": 786 }, { "epoch": 0.19032648125755744, "grad_norm": 3.5975095740470566, "learning_rate": 9.204569229276357e-07, "loss": 0.04140724614262581, "step": 787 }, { "epoch": 0.1905683192261185, "grad_norm": 16.508588140676146, "learning_rate": 9.202491112611939e-07, "loss": 0.02077258937060833, "step": 788 }, { "epoch": 0.19081015719467956, "grad_norm": 2.9493877623923384, "learning_rate": 9.20041052010488e-07, "loss": 0.004496427718549967, "step": 789 }, { "epoch": 0.19105199516324062, "grad_norm": 5.0911520689082215, "learning_rate": 9.198327452980934e-07, "loss": 0.007394782733172178, "step": 790 }, { "epoch": 0.1912938331318017, "grad_norm": 10.881697836717644, "learning_rate": 9.196241912467313e-07, "loss": 0.09845380485057831, "step": 791 }, { "epoch": 0.19153567110036276, "grad_norm": 94.15642057596781, "learning_rate": 9.194153899792685e-07, "loss": 0.43355169892311096, "step": 792 }, { "epoch": 0.19177750906892382, "grad_norm": 5.441669489893294, "learning_rate": 9.192063416187177e-07, "loss": 0.010980519466102123, "step": 793 }, { "epoch": 0.19201934703748488, "grad_norm": 4.340885164800289, "learning_rate": 9.189970462882367e-07, "loss": 0.005974770989269018, "step": 794 }, { "epoch": 0.19226118500604594, "grad_norm": 5.06899423388278, "learning_rate": 9.187875041111294e-07, "loss": 0.05371871218085289, "step": 795 }, { "epoch": 0.192503022974607, "grad_norm": 3.5785900202220233, "learning_rate": 9.185777152108447e-07, "loss": 0.014065495692193508, "step": 796 }, { "epoch": 0.1927448609431681, "grad_norm": 4.9539464629952885, "learning_rate": 9.18367679710977e-07, "loss": 0.02919766679406166, "step": 797 }, { "epoch": 0.19298669891172915, "grad_norm": 5.9723836606367655, "learning_rate": 9.18157397735266e-07, "loss": 0.021522758528590202, "step": 798 }, { "epoch": 0.1932285368802902, "grad_norm": 4.930583060832127, "learning_rate": 9.179468694075966e-07, "loss": 0.04512212425470352, "step": 799 }, { "epoch": 0.19347037484885127, "grad_norm": 1.546853426830051, "learning_rate": 9.177360948519987e-07, "loss": 0.012360385619103909, "step": 800 }, { "epoch": 0.19371221281741233, "grad_norm": 3.724802217179081, "learning_rate": 9.175250741926474e-07, "loss": 0.00836030300706625, "step": 801 }, { "epoch": 0.1939540507859734, "grad_norm": 2.4824692689456915, "learning_rate": 9.173138075538628e-07, "loss": 0.02868085540831089, "step": 802 }, { "epoch": 0.19419588875453447, "grad_norm": 2.941280419379869, "learning_rate": 9.171022950601098e-07, "loss": 0.02272948995232582, "step": 803 }, { "epoch": 0.19443772672309553, "grad_norm": 2.703681035498651, "learning_rate": 9.168905368359985e-07, "loss": 0.01318676769733429, "step": 804 }, { "epoch": 0.1946795646916566, "grad_norm": 2.0991155021146994, "learning_rate": 9.16678533006283e-07, "loss": 0.025973305106163025, "step": 805 }, { "epoch": 0.19492140266021765, "grad_norm": 51.723718217934326, "learning_rate": 9.16466283695863e-07, "loss": 0.04484055936336517, "step": 806 }, { "epoch": 0.1951632406287787, "grad_norm": 4.564178383760068, "learning_rate": 9.162537890297821e-07, "loss": 0.03409678861498833, "step": 807 }, { "epoch": 0.19540507859733977, "grad_norm": 7.672311192912355, "learning_rate": 9.160410491332291e-07, "loss": 0.0612114854156971, "step": 808 }, { "epoch": 0.19564691656590086, "grad_norm": 2.2874737191667776, "learning_rate": 9.158280641315365e-07, "loss": 0.02324586734175682, "step": 809 }, { "epoch": 0.19588875453446192, "grad_norm": 5.036922977037612, "learning_rate": 9.156148341501821e-07, "loss": 0.01223505474627018, "step": 810 }, { "epoch": 0.19613059250302298, "grad_norm": 4.695759194235436, "learning_rate": 9.154013593147873e-07, "loss": 0.012695850804448128, "step": 811 }, { "epoch": 0.19637243047158404, "grad_norm": 2.792281382957911, "learning_rate": 9.151876397511179e-07, "loss": 0.04708218574523926, "step": 812 }, { "epoch": 0.1966142684401451, "grad_norm": 13.399310011176206, "learning_rate": 9.149736755850841e-07, "loss": 0.016337504610419273, "step": 813 }, { "epoch": 0.19685610640870616, "grad_norm": 5.137813734501256, "learning_rate": 9.147594669427401e-07, "loss": 0.02212003618478775, "step": 814 }, { "epoch": 0.19709794437726724, "grad_norm": 2.1620798404487007, "learning_rate": 9.145450139502842e-07, "loss": 0.013183534145355225, "step": 815 }, { "epoch": 0.1973397823458283, "grad_norm": 3.795496761097269, "learning_rate": 9.143303167340584e-07, "loss": 0.030852442607283592, "step": 816 }, { "epoch": 0.19758162031438936, "grad_norm": 8.851387916054334, "learning_rate": 9.141153754205488e-07, "loss": 0.041915521025657654, "step": 817 }, { "epoch": 0.19782345828295042, "grad_norm": 1.5534192646116134, "learning_rate": 9.139001901363853e-07, "loss": 0.007406414952129126, "step": 818 }, { "epoch": 0.19806529625151148, "grad_norm": 6.37539241950499, "learning_rate": 9.136847610083416e-07, "loss": 0.03378522768616676, "step": 819 }, { "epoch": 0.19830713422007254, "grad_norm": 11.387177891635655, "learning_rate": 9.134690881633346e-07, "loss": 0.04338116571307182, "step": 820 }, { "epoch": 0.19854897218863363, "grad_norm": 3.5945560898505873, "learning_rate": 9.132531717284256e-07, "loss": 0.015269242227077484, "step": 821 }, { "epoch": 0.1987908101571947, "grad_norm": 1.9468535296064606, "learning_rate": 9.130370118308186e-07, "loss": 0.01230927836149931, "step": 822 }, { "epoch": 0.19903264812575575, "grad_norm": 4.4164701815038505, "learning_rate": 9.128206085978616e-07, "loss": 0.006912349257618189, "step": 823 }, { "epoch": 0.1992744860943168, "grad_norm": 4.704568116479574, "learning_rate": 9.126039621570456e-07, "loss": 0.02310245670378208, "step": 824 }, { "epoch": 0.19951632406287786, "grad_norm": 1.9581863245626625, "learning_rate": 9.12387072636005e-07, "loss": 0.008169001899659634, "step": 825 }, { "epoch": 0.19975816203143892, "grad_norm": 3.924511881755719, "learning_rate": 9.121699401625175e-07, "loss": 0.02790946140885353, "step": 826 }, { "epoch": 0.2, "grad_norm": 1.7692398486036611, "learning_rate": 9.119525648645037e-07, "loss": 0.036151379346847534, "step": 827 }, { "epoch": 0.20024183796856107, "grad_norm": 2.032835536851501, "learning_rate": 9.117349468700277e-07, "loss": 0.028338799253106117, "step": 828 }, { "epoch": 0.20048367593712213, "grad_norm": 6.21002072990631, "learning_rate": 9.11517086307296e-07, "loss": 0.012655536644160748, "step": 829 }, { "epoch": 0.2007255139056832, "grad_norm": 2.7653223062887697, "learning_rate": 9.112989833046585e-07, "loss": 0.019468870013952255, "step": 830 }, { "epoch": 0.20096735187424425, "grad_norm": 2.62121527996545, "learning_rate": 9.110806379906077e-07, "loss": 0.011605354025959969, "step": 831 }, { "epoch": 0.2012091898428053, "grad_norm": 1.0470150688105078, "learning_rate": 9.108620504937787e-07, "loss": 0.009929433465003967, "step": 832 }, { "epoch": 0.2014510278113664, "grad_norm": 5.211200025132618, "learning_rate": 9.106432209429497e-07, "loss": 0.022353636100888252, "step": 833 }, { "epoch": 0.20169286577992745, "grad_norm": 4.791876224784322, "learning_rate": 9.104241494670411e-07, "loss": 0.046993937343358994, "step": 834 }, { "epoch": 0.20193470374848851, "grad_norm": 4.573273516923591, "learning_rate": 9.10204836195116e-07, "loss": 0.016786515712738037, "step": 835 }, { "epoch": 0.20217654171704957, "grad_norm": 7.1554372245420375, "learning_rate": 9.099852812563802e-07, "loss": 0.024023516103625298, "step": 836 }, { "epoch": 0.20241837968561063, "grad_norm": 2.6395923561237122, "learning_rate": 9.097654847801812e-07, "loss": 0.011726949363946915, "step": 837 }, { "epoch": 0.2026602176541717, "grad_norm": 2.990600291803656, "learning_rate": 9.095454468960097e-07, "loss": 0.044847290962934494, "step": 838 }, { "epoch": 0.20290205562273278, "grad_norm": 2.598292968931722, "learning_rate": 9.093251677334977e-07, "loss": 0.012488218955695629, "step": 839 }, { "epoch": 0.20314389359129384, "grad_norm": 8.005014230221798, "learning_rate": 9.0910464742242e-07, "loss": 0.055350687354803085, "step": 840 }, { "epoch": 0.2033857315598549, "grad_norm": 2.4433150310200813, "learning_rate": 9.088838860926931e-07, "loss": 0.004158677067607641, "step": 841 }, { "epoch": 0.20362756952841596, "grad_norm": 6.122352881629879, "learning_rate": 9.086628838743759e-07, "loss": 0.03064233437180519, "step": 842 }, { "epoch": 0.20386940749697702, "grad_norm": 4.174239087048624, "learning_rate": 9.084416408976689e-07, "loss": 0.017161859199404716, "step": 843 }, { "epoch": 0.20411124546553808, "grad_norm": 3.5248495668353876, "learning_rate": 9.082201572929144e-07, "loss": 0.011214976198971272, "step": 844 }, { "epoch": 0.20435308343409916, "grad_norm": 6.490333073932099, "learning_rate": 9.079984331905966e-07, "loss": 0.01631051115691662, "step": 845 }, { "epoch": 0.20459492140266022, "grad_norm": 3.7866753775082405, "learning_rate": 9.077764687213412e-07, "loss": 0.026730084791779518, "step": 846 }, { "epoch": 0.20483675937122128, "grad_norm": 9.830519121261295, "learning_rate": 9.07554264015916e-07, "loss": 0.04579637944698334, "step": 847 }, { "epoch": 0.20507859733978234, "grad_norm": 6.438154017552263, "learning_rate": 9.073318192052297e-07, "loss": 0.014545273967087269, "step": 848 }, { "epoch": 0.2053204353083434, "grad_norm": 2.7112499226837743, "learning_rate": 9.07109134420333e-07, "loss": 0.016243180260062218, "step": 849 }, { "epoch": 0.20556227327690446, "grad_norm": 3.0662074524265077, "learning_rate": 9.068862097924174e-07, "loss": 0.00787436030805111, "step": 850 }, { "epoch": 0.20580411124546555, "grad_norm": 3.1233972543683426, "learning_rate": 9.066630454528163e-07, "loss": 0.008761375211179256, "step": 851 }, { "epoch": 0.2060459492140266, "grad_norm": 5.138787258932528, "learning_rate": 9.064396415330041e-07, "loss": 0.019886482506990433, "step": 852 }, { "epoch": 0.20628778718258767, "grad_norm": 7.775780025411949, "learning_rate": 9.062159981645961e-07, "loss": 0.038792070001363754, "step": 853 }, { "epoch": 0.20652962515114873, "grad_norm": 3.140329093163038, "learning_rate": 9.059921154793488e-07, "loss": 0.03223837539553642, "step": 854 }, { "epoch": 0.2067714631197098, "grad_norm": 5.0103887698247584, "learning_rate": 9.0576799360916e-07, "loss": 0.006293226964771748, "step": 855 }, { "epoch": 0.20701330108827085, "grad_norm": 9.178094031686221, "learning_rate": 9.055436326860681e-07, "loss": 0.023373819887638092, "step": 856 }, { "epoch": 0.20725513905683193, "grad_norm": 2.623234945023674, "learning_rate": 9.053190328422522e-07, "loss": 0.018974794074892998, "step": 857 }, { "epoch": 0.207496977025393, "grad_norm": 2.898279843084022, "learning_rate": 9.050941942100327e-07, "loss": 0.026009852066636086, "step": 858 }, { "epoch": 0.20773881499395405, "grad_norm": 2.9494497868030103, "learning_rate": 9.0486911692187e-07, "loss": 0.012966277077794075, "step": 859 }, { "epoch": 0.2079806529625151, "grad_norm": 1.7519189994501034, "learning_rate": 9.046438011103657e-07, "loss": 0.010824098251760006, "step": 860 }, { "epoch": 0.20822249093107617, "grad_norm": 1.2939031467733706, "learning_rate": 9.044182469082613e-07, "loss": 0.013142078183591366, "step": 861 }, { "epoch": 0.20846432889963723, "grad_norm": 7.574914645261984, "learning_rate": 9.041924544484396e-07, "loss": 0.030948055908083916, "step": 862 }, { "epoch": 0.20870616686819832, "grad_norm": 5.08060508012066, "learning_rate": 9.039664238639229e-07, "loss": 0.0256259236484766, "step": 863 }, { "epoch": 0.20894800483675938, "grad_norm": 1.9662330041357405, "learning_rate": 9.037401552878743e-07, "loss": 0.011711347848176956, "step": 864 }, { "epoch": 0.20918984280532044, "grad_norm": 5.1330962098841555, "learning_rate": 9.03513648853597e-07, "loss": 0.008260424248874187, "step": 865 }, { "epoch": 0.2094316807738815, "grad_norm": 5.1103226657065886, "learning_rate": 9.032869046945341e-07, "loss": 0.014957589097321033, "step": 866 }, { "epoch": 0.20967351874244256, "grad_norm": 42.0019103904027, "learning_rate": 9.030599229442693e-07, "loss": 0.023158175870776176, "step": 867 }, { "epoch": 0.20991535671100361, "grad_norm": 3.156828371039067, "learning_rate": 9.028327037365257e-07, "loss": 0.02299170382320881, "step": 868 }, { "epoch": 0.2101571946795647, "grad_norm": 1.9798307513797169, "learning_rate": 9.026052472051667e-07, "loss": 0.01415456272661686, "step": 869 }, { "epoch": 0.21039903264812576, "grad_norm": 0.916336134038163, "learning_rate": 9.02377553484195e-07, "loss": 0.007062762510031462, "step": 870 }, { "epoch": 0.21064087061668682, "grad_norm": 9.527828578754296, "learning_rate": 9.021496227077536e-07, "loss": 0.028822803869843483, "step": 871 }, { "epoch": 0.21088270858524788, "grad_norm": 12.421502670330735, "learning_rate": 9.019214550101249e-07, "loss": 0.007179912179708481, "step": 872 }, { "epoch": 0.21112454655380894, "grad_norm": 8.357592823608663, "learning_rate": 9.01693050525731e-07, "loss": 0.0809902474284172, "step": 873 }, { "epoch": 0.21136638452237, "grad_norm": 2.3687226044164826, "learning_rate": 9.014644093891332e-07, "loss": 0.011187906377017498, "step": 874 }, { "epoch": 0.21160822249093109, "grad_norm": 3.729275001064658, "learning_rate": 9.012355317350327e-07, "loss": 0.011984048411250114, "step": 875 }, { "epoch": 0.21185006045949215, "grad_norm": 2.652974246842107, "learning_rate": 9.010064176982697e-07, "loss": 0.01709573343396187, "step": 876 }, { "epoch": 0.2120918984280532, "grad_norm": 27.16571771886645, "learning_rate": 9.007770674138235e-07, "loss": 0.0251733660697937, "step": 877 }, { "epoch": 0.21233373639661426, "grad_norm": 2.67632343478251, "learning_rate": 9.005474810168133e-07, "loss": 0.012327193282544613, "step": 878 }, { "epoch": 0.21257557436517532, "grad_norm": 2.086676464160876, "learning_rate": 9.003176586424963e-07, "loss": 0.022257141768932343, "step": 879 }, { "epoch": 0.21281741233373638, "grad_norm": 1.789764043605636, "learning_rate": 9.000876004262698e-07, "loss": 0.01784614473581314, "step": 880 }, { "epoch": 0.21305925030229747, "grad_norm": 4.768731100963848, "learning_rate": 8.998573065036694e-07, "loss": 0.05580144003033638, "step": 881 }, { "epoch": 0.21330108827085853, "grad_norm": 2.3199174119476345, "learning_rate": 8.996267770103698e-07, "loss": 0.006888490170240402, "step": 882 }, { "epoch": 0.2135429262394196, "grad_norm": 3.0287661572791915, "learning_rate": 8.993960120821846e-07, "loss": 0.023463940247893333, "step": 883 }, { "epoch": 0.21378476420798065, "grad_norm": 1.5418324105837007, "learning_rate": 8.991650118550657e-07, "loss": 0.0110985292121768, "step": 884 }, { "epoch": 0.2140266021765417, "grad_norm": 27.403588416537435, "learning_rate": 8.989337764651038e-07, "loss": 0.031464822590351105, "step": 885 }, { "epoch": 0.21426844014510277, "grad_norm": 6.766814319947673, "learning_rate": 8.987023060485285e-07, "loss": 0.01711445115506649, "step": 886 }, { "epoch": 0.21451027811366385, "grad_norm": 7.616182230774814, "learning_rate": 8.984706007417073e-07, "loss": 0.0071349055506289005, "step": 887 }, { "epoch": 0.21475211608222491, "grad_norm": 5.20096355891099, "learning_rate": 8.982386606811465e-07, "loss": 0.04569293186068535, "step": 888 }, { "epoch": 0.21499395405078597, "grad_norm": 5.042219721301896, "learning_rate": 8.980064860034905e-07, "loss": 0.03423666954040527, "step": 889 }, { "epoch": 0.21523579201934703, "grad_norm": 5.4673054808825094, "learning_rate": 8.977740768455219e-07, "loss": 0.012615196406841278, "step": 890 }, { "epoch": 0.2154776299879081, "grad_norm": 82.6393110266803, "learning_rate": 8.975414333441618e-07, "loss": 0.015704896301031113, "step": 891 }, { "epoch": 0.21571946795646915, "grad_norm": 5.680986947285909, "learning_rate": 8.973085556364687e-07, "loss": 0.009642481803894043, "step": 892 }, { "epoch": 0.21596130592503024, "grad_norm": 3.180849967389516, "learning_rate": 8.970754438596396e-07, "loss": 0.011432381346821785, "step": 893 }, { "epoch": 0.2162031438935913, "grad_norm": 3.7493224774863294, "learning_rate": 8.968420981510094e-07, "loss": 0.015706835314631462, "step": 894 }, { "epoch": 0.21644498186215236, "grad_norm": 6.352084398880548, "learning_rate": 8.966085186480504e-07, "loss": 0.042479731142520905, "step": 895 }, { "epoch": 0.21668681983071342, "grad_norm": 5.563744268300665, "learning_rate": 8.963747054883732e-07, "loss": 0.022634953260421753, "step": 896 }, { "epoch": 0.21692865779927448, "grad_norm": 3.5637983660098, "learning_rate": 8.961406588097256e-07, "loss": 0.023439748212695122, "step": 897 }, { "epoch": 0.21717049576783554, "grad_norm": 2.424490694958443, "learning_rate": 8.95906378749993e-07, "loss": 0.012779595330357552, "step": 898 }, { "epoch": 0.21741233373639662, "grad_norm": 2.5997281622604858, "learning_rate": 8.956718654471986e-07, "loss": 0.041031431406736374, "step": 899 }, { "epoch": 0.21765417170495768, "grad_norm": 2.011660800387648, "learning_rate": 8.954371190395029e-07, "loss": 0.026048360392451286, "step": 900 }, { "epoch": 0.21789600967351874, "grad_norm": 3.036253578180798, "learning_rate": 8.952021396652036e-07, "loss": 0.027280539274215698, "step": 901 }, { "epoch": 0.2181378476420798, "grad_norm": 1.98021428729512, "learning_rate": 8.949669274627358e-07, "loss": 0.012193228118121624, "step": 902 }, { "epoch": 0.21837968561064086, "grad_norm": 3.430745863367769, "learning_rate": 8.947314825706716e-07, "loss": 0.006302003748714924, "step": 903 }, { "epoch": 0.21862152357920195, "grad_norm": 1.499592446966477, "learning_rate": 8.944958051277203e-07, "loss": 0.007558544632047415, "step": 904 }, { "epoch": 0.218863361547763, "grad_norm": 2.417982677999586, "learning_rate": 8.94259895272728e-07, "loss": 0.035255566239356995, "step": 905 }, { "epoch": 0.21910519951632407, "grad_norm": 5.661436394335544, "learning_rate": 8.940237531446781e-07, "loss": 0.02427239716053009, "step": 906 }, { "epoch": 0.21934703748488513, "grad_norm": 1.9871674473907561, "learning_rate": 8.937873788826909e-07, "loss": 0.04839847609400749, "step": 907 }, { "epoch": 0.2195888754534462, "grad_norm": 4.735714079631378, "learning_rate": 8.935507726260226e-07, "loss": 0.02677212469279766, "step": 908 }, { "epoch": 0.21983071342200725, "grad_norm": 2.712305409291389, "learning_rate": 8.933139345140672e-07, "loss": 0.003505978500470519, "step": 909 }, { "epoch": 0.22007255139056833, "grad_norm": 1.9692671568681421, "learning_rate": 8.930768646863545e-07, "loss": 0.03458719700574875, "step": 910 }, { "epoch": 0.2203143893591294, "grad_norm": 1.3933056747051473, "learning_rate": 8.928395632825511e-07, "loss": 0.006721515208482742, "step": 911 }, { "epoch": 0.22055622732769045, "grad_norm": 3.2539727327149555, "learning_rate": 8.926020304424601e-07, "loss": 0.039818741381168365, "step": 912 }, { "epoch": 0.2207980652962515, "grad_norm": 2.158240932384968, "learning_rate": 8.923642663060211e-07, "loss": 0.020172124728560448, "step": 913 }, { "epoch": 0.22103990326481257, "grad_norm": 2.3213166518814647, "learning_rate": 8.921262710133093e-07, "loss": 0.007455683778971434, "step": 914 }, { "epoch": 0.22128174123337363, "grad_norm": 2.8937797835331898, "learning_rate": 8.918880447045366e-07, "loss": 0.016819855198264122, "step": 915 }, { "epoch": 0.22152357920193472, "grad_norm": 16.085491944900163, "learning_rate": 8.916495875200511e-07, "loss": 0.007310065906494856, "step": 916 }, { "epoch": 0.22176541717049578, "grad_norm": 4.323607829727195, "learning_rate": 8.914108996003366e-07, "loss": 0.020515481010079384, "step": 917 }, { "epoch": 0.22200725513905684, "grad_norm": 4.023266384291401, "learning_rate": 8.91171981086013e-07, "loss": 0.05073723942041397, "step": 918 }, { "epoch": 0.2222490931076179, "grad_norm": 76.5463932645085, "learning_rate": 8.909328321178358e-07, "loss": 0.01917901262640953, "step": 919 }, { "epoch": 0.22249093107617895, "grad_norm": 3.56462188982899, "learning_rate": 8.90693452836697e-07, "loss": 0.02682018093764782, "step": 920 }, { "epoch": 0.22273276904474001, "grad_norm": 2.440592275537394, "learning_rate": 8.904538433836233e-07, "loss": 0.014482020400464535, "step": 921 }, { "epoch": 0.2229746070133011, "grad_norm": 4.196529137777159, "learning_rate": 8.902140038997777e-07, "loss": 0.023056721314787865, "step": 922 }, { "epoch": 0.22321644498186216, "grad_norm": 2.282127258536766, "learning_rate": 8.899739345264585e-07, "loss": 0.029440706595778465, "step": 923 }, { "epoch": 0.22345828295042322, "grad_norm": 6.860239223178805, "learning_rate": 8.897336354050993e-07, "loss": 0.011869669891893864, "step": 924 }, { "epoch": 0.22370012091898428, "grad_norm": 1.9797404889170023, "learning_rate": 8.894931066772692e-07, "loss": 0.019226307049393654, "step": 925 }, { "epoch": 0.22394195888754534, "grad_norm": 6.565934039731522, "learning_rate": 8.892523484846726e-07, "loss": 0.007483083289116621, "step": 926 }, { "epoch": 0.2241837968561064, "grad_norm": 1.2096875341559652, "learning_rate": 8.89011360969149e-07, "loss": 0.008708451874554157, "step": 927 }, { "epoch": 0.22442563482466749, "grad_norm": 1.5075186730193886, "learning_rate": 8.887701442726731e-07, "loss": 0.014901070855557919, "step": 928 }, { "epoch": 0.22466747279322855, "grad_norm": 3.807705902936661, "learning_rate": 8.885286985373546e-07, "loss": 0.006977120880037546, "step": 929 }, { "epoch": 0.2249093107617896, "grad_norm": 17.17035721841635, "learning_rate": 8.88287023905438e-07, "loss": 0.026540666818618774, "step": 930 }, { "epoch": 0.22515114873035066, "grad_norm": 4.371478468862625, "learning_rate": 8.880451205193027e-07, "loss": 0.037028033286333084, "step": 931 }, { "epoch": 0.22539298669891172, "grad_norm": 3.298709878497828, "learning_rate": 8.878029885214631e-07, "loss": 0.01099879015237093, "step": 932 }, { "epoch": 0.22563482466747278, "grad_norm": 1.6206421358216727, "learning_rate": 8.87560628054568e-07, "loss": 0.013733753934502602, "step": 933 }, { "epoch": 0.22587666263603387, "grad_norm": 3.212116050229232, "learning_rate": 8.873180392614008e-07, "loss": 0.023878443986177444, "step": 934 }, { "epoch": 0.22611850060459493, "grad_norm": 5.653596118580956, "learning_rate": 8.870752222848797e-07, "loss": 0.009316230192780495, "step": 935 }, { "epoch": 0.226360338573156, "grad_norm": 2.8311685270772533, "learning_rate": 8.868321772680569e-07, "loss": 0.020487887784838676, "step": 936 }, { "epoch": 0.22660217654171705, "grad_norm": 2.1357694308044195, "learning_rate": 8.865889043541194e-07, "loss": 0.012441153638064861, "step": 937 }, { "epoch": 0.2268440145102781, "grad_norm": 1.3377399945457782, "learning_rate": 8.863454036863881e-07, "loss": 0.01494542509317398, "step": 938 }, { "epoch": 0.22708585247883917, "grad_norm": 7.989551009962045, "learning_rate": 8.861016754083184e-07, "loss": 0.033682119101285934, "step": 939 }, { "epoch": 0.22732769044740025, "grad_norm": 1.595242610306766, "learning_rate": 8.858577196634995e-07, "loss": 0.004622145555913448, "step": 940 }, { "epoch": 0.22756952841596131, "grad_norm": 11.163772404408432, "learning_rate": 8.85613536595655e-07, "loss": 0.023054949939250946, "step": 941 }, { "epoch": 0.22781136638452237, "grad_norm": 7.927606609089143, "learning_rate": 8.853691263486416e-07, "loss": 0.02861538901925087, "step": 942 }, { "epoch": 0.22805320435308343, "grad_norm": 6.013330659209385, "learning_rate": 8.851244890664509e-07, "loss": 0.011515039019286633, "step": 943 }, { "epoch": 0.2282950423216445, "grad_norm": 11.014614197800336, "learning_rate": 8.848796248932076e-07, "loss": 0.023126093670725822, "step": 944 }, { "epoch": 0.22853688029020555, "grad_norm": 1.3826186146939916, "learning_rate": 8.846345339731702e-07, "loss": 0.007610830012708902, "step": 945 }, { "epoch": 0.22877871825876664, "grad_norm": 2.1485885141827654, "learning_rate": 8.843892164507308e-07, "loss": 0.01307370699942112, "step": 946 }, { "epoch": 0.2290205562273277, "grad_norm": 5.247043395418285, "learning_rate": 8.84143672470415e-07, "loss": 0.019437169656157494, "step": 947 }, { "epoch": 0.22926239419588876, "grad_norm": 1.4560872047389943, "learning_rate": 8.83897902176882e-07, "loss": 0.002937205135822296, "step": 948 }, { "epoch": 0.22950423216444982, "grad_norm": 2.9714238590294615, "learning_rate": 8.836519057149239e-07, "loss": 0.006574505474418402, "step": 949 }, { "epoch": 0.22974607013301088, "grad_norm": 4.057591105082499, "learning_rate": 8.834056832294663e-07, "loss": 0.019280394539237022, "step": 950 }, { "epoch": 0.22998790810157194, "grad_norm": 7.171370467125943, "learning_rate": 8.83159234865568e-07, "loss": 0.03883705660700798, "step": 951 }, { "epoch": 0.23022974607013302, "grad_norm": 4.258900426623611, "learning_rate": 8.829125607684212e-07, "loss": 0.022054146975278854, "step": 952 }, { "epoch": 0.23047158403869408, "grad_norm": 21.642391181406115, "learning_rate": 8.826656610833503e-07, "loss": 0.027334047481417656, "step": 953 }, { "epoch": 0.23071342200725514, "grad_norm": 11.27715336545314, "learning_rate": 8.82418535955813e-07, "loss": 0.05251508951187134, "step": 954 }, { "epoch": 0.2309552599758162, "grad_norm": 3.4626654260093246, "learning_rate": 8.821711855313999e-07, "loss": 0.021601121872663498, "step": 955 }, { "epoch": 0.23119709794437726, "grad_norm": 2.4532064280153714, "learning_rate": 8.819236099558345e-07, "loss": 0.03959675505757332, "step": 956 }, { "epoch": 0.23143893591293832, "grad_norm": 3.9151211603045524, "learning_rate": 8.816758093749724e-07, "loss": 0.009413468651473522, "step": 957 }, { "epoch": 0.2316807738814994, "grad_norm": 1.164249477295749, "learning_rate": 8.814277839348021e-07, "loss": 0.012233803980052471, "step": 958 }, { "epoch": 0.23192261185006047, "grad_norm": 1.1201929867535954, "learning_rate": 8.811795337814449e-07, "loss": 0.00893858727067709, "step": 959 }, { "epoch": 0.23216444981862153, "grad_norm": 1.8079756823804998, "learning_rate": 8.809310590611536e-07, "loss": 0.03234201669692993, "step": 960 }, { "epoch": 0.23240628778718259, "grad_norm": 5.026916117973118, "learning_rate": 8.806823599203144e-07, "loss": 0.016306325793266296, "step": 961 }, { "epoch": 0.23264812575574365, "grad_norm": 1.9803514140884984, "learning_rate": 8.804334365054446e-07, "loss": 0.01445599552243948, "step": 962 }, { "epoch": 0.2328899637243047, "grad_norm": 7.344833100593055, "learning_rate": 8.801842889631944e-07, "loss": 0.023506632074713707, "step": 963 }, { "epoch": 0.2331318016928658, "grad_norm": 25.479842090311646, "learning_rate": 8.799349174403459e-07, "loss": 0.017283841967582703, "step": 964 }, { "epoch": 0.23337363966142685, "grad_norm": 10.55607229774118, "learning_rate": 8.79685322083813e-07, "loss": 0.006339074112474918, "step": 965 }, { "epoch": 0.2336154776299879, "grad_norm": 1.460201483207436, "learning_rate": 8.794355030406415e-07, "loss": 0.016876645386219025, "step": 966 }, { "epoch": 0.23385731559854897, "grad_norm": 35.85764192835169, "learning_rate": 8.791854604580091e-07, "loss": 0.02414657734334469, "step": 967 }, { "epoch": 0.23409915356711003, "grad_norm": 1.8351959506877775, "learning_rate": 8.78935194483225e-07, "loss": 0.06131653115153313, "step": 968 }, { "epoch": 0.2343409915356711, "grad_norm": 1.6386970810247479, "learning_rate": 8.786847052637304e-07, "loss": 0.006733150687068701, "step": 969 }, { "epoch": 0.23458282950423218, "grad_norm": 5.247798857390303, "learning_rate": 8.784339929470974e-07, "loss": 0.023865539580583572, "step": 970 }, { "epoch": 0.23482466747279324, "grad_norm": 4.037540351436122, "learning_rate": 8.781830576810299e-07, "loss": 0.012210238724946976, "step": 971 }, { "epoch": 0.2350665054413543, "grad_norm": 1.6612319195639407, "learning_rate": 8.779318996133633e-07, "loss": 0.0068552191369235516, "step": 972 }, { "epoch": 0.23530834340991535, "grad_norm": 1.9976630610741963, "learning_rate": 8.776805188920642e-07, "loss": 0.03216392546892166, "step": 973 }, { "epoch": 0.23555018137847641, "grad_norm": 7.42139293200359, "learning_rate": 8.7742891566523e-07, "loss": 0.028237570077180862, "step": 974 }, { "epoch": 0.23579201934703747, "grad_norm": 49.76520177034033, "learning_rate": 8.771770900810895e-07, "loss": 0.01503857970237732, "step": 975 }, { "epoch": 0.23603385731559856, "grad_norm": 4.7352663120010305, "learning_rate": 8.769250422880026e-07, "loss": 0.01224495843052864, "step": 976 }, { "epoch": 0.23627569528415962, "grad_norm": 4.363861708776634, "learning_rate": 8.766727724344597e-07, "loss": 0.011571191251277924, "step": 977 }, { "epoch": 0.23651753325272068, "grad_norm": 3.804723865945963, "learning_rate": 8.764202806690826e-07, "loss": 0.009626737795770168, "step": 978 }, { "epoch": 0.23675937122128174, "grad_norm": 4.950167162194494, "learning_rate": 8.761675671406231e-07, "loss": 0.03842759132385254, "step": 979 }, { "epoch": 0.2370012091898428, "grad_norm": 8.832287864412127, "learning_rate": 8.759146319979645e-07, "loss": 0.015189612284302711, "step": 980 }, { "epoch": 0.23724304715840386, "grad_norm": 4.5418454373532295, "learning_rate": 8.756614753901201e-07, "loss": 0.024082517251372337, "step": 981 }, { "epoch": 0.23748488512696495, "grad_norm": 3.742589003926713, "learning_rate": 8.754080974662337e-07, "loss": 0.009183892048895359, "step": 982 }, { "epoch": 0.237726723095526, "grad_norm": 1.8432434995476887, "learning_rate": 8.751544983755796e-07, "loss": 0.01675874926149845, "step": 983 }, { "epoch": 0.23796856106408706, "grad_norm": 1.98360511027857, "learning_rate": 8.749006782675625e-07, "loss": 0.011232723481953144, "step": 984 }, { "epoch": 0.23821039903264812, "grad_norm": 2.3460652881750286, "learning_rate": 8.746466372917169e-07, "loss": 0.006962924730032682, "step": 985 }, { "epoch": 0.23845223700120918, "grad_norm": 2.8450662590135125, "learning_rate": 8.743923755977083e-07, "loss": 0.011697755195200443, "step": 986 }, { "epoch": 0.23869407496977024, "grad_norm": 2.711038776226019, "learning_rate": 8.74137893335331e-07, "loss": 0.013769849203526974, "step": 987 }, { "epoch": 0.23893591293833133, "grad_norm": 1.7811205798317165, "learning_rate": 8.738831906545103e-07, "loss": 0.010223246179521084, "step": 988 }, { "epoch": 0.2391777509068924, "grad_norm": 3.0486065416905177, "learning_rate": 8.736282677053009e-07, "loss": 0.0575220100581646, "step": 989 }, { "epoch": 0.23941958887545345, "grad_norm": 1.8450177628732392, "learning_rate": 8.733731246378871e-07, "loss": 0.009257408790290356, "step": 990 }, { "epoch": 0.2396614268440145, "grad_norm": 8.300839412668111, "learning_rate": 8.731177616025834e-07, "loss": 0.009060042910277843, "step": 991 }, { "epoch": 0.23990326481257557, "grad_norm": 3.5629384521693335, "learning_rate": 8.728621787498332e-07, "loss": 0.015111899934709072, "step": 992 }, { "epoch": 0.24014510278113663, "grad_norm": 16.446411962385604, "learning_rate": 8.726063762302103e-07, "loss": 0.038188621401786804, "step": 993 }, { "epoch": 0.24038694074969771, "grad_norm": 1.663797574654503, "learning_rate": 8.72350354194417e-07, "loss": 0.00589290214702487, "step": 994 }, { "epoch": 0.24062877871825877, "grad_norm": 4.245579951822779, "learning_rate": 8.720941127932854e-07, "loss": 0.015974385663866997, "step": 995 }, { "epoch": 0.24087061668681983, "grad_norm": 2.552656010961041, "learning_rate": 8.71837652177777e-07, "loss": 0.026212329044938087, "step": 996 }, { "epoch": 0.2411124546553809, "grad_norm": 2.453105641354933, "learning_rate": 8.715809724989821e-07, "loss": 0.014425763860344887, "step": 997 }, { "epoch": 0.24135429262394195, "grad_norm": 7.011497947865286, "learning_rate": 8.7132407390812e-07, "loss": 0.0322682186961174, "step": 998 }, { "epoch": 0.241596130592503, "grad_norm": 3.8878178033707695, "learning_rate": 8.710669565565393e-07, "loss": 0.028755972161889076, "step": 999 }, { "epoch": 0.2418379685610641, "grad_norm": 8.447420995045073, "learning_rate": 8.708096205957173e-07, "loss": 0.04023744538426399, "step": 1000 }, { "epoch": 0.24207980652962516, "grad_norm": 6.223514011328545, "learning_rate": 8.705520661772601e-07, "loss": 0.011853156611323357, "step": 1001 }, { "epoch": 0.24232164449818622, "grad_norm": 2.3146710875597756, "learning_rate": 8.702942934529025e-07, "loss": 0.008948910050094128, "step": 1002 }, { "epoch": 0.24256348246674728, "grad_norm": 6.665184161493666, "learning_rate": 8.700363025745081e-07, "loss": 0.01915164291858673, "step": 1003 }, { "epoch": 0.24280532043530834, "grad_norm": 2.4728962823377914, "learning_rate": 8.697780936940685e-07, "loss": 0.010452628135681152, "step": 1004 }, { "epoch": 0.2430471584038694, "grad_norm": 3.0099927494044048, "learning_rate": 8.695196669637044e-07, "loss": 0.005895719863474369, "step": 1005 }, { "epoch": 0.24328899637243048, "grad_norm": 1.7515926420009966, "learning_rate": 8.692610225356644e-07, "loss": 0.014225788414478302, "step": 1006 }, { "epoch": 0.24353083434099154, "grad_norm": 1.1144819315849714, "learning_rate": 8.690021605623254e-07, "loss": 0.004136219155043364, "step": 1007 }, { "epoch": 0.2437726723095526, "grad_norm": 4.1699478030153925, "learning_rate": 8.687430811961928e-07, "loss": 0.013017073273658752, "step": 1008 }, { "epoch": 0.24401451027811366, "grad_norm": 3.1794410108266695, "learning_rate": 8.684837845898995e-07, "loss": 0.02179432474076748, "step": 1009 }, { "epoch": 0.24425634824667472, "grad_norm": 8.56635090531929, "learning_rate": 8.682242708962069e-07, "loss": 0.01148330420255661, "step": 1010 }, { "epoch": 0.24449818621523578, "grad_norm": 3.7200769813509686, "learning_rate": 8.67964540268004e-07, "loss": 0.020296430215239525, "step": 1011 }, { "epoch": 0.24474002418379687, "grad_norm": 5.924571886514265, "learning_rate": 8.677045928583079e-07, "loss": 0.009604903869330883, "step": 1012 }, { "epoch": 0.24498186215235793, "grad_norm": 1.2876423638947072, "learning_rate": 8.67444428820263e-07, "loss": 0.020582273602485657, "step": 1013 }, { "epoch": 0.24522370012091899, "grad_norm": 1.3540607770000608, "learning_rate": 8.671840483071415e-07, "loss": 0.00930364616215229, "step": 1014 }, { "epoch": 0.24546553808948005, "grad_norm": 2.9128328210112384, "learning_rate": 8.669234514723433e-07, "loss": 0.03683369234204292, "step": 1015 }, { "epoch": 0.2457073760580411, "grad_norm": 1.9095226231408533, "learning_rate": 8.666626384693958e-07, "loss": 0.009303120896220207, "step": 1016 }, { "epoch": 0.24594921402660216, "grad_norm": 13.175416238018641, "learning_rate": 8.664016094519532e-07, "loss": 0.037822067737579346, "step": 1017 }, { "epoch": 0.24619105199516325, "grad_norm": 10.897432902889697, "learning_rate": 8.661403645737974e-07, "loss": 0.031492721289396286, "step": 1018 }, { "epoch": 0.2464328899637243, "grad_norm": 2.033383474420265, "learning_rate": 8.658789039888377e-07, "loss": 0.0061507681384682655, "step": 1019 }, { "epoch": 0.24667472793228537, "grad_norm": 3.153835747855278, "learning_rate": 8.656172278511098e-07, "loss": 0.016044849529862404, "step": 1020 }, { "epoch": 0.24691656590084643, "grad_norm": 2.621488586079077, "learning_rate": 8.65355336314777e-07, "loss": 0.020553767681121826, "step": 1021 }, { "epoch": 0.2471584038694075, "grad_norm": 4.0555178939262255, "learning_rate": 8.650932295341291e-07, "loss": 0.014769394882023335, "step": 1022 }, { "epoch": 0.24740024183796855, "grad_norm": 4.7960051378567226, "learning_rate": 8.648309076635831e-07, "loss": 0.040414538234472275, "step": 1023 }, { "epoch": 0.24764207980652964, "grad_norm": 2.4762988887988238, "learning_rate": 8.645683708576825e-07, "loss": 0.013556013815104961, "step": 1024 }, { "epoch": 0.2478839177750907, "grad_norm": 2.738616137765918, "learning_rate": 8.643056192710971e-07, "loss": 0.015275374054908752, "step": 1025 }, { "epoch": 0.24812575574365175, "grad_norm": 8.028323274765112, "learning_rate": 8.640426530586239e-07, "loss": 0.019398555159568787, "step": 1026 }, { "epoch": 0.24836759371221281, "grad_norm": 0.9132273446385285, "learning_rate": 8.637794723751859e-07, "loss": 0.004943373147398233, "step": 1027 }, { "epoch": 0.24860943168077387, "grad_norm": 16.876071348234902, "learning_rate": 8.635160773758323e-07, "loss": 0.014536959119141102, "step": 1028 }, { "epoch": 0.24885126964933493, "grad_norm": 2.678670649481825, "learning_rate": 8.632524682157394e-07, "loss": 0.006316587328910828, "step": 1029 }, { "epoch": 0.24909310761789602, "grad_norm": 2.320687760482069, "learning_rate": 8.629886450502084e-07, "loss": 0.01202085055410862, "step": 1030 }, { "epoch": 0.24933494558645708, "grad_norm": 3.155100566432322, "learning_rate": 8.627246080346678e-07, "loss": 0.028269857168197632, "step": 1031 }, { "epoch": 0.24957678355501814, "grad_norm": 3.0488408197701675, "learning_rate": 8.624603573246714e-07, "loss": 0.023942478001117706, "step": 1032 }, { "epoch": 0.2498186215235792, "grad_norm": 2.908328308025281, "learning_rate": 8.621958930758988e-07, "loss": 0.0059004733338952065, "step": 1033 }, { "epoch": 0.2500604594921403, "grad_norm": 0.9636395778429391, "learning_rate": 8.61931215444156e-07, "loss": 0.003175706136971712, "step": 1034 }, { "epoch": 0.2503022974607013, "grad_norm": 3.9664494759137168, "learning_rate": 8.616663245853742e-07, "loss": 0.013031295500695705, "step": 1035 }, { "epoch": 0.2505441354292624, "grad_norm": 4.16806956488742, "learning_rate": 8.614012206556103e-07, "loss": 0.03311019763350487, "step": 1036 }, { "epoch": 0.25078597339782344, "grad_norm": 3.2175714568514904, "learning_rate": 8.61135903811047e-07, "loss": 0.03160218521952629, "step": 1037 }, { "epoch": 0.2510278113663845, "grad_norm": 48.704834257753184, "learning_rate": 8.608703742079921e-07, "loss": 0.004345900844782591, "step": 1038 }, { "epoch": 0.2512696493349456, "grad_norm": 7.500633363266828, "learning_rate": 8.606046320028789e-07, "loss": 0.018113259226083755, "step": 1039 }, { "epoch": 0.25151148730350664, "grad_norm": 2.1949226593533786, "learning_rate": 8.60338677352266e-07, "loss": 0.019482016563415527, "step": 1040 }, { "epoch": 0.25175332527206773, "grad_norm": 2.080346840414865, "learning_rate": 8.600725104128372e-07, "loss": 0.021603507921099663, "step": 1041 }, { "epoch": 0.25199516324062876, "grad_norm": 4.977749622794939, "learning_rate": 8.598061313414011e-07, "loss": 0.023811014369130135, "step": 1042 }, { "epoch": 0.25223700120918985, "grad_norm": 2.0119042331575034, "learning_rate": 8.595395402948915e-07, "loss": 0.004433663096278906, "step": 1043 }, { "epoch": 0.2524788391777509, "grad_norm": 5.791587852511591, "learning_rate": 8.59272737430367e-07, "loss": 0.024252191185951233, "step": 1044 }, { "epoch": 0.25272067714631197, "grad_norm": 3.144270442363101, "learning_rate": 8.59005722905011e-07, "loss": 0.029158253222703934, "step": 1045 }, { "epoch": 0.25296251511487305, "grad_norm": 5.063789706074738, "learning_rate": 8.587384968761317e-07, "loss": 0.025739679113030434, "step": 1046 }, { "epoch": 0.2532043530834341, "grad_norm": 35.43161725948775, "learning_rate": 8.584710595011619e-07, "loss": 0.024076228961348534, "step": 1047 }, { "epoch": 0.2534461910519952, "grad_norm": 4.892897573928452, "learning_rate": 8.582034109376586e-07, "loss": 0.022234510630369186, "step": 1048 }, { "epoch": 0.2536880290205562, "grad_norm": 6.240982572421941, "learning_rate": 8.579355513433035e-07, "loss": 0.007532754447311163, "step": 1049 }, { "epoch": 0.2539298669891173, "grad_norm": 1.9342051885331963, "learning_rate": 8.576674808759028e-07, "loss": 0.004951328504830599, "step": 1050 }, { "epoch": 0.2541717049576784, "grad_norm": 3.507853647217064, "learning_rate": 8.573991996933862e-07, "loss": 0.01264230627566576, "step": 1051 }, { "epoch": 0.2544135429262394, "grad_norm": 1.6540254473237619, "learning_rate": 8.571307079538087e-07, "loss": 0.0045938752591609955, "step": 1052 }, { "epoch": 0.2546553808948005, "grad_norm": 3.2656537681607793, "learning_rate": 8.568620058153482e-07, "loss": 0.018903877586126328, "step": 1053 }, { "epoch": 0.25489721886336153, "grad_norm": 3.9802764800907005, "learning_rate": 8.565930934363072e-07, "loss": 0.011859888210892677, "step": 1054 }, { "epoch": 0.2551390568319226, "grad_norm": 1.5320313029893151, "learning_rate": 8.563239709751123e-07, "loss": 0.02443360909819603, "step": 1055 }, { "epoch": 0.25538089480048365, "grad_norm": 5.776465941250084, "learning_rate": 8.560546385903126e-07, "loss": 0.015102291479706764, "step": 1056 }, { "epoch": 0.25562273276904474, "grad_norm": 3.3642278553389593, "learning_rate": 8.557850964405826e-07, "loss": 0.04585913196206093, "step": 1057 }, { "epoch": 0.2558645707376058, "grad_norm": 2.3510315025906263, "learning_rate": 8.55515344684719e-07, "loss": 0.018079159781336784, "step": 1058 }, { "epoch": 0.25610640870616685, "grad_norm": 2.039372733832651, "learning_rate": 8.552453834816429e-07, "loss": 0.011166229844093323, "step": 1059 }, { "epoch": 0.25634824667472794, "grad_norm": 3.95254190780604, "learning_rate": 8.54975212990398e-07, "loss": 0.02501397393643856, "step": 1060 }, { "epoch": 0.256590084643289, "grad_norm": 17.70980369228024, "learning_rate": 8.547048333701521e-07, "loss": 0.0078081414103507996, "step": 1061 }, { "epoch": 0.25683192261185006, "grad_norm": 5.047634184434183, "learning_rate": 8.544342447801955e-07, "loss": 0.011632937006652355, "step": 1062 }, { "epoch": 0.25707376058041115, "grad_norm": 1.0337478366293844, "learning_rate": 8.541634473799422e-07, "loss": 0.003346068086102605, "step": 1063 }, { "epoch": 0.2573155985489722, "grad_norm": 1.972767177991402, "learning_rate": 8.538924413289287e-07, "loss": 0.009519078768789768, "step": 1064 }, { "epoch": 0.25755743651753327, "grad_norm": 1.8073662252893254, "learning_rate": 8.536212267868148e-07, "loss": 0.010517914779484272, "step": 1065 }, { "epoch": 0.2577992744860943, "grad_norm": 2.1215222236087223, "learning_rate": 8.53349803913383e-07, "loss": 0.010038149543106556, "step": 1066 }, { "epoch": 0.2580411124546554, "grad_norm": 5.541605991339491, "learning_rate": 8.530781728685385e-07, "loss": 0.009725183248519897, "step": 1067 }, { "epoch": 0.2582829504232164, "grad_norm": 1.3958349329196305, "learning_rate": 8.528063338123093e-07, "loss": 0.006786813028156757, "step": 1068 }, { "epoch": 0.2585247883917775, "grad_norm": 3.3929079678291147, "learning_rate": 8.525342869048455e-07, "loss": 0.005011742934584618, "step": 1069 }, { "epoch": 0.2587666263603386, "grad_norm": 2.820352740102394, "learning_rate": 8.522620323064203e-07, "loss": 0.002866075374186039, "step": 1070 }, { "epoch": 0.2590084643288996, "grad_norm": 2.3929895628272253, "learning_rate": 8.519895701774289e-07, "loss": 0.015853123739361763, "step": 1071 }, { "epoch": 0.2592503022974607, "grad_norm": 8.38255532205695, "learning_rate": 8.517169006783885e-07, "loss": 0.011613796465098858, "step": 1072 }, { "epoch": 0.25949214026602174, "grad_norm": 2.3092010552138627, "learning_rate": 8.514440239699392e-07, "loss": 0.006239986512809992, "step": 1073 }, { "epoch": 0.25973397823458283, "grad_norm": 2.1006394545229248, "learning_rate": 8.511709402128424e-07, "loss": 0.00993583258241415, "step": 1074 }, { "epoch": 0.2599758162031439, "grad_norm": 5.404291709049542, "learning_rate": 8.508976495679821e-07, "loss": 0.019139055162668228, "step": 1075 }, { "epoch": 0.26021765417170495, "grad_norm": 4.1552540878369495, "learning_rate": 8.506241521963635e-07, "loss": 0.008295689709484577, "step": 1076 }, { "epoch": 0.26045949214026604, "grad_norm": 3.441463619516618, "learning_rate": 8.503504482591144e-07, "loss": 0.0209818035364151, "step": 1077 }, { "epoch": 0.26070133010882707, "grad_norm": 3.656403444018257, "learning_rate": 8.500765379174837e-07, "loss": 0.016514437273144722, "step": 1078 }, { "epoch": 0.26094316807738815, "grad_norm": 1.5206559748320208, "learning_rate": 8.498024213328422e-07, "loss": 0.005771522875875235, "step": 1079 }, { "epoch": 0.26118500604594924, "grad_norm": 3.3574170876676717, "learning_rate": 8.49528098666682e-07, "loss": 0.020434817299246788, "step": 1080 }, { "epoch": 0.2614268440145103, "grad_norm": 4.861248346644821, "learning_rate": 8.492535700806166e-07, "loss": 0.025809383019804955, "step": 1081 }, { "epoch": 0.26166868198307136, "grad_norm": 2.4016726995639237, "learning_rate": 8.489788357363813e-07, "loss": 0.049428630620241165, "step": 1082 }, { "epoch": 0.2619105199516324, "grad_norm": 3.9831383901760566, "learning_rate": 8.487038957958319e-07, "loss": 0.008094598539173603, "step": 1083 }, { "epoch": 0.2621523579201935, "grad_norm": 26.808790917088373, "learning_rate": 8.484287504209457e-07, "loss": 0.016179142519831657, "step": 1084 }, { "epoch": 0.2623941958887545, "grad_norm": 1.8567849152410714, "learning_rate": 8.481533997738212e-07, "loss": 0.013193446211516857, "step": 1085 }, { "epoch": 0.2626360338573156, "grad_norm": 1.452071418174104, "learning_rate": 8.478778440166775e-07, "loss": 0.021837595850229263, "step": 1086 }, { "epoch": 0.2628778718258767, "grad_norm": 2.347224959260151, "learning_rate": 8.476020833118545e-07, "loss": 0.015368503518402576, "step": 1087 }, { "epoch": 0.2631197097944377, "grad_norm": 2.043432208307028, "learning_rate": 8.473261178218134e-07, "loss": 0.009730644524097443, "step": 1088 }, { "epoch": 0.2633615477629988, "grad_norm": 1.6599338407275783, "learning_rate": 8.470499477091353e-07, "loss": 0.036183759570121765, "step": 1089 }, { "epoch": 0.26360338573155984, "grad_norm": 3.363635374718553, "learning_rate": 8.467735731365224e-07, "loss": 0.00896710529923439, "step": 1090 }, { "epoch": 0.2638452237001209, "grad_norm": 4.636830338921694, "learning_rate": 8.464969942667971e-07, "loss": 0.011018102057278156, "step": 1091 }, { "epoch": 0.264087061668682, "grad_norm": 6.0828145482112665, "learning_rate": 8.462202112629022e-07, "loss": 0.018171275034546852, "step": 1092 }, { "epoch": 0.26432889963724304, "grad_norm": 3.7495402845008416, "learning_rate": 8.45943224287901e-07, "loss": 0.0204568300396204, "step": 1093 }, { "epoch": 0.26457073760580413, "grad_norm": 9.248538800834128, "learning_rate": 8.456660335049763e-07, "loss": 0.010199195705354214, "step": 1094 }, { "epoch": 0.26481257557436516, "grad_norm": 3.6982472845242147, "learning_rate": 8.453886390774319e-07, "loss": 0.058933328837156296, "step": 1095 }, { "epoch": 0.26505441354292625, "grad_norm": 2.334570301837053, "learning_rate": 8.451110411686908e-07, "loss": 0.0324026457965374, "step": 1096 }, { "epoch": 0.2652962515114873, "grad_norm": 8.525370240742559, "learning_rate": 8.448332399422962e-07, "loss": 0.012286088429391384, "step": 1097 }, { "epoch": 0.26553808948004837, "grad_norm": 1.6573198053292433, "learning_rate": 8.445552355619112e-07, "loss": 0.008803880773484707, "step": 1098 }, { "epoch": 0.26577992744860945, "grad_norm": 3.138731122089847, "learning_rate": 8.442770281913184e-07, "loss": 0.018278637900948524, "step": 1099 }, { "epoch": 0.2660217654171705, "grad_norm": 2.9151140740965475, "learning_rate": 8.439986179944199e-07, "loss": 0.01191879902034998, "step": 1100 }, { "epoch": 0.2662636033857316, "grad_norm": 4.026860085420234, "learning_rate": 8.437200051352376e-07, "loss": 0.017988141626119614, "step": 1101 }, { "epoch": 0.2665054413542926, "grad_norm": 2.06711142930644, "learning_rate": 8.434411897779125e-07, "loss": 0.0151355667039752, "step": 1102 }, { "epoch": 0.2667472793228537, "grad_norm": 9.878743389964255, "learning_rate": 8.431621720867051e-07, "loss": 0.00858235452324152, "step": 1103 }, { "epoch": 0.2669891172914148, "grad_norm": 7.8422359214299915, "learning_rate": 8.428829522259949e-07, "loss": 0.018245143815875053, "step": 1104 }, { "epoch": 0.2672309552599758, "grad_norm": 2.746817042857407, "learning_rate": 8.426035303602808e-07, "loss": 0.007114742882549763, "step": 1105 }, { "epoch": 0.2674727932285369, "grad_norm": 2.802154283459584, "learning_rate": 8.423239066541802e-07, "loss": 0.0039283884689211845, "step": 1106 }, { "epoch": 0.26771463119709793, "grad_norm": 3.2749165720835256, "learning_rate": 8.420440812724301e-07, "loss": 0.008842743933200836, "step": 1107 }, { "epoch": 0.267956469165659, "grad_norm": 3.587916529701098, "learning_rate": 8.41764054379886e-07, "loss": 0.014011022634804249, "step": 1108 }, { "epoch": 0.26819830713422005, "grad_norm": 4.244032728331932, "learning_rate": 8.414838261415218e-07, "loss": 0.028602654114365578, "step": 1109 }, { "epoch": 0.26844014510278114, "grad_norm": 1.4275234594250517, "learning_rate": 8.412033967224303e-07, "loss": 0.007051246706396341, "step": 1110 }, { "epoch": 0.2686819830713422, "grad_norm": 2.54334760974789, "learning_rate": 8.409227662878232e-07, "loss": 0.021948030218482018, "step": 1111 }, { "epoch": 0.26892382103990325, "grad_norm": 2.712747318484591, "learning_rate": 8.4064193500303e-07, "loss": 0.009601816534996033, "step": 1112 }, { "epoch": 0.26916565900846434, "grad_norm": 3.3993816137203794, "learning_rate": 8.403609030334987e-07, "loss": 0.018290294334292412, "step": 1113 }, { "epoch": 0.2694074969770254, "grad_norm": 1.492333128170293, "learning_rate": 8.400796705447955e-07, "loss": 0.004836448933929205, "step": 1114 }, { "epoch": 0.26964933494558646, "grad_norm": 2.932739342617909, "learning_rate": 8.397982377026052e-07, "loss": 0.010896782390773296, "step": 1115 }, { "epoch": 0.26989117291414755, "grad_norm": 1.8407272976405236, "learning_rate": 8.3951660467273e-07, "loss": 0.005594284273684025, "step": 1116 }, { "epoch": 0.2701330108827086, "grad_norm": 1.8986413976125311, "learning_rate": 8.392347716210905e-07, "loss": 0.017214396968483925, "step": 1117 }, { "epoch": 0.27037484885126967, "grad_norm": 2.8765515596508564, "learning_rate": 8.389527387137247e-07, "loss": 0.01295419316738844, "step": 1118 }, { "epoch": 0.2706166868198307, "grad_norm": 11.761665497397438, "learning_rate": 8.38670506116789e-07, "loss": 0.020533299073576927, "step": 1119 }, { "epoch": 0.2708585247883918, "grad_norm": 6.868952332750728, "learning_rate": 8.383880739965567e-07, "loss": 0.014714876189827919, "step": 1120 }, { "epoch": 0.2711003627569528, "grad_norm": 3.2341744910053376, "learning_rate": 8.381054425194189e-07, "loss": 0.06464789062738419, "step": 1121 }, { "epoch": 0.2713422007255139, "grad_norm": 2.383390021968528, "learning_rate": 8.378226118518848e-07, "loss": 0.0061810496263206005, "step": 1122 }, { "epoch": 0.271584038694075, "grad_norm": 2.813152338765994, "learning_rate": 8.375395821605797e-07, "loss": 0.01617908664047718, "step": 1123 }, { "epoch": 0.271825876662636, "grad_norm": 1.5112056864574384, "learning_rate": 8.372563536122474e-07, "loss": 0.006804091390222311, "step": 1124 }, { "epoch": 0.2720677146311971, "grad_norm": 2.9056104356861545, "learning_rate": 8.369729263737477e-07, "loss": 0.003655471606180072, "step": 1125 }, { "epoch": 0.27230955259975814, "grad_norm": 1.8033239327518613, "learning_rate": 8.366893006120587e-07, "loss": 0.02708449773490429, "step": 1126 }, { "epoch": 0.27255139056831923, "grad_norm": 3.4996204744031116, "learning_rate": 8.364054764942744e-07, "loss": 0.0063239349983632565, "step": 1127 }, { "epoch": 0.2727932285368803, "grad_norm": 4.599091046081529, "learning_rate": 8.361214541876061e-07, "loss": 0.017597293481230736, "step": 1128 }, { "epoch": 0.27303506650544135, "grad_norm": 1.4439841276624383, "learning_rate": 8.358372338593819e-07, "loss": 0.007838691584765911, "step": 1129 }, { "epoch": 0.27327690447400244, "grad_norm": 2.4306385724064947, "learning_rate": 8.355528156770465e-07, "loss": 0.021251877769827843, "step": 1130 }, { "epoch": 0.27351874244256347, "grad_norm": 0.990770828667606, "learning_rate": 8.352681998081612e-07, "loss": 0.003595961956307292, "step": 1131 }, { "epoch": 0.27376058041112455, "grad_norm": 5.713926558264533, "learning_rate": 8.349833864204034e-07, "loss": 0.01043116208165884, "step": 1132 }, { "epoch": 0.2740024183796856, "grad_norm": 2.7767347929942368, "learning_rate": 8.346983756815676e-07, "loss": 0.016523662954568863, "step": 1133 }, { "epoch": 0.2742442563482467, "grad_norm": 5.471920255589119, "learning_rate": 8.344131677595639e-07, "loss": 0.017174184322357178, "step": 1134 }, { "epoch": 0.27448609431680776, "grad_norm": 3.638713662374391, "learning_rate": 8.34127762822419e-07, "loss": 0.010497295297682285, "step": 1135 }, { "epoch": 0.2747279322853688, "grad_norm": 2.8513324866208163, "learning_rate": 8.338421610382752e-07, "loss": 0.014612573198974133, "step": 1136 }, { "epoch": 0.2749697702539299, "grad_norm": 8.773056176425452, "learning_rate": 8.335563625753913e-07, "loss": 0.01753518544137478, "step": 1137 }, { "epoch": 0.2752116082224909, "grad_norm": 2.9607469820546357, "learning_rate": 8.332703676021416e-07, "loss": 0.006233035586774349, "step": 1138 }, { "epoch": 0.275453446191052, "grad_norm": 4.948736832515568, "learning_rate": 8.329841762870165e-07, "loss": 0.02688823640346527, "step": 1139 }, { "epoch": 0.2756952841596131, "grad_norm": 27.61126249747951, "learning_rate": 8.326977887986214e-07, "loss": 0.024400876834988594, "step": 1140 }, { "epoch": 0.2759371221281741, "grad_norm": 6.823356234562905, "learning_rate": 8.324112053056783e-07, "loss": 0.02995014749467373, "step": 1141 }, { "epoch": 0.2761789600967352, "grad_norm": 1.44018684100534, "learning_rate": 8.321244259770237e-07, "loss": 0.00679543474689126, "step": 1142 }, { "epoch": 0.27642079806529624, "grad_norm": 1.6509117829249775, "learning_rate": 8.318374509816102e-07, "loss": 0.011053415946662426, "step": 1143 }, { "epoch": 0.2766626360338573, "grad_norm": 2.4889172207359276, "learning_rate": 8.31550280488505e-07, "loss": 0.021143333986401558, "step": 1144 }, { "epoch": 0.27690447400241835, "grad_norm": 4.004987893669622, "learning_rate": 8.312629146668911e-07, "loss": 0.019728533923625946, "step": 1145 }, { "epoch": 0.27714631197097944, "grad_norm": 2.8340176466575646, "learning_rate": 8.309753536860664e-07, "loss": 0.029660066589713097, "step": 1146 }, { "epoch": 0.27738814993954053, "grad_norm": 2.1317657498245977, "learning_rate": 8.306875977154435e-07, "loss": 0.020073115825653076, "step": 1147 }, { "epoch": 0.27762998790810156, "grad_norm": 3.7738551572439136, "learning_rate": 8.3039964692455e-07, "loss": 0.0450403206050396, "step": 1148 }, { "epoch": 0.27787182587666265, "grad_norm": 7.1782985429287125, "learning_rate": 8.301115014830286e-07, "loss": 0.009700867347419262, "step": 1149 }, { "epoch": 0.2781136638452237, "grad_norm": 6.502192513696911, "learning_rate": 8.298231615606361e-07, "loss": 0.005448566749691963, "step": 1150 }, { "epoch": 0.27835550181378477, "grad_norm": 8.871838922534279, "learning_rate": 8.295346273272445e-07, "loss": 0.049016065895557404, "step": 1151 }, { "epoch": 0.27859733978234585, "grad_norm": 1.242727713321784, "learning_rate": 8.292458989528398e-07, "loss": 0.005588111933320761, "step": 1152 }, { "epoch": 0.2788391777509069, "grad_norm": 6.392672409770251, "learning_rate": 8.289569766075227e-07, "loss": 0.06454899162054062, "step": 1153 }, { "epoch": 0.279081015719468, "grad_norm": 2.10007388967958, "learning_rate": 8.286678604615076e-07, "loss": 0.011077611707150936, "step": 1154 }, { "epoch": 0.279322853688029, "grad_norm": 26.1959867916843, "learning_rate": 8.28378550685124e-07, "loss": 0.046103838831186295, "step": 1155 }, { "epoch": 0.2795646916565901, "grad_norm": 1.5499758222819604, "learning_rate": 8.280890474488146e-07, "loss": 0.017637433484196663, "step": 1156 }, { "epoch": 0.2798065296251511, "grad_norm": 7.988936280266139, "learning_rate": 8.277993509231366e-07, "loss": 0.020673861727118492, "step": 1157 }, { "epoch": 0.2800483675937122, "grad_norm": 1.0876097336069936, "learning_rate": 8.275094612787611e-07, "loss": 0.0030573694966733456, "step": 1158 }, { "epoch": 0.2802902055622733, "grad_norm": 1.3790138823446787, "learning_rate": 8.272193786864725e-07, "loss": 0.005894965026527643, "step": 1159 }, { "epoch": 0.28053204353083433, "grad_norm": 7.031085044869187, "learning_rate": 8.269291033171692e-07, "loss": 0.014063261449337006, "step": 1160 }, { "epoch": 0.2807738814993954, "grad_norm": 4.105733202804526, "learning_rate": 8.266386353418633e-07, "loss": 0.03135467320680618, "step": 1161 }, { "epoch": 0.28101571946795645, "grad_norm": 2.097582090888825, "learning_rate": 8.263479749316799e-07, "loss": 0.01710064709186554, "step": 1162 }, { "epoch": 0.28125755743651754, "grad_norm": 1.485190862525185, "learning_rate": 8.260571222578583e-07, "loss": 0.01316691655665636, "step": 1163 }, { "epoch": 0.2814993954050786, "grad_norm": 4.1216172537498625, "learning_rate": 8.257660774917501e-07, "loss": 0.024812057614326477, "step": 1164 }, { "epoch": 0.28174123337363965, "grad_norm": 2.740379454059501, "learning_rate": 8.254748408048206e-07, "loss": 0.007421509828418493, "step": 1165 }, { "epoch": 0.28198307134220074, "grad_norm": 11.716378008070553, "learning_rate": 8.25183412368648e-07, "loss": 0.009088258258998394, "step": 1166 }, { "epoch": 0.2822249093107618, "grad_norm": 4.516565365499813, "learning_rate": 8.248917923549239e-07, "loss": 0.0385836623609066, "step": 1167 }, { "epoch": 0.28246674727932286, "grad_norm": 3.610650544541896, "learning_rate": 8.245999809354521e-07, "loss": 0.012490455992519855, "step": 1168 }, { "epoch": 0.2827085852478839, "grad_norm": 7.39033739478866, "learning_rate": 8.243079782821496e-07, "loss": 0.005155600141733885, "step": 1169 }, { "epoch": 0.282950423216445, "grad_norm": 2.5959276634930726, "learning_rate": 8.240157845670459e-07, "loss": 0.016192618757486343, "step": 1170 }, { "epoch": 0.28319226118500607, "grad_norm": 5.746223481586789, "learning_rate": 8.237233999622831e-07, "loss": 0.058168888092041016, "step": 1171 }, { "epoch": 0.2834340991535671, "grad_norm": 2.3571612859567113, "learning_rate": 8.234308246401159e-07, "loss": 0.01073279045522213, "step": 1172 }, { "epoch": 0.2836759371221282, "grad_norm": 4.316200663413216, "learning_rate": 8.231380587729109e-07, "loss": 0.02144620195031166, "step": 1173 }, { "epoch": 0.2839177750906892, "grad_norm": 6.564903219378214, "learning_rate": 8.228451025331475e-07, "loss": 0.02324174903333187, "step": 1174 }, { "epoch": 0.2841596130592503, "grad_norm": 19.083861796737487, "learning_rate": 8.225519560934171e-07, "loss": 0.02455965243279934, "step": 1175 }, { "epoch": 0.2844014510278114, "grad_norm": 12.28333416402207, "learning_rate": 8.22258619626423e-07, "loss": 0.01761504076421261, "step": 1176 }, { "epoch": 0.2846432889963724, "grad_norm": 6.209561125266415, "learning_rate": 8.219650933049804e-07, "loss": 0.006885793060064316, "step": 1177 }, { "epoch": 0.2848851269649335, "grad_norm": 6.849727264921907, "learning_rate": 8.216713773020169e-07, "loss": 0.015291281044483185, "step": 1178 }, { "epoch": 0.28512696493349454, "grad_norm": 3.5789352769120426, "learning_rate": 8.21377471790571e-07, "loss": 0.012369215488433838, "step": 1179 }, { "epoch": 0.28536880290205563, "grad_norm": 22.48083629915069, "learning_rate": 8.210833769437934e-07, "loss": 0.048770368099212646, "step": 1180 }, { "epoch": 0.28561064087061666, "grad_norm": 3.4993164162898256, "learning_rate": 8.207890929349465e-07, "loss": 0.013881337828934193, "step": 1181 }, { "epoch": 0.28585247883917775, "grad_norm": 3.2646168371179893, "learning_rate": 8.204946199374034e-07, "loss": 0.030780255794525146, "step": 1182 }, { "epoch": 0.28609431680773884, "grad_norm": 2.73452283840813, "learning_rate": 8.201999581246496e-07, "loss": 0.016695739701390266, "step": 1183 }, { "epoch": 0.28633615477629987, "grad_norm": 4.580389137991269, "learning_rate": 8.199051076702808e-07, "loss": 0.015613299794495106, "step": 1184 }, { "epoch": 0.28657799274486095, "grad_norm": 2.6541639938444592, "learning_rate": 8.196100687480042e-07, "loss": 0.029403293505311012, "step": 1185 }, { "epoch": 0.286819830713422, "grad_norm": 2.3376065984322647, "learning_rate": 8.193148415316384e-07, "loss": 0.015310831367969513, "step": 1186 }, { "epoch": 0.2870616686819831, "grad_norm": 2.7359212496541585, "learning_rate": 8.190194261951128e-07, "loss": 0.016647974029183388, "step": 1187 }, { "epoch": 0.28730350665054416, "grad_norm": 11.468895359359784, "learning_rate": 8.187238229124671e-07, "loss": 0.02797735296189785, "step": 1188 }, { "epoch": 0.2875453446191052, "grad_norm": 1.3119400150732652, "learning_rate": 8.184280318578522e-07, "loss": 0.005063540767878294, "step": 1189 }, { "epoch": 0.2877871825876663, "grad_norm": 0.6477118170180913, "learning_rate": 8.181320532055298e-07, "loss": 0.003973125945776701, "step": 1190 }, { "epoch": 0.2880290205562273, "grad_norm": 1.7659436763900669, "learning_rate": 8.178358871298716e-07, "loss": 0.030795935541391373, "step": 1191 }, { "epoch": 0.2882708585247884, "grad_norm": 1.552123476373154, "learning_rate": 8.175395338053599e-07, "loss": 0.010729352943599224, "step": 1192 }, { "epoch": 0.28851269649334943, "grad_norm": 1.9637897398503437, "learning_rate": 8.172429934065877e-07, "loss": 0.01992015354335308, "step": 1193 }, { "epoch": 0.2887545344619105, "grad_norm": 2.8431540514874496, "learning_rate": 8.169462661082573e-07, "loss": 0.041662365198135376, "step": 1194 }, { "epoch": 0.2889963724304716, "grad_norm": 34.726730670938316, "learning_rate": 8.166493520851825e-07, "loss": 0.020636258646845818, "step": 1195 }, { "epoch": 0.28923821039903264, "grad_norm": 0.9246966557738614, "learning_rate": 8.163522515122857e-07, "loss": 0.008195842616260052, "step": 1196 }, { "epoch": 0.2894800483675937, "grad_norm": 0.7612665750632744, "learning_rate": 8.160549645646001e-07, "loss": 0.005069775972515345, "step": 1197 }, { "epoch": 0.28972188633615475, "grad_norm": 8.516815869455616, "learning_rate": 8.157574914172683e-07, "loss": 0.02412748895585537, "step": 1198 }, { "epoch": 0.28996372430471584, "grad_norm": 5.192172222625709, "learning_rate": 8.154598322455428e-07, "loss": 0.06442272663116455, "step": 1199 }, { "epoch": 0.29020556227327693, "grad_norm": 2.523050232376882, "learning_rate": 8.151619872247855e-07, "loss": 0.017779530957341194, "step": 1200 }, { "epoch": 0.29044740024183796, "grad_norm": 2.9470427684010065, "learning_rate": 8.148639565304679e-07, "loss": 0.03142794221639633, "step": 1201 }, { "epoch": 0.29068923821039905, "grad_norm": 1.9472200985875099, "learning_rate": 8.145657403381709e-07, "loss": 0.014949159696698189, "step": 1202 }, { "epoch": 0.2909310761789601, "grad_norm": 2.5820331517176993, "learning_rate": 8.142673388235849e-07, "loss": 0.019605727866292, "step": 1203 }, { "epoch": 0.29117291414752117, "grad_norm": 1.5838196186448696, "learning_rate": 8.139687521625089e-07, "loss": 0.02760450541973114, "step": 1204 }, { "epoch": 0.2914147521160822, "grad_norm": 2.1692136164074194, "learning_rate": 8.136699805308515e-07, "loss": 0.00955783762037754, "step": 1205 }, { "epoch": 0.2916565900846433, "grad_norm": 1.1144218213564072, "learning_rate": 8.133710241046301e-07, "loss": 0.0033802760299295187, "step": 1206 }, { "epoch": 0.2918984280532044, "grad_norm": 2.1195858743514284, "learning_rate": 8.130718830599708e-07, "loss": 0.005764669273048639, "step": 1207 }, { "epoch": 0.2921402660217654, "grad_norm": 2.6461097105896707, "learning_rate": 8.12772557573109e-07, "loss": 0.015863219276070595, "step": 1208 }, { "epoch": 0.2923821039903265, "grad_norm": 3.781707754361775, "learning_rate": 8.124730478203881e-07, "loss": 0.010230970568954945, "step": 1209 }, { "epoch": 0.2926239419588875, "grad_norm": 7.976981206590927, "learning_rate": 8.121733539782603e-07, "loss": 0.01374492235481739, "step": 1210 }, { "epoch": 0.2928657799274486, "grad_norm": 1.8659193156299543, "learning_rate": 8.118734762232864e-07, "loss": 0.013011514209210873, "step": 1211 }, { "epoch": 0.2931076178960097, "grad_norm": 1.963913750385719, "learning_rate": 8.115734147321356e-07, "loss": 0.007561424281448126, "step": 1212 }, { "epoch": 0.29334945586457073, "grad_norm": 2.692962869994427, "learning_rate": 8.112731696815849e-07, "loss": 0.025827307254076004, "step": 1213 }, { "epoch": 0.2935912938331318, "grad_norm": 3.262675691856723, "learning_rate": 8.1097274124852e-07, "loss": 0.02309076115489006, "step": 1214 }, { "epoch": 0.29383313180169285, "grad_norm": 2.5431540504656884, "learning_rate": 8.106721296099344e-07, "loss": 0.01807442121207714, "step": 1215 }, { "epoch": 0.29407496977025394, "grad_norm": 1.2682521407253864, "learning_rate": 8.103713349429292e-07, "loss": 0.009617877192795277, "step": 1216 }, { "epoch": 0.29431680773881497, "grad_norm": 2.811205632129044, "learning_rate": 8.10070357424714e-07, "loss": 0.010294451378285885, "step": 1217 }, { "epoch": 0.29455864570737605, "grad_norm": 1.5380944553041758, "learning_rate": 8.097691972326055e-07, "loss": 0.016931751742959023, "step": 1218 }, { "epoch": 0.29480048367593714, "grad_norm": 11.40449284324151, "learning_rate": 8.094678545440284e-07, "loss": 0.009399942122399807, "step": 1219 }, { "epoch": 0.2950423216444982, "grad_norm": 2.9832780571534148, "learning_rate": 8.09166329536515e-07, "loss": 0.012795144692063332, "step": 1220 }, { "epoch": 0.29528415961305926, "grad_norm": 1.6150203755433914, "learning_rate": 8.088646223877044e-07, "loss": 0.009483342058956623, "step": 1221 }, { "epoch": 0.2955259975816203, "grad_norm": 7.172479253737797, "learning_rate": 8.085627332753436e-07, "loss": 0.008616764098405838, "step": 1222 }, { "epoch": 0.2957678355501814, "grad_norm": 3.8539864921605527, "learning_rate": 8.082606623772868e-07, "loss": 0.03874536603689194, "step": 1223 }, { "epoch": 0.29600967351874247, "grad_norm": 3.1272984252149816, "learning_rate": 8.079584098714949e-07, "loss": 0.010154720395803452, "step": 1224 }, { "epoch": 0.2962515114873035, "grad_norm": 2.6174975969365395, "learning_rate": 8.076559759360362e-07, "loss": 0.008825707249343395, "step": 1225 }, { "epoch": 0.2964933494558646, "grad_norm": 7.597526975260206, "learning_rate": 8.073533607490853e-07, "loss": 0.02311410941183567, "step": 1226 }, { "epoch": 0.2967351874244256, "grad_norm": 7.680465092846154, "learning_rate": 8.070505644889245e-07, "loss": 0.018639184534549713, "step": 1227 }, { "epoch": 0.2969770253929867, "grad_norm": 4.151471111432867, "learning_rate": 8.067475873339419e-07, "loss": 0.023444315418601036, "step": 1228 }, { "epoch": 0.29721886336154774, "grad_norm": 2.0192481714998456, "learning_rate": 8.064444294626327e-07, "loss": 0.011197858490049839, "step": 1229 }, { "epoch": 0.2974607013301088, "grad_norm": 2.4332214643960737, "learning_rate": 8.061410910535981e-07, "loss": 0.015443935059010983, "step": 1230 }, { "epoch": 0.2977025392986699, "grad_norm": 2.6702847381732484, "learning_rate": 8.058375722855463e-07, "loss": 0.01438971608877182, "step": 1231 }, { "epoch": 0.29794437726723094, "grad_norm": 4.541483142257224, "learning_rate": 8.055338733372914e-07, "loss": 0.027957770973443985, "step": 1232 }, { "epoch": 0.29818621523579203, "grad_norm": 2.370443749635524, "learning_rate": 8.052299943877534e-07, "loss": 0.016515083611011505, "step": 1233 }, { "epoch": 0.29842805320435306, "grad_norm": 1.8624100841946059, "learning_rate": 8.049259356159588e-07, "loss": 0.007335782982409, "step": 1234 }, { "epoch": 0.29866989117291415, "grad_norm": 2.004299792076631, "learning_rate": 8.046216972010398e-07, "loss": 0.004618487786501646, "step": 1235 }, { "epoch": 0.29891172914147524, "grad_norm": 1.0032636057167608, "learning_rate": 8.043172793222344e-07, "loss": 0.0043844496831297874, "step": 1236 }, { "epoch": 0.29915356711003627, "grad_norm": 10.63121143728974, "learning_rate": 8.040126821588865e-07, "loss": 0.016143925487995148, "step": 1237 }, { "epoch": 0.29939540507859735, "grad_norm": 3.494885064578341, "learning_rate": 8.037079058904456e-07, "loss": 0.010835436172783375, "step": 1238 }, { "epoch": 0.2996372430471584, "grad_norm": 0.8689936753782352, "learning_rate": 8.034029506964666e-07, "loss": 0.0017616910627111793, "step": 1239 }, { "epoch": 0.2998790810157195, "grad_norm": 4.7364626719933804, "learning_rate": 8.030978167566099e-07, "loss": 0.013968029990792274, "step": 1240 }, { "epoch": 0.3001209189842805, "grad_norm": 3.3222924626701418, "learning_rate": 8.027925042506407e-07, "loss": 0.013182911090552807, "step": 1241 }, { "epoch": 0.3003627569528416, "grad_norm": 3.6980189261742527, "learning_rate": 8.024870133584305e-07, "loss": 0.01698405295610428, "step": 1242 }, { "epoch": 0.3006045949214027, "grad_norm": 1.3110441704886722, "learning_rate": 8.021813442599552e-07, "loss": 0.031204137951135635, "step": 1243 }, { "epoch": 0.3008464328899637, "grad_norm": 5.696963352643144, "learning_rate": 8.018754971352955e-07, "loss": 0.02950156293809414, "step": 1244 }, { "epoch": 0.3010882708585248, "grad_norm": 6.683027044216851, "learning_rate": 8.015694721646371e-07, "loss": 0.046460509300231934, "step": 1245 }, { "epoch": 0.30133010882708583, "grad_norm": 3.0672295264465324, "learning_rate": 8.01263269528271e-07, "loss": 0.013416960835456848, "step": 1246 }, { "epoch": 0.3015719467956469, "grad_norm": 2.5735418309903726, "learning_rate": 8.009568894065922e-07, "loss": 0.027108145877718925, "step": 1247 }, { "epoch": 0.301813784764208, "grad_norm": 5.115079681174397, "learning_rate": 8.006503319801007e-07, "loss": 0.007508100010454655, "step": 1248 }, { "epoch": 0.30205562273276904, "grad_norm": 2.303186944171613, "learning_rate": 8.003435974294007e-07, "loss": 0.01922021619975567, "step": 1249 }, { "epoch": 0.3022974607013301, "grad_norm": 50.9004517185539, "learning_rate": 8.000366859352008e-07, "loss": 0.024732818827033043, "step": 1250 }, { "epoch": 0.30253929866989115, "grad_norm": 4.212899462800772, "learning_rate": 7.99729597678314e-07, "loss": 0.024084944278001785, "step": 1251 }, { "epoch": 0.30278113663845224, "grad_norm": 4.043447168871215, "learning_rate": 7.994223328396572e-07, "loss": 0.017770281061530113, "step": 1252 }, { "epoch": 0.3030229746070133, "grad_norm": 5.1281559816581295, "learning_rate": 7.991148916002517e-07, "loss": 0.020978569984436035, "step": 1253 }, { "epoch": 0.30326481257557436, "grad_norm": 7.992152437205583, "learning_rate": 7.988072741412222e-07, "loss": 0.01983039453625679, "step": 1254 }, { "epoch": 0.30350665054413545, "grad_norm": 2.590420451451821, "learning_rate": 7.984994806437976e-07, "loss": 0.013816729187965393, "step": 1255 }, { "epoch": 0.3037484885126965, "grad_norm": 2.3601969967193726, "learning_rate": 7.981915112893107e-07, "loss": 0.013097424991428852, "step": 1256 }, { "epoch": 0.30399032648125757, "grad_norm": 1.6887002593435179, "learning_rate": 7.978833662591972e-07, "loss": 0.012841167859733105, "step": 1257 }, { "epoch": 0.3042321644498186, "grad_norm": 1.450015760980142, "learning_rate": 7.975750457349967e-07, "loss": 0.012992605566978455, "step": 1258 }, { "epoch": 0.3044740024183797, "grad_norm": 2.330345646311456, "learning_rate": 7.972665498983526e-07, "loss": 0.008896143175661564, "step": 1259 }, { "epoch": 0.3047158403869408, "grad_norm": 2.922511291588568, "learning_rate": 7.96957878931011e-07, "loss": 0.004657222423702478, "step": 1260 }, { "epoch": 0.3049576783555018, "grad_norm": 2.4274382931639287, "learning_rate": 7.966490330148211e-07, "loss": 0.01744846999645233, "step": 1261 }, { "epoch": 0.3051995163240629, "grad_norm": 20.14442300330125, "learning_rate": 7.963400123317358e-07, "loss": 0.006683265324681997, "step": 1262 }, { "epoch": 0.3054413542926239, "grad_norm": 9.320925193528483, "learning_rate": 7.960308170638103e-07, "loss": 0.008280586451292038, "step": 1263 }, { "epoch": 0.305683192261185, "grad_norm": 2.744386283667109, "learning_rate": 7.957214473932032e-07, "loss": 0.008875984698534012, "step": 1264 }, { "epoch": 0.3059250302297461, "grad_norm": 2.1018719099534433, "learning_rate": 7.954119035021754e-07, "loss": 0.010309344157576561, "step": 1265 }, { "epoch": 0.30616686819830713, "grad_norm": 1.8512569113194555, "learning_rate": 7.951021855730907e-07, "loss": 0.010412185452878475, "step": 1266 }, { "epoch": 0.3064087061668682, "grad_norm": 3.088094980341554, "learning_rate": 7.947922937884154e-07, "loss": 0.01065523736178875, "step": 1267 }, { "epoch": 0.30665054413542925, "grad_norm": 11.564303806295506, "learning_rate": 7.944822283307182e-07, "loss": 0.033017776906490326, "step": 1268 }, { "epoch": 0.30689238210399034, "grad_norm": 4.9536915020287084, "learning_rate": 7.9417198938267e-07, "loss": 0.016681084409356117, "step": 1269 }, { "epoch": 0.30713422007255137, "grad_norm": 5.010695369393982, "learning_rate": 7.93861577127044e-07, "loss": 0.012210358865559101, "step": 1270 }, { "epoch": 0.30737605804111245, "grad_norm": 1.170222556090013, "learning_rate": 7.935509917467158e-07, "loss": 0.004962825682014227, "step": 1271 }, { "epoch": 0.30761789600967354, "grad_norm": 3.4052288114476443, "learning_rate": 7.932402334246624e-07, "loss": 0.016338277608156204, "step": 1272 }, { "epoch": 0.3078597339782346, "grad_norm": 2.122069472224564, "learning_rate": 7.929293023439632e-07, "loss": 0.005152618046849966, "step": 1273 }, { "epoch": 0.30810157194679566, "grad_norm": 2.026787216089439, "learning_rate": 7.92618198687799e-07, "loss": 0.059396397322416306, "step": 1274 }, { "epoch": 0.3083434099153567, "grad_norm": 9.621922802878672, "learning_rate": 7.923069226394525e-07, "loss": 0.005775091238319874, "step": 1275 }, { "epoch": 0.3085852478839178, "grad_norm": 19.106864392826818, "learning_rate": 7.919954743823081e-07, "loss": 0.015401679091155529, "step": 1276 }, { "epoch": 0.30882708585247887, "grad_norm": 3.2058711187634183, "learning_rate": 7.916838540998512e-07, "loss": 0.013060635887086391, "step": 1277 }, { "epoch": 0.3090689238210399, "grad_norm": 5.184639423379562, "learning_rate": 7.913720619756688e-07, "loss": 0.0062162550166249275, "step": 1278 }, { "epoch": 0.309310761789601, "grad_norm": 8.282045791181076, "learning_rate": 7.910600981934494e-07, "loss": 0.022461170330643654, "step": 1279 }, { "epoch": 0.309552599758162, "grad_norm": 3.1191799446690687, "learning_rate": 7.907479629369823e-07, "loss": 0.011023943312466145, "step": 1280 }, { "epoch": 0.3097944377267231, "grad_norm": 1.7895123306596772, "learning_rate": 7.904356563901577e-07, "loss": 0.010283281095325947, "step": 1281 }, { "epoch": 0.31003627569528414, "grad_norm": 1.4101545654459322, "learning_rate": 7.901231787369672e-07, "loss": 0.015210716985166073, "step": 1282 }, { "epoch": 0.3102781136638452, "grad_norm": 9.65068406184299, "learning_rate": 7.898105301615028e-07, "loss": 0.03314661234617233, "step": 1283 }, { "epoch": 0.3105199516324063, "grad_norm": 6.360276476971433, "learning_rate": 7.894977108479572e-07, "loss": 0.0046968418173491955, "step": 1284 }, { "epoch": 0.31076178960096734, "grad_norm": 1.5362847842561178, "learning_rate": 7.891847209806242e-07, "loss": 0.0073564439080655575, "step": 1285 }, { "epoch": 0.31100362756952843, "grad_norm": 1.4334851443046142, "learning_rate": 7.88871560743897e-07, "loss": 0.008497885428369045, "step": 1286 }, { "epoch": 0.31124546553808946, "grad_norm": 4.2746748777309564, "learning_rate": 7.885582303222705e-07, "loss": 0.016627585515379906, "step": 1287 }, { "epoch": 0.31148730350665055, "grad_norm": 3.842460060689104, "learning_rate": 7.882447299003388e-07, "loss": 0.012038460932672024, "step": 1288 }, { "epoch": 0.31172914147521164, "grad_norm": 6.488757509078934, "learning_rate": 7.879310596627968e-07, "loss": 0.022565701976418495, "step": 1289 }, { "epoch": 0.31197097944377267, "grad_norm": 2.4778261687336016, "learning_rate": 7.876172197944391e-07, "loss": 0.03653290495276451, "step": 1290 }, { "epoch": 0.31221281741233375, "grad_norm": 7.284103158285164, "learning_rate": 7.873032104801605e-07, "loss": 0.046658288687467575, "step": 1291 }, { "epoch": 0.3124546553808948, "grad_norm": 1.96449389675638, "learning_rate": 7.869890319049555e-07, "loss": 0.024757161736488342, "step": 1292 }, { "epoch": 0.3126964933494559, "grad_norm": 21.989720737508584, "learning_rate": 7.86674684253918e-07, "loss": 0.027985742315649986, "step": 1293 }, { "epoch": 0.3129383313180169, "grad_norm": 3.19352332377942, "learning_rate": 7.863601677122419e-07, "loss": 0.022157488390803337, "step": 1294 }, { "epoch": 0.313180169286578, "grad_norm": 4.747344081634697, "learning_rate": 7.860454824652208e-07, "loss": 0.023970596492290497, "step": 1295 }, { "epoch": 0.3134220072551391, "grad_norm": 1.8399943824692995, "learning_rate": 7.85730628698247e-07, "loss": 0.008593042381107807, "step": 1296 }, { "epoch": 0.3136638452237001, "grad_norm": 4.449780684793254, "learning_rate": 7.854156065968127e-07, "loss": 0.014531642198562622, "step": 1297 }, { "epoch": 0.3139056831922612, "grad_norm": 3.264921200571362, "learning_rate": 7.85100416346509e-07, "loss": 0.018031904473900795, "step": 1298 }, { "epoch": 0.31414752116082223, "grad_norm": 1.7905408283537465, "learning_rate": 7.847850581330262e-07, "loss": 0.008832322433590889, "step": 1299 }, { "epoch": 0.3143893591293833, "grad_norm": 1.9443607237054288, "learning_rate": 7.844695321421534e-07, "loss": 0.008665508590638638, "step": 1300 }, { "epoch": 0.3146311970979444, "grad_norm": 1.1481413593034888, "learning_rate": 7.841538385597783e-07, "loss": 0.011701459065079689, "step": 1301 }, { "epoch": 0.31487303506650544, "grad_norm": 1.8404824211802413, "learning_rate": 7.838379775718883e-07, "loss": 0.029103362932801247, "step": 1302 }, { "epoch": 0.3151148730350665, "grad_norm": 1.2455457882072785, "learning_rate": 7.835219493645683e-07, "loss": 0.006307606585323811, "step": 1303 }, { "epoch": 0.31535671100362755, "grad_norm": 2.4551699819974018, "learning_rate": 7.832057541240022e-07, "loss": 0.007977108471095562, "step": 1304 }, { "epoch": 0.31559854897218864, "grad_norm": 3.5349575430131877, "learning_rate": 7.828893920364723e-07, "loss": 0.011890890076756477, "step": 1305 }, { "epoch": 0.3158403869407497, "grad_norm": 1.9934173163021571, "learning_rate": 7.825728632883595e-07, "loss": 0.008108674548566341, "step": 1306 }, { "epoch": 0.31608222490931076, "grad_norm": 3.2130096517635227, "learning_rate": 7.822561680661421e-07, "loss": 0.010842596180737019, "step": 1307 }, { "epoch": 0.31632406287787185, "grad_norm": 2.1979135217734864, "learning_rate": 7.819393065563973e-07, "loss": 0.018828600645065308, "step": 1308 }, { "epoch": 0.3165659008464329, "grad_norm": 10.006560161029075, "learning_rate": 7.816222789457997e-07, "loss": 0.03485524281859398, "step": 1309 }, { "epoch": 0.31680773881499397, "grad_norm": 1.4473659951254472, "learning_rate": 7.813050854211221e-07, "loss": 0.024257998913526535, "step": 1310 }, { "epoch": 0.317049576783555, "grad_norm": 1.5158086424383157, "learning_rate": 7.809877261692348e-07, "loss": 0.017905935645103455, "step": 1311 }, { "epoch": 0.3172914147521161, "grad_norm": 3.791606567216987, "learning_rate": 7.806702013771061e-07, "loss": 0.01698991097509861, "step": 1312 }, { "epoch": 0.3175332527206772, "grad_norm": 0.8976170380894505, "learning_rate": 7.803525112318011e-07, "loss": 0.010542910546064377, "step": 1313 }, { "epoch": 0.3177750906892382, "grad_norm": 1.8720311502106015, "learning_rate": 7.800346559204833e-07, "loss": 0.012788956053555012, "step": 1314 }, { "epoch": 0.3180169286577993, "grad_norm": 1.9279042457594682, "learning_rate": 7.797166356304127e-07, "loss": 0.017697783187031746, "step": 1315 }, { "epoch": 0.3182587666263603, "grad_norm": 2.8957889884269017, "learning_rate": 7.793984505489468e-07, "loss": 0.0448978990316391, "step": 1316 }, { "epoch": 0.3185006045949214, "grad_norm": 1.0844615454310023, "learning_rate": 7.790801008635404e-07, "loss": 0.003082212759181857, "step": 1317 }, { "epoch": 0.31874244256348244, "grad_norm": 2.723599172022723, "learning_rate": 7.787615867617447e-07, "loss": 0.016581807285547256, "step": 1318 }, { "epoch": 0.31898428053204353, "grad_norm": 2.3472975484894936, "learning_rate": 7.784429084312085e-07, "loss": 0.006419782526791096, "step": 1319 }, { "epoch": 0.3192261185006046, "grad_norm": 3.069857678786319, "learning_rate": 7.781240660596766e-07, "loss": 0.013557967729866505, "step": 1320 }, { "epoch": 0.31946795646916565, "grad_norm": 23.83322972318915, "learning_rate": 7.778050598349911e-07, "loss": 0.013558143749833107, "step": 1321 }, { "epoch": 0.31970979443772674, "grad_norm": 2.877666951202255, "learning_rate": 7.774858899450903e-07, "loss": 0.014265838079154491, "step": 1322 }, { "epoch": 0.31995163240628777, "grad_norm": 2.2445979463795327, "learning_rate": 7.771665565780088e-07, "loss": 0.014116495847702026, "step": 1323 }, { "epoch": 0.32019347037484885, "grad_norm": 3.5951129263724617, "learning_rate": 7.768470599218777e-07, "loss": 0.014860107563436031, "step": 1324 }, { "epoch": 0.32043530834340994, "grad_norm": 1.6035714427147663, "learning_rate": 7.765274001649245e-07, "loss": 0.007832847535610199, "step": 1325 }, { "epoch": 0.320677146311971, "grad_norm": 7.127192605619788, "learning_rate": 7.762075774954723e-07, "loss": 0.021748384460806847, "step": 1326 }, { "epoch": 0.32091898428053206, "grad_norm": 1.4209958783745638, "learning_rate": 7.758875921019406e-07, "loss": 0.008590337820351124, "step": 1327 }, { "epoch": 0.3211608222490931, "grad_norm": 5.652616852842849, "learning_rate": 7.755674441728444e-07, "loss": 0.01384627353399992, "step": 1328 }, { "epoch": 0.3214026602176542, "grad_norm": 1.9779150120918618, "learning_rate": 7.75247133896795e-07, "loss": 0.011640580371022224, "step": 1329 }, { "epoch": 0.3216444981862152, "grad_norm": 1.598284205154043, "learning_rate": 7.749266614624988e-07, "loss": 0.010935445316135883, "step": 1330 }, { "epoch": 0.3218863361547763, "grad_norm": 2.8852309461596075, "learning_rate": 7.746060270587577e-07, "loss": 0.0205752681940794, "step": 1331 }, { "epoch": 0.3221281741233374, "grad_norm": 8.970761589716034, "learning_rate": 7.742852308744699e-07, "loss": 0.014910170808434486, "step": 1332 }, { "epoch": 0.3223700120918984, "grad_norm": 4.997626946593833, "learning_rate": 7.739642730986278e-07, "loss": 0.019525324925780296, "step": 1333 }, { "epoch": 0.3226118500604595, "grad_norm": 4.671082894663025, "learning_rate": 7.736431539203194e-07, "loss": 0.010776530019938946, "step": 1334 }, { "epoch": 0.32285368802902054, "grad_norm": 4.720513943028035, "learning_rate": 7.733218735287281e-07, "loss": 0.004823227412998676, "step": 1335 }, { "epoch": 0.3230955259975816, "grad_norm": 5.947091087672842, "learning_rate": 7.730004321131322e-07, "loss": 0.029360106214880943, "step": 1336 }, { "epoch": 0.3233373639661427, "grad_norm": 2.0767174522358847, "learning_rate": 7.726788298629042e-07, "loss": 0.012040652334690094, "step": 1337 }, { "epoch": 0.32357920193470374, "grad_norm": 2.302821626128724, "learning_rate": 7.723570669675124e-07, "loss": 0.016249703243374825, "step": 1338 }, { "epoch": 0.32382103990326483, "grad_norm": 1.2277831421119259, "learning_rate": 7.720351436165185e-07, "loss": 0.003490907372906804, "step": 1339 }, { "epoch": 0.32406287787182586, "grad_norm": 2.600236295893817, "learning_rate": 7.717130599995803e-07, "loss": 0.022905787453055382, "step": 1340 }, { "epoch": 0.32430471584038695, "grad_norm": 1.0167562500181186, "learning_rate": 7.713908163064484e-07, "loss": 0.01372973620891571, "step": 1341 }, { "epoch": 0.324546553808948, "grad_norm": 10.974953736530097, "learning_rate": 7.710684127269689e-07, "loss": 0.0572776161134243, "step": 1342 }, { "epoch": 0.32478839177750907, "grad_norm": 3.1685485757778125, "learning_rate": 7.707458494510815e-07, "loss": 0.012425360269844532, "step": 1343 }, { "epoch": 0.32503022974607015, "grad_norm": 1.2997574479777774, "learning_rate": 7.704231266688203e-07, "loss": 0.004858993459492922, "step": 1344 }, { "epoch": 0.3252720677146312, "grad_norm": 5.101263386477355, "learning_rate": 7.701002445703129e-07, "loss": 0.016031762585043907, "step": 1345 }, { "epoch": 0.3255139056831923, "grad_norm": 1.0275103548346973, "learning_rate": 7.697772033457812e-07, "loss": 0.008486787788569927, "step": 1346 }, { "epoch": 0.3257557436517533, "grad_norm": 2.676939156809351, "learning_rate": 7.694540031855408e-07, "loss": 0.02549220435321331, "step": 1347 }, { "epoch": 0.3259975816203144, "grad_norm": 1.8046399892916414, "learning_rate": 7.691306442800006e-07, "loss": 0.029893556609749794, "step": 1348 }, { "epoch": 0.3262394195888755, "grad_norm": 2.8660558178674456, "learning_rate": 7.688071268196636e-07, "loss": 0.005847379565238953, "step": 1349 }, { "epoch": 0.3264812575574365, "grad_norm": 2.4330593294901846, "learning_rate": 7.684834509951255e-07, "loss": 0.018998591229319572, "step": 1350 }, { "epoch": 0.3267230955259976, "grad_norm": 8.428141921629287, "learning_rate": 7.681596169970761e-07, "loss": 0.013906656764447689, "step": 1351 }, { "epoch": 0.32696493349455863, "grad_norm": 3.8204209911955678, "learning_rate": 7.678356250162976e-07, "loss": 0.013845192268490791, "step": 1352 }, { "epoch": 0.3272067714631197, "grad_norm": 6.207768759359813, "learning_rate": 7.67511475243666e-07, "loss": 0.0946844145655632, "step": 1353 }, { "epoch": 0.32744860943168075, "grad_norm": 3.626023432171727, "learning_rate": 7.671871678701495e-07, "loss": 0.01133557129651308, "step": 1354 }, { "epoch": 0.32769044740024184, "grad_norm": 1.9174666682354295, "learning_rate": 7.668627030868098e-07, "loss": 0.007394711021333933, "step": 1355 }, { "epoch": 0.3279322853688029, "grad_norm": 1.2335394032588143, "learning_rate": 7.66538081084801e-07, "loss": 0.009655609726905823, "step": 1356 }, { "epoch": 0.32817412333736395, "grad_norm": 2.50507156396789, "learning_rate": 7.662133020553698e-07, "loss": 0.012717822566628456, "step": 1357 }, { "epoch": 0.32841596130592504, "grad_norm": 2.2472619604235446, "learning_rate": 7.658883661898556e-07, "loss": 0.009320741519331932, "step": 1358 }, { "epoch": 0.3286577992744861, "grad_norm": 2.2972837991830337, "learning_rate": 7.655632736796905e-07, "loss": 0.013644428923726082, "step": 1359 }, { "epoch": 0.32889963724304716, "grad_norm": 2.1949817621020786, "learning_rate": 7.652380247163978e-07, "loss": 0.009278311394155025, "step": 1360 }, { "epoch": 0.32914147521160825, "grad_norm": 5.028182164271036, "learning_rate": 7.64912619491594e-07, "loss": 0.02425316348671913, "step": 1361 }, { "epoch": 0.3293833131801693, "grad_norm": 2.132969902627163, "learning_rate": 7.645870581969876e-07, "loss": 0.015952864661812782, "step": 1362 }, { "epoch": 0.32962515114873037, "grad_norm": 1.68233000974487, "learning_rate": 7.642613410243783e-07, "loss": 0.005950865335762501, "step": 1363 }, { "epoch": 0.3298669891172914, "grad_norm": 11.73946616663502, "learning_rate": 7.639354681656585e-07, "loss": 0.031377162784338, "step": 1364 }, { "epoch": 0.3301088270858525, "grad_norm": 1.9899205332025591, "learning_rate": 7.636094398128115e-07, "loss": 0.0074442848563194275, "step": 1365 }, { "epoch": 0.3303506650544135, "grad_norm": 2.048027246304259, "learning_rate": 7.63283256157913e-07, "loss": 0.018140485510230064, "step": 1366 }, { "epoch": 0.3305925030229746, "grad_norm": 1.5272130648251305, "learning_rate": 7.629569173931296e-07, "loss": 0.013962839730083942, "step": 1367 }, { "epoch": 0.3308343409915357, "grad_norm": 9.893230814249218, "learning_rate": 7.626304237107199e-07, "loss": 0.007510361261665821, "step": 1368 }, { "epoch": 0.3310761789600967, "grad_norm": 5.462117165483452, "learning_rate": 7.623037753030328e-07, "loss": 0.01533250417560339, "step": 1369 }, { "epoch": 0.3313180169286578, "grad_norm": 5.875693142384443, "learning_rate": 7.619769723625095e-07, "loss": 0.009749538265168667, "step": 1370 }, { "epoch": 0.33155985489721884, "grad_norm": 2.5583267757125823, "learning_rate": 7.616500150816815e-07, "loss": 0.010509542189538479, "step": 1371 }, { "epoch": 0.33180169286577993, "grad_norm": 7.979186959786023, "learning_rate": 7.61322903653171e-07, "loss": 0.02304808609187603, "step": 1372 }, { "epoch": 0.332043530834341, "grad_norm": 2.315953229962489, "learning_rate": 7.609956382696921e-07, "loss": 0.0075081586837768555, "step": 1373 }, { "epoch": 0.33228536880290205, "grad_norm": 10.28086008414578, "learning_rate": 7.606682191240483e-07, "loss": 0.02124006859958172, "step": 1374 }, { "epoch": 0.33252720677146314, "grad_norm": 2.3649892316166157, "learning_rate": 7.603406464091346e-07, "loss": 0.011302517727017403, "step": 1375 }, { "epoch": 0.33276904474002417, "grad_norm": 4.559750203426981, "learning_rate": 7.600129203179361e-07, "loss": 0.01865907944738865, "step": 1376 }, { "epoch": 0.33301088270858525, "grad_norm": 2.633475882515024, "learning_rate": 7.596850410435284e-07, "loss": 0.017103582620620728, "step": 1377 }, { "epoch": 0.3332527206771463, "grad_norm": 4.33572628911935, "learning_rate": 7.593570087790775e-07, "loss": 0.004823980387300253, "step": 1378 }, { "epoch": 0.3334945586457074, "grad_norm": 31.841262852328438, "learning_rate": 7.59028823717839e-07, "loss": 0.014127634465694427, "step": 1379 }, { "epoch": 0.33373639661426846, "grad_norm": 2.1004557003399205, "learning_rate": 7.587004860531587e-07, "loss": 0.008744359016418457, "step": 1380 }, { "epoch": 0.3339782345828295, "grad_norm": 14.564007596402641, "learning_rate": 7.583719959784729e-07, "loss": 0.030527383089065552, "step": 1381 }, { "epoch": 0.3342200725513906, "grad_norm": 4.962042560984755, "learning_rate": 7.580433536873068e-07, "loss": 0.021341709420084953, "step": 1382 }, { "epoch": 0.3344619105199516, "grad_norm": 3.0049516865909545, "learning_rate": 7.57714559373276e-07, "loss": 0.018445918336510658, "step": 1383 }, { "epoch": 0.3347037484885127, "grad_norm": 1.4949106434210289, "learning_rate": 7.573856132300851e-07, "loss": 0.007409972604364157, "step": 1384 }, { "epoch": 0.3349455864570738, "grad_norm": 2.387539194748717, "learning_rate": 7.570565154515287e-07, "loss": 0.015111877582967281, "step": 1385 }, { "epoch": 0.3351874244256348, "grad_norm": 2.23366644715615, "learning_rate": 7.567272662314903e-07, "loss": 0.0036815123166888952, "step": 1386 }, { "epoch": 0.3354292623941959, "grad_norm": 2.7520449343621296, "learning_rate": 7.563978657639429e-07, "loss": 0.021624112501740456, "step": 1387 }, { "epoch": 0.33567110036275694, "grad_norm": 8.992712622359123, "learning_rate": 7.560683142429482e-07, "loss": 0.033703215420246124, "step": 1388 }, { "epoch": 0.335912938331318, "grad_norm": 7.485151778881501, "learning_rate": 7.557386118626574e-07, "loss": 0.020085882395505905, "step": 1389 }, { "epoch": 0.33615477629987905, "grad_norm": 2.747108103365674, "learning_rate": 7.554087588173103e-07, "loss": 0.024598529562354088, "step": 1390 }, { "epoch": 0.33639661426844014, "grad_norm": 3.9542636608505197, "learning_rate": 7.550787553012353e-07, "loss": 0.012200740166008472, "step": 1391 }, { "epoch": 0.33663845223700123, "grad_norm": 8.81971419565205, "learning_rate": 7.5474860150885e-07, "loss": 0.013578832149505615, "step": 1392 }, { "epoch": 0.33688029020556226, "grad_norm": 1.4069318492976366, "learning_rate": 7.5441829763466e-07, "loss": 0.008460710756480694, "step": 1393 }, { "epoch": 0.33712212817412335, "grad_norm": 1.7408235535794139, "learning_rate": 7.540878438732594e-07, "loss": 0.009568092413246632, "step": 1394 }, { "epoch": 0.3373639661426844, "grad_norm": 5.075767594464229, "learning_rate": 7.53757240419331e-07, "loss": 0.011807812377810478, "step": 1395 }, { "epoch": 0.33760580411124547, "grad_norm": 6.899589982214357, "learning_rate": 7.534264874676453e-07, "loss": 0.013988162390887737, "step": 1396 }, { "epoch": 0.33784764207980655, "grad_norm": 2.2539549188896157, "learning_rate": 7.53095585213061e-07, "loss": 0.007429561112076044, "step": 1397 }, { "epoch": 0.3380894800483676, "grad_norm": 20.423129355480963, "learning_rate": 7.52764533850525e-07, "loss": 0.029422258958220482, "step": 1398 }, { "epoch": 0.3383313180169287, "grad_norm": 1.6581775811108472, "learning_rate": 7.524333335750716e-07, "loss": 0.01488127838820219, "step": 1399 }, { "epoch": 0.3385731559854897, "grad_norm": 17.779289555285036, "learning_rate": 7.521019845818235e-07, "loss": 0.020655421540141106, "step": 1400 }, { "epoch": 0.3388149939540508, "grad_norm": 2.1848786179390767, "learning_rate": 7.517704870659906e-07, "loss": 0.03628822788596153, "step": 1401 }, { "epoch": 0.3390568319226118, "grad_norm": 9.498720008975363, "learning_rate": 7.514388412228699e-07, "loss": 0.005977795924991369, "step": 1402 }, { "epoch": 0.3392986698911729, "grad_norm": 2.472487174214812, "learning_rate": 7.511070472478466e-07, "loss": 0.014567327685654163, "step": 1403 }, { "epoch": 0.339540507859734, "grad_norm": 2.671877831352542, "learning_rate": 7.507751053363927e-07, "loss": 0.003895907197147608, "step": 1404 }, { "epoch": 0.33978234582829503, "grad_norm": 5.061993373934299, "learning_rate": 7.504430156840673e-07, "loss": 0.026924407109618187, "step": 1405 }, { "epoch": 0.3400241837968561, "grad_norm": 0.9665082885968346, "learning_rate": 7.501107784865166e-07, "loss": 0.006993585731834173, "step": 1406 }, { "epoch": 0.34026602176541715, "grad_norm": 2.2057669959042325, "learning_rate": 7.497783939394742e-07, "loss": 0.013533077202737331, "step": 1407 }, { "epoch": 0.34050785973397824, "grad_norm": 2.7038330953626635, "learning_rate": 7.494458622387598e-07, "loss": 0.024984020739793777, "step": 1408 }, { "epoch": 0.3407496977025393, "grad_norm": 1.0821671806870081, "learning_rate": 7.491131835802802e-07, "loss": 0.0019755722023546696, "step": 1409 }, { "epoch": 0.34099153567110035, "grad_norm": 2.6264258771783546, "learning_rate": 7.487803581600285e-07, "loss": 0.0059576560743153095, "step": 1410 }, { "epoch": 0.34123337363966144, "grad_norm": 2.6441533414774567, "learning_rate": 7.484473861740847e-07, "loss": 0.0028989864513278008, "step": 1411 }, { "epoch": 0.3414752116082225, "grad_norm": 6.921943410756075, "learning_rate": 7.481142678186147e-07, "loss": 0.018469903618097305, "step": 1412 }, { "epoch": 0.34171704957678356, "grad_norm": 3.5090247946167956, "learning_rate": 7.47781003289871e-07, "loss": 0.010015692561864853, "step": 1413 }, { "epoch": 0.3419588875453446, "grad_norm": 2.8900236534044215, "learning_rate": 7.474475927841917e-07, "loss": 0.021997548639774323, "step": 1414 }, { "epoch": 0.3422007255139057, "grad_norm": 5.611863217238986, "learning_rate": 7.471140364980015e-07, "loss": 0.02100142277777195, "step": 1415 }, { "epoch": 0.34244256348246677, "grad_norm": 4.383994944013852, "learning_rate": 7.467803346278108e-07, "loss": 0.009770825505256653, "step": 1416 }, { "epoch": 0.3426844014510278, "grad_norm": 6.807015033248777, "learning_rate": 7.464464873702155e-07, "loss": 0.02235579863190651, "step": 1417 }, { "epoch": 0.3429262394195889, "grad_norm": 22.39384124762975, "learning_rate": 7.461124949218974e-07, "loss": 0.01127004623413086, "step": 1418 }, { "epoch": 0.3431680773881499, "grad_norm": 2.572115288847532, "learning_rate": 7.457783574796239e-07, "loss": 0.014910787343978882, "step": 1419 }, { "epoch": 0.343409915356711, "grad_norm": 2.4415575573148525, "learning_rate": 7.454440752402476e-07, "loss": 0.008974961005151272, "step": 1420 }, { "epoch": 0.3436517533252721, "grad_norm": 11.709804784326387, "learning_rate": 7.451096484007062e-07, "loss": 0.01683090440928936, "step": 1421 }, { "epoch": 0.3438935912938331, "grad_norm": 5.845041164479442, "learning_rate": 7.447750771580235e-07, "loss": 0.004925819579511881, "step": 1422 }, { "epoch": 0.3441354292623942, "grad_norm": 1.335225201711825, "learning_rate": 7.444403617093074e-07, "loss": 0.01520239282399416, "step": 1423 }, { "epoch": 0.34437726723095524, "grad_norm": 5.602705053905688, "learning_rate": 7.441055022517511e-07, "loss": 0.010072344914078712, "step": 1424 }, { "epoch": 0.34461910519951633, "grad_norm": 3.777658468690646, "learning_rate": 7.437704989826327e-07, "loss": 0.0038615481462329626, "step": 1425 }, { "epoch": 0.34486094316807736, "grad_norm": 3.6223469229961727, "learning_rate": 7.434353520993152e-07, "loss": 0.013371025212109089, "step": 1426 }, { "epoch": 0.34510278113663845, "grad_norm": 2.6155220576002822, "learning_rate": 7.431000617992459e-07, "loss": 0.01640796847641468, "step": 1427 }, { "epoch": 0.34534461910519954, "grad_norm": 2.894565394946084, "learning_rate": 7.427646282799565e-07, "loss": 0.011647500097751617, "step": 1428 }, { "epoch": 0.34558645707376057, "grad_norm": 1.452393900336079, "learning_rate": 7.424290517390634e-07, "loss": 0.005625549238175154, "step": 1429 }, { "epoch": 0.34582829504232165, "grad_norm": 4.628684838253253, "learning_rate": 7.420933323742672e-07, "loss": 0.0067794956266880035, "step": 1430 }, { "epoch": 0.3460701330108827, "grad_norm": 3.9347655721400567, "learning_rate": 7.417574703833524e-07, "loss": 0.029549945145845413, "step": 1431 }, { "epoch": 0.3463119709794438, "grad_norm": 25.435950416646627, "learning_rate": 7.414214659641878e-07, "loss": 0.03943261131644249, "step": 1432 }, { "epoch": 0.34655380894800486, "grad_norm": 1.7767731760965333, "learning_rate": 7.410853193147262e-07, "loss": 0.013938412070274353, "step": 1433 }, { "epoch": 0.3467956469165659, "grad_norm": 2.7286644738970876, "learning_rate": 7.407490306330037e-07, "loss": 0.019060760736465454, "step": 1434 }, { "epoch": 0.347037484885127, "grad_norm": 4.769486991485195, "learning_rate": 7.404126001171404e-07, "loss": 0.019017044454813004, "step": 1435 }, { "epoch": 0.347279322853688, "grad_norm": 2.2838681320562784, "learning_rate": 7.400760279653401e-07, "loss": 0.008117909543216228, "step": 1436 }, { "epoch": 0.3475211608222491, "grad_norm": 2.98633810540724, "learning_rate": 7.397393143758899e-07, "loss": 0.016767408698797226, "step": 1437 }, { "epoch": 0.34776299879081013, "grad_norm": 3.917176665380855, "learning_rate": 7.394024595471601e-07, "loss": 0.018716245889663696, "step": 1438 }, { "epoch": 0.3480048367593712, "grad_norm": 2.1604124411631824, "learning_rate": 7.390654636776041e-07, "loss": 0.060515593737363815, "step": 1439 }, { "epoch": 0.3482466747279323, "grad_norm": 2.167793769107108, "learning_rate": 7.38728326965759e-07, "loss": 0.014769941568374634, "step": 1440 }, { "epoch": 0.34848851269649334, "grad_norm": 2.83415812856882, "learning_rate": 7.383910496102443e-07, "loss": 0.011281712912023067, "step": 1441 }, { "epoch": 0.3487303506650544, "grad_norm": 8.025334948465884, "learning_rate": 7.380536318097624e-07, "loss": 0.009952614083886147, "step": 1442 }, { "epoch": 0.34897218863361545, "grad_norm": 4.406997988011179, "learning_rate": 7.377160737630989e-07, "loss": 0.029172569513320923, "step": 1443 }, { "epoch": 0.34921402660217654, "grad_norm": 3.9174234349465733, "learning_rate": 7.373783756691214e-07, "loss": 0.01417694240808487, "step": 1444 }, { "epoch": 0.34945586457073763, "grad_norm": 2.8163069972003663, "learning_rate": 7.370405377267805e-07, "loss": 0.017812859266996384, "step": 1445 }, { "epoch": 0.34969770253929866, "grad_norm": 3.3209425582122183, "learning_rate": 7.36702560135109e-07, "loss": 0.00716733280569315, "step": 1446 }, { "epoch": 0.34993954050785975, "grad_norm": 1.153266712693646, "learning_rate": 7.363644430932217e-07, "loss": 0.00824161060154438, "step": 1447 }, { "epoch": 0.3501813784764208, "grad_norm": 3.7604094315445904, "learning_rate": 7.360261868003163e-07, "loss": 0.013567757792770863, "step": 1448 }, { "epoch": 0.35042321644498187, "grad_norm": 1.9386264842028225, "learning_rate": 7.356877914556717e-07, "loss": 0.007869045250117779, "step": 1449 }, { "epoch": 0.35066505441354295, "grad_norm": 1.6432012857066494, "learning_rate": 7.353492572586493e-07, "loss": 0.007242993451654911, "step": 1450 }, { "epoch": 0.350906892382104, "grad_norm": 59.20781943171442, "learning_rate": 7.350105844086919e-07, "loss": 0.008766472339630127, "step": 1451 }, { "epoch": 0.3511487303506651, "grad_norm": 2.6482911238732374, "learning_rate": 7.346717731053243e-07, "loss": 0.01199681218713522, "step": 1452 }, { "epoch": 0.3513905683192261, "grad_norm": 1.14848502633625, "learning_rate": 7.343328235481529e-07, "loss": 0.0043502734042704105, "step": 1453 }, { "epoch": 0.3516324062877872, "grad_norm": 1.917761370081841, "learning_rate": 7.339937359368649e-07, "loss": 0.022120805457234383, "step": 1454 }, { "epoch": 0.3518742442563482, "grad_norm": 3.6358209079438986, "learning_rate": 7.336545104712298e-07, "loss": 0.006588836200535297, "step": 1455 }, { "epoch": 0.3521160822249093, "grad_norm": 0.8996112347319881, "learning_rate": 7.333151473510976e-07, "loss": 0.004015245940536261, "step": 1456 }, { "epoch": 0.3523579201934704, "grad_norm": 2.228172642863167, "learning_rate": 7.329756467764e-07, "loss": 0.012975456193089485, "step": 1457 }, { "epoch": 0.35259975816203143, "grad_norm": 2.052966307901784, "learning_rate": 7.326360089471488e-07, "loss": 0.03434063866734505, "step": 1458 }, { "epoch": 0.3528415961305925, "grad_norm": 9.23652038242849, "learning_rate": 7.322962340634375e-07, "loss": 0.031992923468351364, "step": 1459 }, { "epoch": 0.35308343409915355, "grad_norm": 2.1109553846727467, "learning_rate": 7.319563223254401e-07, "loss": 0.006102907005697489, "step": 1460 }, { "epoch": 0.35332527206771464, "grad_norm": 2.4413484060870307, "learning_rate": 7.316162739334109e-07, "loss": 0.01673380844295025, "step": 1461 }, { "epoch": 0.3535671100362757, "grad_norm": 3.6305290768175325, "learning_rate": 7.31276089087685e-07, "loss": 0.017076464369893074, "step": 1462 }, { "epoch": 0.35380894800483675, "grad_norm": 4.449414204648046, "learning_rate": 7.309357679886779e-07, "loss": 0.012105733156204224, "step": 1463 }, { "epoch": 0.35405078597339784, "grad_norm": 2.0959586657808615, "learning_rate": 7.305953108368856e-07, "loss": 0.012595380656421185, "step": 1464 }, { "epoch": 0.3542926239419589, "grad_norm": 4.3104287600995175, "learning_rate": 7.302547178328836e-07, "loss": 0.013405662961304188, "step": 1465 }, { "epoch": 0.35453446191051996, "grad_norm": 5.0667240995814105, "learning_rate": 7.299139891773276e-07, "loss": 0.020270347595214844, "step": 1466 }, { "epoch": 0.354776299879081, "grad_norm": 3.4583492206512307, "learning_rate": 7.295731250709543e-07, "loss": 0.0116346450522542, "step": 1467 }, { "epoch": 0.3550181378476421, "grad_norm": 3.048407250340678, "learning_rate": 7.292321257145785e-07, "loss": 0.002208976075053215, "step": 1468 }, { "epoch": 0.35525997581620317, "grad_norm": 5.703048049097333, "learning_rate": 7.288909913090958e-07, "loss": 0.030490612611174583, "step": 1469 }, { "epoch": 0.3555018137847642, "grad_norm": 13.578264494526822, "learning_rate": 7.28549722055481e-07, "loss": 0.01690484955906868, "step": 1470 }, { "epoch": 0.3557436517533253, "grad_norm": 4.541351342203318, "learning_rate": 7.282083181547886e-07, "loss": 0.019705265760421753, "step": 1471 }, { "epoch": 0.3559854897218863, "grad_norm": 2.345648078975723, "learning_rate": 7.27866779808152e-07, "loss": 0.006881223060190678, "step": 1472 }, { "epoch": 0.3562273276904474, "grad_norm": 0.8100373845936734, "learning_rate": 7.275251072167843e-07, "loss": 0.003506237408146262, "step": 1473 }, { "epoch": 0.3564691656590085, "grad_norm": 7.522680269783317, "learning_rate": 7.271833005819772e-07, "loss": 0.046729039400815964, "step": 1474 }, { "epoch": 0.3567110036275695, "grad_norm": 1.242873296427993, "learning_rate": 7.268413601051019e-07, "loss": 0.005991392768919468, "step": 1475 }, { "epoch": 0.3569528415961306, "grad_norm": 4.751930006507887, "learning_rate": 7.264992859876078e-07, "loss": 0.020505154505372047, "step": 1476 }, { "epoch": 0.35719467956469164, "grad_norm": 1.048384090019792, "learning_rate": 7.261570784310236e-07, "loss": 0.005121234338730574, "step": 1477 }, { "epoch": 0.35743651753325273, "grad_norm": 1.9426272401391667, "learning_rate": 7.258147376369564e-07, "loss": 0.003318834351375699, "step": 1478 }, { "epoch": 0.35767835550181376, "grad_norm": 4.5354363634353305, "learning_rate": 7.254722638070916e-07, "loss": 0.01005585864186287, "step": 1479 }, { "epoch": 0.35792019347037485, "grad_norm": 2.6882814373388766, "learning_rate": 7.251296571431934e-07, "loss": 0.020660776644945145, "step": 1480 }, { "epoch": 0.35816203143893593, "grad_norm": 1.8820525465254616, "learning_rate": 7.24786917847104e-07, "loss": 0.005130805540829897, "step": 1481 }, { "epoch": 0.35840386940749697, "grad_norm": 5.433819440515617, "learning_rate": 7.244440461207437e-07, "loss": 0.01251339353621006, "step": 1482 }, { "epoch": 0.35864570737605805, "grad_norm": 3.9086141524632856, "learning_rate": 7.24101042166111e-07, "loss": 0.017801156267523766, "step": 1483 }, { "epoch": 0.3588875453446191, "grad_norm": 4.037749355300777, "learning_rate": 7.237579061852821e-07, "loss": 0.00905107893049717, "step": 1484 }, { "epoch": 0.3591293833131802, "grad_norm": 2.7535706577853634, "learning_rate": 7.234146383804111e-07, "loss": 0.003360925940796733, "step": 1485 }, { "epoch": 0.35937122128174126, "grad_norm": 1.6583766871280683, "learning_rate": 7.230712389537296e-07, "loss": 0.004712735302746296, "step": 1486 }, { "epoch": 0.3596130592503023, "grad_norm": 2.99828943987833, "learning_rate": 7.227277081075471e-07, "loss": 0.008782301098108292, "step": 1487 }, { "epoch": 0.3598548972188634, "grad_norm": 1.925598479495292, "learning_rate": 7.223840460442503e-07, "loss": 0.005082964897155762, "step": 1488 }, { "epoch": 0.3600967351874244, "grad_norm": 1.1618971779915037, "learning_rate": 7.220402529663028e-07, "loss": 0.01232729759067297, "step": 1489 }, { "epoch": 0.3603385731559855, "grad_norm": 4.393572388526232, "learning_rate": 7.216963290762463e-07, "loss": 0.011742711067199707, "step": 1490 }, { "epoch": 0.36058041112454653, "grad_norm": 1.1762273352071788, "learning_rate": 7.21352274576699e-07, "loss": 0.01053901668637991, "step": 1491 }, { "epoch": 0.3608222490931076, "grad_norm": 1.6086466764888772, "learning_rate": 7.210080896703558e-07, "loss": 0.00809098407626152, "step": 1492 }, { "epoch": 0.3610640870616687, "grad_norm": 3.8531914643533156, "learning_rate": 7.206637745599891e-07, "loss": 0.02101645991206169, "step": 1493 }, { "epoch": 0.36130592503022974, "grad_norm": 3.831384971732157, "learning_rate": 7.203193294484472e-07, "loss": 0.01571544073522091, "step": 1494 }, { "epoch": 0.3615477629987908, "grad_norm": 1.4889489959846829, "learning_rate": 7.199747545386558e-07, "loss": 0.012026715092360973, "step": 1495 }, { "epoch": 0.36178960096735185, "grad_norm": 2.147950995026723, "learning_rate": 7.196300500336164e-07, "loss": 0.020843904465436935, "step": 1496 }, { "epoch": 0.36203143893591294, "grad_norm": 39.11108478747391, "learning_rate": 7.192852161364074e-07, "loss": 0.017185980454087257, "step": 1497 }, { "epoch": 0.36227327690447403, "grad_norm": 2.796626986011948, "learning_rate": 7.189402530501832e-07, "loss": 0.008734649047255516, "step": 1498 }, { "epoch": 0.36251511487303506, "grad_norm": 1.8594180260091109, "learning_rate": 7.185951609781742e-07, "loss": 0.014711553230881691, "step": 1499 }, { "epoch": 0.36275695284159615, "grad_norm": 5.619279300838797, "learning_rate": 7.182499401236868e-07, "loss": 0.013994961977005005, "step": 1500 }, { "epoch": 0.3629987908101572, "grad_norm": 0.8295591056009695, "learning_rate": 7.179045906901034e-07, "loss": 0.009233195334672928, "step": 1501 }, { "epoch": 0.36324062877871827, "grad_norm": 1.664771562022115, "learning_rate": 7.175591128808822e-07, "loss": 0.03201181814074516, "step": 1502 }, { "epoch": 0.3634824667472793, "grad_norm": 3.2883764455241575, "learning_rate": 7.172135068995567e-07, "loss": 0.018751243129372597, "step": 1503 }, { "epoch": 0.3637243047158404, "grad_norm": 0.6973328450131444, "learning_rate": 7.168677729497363e-07, "loss": 0.0033814848866313696, "step": 1504 }, { "epoch": 0.3639661426844015, "grad_norm": 3.756032097135633, "learning_rate": 7.165219112351057e-07, "loss": 0.013281220570206642, "step": 1505 }, { "epoch": 0.3642079806529625, "grad_norm": 1.6702540314473169, "learning_rate": 7.161759219594247e-07, "loss": 0.002095491625368595, "step": 1506 }, { "epoch": 0.3644498186215236, "grad_norm": 4.3382109961588995, "learning_rate": 7.158298053265283e-07, "loss": 0.005522863008081913, "step": 1507 }, { "epoch": 0.3646916565900846, "grad_norm": 2.229814338145318, "learning_rate": 7.154835615403267e-07, "loss": 0.024479074403643608, "step": 1508 }, { "epoch": 0.3649334945586457, "grad_norm": 18.740724759721665, "learning_rate": 7.151371908048049e-07, "loss": 0.023833245038986206, "step": 1509 }, { "epoch": 0.3651753325272068, "grad_norm": 0.5013912639317742, "learning_rate": 7.147906933240226e-07, "loss": 0.0018347172299399972, "step": 1510 }, { "epoch": 0.36541717049576783, "grad_norm": 2.1185338371151192, "learning_rate": 7.144440693021142e-07, "loss": 0.011699112132191658, "step": 1511 }, { "epoch": 0.3656590084643289, "grad_norm": 1.0069379908440212, "learning_rate": 7.140973189432888e-07, "loss": 0.0060724252834916115, "step": 1512 }, { "epoch": 0.36590084643288995, "grad_norm": 6.6142390753402776, "learning_rate": 7.1375044245183e-07, "loss": 0.033701568841934204, "step": 1513 }, { "epoch": 0.36614268440145104, "grad_norm": 6.224886495936268, "learning_rate": 7.134034400320953e-07, "loss": 0.02807488478720188, "step": 1514 }, { "epoch": 0.36638452237001207, "grad_norm": 1.748500133943345, "learning_rate": 7.130563118885167e-07, "loss": 0.005764966364949942, "step": 1515 }, { "epoch": 0.36662636033857315, "grad_norm": 6.662840606258157, "learning_rate": 7.127090582256002e-07, "loss": 0.013348829932510853, "step": 1516 }, { "epoch": 0.36686819830713424, "grad_norm": 1.792708937486661, "learning_rate": 7.123616792479259e-07, "loss": 0.008946478366851807, "step": 1517 }, { "epoch": 0.3671100362756953, "grad_norm": 2.6727816674042546, "learning_rate": 7.120141751601472e-07, "loss": 0.008617429994046688, "step": 1518 }, { "epoch": 0.36735187424425636, "grad_norm": 2.800540744217865, "learning_rate": 7.11666546166992e-07, "loss": 0.026885230094194412, "step": 1519 }, { "epoch": 0.3675937122128174, "grad_norm": 1.7410175037514686, "learning_rate": 7.113187924732612e-07, "loss": 0.004528599325567484, "step": 1520 }, { "epoch": 0.3678355501813785, "grad_norm": 1.6882147622164296, "learning_rate": 7.109709142838294e-07, "loss": 0.004350464325398207, "step": 1521 }, { "epoch": 0.36807738814993957, "grad_norm": 0.9737964379730866, "learning_rate": 7.106229118036443e-07, "loss": 0.005234187003225088, "step": 1522 }, { "epoch": 0.3683192261185006, "grad_norm": 7.66004390365483, "learning_rate": 7.102747852377271e-07, "loss": 0.022044438868761063, "step": 1523 }, { "epoch": 0.3685610640870617, "grad_norm": 8.210333194188937, "learning_rate": 7.099265347911723e-07, "loss": 0.014523866586387157, "step": 1524 }, { "epoch": 0.3688029020556227, "grad_norm": 3.883321030787277, "learning_rate": 7.095781606691465e-07, "loss": 0.02172071672976017, "step": 1525 }, { "epoch": 0.3690447400241838, "grad_norm": 3.025450595617479, "learning_rate": 7.092296630768902e-07, "loss": 0.0028509784024208784, "step": 1526 }, { "epoch": 0.36928657799274484, "grad_norm": 7.625137152899734, "learning_rate": 7.088810422197162e-07, "loss": 0.010601376183331013, "step": 1527 }, { "epoch": 0.3695284159613059, "grad_norm": 1.3836083090754427, "learning_rate": 7.085322983030098e-07, "loss": 0.011313985101878643, "step": 1528 }, { "epoch": 0.369770253929867, "grad_norm": 3.418853967553588, "learning_rate": 7.081834315322288e-07, "loss": 0.018043747171759605, "step": 1529 }, { "epoch": 0.37001209189842804, "grad_norm": 5.938678234385703, "learning_rate": 7.078344421129036e-07, "loss": 0.03766518458724022, "step": 1530 }, { "epoch": 0.37025392986698913, "grad_norm": 4.155424970272926, "learning_rate": 7.074853302506368e-07, "loss": 0.022100692614912987, "step": 1531 }, { "epoch": 0.37049576783555016, "grad_norm": 1.587065208070596, "learning_rate": 7.071360961511031e-07, "loss": 0.004207621794193983, "step": 1532 }, { "epoch": 0.37073760580411125, "grad_norm": 1.8414945470515574, "learning_rate": 7.067867400200491e-07, "loss": 0.0072647216729819775, "step": 1533 }, { "epoch": 0.37097944377267233, "grad_norm": 2.18134173011481, "learning_rate": 7.064372620632934e-07, "loss": 0.020747726783156395, "step": 1534 }, { "epoch": 0.37122128174123337, "grad_norm": 5.777592471443355, "learning_rate": 7.060876624867263e-07, "loss": 0.021638406440615654, "step": 1535 }, { "epoch": 0.37146311970979445, "grad_norm": 3.0586857687093048, "learning_rate": 7.057379414963101e-07, "loss": 0.009917033836245537, "step": 1536 }, { "epoch": 0.3717049576783555, "grad_norm": 11.368448377463404, "learning_rate": 7.053880992980782e-07, "loss": 0.06550796329975128, "step": 1537 }, { "epoch": 0.3719467956469166, "grad_norm": 2.375530946707416, "learning_rate": 7.050381360981352e-07, "loss": 0.00437023863196373, "step": 1538 }, { "epoch": 0.3721886336154776, "grad_norm": 4.296477269120427, "learning_rate": 7.046880521026579e-07, "loss": 0.024511924013495445, "step": 1539 }, { "epoch": 0.3724304715840387, "grad_norm": 8.80317185275582, "learning_rate": 7.043378475178935e-07, "loss": 0.009623120538890362, "step": 1540 }, { "epoch": 0.3726723095525998, "grad_norm": 6.546629845026227, "learning_rate": 7.039875225501604e-07, "loss": 0.04036131128668785, "step": 1541 }, { "epoch": 0.3729141475211608, "grad_norm": 6.713544459347911, "learning_rate": 7.036370774058477e-07, "loss": 0.005198070779442787, "step": 1542 }, { "epoch": 0.3731559854897219, "grad_norm": 1.117287959559758, "learning_rate": 7.032865122914161e-07, "loss": 0.012003063224256039, "step": 1543 }, { "epoch": 0.37339782345828293, "grad_norm": 4.168214284135485, "learning_rate": 7.029358274133961e-07, "loss": 0.01195963378995657, "step": 1544 }, { "epoch": 0.373639661426844, "grad_norm": 1.4947332038371814, "learning_rate": 7.025850229783892e-07, "loss": 0.009317846968770027, "step": 1545 }, { "epoch": 0.3738814993954051, "grad_norm": 2.133847097782542, "learning_rate": 7.022340991930673e-07, "loss": 0.02108253352344036, "step": 1546 }, { "epoch": 0.37412333736396614, "grad_norm": 2.2493025911705926, "learning_rate": 7.018830562641725e-07, "loss": 0.00821349024772644, "step": 1547 }, { "epoch": 0.3743651753325272, "grad_norm": 1.4109274580534503, "learning_rate": 7.015318943985171e-07, "loss": 0.010265051387250423, "step": 1548 }, { "epoch": 0.37460701330108825, "grad_norm": 5.734458416730326, "learning_rate": 7.011806138029836e-07, "loss": 0.015783358365297318, "step": 1549 }, { "epoch": 0.37484885126964934, "grad_norm": 2.232787284505023, "learning_rate": 7.008292146845243e-07, "loss": 0.01168822031468153, "step": 1550 }, { "epoch": 0.3750906892382104, "grad_norm": 0.9182274898120144, "learning_rate": 7.004776972501613e-07, "loss": 0.00440002977848053, "step": 1551 }, { "epoch": 0.37533252720677146, "grad_norm": 1.4765911955962892, "learning_rate": 7.001260617069868e-07, "loss": 0.010775500908493996, "step": 1552 }, { "epoch": 0.37557436517533255, "grad_norm": 5.128881422517462, "learning_rate": 6.99774308262162e-07, "loss": 0.012062317691743374, "step": 1553 }, { "epoch": 0.3758162031438936, "grad_norm": 4.273972128230354, "learning_rate": 6.994224371229179e-07, "loss": 0.03270552307367325, "step": 1554 }, { "epoch": 0.37605804111245467, "grad_norm": 2.6587254409337953, "learning_rate": 6.990704484965549e-07, "loss": 0.0038235001266002655, "step": 1555 }, { "epoch": 0.3762998790810157, "grad_norm": 1.230545468485897, "learning_rate": 6.987183425904423e-07, "loss": 0.006198114715516567, "step": 1556 }, { "epoch": 0.3765417170495768, "grad_norm": 1.5967189639901487, "learning_rate": 6.98366119612019e-07, "loss": 0.012963324785232544, "step": 1557 }, { "epoch": 0.3767835550181379, "grad_norm": 3.880674935070562, "learning_rate": 6.980137797687921e-07, "loss": 0.07418046146631241, "step": 1558 }, { "epoch": 0.3770253929866989, "grad_norm": 56.805417265886234, "learning_rate": 6.976613232683384e-07, "loss": 0.10726137459278107, "step": 1559 }, { "epoch": 0.37726723095526, "grad_norm": 4.828829753340256, "learning_rate": 6.973087503183029e-07, "loss": 0.03719765692949295, "step": 1560 }, { "epoch": 0.377509068923821, "grad_norm": 2.2892700281022957, "learning_rate": 6.969560611263993e-07, "loss": 0.012028790079057217, "step": 1561 }, { "epoch": 0.3777509068923821, "grad_norm": 1.5218746212649394, "learning_rate": 6.966032559004098e-07, "loss": 0.005659319460391998, "step": 1562 }, { "epoch": 0.37799274486094314, "grad_norm": 1.4485668332682538, "learning_rate": 6.962503348481851e-07, "loss": 0.018100515007972717, "step": 1563 }, { "epoch": 0.37823458282950423, "grad_norm": 4.059610484634677, "learning_rate": 6.958972981776439e-07, "loss": 0.010537019930779934, "step": 1564 }, { "epoch": 0.3784764207980653, "grad_norm": 4.292651821708535, "learning_rate": 6.955441460967732e-07, "loss": 0.021827613934874535, "step": 1565 }, { "epoch": 0.37871825876662635, "grad_norm": 3.699383399914277, "learning_rate": 6.951908788136278e-07, "loss": 0.008535315282642841, "step": 1566 }, { "epoch": 0.37896009673518743, "grad_norm": 8.43359738662322, "learning_rate": 6.948374965363308e-07, "loss": 0.008800366893410683, "step": 1567 }, { "epoch": 0.37920193470374847, "grad_norm": 1.4006297790700404, "learning_rate": 6.944839994730723e-07, "loss": 0.002819209126755595, "step": 1568 }, { "epoch": 0.37944377267230955, "grad_norm": 2.0236537709873836, "learning_rate": 6.941303878321109e-07, "loss": 0.009236504323780537, "step": 1569 }, { "epoch": 0.37968561064087064, "grad_norm": 1.650695012289123, "learning_rate": 6.937766618217722e-07, "loss": 0.008203746750950813, "step": 1570 }, { "epoch": 0.3799274486094317, "grad_norm": 5.068800566760393, "learning_rate": 6.93422821650449e-07, "loss": 0.012058238498866558, "step": 1571 }, { "epoch": 0.38016928657799276, "grad_norm": 1.7717497243924014, "learning_rate": 6.930688675266018e-07, "loss": 0.010967480950057507, "step": 1572 }, { "epoch": 0.3804111245465538, "grad_norm": 1.8333506788578986, "learning_rate": 6.927147996587579e-07, "loss": 0.009651080705225468, "step": 1573 }, { "epoch": 0.3806529625151149, "grad_norm": 2.2193595687377488, "learning_rate": 6.923606182555119e-07, "loss": 0.00823457445949316, "step": 1574 }, { "epoch": 0.3808948004836759, "grad_norm": 9.062367378586206, "learning_rate": 6.92006323525525e-07, "loss": 0.00698061054572463, "step": 1575 }, { "epoch": 0.381136638452237, "grad_norm": 6.646753887466314, "learning_rate": 6.916519156775256e-07, "loss": 0.03813226521015167, "step": 1576 }, { "epoch": 0.3813784764207981, "grad_norm": 2.3474938975852844, "learning_rate": 6.91297394920308e-07, "loss": 0.014025479555130005, "step": 1577 }, { "epoch": 0.3816203143893591, "grad_norm": 1.1285492901331862, "learning_rate": 6.909427614627338e-07, "loss": 0.004027527756989002, "step": 1578 }, { "epoch": 0.3818621523579202, "grad_norm": 4.602315875087632, "learning_rate": 6.905880155137305e-07, "loss": 0.011162626557052135, "step": 1579 }, { "epoch": 0.38210399032648124, "grad_norm": 2.8618353876679383, "learning_rate": 6.902331572822921e-07, "loss": 0.013203958049416542, "step": 1580 }, { "epoch": 0.3823458282950423, "grad_norm": 3.4200979822422277, "learning_rate": 6.898781869774787e-07, "loss": 0.006770502310246229, "step": 1581 }, { "epoch": 0.3825876662636034, "grad_norm": 4.147075588943984, "learning_rate": 6.895231048084164e-07, "loss": 0.02242213301360607, "step": 1582 }, { "epoch": 0.38282950423216444, "grad_norm": 2.8918491009322356, "learning_rate": 6.89167910984297e-07, "loss": 0.0078547652810812, "step": 1583 }, { "epoch": 0.38307134220072553, "grad_norm": 2.2468923176180517, "learning_rate": 6.888126057143787e-07, "loss": 0.010863796807825565, "step": 1584 }, { "epoch": 0.38331318016928656, "grad_norm": 0.96510150890694, "learning_rate": 6.884571892079847e-07, "loss": 0.004894809797406197, "step": 1585 }, { "epoch": 0.38355501813784765, "grad_norm": 1.1946876089326928, "learning_rate": 6.881016616745038e-07, "loss": 0.00640276400372386, "step": 1586 }, { "epoch": 0.3837968561064087, "grad_norm": 2.608657516794566, "learning_rate": 6.877460233233908e-07, "loss": 0.00873494241386652, "step": 1587 }, { "epoch": 0.38403869407496977, "grad_norm": 1.7213365429781773, "learning_rate": 6.873902743641651e-07, "loss": 0.008737324737012386, "step": 1588 }, { "epoch": 0.38428053204353085, "grad_norm": 1.9971867268638828, "learning_rate": 6.870344150064119e-07, "loss": 0.023235592991113663, "step": 1589 }, { "epoch": 0.3845223700120919, "grad_norm": 2.797342136108898, "learning_rate": 6.866784454597805e-07, "loss": 0.01785374991595745, "step": 1590 }, { "epoch": 0.384764207980653, "grad_norm": 3.190321783905076, "learning_rate": 6.863223659339862e-07, "loss": 0.01870199292898178, "step": 1591 }, { "epoch": 0.385006045949214, "grad_norm": 2.621093451109382, "learning_rate": 6.859661766388083e-07, "loss": 0.006504962686449289, "step": 1592 }, { "epoch": 0.3852478839177751, "grad_norm": 2.5062800626463426, "learning_rate": 6.856098777840913e-07, "loss": 0.010416469536721706, "step": 1593 }, { "epoch": 0.3854897218863362, "grad_norm": 2.023446267056306, "learning_rate": 6.852534695797437e-07, "loss": 0.011670251376926899, "step": 1594 }, { "epoch": 0.3857315598548972, "grad_norm": 1.06943847870568, "learning_rate": 6.84896952235739e-07, "loss": 0.003465095302090049, "step": 1595 }, { "epoch": 0.3859733978234583, "grad_norm": 11.48150340376096, "learning_rate": 6.845403259621147e-07, "loss": 0.017374971881508827, "step": 1596 }, { "epoch": 0.38621523579201933, "grad_norm": 14.835414065619622, "learning_rate": 6.841835909689724e-07, "loss": 0.004794074688106775, "step": 1597 }, { "epoch": 0.3864570737605804, "grad_norm": 3.0149430309152754, "learning_rate": 6.838267474664778e-07, "loss": 0.018832748755812645, "step": 1598 }, { "epoch": 0.38669891172914145, "grad_norm": 2.8809000389578467, "learning_rate": 6.834697956648607e-07, "loss": 0.007612995803356171, "step": 1599 }, { "epoch": 0.38694074969770254, "grad_norm": 1.4990410552155342, "learning_rate": 6.831127357744146e-07, "loss": 0.010036607272922993, "step": 1600 }, { "epoch": 0.3871825876662636, "grad_norm": 0.7258425205257875, "learning_rate": 6.827555680054966e-07, "loss": 0.005626137834042311, "step": 1601 }, { "epoch": 0.38742442563482465, "grad_norm": 1.278925762246715, "learning_rate": 6.823982925685274e-07, "loss": 0.0071830907836556435, "step": 1602 }, { "epoch": 0.38766626360338574, "grad_norm": 3.534437594752918, "learning_rate": 6.820409096739912e-07, "loss": 0.013277038931846619, "step": 1603 }, { "epoch": 0.3879081015719468, "grad_norm": 6.167836169337317, "learning_rate": 6.816834195324354e-07, "loss": 0.015909859910607338, "step": 1604 }, { "epoch": 0.38814993954050786, "grad_norm": 0.700708194183873, "learning_rate": 6.813258223544709e-07, "loss": 0.005442066118121147, "step": 1605 }, { "epoch": 0.38839177750906895, "grad_norm": 4.357534734467916, "learning_rate": 6.809681183507709e-07, "loss": 0.010063767433166504, "step": 1606 }, { "epoch": 0.38863361547763, "grad_norm": 1.4518384308718832, "learning_rate": 6.806103077320724e-07, "loss": 0.006281334441155195, "step": 1607 }, { "epoch": 0.38887545344619107, "grad_norm": 5.310666063811739, "learning_rate": 6.802523907091748e-07, "loss": 0.005199764855206013, "step": 1608 }, { "epoch": 0.3891172914147521, "grad_norm": 2.6863199729664777, "learning_rate": 6.798943674929401e-07, "loss": 0.01185989286750555, "step": 1609 }, { "epoch": 0.3893591293833132, "grad_norm": 1.0247979062815686, "learning_rate": 6.79536238294293e-07, "loss": 0.008290622383356094, "step": 1610 }, { "epoch": 0.3896009673518742, "grad_norm": 17.298022177874238, "learning_rate": 6.791780033242207e-07, "loss": 0.01832897774875164, "step": 1611 }, { "epoch": 0.3898428053204353, "grad_norm": 16.621620992220645, "learning_rate": 6.788196627937728e-07, "loss": 0.010525918565690517, "step": 1612 }, { "epoch": 0.3900846432889964, "grad_norm": 0.524732636169819, "learning_rate": 6.784612169140606e-07, "loss": 0.0012774787610396743, "step": 1613 }, { "epoch": 0.3903264812575574, "grad_norm": 3.532293745365397, "learning_rate": 6.781026658962582e-07, "loss": 0.026682788506150246, "step": 1614 }, { "epoch": 0.3905683192261185, "grad_norm": 5.157733262765951, "learning_rate": 6.777440099516008e-07, "loss": 0.013749874196946621, "step": 1615 }, { "epoch": 0.39081015719467954, "grad_norm": 2.7388900199493977, "learning_rate": 6.773852492913861e-07, "loss": 0.013610276393592358, "step": 1616 }, { "epoch": 0.39105199516324063, "grad_norm": 2.43258583107837, "learning_rate": 6.770263841269733e-07, "loss": 0.008155415765941143, "step": 1617 }, { "epoch": 0.3912938331318017, "grad_norm": 16.899500807208543, "learning_rate": 6.76667414669783e-07, "loss": 0.006332975812256336, "step": 1618 }, { "epoch": 0.39153567110036275, "grad_norm": 3.9832958159084018, "learning_rate": 6.763083411312975e-07, "loss": 0.027200255542993546, "step": 1619 }, { "epoch": 0.39177750906892383, "grad_norm": 1.8057486308849402, "learning_rate": 6.759491637230601e-07, "loss": 0.010866872034966946, "step": 1620 }, { "epoch": 0.39201934703748487, "grad_norm": 2.157860857172876, "learning_rate": 6.755898826566755e-07, "loss": 0.02812168374657631, "step": 1621 }, { "epoch": 0.39226118500604595, "grad_norm": 3.8273513875593532, "learning_rate": 6.752304981438095e-07, "loss": 0.014776592142879963, "step": 1622 }, { "epoch": 0.39250302297460704, "grad_norm": 1.8900938302962493, "learning_rate": 6.748710103961887e-07, "loss": 0.007027371320873499, "step": 1623 }, { "epoch": 0.3927448609431681, "grad_norm": 3.2466089733168055, "learning_rate": 6.745114196256007e-07, "loss": 0.00859600305557251, "step": 1624 }, { "epoch": 0.39298669891172916, "grad_norm": 1.3011172271666296, "learning_rate": 6.741517260438937e-07, "loss": 0.007943603210151196, "step": 1625 }, { "epoch": 0.3932285368802902, "grad_norm": 5.002541977703654, "learning_rate": 6.737919298629764e-07, "loss": 0.006480695214122534, "step": 1626 }, { "epoch": 0.3934703748488513, "grad_norm": 2.186239675932205, "learning_rate": 6.734320312948181e-07, "loss": 0.010236294940114021, "step": 1627 }, { "epoch": 0.3937122128174123, "grad_norm": 19.067060048395902, "learning_rate": 6.730720305514481e-07, "loss": 0.018968360498547554, "step": 1628 }, { "epoch": 0.3939540507859734, "grad_norm": 11.774285020349605, "learning_rate": 6.727119278449564e-07, "loss": 0.012185786850750446, "step": 1629 }, { "epoch": 0.3941958887545345, "grad_norm": 1.415185387337471, "learning_rate": 6.723517233874927e-07, "loss": 0.0055025736801326275, "step": 1630 }, { "epoch": 0.3944377267230955, "grad_norm": 1.5027843903454352, "learning_rate": 6.719914173912666e-07, "loss": 0.01406980212777853, "step": 1631 }, { "epoch": 0.3946795646916566, "grad_norm": 1.8033941414421053, "learning_rate": 6.716310100685479e-07, "loss": 0.007517840247601271, "step": 1632 }, { "epoch": 0.39492140266021764, "grad_norm": 0.613400497653759, "learning_rate": 6.712705016316658e-07, "loss": 0.001366928219795227, "step": 1633 }, { "epoch": 0.3951632406287787, "grad_norm": 5.0384453051381675, "learning_rate": 6.70909892293009e-07, "loss": 0.010966573841869831, "step": 1634 }, { "epoch": 0.3954050785973398, "grad_norm": 4.354468439455184, "learning_rate": 6.705491822650259e-07, "loss": 0.027871254831552505, "step": 1635 }, { "epoch": 0.39564691656590084, "grad_norm": 2.901468865087696, "learning_rate": 6.70188371760224e-07, "loss": 0.009536507539451122, "step": 1636 }, { "epoch": 0.39588875453446193, "grad_norm": 4.461743099001216, "learning_rate": 6.698274609911702e-07, "loss": 0.00549303600564599, "step": 1637 }, { "epoch": 0.39613059250302296, "grad_norm": 3.882058467573988, "learning_rate": 6.694664501704903e-07, "loss": 0.019106155261397362, "step": 1638 }, { "epoch": 0.39637243047158405, "grad_norm": 0.7582266212269833, "learning_rate": 6.69105339510869e-07, "loss": 0.007992735132575035, "step": 1639 }, { "epoch": 0.3966142684401451, "grad_norm": 2.7955644784608573, "learning_rate": 6.687441292250499e-07, "loss": 0.008489513769745827, "step": 1640 }, { "epoch": 0.39685610640870617, "grad_norm": 23.998096120113136, "learning_rate": 6.683828195258358e-07, "loss": 0.03436943143606186, "step": 1641 }, { "epoch": 0.39709794437726725, "grad_norm": 1.6218786402728218, "learning_rate": 6.680214106260869e-07, "loss": 0.013410300016403198, "step": 1642 }, { "epoch": 0.3973397823458283, "grad_norm": 2.358462014096726, "learning_rate": 6.676599027387228e-07, "loss": 0.011602184735238552, "step": 1643 }, { "epoch": 0.3975816203143894, "grad_norm": 1.292728753160966, "learning_rate": 6.672982960767213e-07, "loss": 0.0046600536443293095, "step": 1644 }, { "epoch": 0.3978234582829504, "grad_norm": 4.138823435083929, "learning_rate": 6.669365908531181e-07, "loss": 0.011260601691901684, "step": 1645 }, { "epoch": 0.3980652962515115, "grad_norm": 1.0233120952015562, "learning_rate": 6.66574787281007e-07, "loss": 0.0044858441688120365, "step": 1646 }, { "epoch": 0.3983071342200726, "grad_norm": 2.5921580521641143, "learning_rate": 6.6621288557354e-07, "loss": 0.009757465682923794, "step": 1647 }, { "epoch": 0.3985489721886336, "grad_norm": 14.671017729618347, "learning_rate": 6.658508859439267e-07, "loss": 0.015061174519360065, "step": 1648 }, { "epoch": 0.3987908101571947, "grad_norm": 2.258346215549351, "learning_rate": 6.654887886054345e-07, "loss": 0.012620441615581512, "step": 1649 }, { "epoch": 0.39903264812575573, "grad_norm": 1.3567224687083907, "learning_rate": 6.65126593771388e-07, "loss": 0.006277073174715042, "step": 1650 }, { "epoch": 0.3992744860943168, "grad_norm": 41.90755076031199, "learning_rate": 6.6476430165517e-07, "loss": 0.01201782375574112, "step": 1651 }, { "epoch": 0.39951632406287785, "grad_norm": 6.75523094637965, "learning_rate": 6.644019124702199e-07, "loss": 0.02369503304362297, "step": 1652 }, { "epoch": 0.39975816203143894, "grad_norm": 11.259816184428624, "learning_rate": 6.640394264300346e-07, "loss": 0.005480997264385223, "step": 1653 }, { "epoch": 0.4, "grad_norm": 2.022667328509103, "learning_rate": 6.636768437481681e-07, "loss": 0.008738622069358826, "step": 1654 }, { "epoch": 0.40024183796856105, "grad_norm": 1.174083757722893, "learning_rate": 6.63314164638231e-07, "loss": 0.004029365722090006, "step": 1655 }, { "epoch": 0.40048367593712214, "grad_norm": 3.984234304548184, "learning_rate": 6.62951389313891e-07, "loss": 0.007688954472541809, "step": 1656 }, { "epoch": 0.4007255139056832, "grad_norm": 2.7310762196035476, "learning_rate": 6.625885179888727e-07, "loss": 0.006327821407467127, "step": 1657 }, { "epoch": 0.40096735187424426, "grad_norm": 2.743534032259035, "learning_rate": 6.622255508769567e-07, "loss": 0.021789761260151863, "step": 1658 }, { "epoch": 0.40120918984280535, "grad_norm": 2.0443822665091513, "learning_rate": 6.618624881919805e-07, "loss": 0.01917596161365509, "step": 1659 }, { "epoch": 0.4014510278113664, "grad_norm": 7.055503443772253, "learning_rate": 6.614993301478378e-07, "loss": 0.005560680292546749, "step": 1660 }, { "epoch": 0.40169286577992747, "grad_norm": 1.2463667603879316, "learning_rate": 6.611360769584781e-07, "loss": 0.009287933818995953, "step": 1661 }, { "epoch": 0.4019347037484885, "grad_norm": 2.6206326610389428, "learning_rate": 6.607727288379077e-07, "loss": 0.01693003438413143, "step": 1662 }, { "epoch": 0.4021765417170496, "grad_norm": 0.8409161755029589, "learning_rate": 6.60409286000188e-07, "loss": 0.0034992308355867863, "step": 1663 }, { "epoch": 0.4024183796856106, "grad_norm": 2.8154507258055, "learning_rate": 6.600457486594366e-07, "loss": 0.032774463295936584, "step": 1664 }, { "epoch": 0.4026602176541717, "grad_norm": 2.91664273159952, "learning_rate": 6.59682117029827e-07, "loss": 0.008742133155465126, "step": 1665 }, { "epoch": 0.4029020556227328, "grad_norm": 2.719166154728327, "learning_rate": 6.593183913255879e-07, "loss": 0.004566238261759281, "step": 1666 }, { "epoch": 0.4031438935912938, "grad_norm": 2.1952797462244873, "learning_rate": 6.589545717610035e-07, "loss": 0.029636947438120842, "step": 1667 }, { "epoch": 0.4033857315598549, "grad_norm": 4.655733764297393, "learning_rate": 6.585906585504135e-07, "loss": 0.020473066717386246, "step": 1668 }, { "epoch": 0.40362756952841594, "grad_norm": 3.4135859412747993, "learning_rate": 6.582266519082125e-07, "loss": 0.014080837368965149, "step": 1669 }, { "epoch": 0.40386940749697703, "grad_norm": 2.0769405159762706, "learning_rate": 6.578625520488501e-07, "loss": 0.0061830803751945496, "step": 1670 }, { "epoch": 0.4041112454655381, "grad_norm": 2.0723468932533726, "learning_rate": 6.574983591868313e-07, "loss": 0.023261303082108498, "step": 1671 }, { "epoch": 0.40435308343409915, "grad_norm": 6.431392783574317, "learning_rate": 6.571340735367152e-07, "loss": 0.01940784603357315, "step": 1672 }, { "epoch": 0.40459492140266023, "grad_norm": 1.676046023861026, "learning_rate": 6.56769695313116e-07, "loss": 0.00572116207331419, "step": 1673 }, { "epoch": 0.40483675937122127, "grad_norm": 3.59003088126197, "learning_rate": 6.564052247307026e-07, "loss": 0.0025878034066408873, "step": 1674 }, { "epoch": 0.40507859733978235, "grad_norm": 0.6900966490944208, "learning_rate": 6.56040662004198e-07, "loss": 0.0026884821709245443, "step": 1675 }, { "epoch": 0.4053204353083434, "grad_norm": 3.940622094830194, "learning_rate": 6.556760073483794e-07, "loss": 0.019203269854187965, "step": 1676 }, { "epoch": 0.4055622732769045, "grad_norm": 8.058286887594042, "learning_rate": 6.553112609780783e-07, "loss": 0.005365660879760981, "step": 1677 }, { "epoch": 0.40580411124546556, "grad_norm": 7.898044009000426, "learning_rate": 6.549464231081805e-07, "loss": 0.011729891411960125, "step": 1678 }, { "epoch": 0.4060459492140266, "grad_norm": 5.271797884729643, "learning_rate": 6.545814939536253e-07, "loss": 0.008885182440280914, "step": 1679 }, { "epoch": 0.4062877871825877, "grad_norm": 1.652682529436735, "learning_rate": 6.542164737294058e-07, "loss": 0.008625964634120464, "step": 1680 }, { "epoch": 0.4065296251511487, "grad_norm": 1.9537349812790583, "learning_rate": 6.53851362650569e-07, "loss": 0.008783071301877499, "step": 1681 }, { "epoch": 0.4067714631197098, "grad_norm": 1.661017934752942, "learning_rate": 6.534861609322155e-07, "loss": 0.0064249648712575436, "step": 1682 }, { "epoch": 0.4070133010882709, "grad_norm": 2.6399416548901744, "learning_rate": 6.53120868789499e-07, "loss": 0.01490571815520525, "step": 1683 }, { "epoch": 0.4072551390568319, "grad_norm": 1.1810738185996108, "learning_rate": 6.527554864376264e-07, "loss": 0.007142676506191492, "step": 1684 }, { "epoch": 0.407496977025393, "grad_norm": 2.7633770810718423, "learning_rate": 6.523900140918583e-07, "loss": 0.013354124501347542, "step": 1685 }, { "epoch": 0.40773881499395404, "grad_norm": 16.437181990032684, "learning_rate": 6.520244519675075e-07, "loss": 0.01994045451283455, "step": 1686 }, { "epoch": 0.4079806529625151, "grad_norm": 0.9721511003815608, "learning_rate": 6.516588002799406e-07, "loss": 0.0030051060020923615, "step": 1687 }, { "epoch": 0.40822249093107615, "grad_norm": 4.020391990712083, "learning_rate": 6.51293059244576e-07, "loss": 0.009914161637425423, "step": 1688 }, { "epoch": 0.40846432889963724, "grad_norm": 3.887453999138017, "learning_rate": 6.509272290768856e-07, "loss": 0.014283435419201851, "step": 1689 }, { "epoch": 0.40870616686819833, "grad_norm": 2.531490322831718, "learning_rate": 6.505613099923934e-07, "loss": 0.007072829641401768, "step": 1690 }, { "epoch": 0.40894800483675936, "grad_norm": 1.3762919907726963, "learning_rate": 6.501953022066758e-07, "loss": 0.008696655742824078, "step": 1691 }, { "epoch": 0.40918984280532045, "grad_norm": 1.5137185724044238, "learning_rate": 6.498292059353615e-07, "loss": 0.010632907971739769, "step": 1692 }, { "epoch": 0.4094316807738815, "grad_norm": 0.7296975718634382, "learning_rate": 6.494630213941313e-07, "loss": 0.0034569238778203726, "step": 1693 }, { "epoch": 0.40967351874244257, "grad_norm": 0.6263367618551726, "learning_rate": 6.490967487987183e-07, "loss": 0.0011894692433997989, "step": 1694 }, { "epoch": 0.40991535671100365, "grad_norm": 2.3005223474753667, "learning_rate": 6.487303883649067e-07, "loss": 0.008585515432059765, "step": 1695 }, { "epoch": 0.4101571946795647, "grad_norm": 6.103332256712638, "learning_rate": 6.483639403085334e-07, "loss": 0.01936357654631138, "step": 1696 }, { "epoch": 0.41039903264812577, "grad_norm": 2.868716257344589, "learning_rate": 6.479974048454861e-07, "loss": 0.01092293206602335, "step": 1697 }, { "epoch": 0.4106408706166868, "grad_norm": 4.648743106088614, "learning_rate": 6.476307821917049e-07, "loss": 0.010375051759183407, "step": 1698 }, { "epoch": 0.4108827085852479, "grad_norm": 2.6724132875725517, "learning_rate": 6.472640725631801e-07, "loss": 0.024351805448532104, "step": 1699 }, { "epoch": 0.4111245465538089, "grad_norm": 3.6287121784860483, "learning_rate": 6.468972761759543e-07, "loss": 0.014413438737392426, "step": 1700 }, { "epoch": 0.41136638452237, "grad_norm": 2.6292232729045075, "learning_rate": 6.465303932461208e-07, "loss": 0.018253624439239502, "step": 1701 }, { "epoch": 0.4116082224909311, "grad_norm": 3.071202860099024, "learning_rate": 6.461634239898237e-07, "loss": 0.02101549319922924, "step": 1702 }, { "epoch": 0.41185006045949213, "grad_norm": 1.5646659240808167, "learning_rate": 6.457963686232582e-07, "loss": 0.00926982332020998, "step": 1703 }, { "epoch": 0.4120918984280532, "grad_norm": 8.016862163482934, "learning_rate": 6.4542922736267e-07, "loss": 0.017829861491918564, "step": 1704 }, { "epoch": 0.41233373639661425, "grad_norm": 0.8531068875161036, "learning_rate": 6.450620004243559e-07, "loss": 0.005393661558628082, "step": 1705 }, { "epoch": 0.41257557436517533, "grad_norm": 1.9093156454949898, "learning_rate": 6.446946880246626e-07, "loss": 0.009513825178146362, "step": 1706 }, { "epoch": 0.4128174123337364, "grad_norm": 2.5319337894500142, "learning_rate": 6.443272903799875e-07, "loss": 0.00730525329709053, "step": 1707 }, { "epoch": 0.41305925030229745, "grad_norm": 1.9198448582971515, "learning_rate": 6.43959807706778e-07, "loss": 0.014906312339007854, "step": 1708 }, { "epoch": 0.41330108827085854, "grad_norm": 1.5523594056471222, "learning_rate": 6.435922402215319e-07, "loss": 0.012577684596180916, "step": 1709 }, { "epoch": 0.4135429262394196, "grad_norm": 19.002972343830407, "learning_rate": 6.432245881407968e-07, "loss": 0.04736657068133354, "step": 1710 }, { "epoch": 0.41378476420798066, "grad_norm": 4.626471582727805, "learning_rate": 6.428568516811698e-07, "loss": 0.0077191563323140144, "step": 1711 }, { "epoch": 0.4140266021765417, "grad_norm": 1.7754858126057242, "learning_rate": 6.424890310592981e-07, "loss": 0.014525527134537697, "step": 1712 }, { "epoch": 0.4142684401451028, "grad_norm": 3.0777395565771974, "learning_rate": 6.421211264918786e-07, "loss": 0.016485530883073807, "step": 1713 }, { "epoch": 0.41451027811366387, "grad_norm": 1.7186827589641012, "learning_rate": 6.417531381956574e-07, "loss": 0.022348059341311455, "step": 1714 }, { "epoch": 0.4147521160822249, "grad_norm": 2.8314408696369786, "learning_rate": 6.4138506638743e-07, "loss": 0.00915563479065895, "step": 1715 }, { "epoch": 0.414993954050786, "grad_norm": 3.050879686819196, "learning_rate": 6.410169112840409e-07, "loss": 0.01993248425424099, "step": 1716 }, { "epoch": 0.415235792019347, "grad_norm": 0.8890842411139341, "learning_rate": 6.406486731023842e-07, "loss": 0.0045911031775176525, "step": 1717 }, { "epoch": 0.4154776299879081, "grad_norm": 0.9537599782254617, "learning_rate": 6.402803520594022e-07, "loss": 0.0031301197595894337, "step": 1718 }, { "epoch": 0.4157194679564692, "grad_norm": 1.2925136091827991, "learning_rate": 6.399119483720868e-07, "loss": 0.003581733675673604, "step": 1719 }, { "epoch": 0.4159613059250302, "grad_norm": 1.5841553623379816, "learning_rate": 6.39543462257478e-07, "loss": 0.00517108803614974, "step": 1720 }, { "epoch": 0.4162031438935913, "grad_norm": 1.6695935341822337, "learning_rate": 6.391748939326644e-07, "loss": 0.017539212480187416, "step": 1721 }, { "epoch": 0.41644498186215234, "grad_norm": 2.045881649101949, "learning_rate": 6.388062436147837e-07, "loss": 0.014035164378583431, "step": 1722 }, { "epoch": 0.41668681983071343, "grad_norm": 0.7601519606072791, "learning_rate": 6.384375115210211e-07, "loss": 0.005801492836326361, "step": 1723 }, { "epoch": 0.41692865779927446, "grad_norm": 2.013260794545258, "learning_rate": 6.380686978686103e-07, "loss": 0.022782251238822937, "step": 1724 }, { "epoch": 0.41717049576783555, "grad_norm": 2.4959827699603783, "learning_rate": 6.37699802874833e-07, "loss": 0.0066625820472836494, "step": 1725 }, { "epoch": 0.41741233373639663, "grad_norm": 2.2309674969344364, "learning_rate": 6.373308267570189e-07, "loss": 0.012674606405198574, "step": 1726 }, { "epoch": 0.41765417170495767, "grad_norm": 3.136910216381334, "learning_rate": 6.369617697325456e-07, "loss": 0.005183309316635132, "step": 1727 }, { "epoch": 0.41789600967351875, "grad_norm": 2.4244234696291365, "learning_rate": 6.365926320188378e-07, "loss": 0.01987166702747345, "step": 1728 }, { "epoch": 0.4181378476420798, "grad_norm": 2.0739431076552224, "learning_rate": 6.362234138333687e-07, "loss": 0.018022023141384125, "step": 1729 }, { "epoch": 0.4183796856106409, "grad_norm": 1.1346035248552258, "learning_rate": 6.358541153936578e-07, "loss": 0.006383263505995274, "step": 1730 }, { "epoch": 0.41862152357920196, "grad_norm": 2.6436400568433474, "learning_rate": 6.354847369172732e-07, "loss": 0.0042269425466656685, "step": 1731 }, { "epoch": 0.418863361547763, "grad_norm": 3.3501534265753374, "learning_rate": 6.351152786218286e-07, "loss": 0.008866893127560616, "step": 1732 }, { "epoch": 0.4191051995163241, "grad_norm": 0.7993730098665108, "learning_rate": 6.347457407249859e-07, "loss": 0.003461387474089861, "step": 1733 }, { "epoch": 0.4193470374848851, "grad_norm": 0.9792862315068304, "learning_rate": 6.343761234444534e-07, "loss": 0.009950557723641396, "step": 1734 }, { "epoch": 0.4195888754534462, "grad_norm": 1.3727870968544178, "learning_rate": 6.340064269979866e-07, "loss": 0.017046842724084854, "step": 1735 }, { "epoch": 0.41983071342200723, "grad_norm": 3.0966558092613754, "learning_rate": 6.336366516033867e-07, "loss": 0.006341094616800547, "step": 1736 }, { "epoch": 0.4200725513905683, "grad_norm": 4.721787235331892, "learning_rate": 6.332667974785027e-07, "loss": 0.03062724880874157, "step": 1737 }, { "epoch": 0.4203143893591294, "grad_norm": 7.152698488568104, "learning_rate": 6.328968648412289e-07, "loss": 0.02002992480993271, "step": 1738 }, { "epoch": 0.42055622732769044, "grad_norm": 3.7897880068895304, "learning_rate": 6.325268539095066e-07, "loss": 0.03744691610336304, "step": 1739 }, { "epoch": 0.4207980652962515, "grad_norm": 6.158181233505289, "learning_rate": 6.321567649013224e-07, "loss": 0.0193362794816494, "step": 1740 }, { "epoch": 0.42103990326481255, "grad_norm": 4.07538686366426, "learning_rate": 6.317865980347098e-07, "loss": 0.006047854665666819, "step": 1741 }, { "epoch": 0.42128174123337364, "grad_norm": 2.3358263398699317, "learning_rate": 6.314163535277478e-07, "loss": 0.017357412725687027, "step": 1742 }, { "epoch": 0.42152357920193473, "grad_norm": 1.1121945774602637, "learning_rate": 6.31046031598561e-07, "loss": 0.012696200981736183, "step": 1743 }, { "epoch": 0.42176541717049576, "grad_norm": 0.9233704536569531, "learning_rate": 6.306756324653194e-07, "loss": 0.0026466173585504293, "step": 1744 }, { "epoch": 0.42200725513905685, "grad_norm": 0.55740307650951, "learning_rate": 6.303051563462391e-07, "loss": 0.0029347760137170553, "step": 1745 }, { "epoch": 0.4222490931076179, "grad_norm": 6.17105138448542, "learning_rate": 6.299346034595815e-07, "loss": 0.008466918021440506, "step": 1746 }, { "epoch": 0.42249093107617897, "grad_norm": 6.446352156123596, "learning_rate": 6.295639740236526e-07, "loss": 0.011133141815662384, "step": 1747 }, { "epoch": 0.42273276904474, "grad_norm": 4.159258724784237, "learning_rate": 6.29193268256804e-07, "loss": 0.005632133688777685, "step": 1748 }, { "epoch": 0.4229746070133011, "grad_norm": 3.626844647363778, "learning_rate": 6.288224863774321e-07, "loss": 0.006578186992555857, "step": 1749 }, { "epoch": 0.42321644498186217, "grad_norm": 1.747571399624399, "learning_rate": 6.284516286039785e-07, "loss": 0.008761836215853691, "step": 1750 }, { "epoch": 0.4234582829504232, "grad_norm": 2.028798688043136, "learning_rate": 6.280806951549287e-07, "loss": 0.017376719042658806, "step": 1751 }, { "epoch": 0.4237001209189843, "grad_norm": 1.2153312421594844, "learning_rate": 6.277096862488136e-07, "loss": 0.010165602900087833, "step": 1752 }, { "epoch": 0.4239419588875453, "grad_norm": 2.381999093070314, "learning_rate": 6.273386021042082e-07, "loss": 0.023086965084075928, "step": 1753 }, { "epoch": 0.4241837968561064, "grad_norm": 2.703780137124519, "learning_rate": 6.269674429397319e-07, "loss": 0.007703012321144342, "step": 1754 }, { "epoch": 0.4244256348246675, "grad_norm": 0.7139877198000684, "learning_rate": 6.265962089740479e-07, "loss": 0.006879532244056463, "step": 1755 }, { "epoch": 0.42466747279322853, "grad_norm": 5.885947714351874, "learning_rate": 6.262249004258644e-07, "loss": 0.01431161630898714, "step": 1756 }, { "epoch": 0.4249093107617896, "grad_norm": 1.7704337315640917, "learning_rate": 6.258535175139327e-07, "loss": 0.010621647350490093, "step": 1757 }, { "epoch": 0.42515114873035065, "grad_norm": 0.7889164268607204, "learning_rate": 6.254820604570483e-07, "loss": 0.008130754344165325, "step": 1758 }, { "epoch": 0.42539298669891173, "grad_norm": 3.131523293385886, "learning_rate": 6.251105294740502e-07, "loss": 0.02836686186492443, "step": 1759 }, { "epoch": 0.42563482466747277, "grad_norm": 0.5665639828022206, "learning_rate": 6.247389247838209e-07, "loss": 0.0028936348389834166, "step": 1760 }, { "epoch": 0.42587666263603385, "grad_norm": 2.9662845158997886, "learning_rate": 6.243672466052863e-07, "loss": 0.006948649883270264, "step": 1761 }, { "epoch": 0.42611850060459494, "grad_norm": 4.367013112448841, "learning_rate": 6.239954951574162e-07, "loss": 0.02249276451766491, "step": 1762 }, { "epoch": 0.426360338573156, "grad_norm": 0.7018381277867307, "learning_rate": 6.236236706592227e-07, "loss": 0.0009682701784186065, "step": 1763 }, { "epoch": 0.42660217654171706, "grad_norm": 3.5898639964534897, "learning_rate": 6.232517733297617e-07, "loss": 0.009463774971663952, "step": 1764 }, { "epoch": 0.4268440145102781, "grad_norm": 2.3229615612594134, "learning_rate": 6.228798033881313e-07, "loss": 0.011953829787671566, "step": 1765 }, { "epoch": 0.4270858524788392, "grad_norm": 4.194016815590649, "learning_rate": 6.225077610534729e-07, "loss": 0.012056060135364532, "step": 1766 }, { "epoch": 0.42732769044740027, "grad_norm": 1.9349846757104137, "learning_rate": 6.221356465449703e-07, "loss": 0.0161331444978714, "step": 1767 }, { "epoch": 0.4275695284159613, "grad_norm": 2.055767594742814, "learning_rate": 6.2176346008185e-07, "loss": 0.01501503773033619, "step": 1768 }, { "epoch": 0.4278113663845224, "grad_norm": 5.077778394892197, "learning_rate": 6.213912018833807e-07, "loss": 0.01772947795689106, "step": 1769 }, { "epoch": 0.4280532043530834, "grad_norm": 7.527527525845499, "learning_rate": 6.210188721688733e-07, "loss": 0.024327503517270088, "step": 1770 }, { "epoch": 0.4282950423216445, "grad_norm": 5.165652876131487, "learning_rate": 6.206464711576813e-07, "loss": 0.012553234584629536, "step": 1771 }, { "epoch": 0.42853688029020554, "grad_norm": 6.916188996177304, "learning_rate": 6.202739990691997e-07, "loss": 0.006775815039873123, "step": 1772 }, { "epoch": 0.4287787182587666, "grad_norm": 2.521952850756019, "learning_rate": 6.199014561228655e-07, "loss": 0.013670245185494423, "step": 1773 }, { "epoch": 0.4290205562273277, "grad_norm": 1.5582593709060635, "learning_rate": 6.195288425381576e-07, "loss": 0.017903869971632957, "step": 1774 }, { "epoch": 0.42926239419588874, "grad_norm": 2.476595075038915, "learning_rate": 6.191561585345965e-07, "loss": 0.010213037952780724, "step": 1775 }, { "epoch": 0.42950423216444983, "grad_norm": 1.9358516877889969, "learning_rate": 6.187834043317439e-07, "loss": 0.00862963031977415, "step": 1776 }, { "epoch": 0.42974607013301086, "grad_norm": 1.3204541876336515, "learning_rate": 6.184105801492031e-07, "loss": 0.0116881737485528, "step": 1777 }, { "epoch": 0.42998790810157195, "grad_norm": 2.0607230417171483, "learning_rate": 6.180376862066186e-07, "loss": 0.0059061492793262005, "step": 1778 }, { "epoch": 0.43022974607013303, "grad_norm": 1.26165771705688, "learning_rate": 6.176647227236762e-07, "loss": 0.00676583731546998, "step": 1779 }, { "epoch": 0.43047158403869407, "grad_norm": 4.257832566790668, "learning_rate": 6.172916899201023e-07, "loss": 0.048842888325452805, "step": 1780 }, { "epoch": 0.43071342200725515, "grad_norm": 2.005841046232514, "learning_rate": 6.169185880156641e-07, "loss": 0.005919675808399916, "step": 1781 }, { "epoch": 0.4309552599758162, "grad_norm": 1.3044867068929435, "learning_rate": 6.1654541723017e-07, "loss": 0.006556949112564325, "step": 1782 }, { "epoch": 0.43119709794437727, "grad_norm": 1.1706967602955793, "learning_rate": 6.161721777834686e-07, "loss": 0.004501208662986755, "step": 1783 }, { "epoch": 0.4314389359129383, "grad_norm": 2.378398627305407, "learning_rate": 6.15798869895449e-07, "loss": 0.01400691457092762, "step": 1784 }, { "epoch": 0.4316807738814994, "grad_norm": 1.6198067223498822, "learning_rate": 6.154254937860404e-07, "loss": 0.008286637254059315, "step": 1785 }, { "epoch": 0.4319226118500605, "grad_norm": 1.6167541933916045, "learning_rate": 6.150520496752129e-07, "loss": 0.00913685467094183, "step": 1786 }, { "epoch": 0.4321644498186215, "grad_norm": 1.6429663751725254, "learning_rate": 6.146785377829762e-07, "loss": 0.010549936443567276, "step": 1787 }, { "epoch": 0.4324062877871826, "grad_norm": 2.061582624682847, "learning_rate": 6.143049583293795e-07, "loss": 0.002450547879561782, "step": 1788 }, { "epoch": 0.43264812575574363, "grad_norm": 8.188609927431767, "learning_rate": 6.139313115345127e-07, "loss": 0.01934937946498394, "step": 1789 }, { "epoch": 0.4328899637243047, "grad_norm": 1.8106635574612677, "learning_rate": 6.135575976185047e-07, "loss": 0.005089456681162119, "step": 1790 }, { "epoch": 0.4331318016928658, "grad_norm": 1.8224827144597484, "learning_rate": 6.13183816801524e-07, "loss": 0.0078094289638102055, "step": 1791 }, { "epoch": 0.43337363966142683, "grad_norm": 3.8234839722453158, "learning_rate": 6.128099693037792e-07, "loss": 0.007144227158278227, "step": 1792 }, { "epoch": 0.4336154776299879, "grad_norm": 1.0752722000943766, "learning_rate": 6.124360553455169e-07, "loss": 0.004359239246696234, "step": 1793 }, { "epoch": 0.43385731559854895, "grad_norm": 2.136821589959339, "learning_rate": 6.120620751470244e-07, "loss": 0.01346230786293745, "step": 1794 }, { "epoch": 0.43409915356711004, "grad_norm": 0.8184480309615325, "learning_rate": 6.116880289286267e-07, "loss": 0.00790246482938528, "step": 1795 }, { "epoch": 0.4343409915356711, "grad_norm": 4.423855796279789, "learning_rate": 6.113139169106884e-07, "loss": 0.009634779766201973, "step": 1796 }, { "epoch": 0.43458282950423216, "grad_norm": 2.1483023528409126, "learning_rate": 6.109397393136128e-07, "loss": 0.015561344102025032, "step": 1797 }, { "epoch": 0.43482466747279325, "grad_norm": 3.304518808164414, "learning_rate": 6.105654963578417e-07, "loss": 0.02318551577627659, "step": 1798 }, { "epoch": 0.4350665054413543, "grad_norm": 2.7297473573279247, "learning_rate": 6.101911882638553e-07, "loss": 0.012910640798509121, "step": 1799 }, { "epoch": 0.43530834340991537, "grad_norm": 32.50303358557317, "learning_rate": 6.098168152521723e-07, "loss": 0.0095581179484725, "step": 1800 }, { "epoch": 0.4355501813784764, "grad_norm": 1.7398209640039601, "learning_rate": 6.0944237754335e-07, "loss": 0.006983650382608175, "step": 1801 }, { "epoch": 0.4357920193470375, "grad_norm": 0.6986049653464272, "learning_rate": 6.090678753579831e-07, "loss": 0.0035213835071772337, "step": 1802 }, { "epoch": 0.43603385731559857, "grad_norm": 2.84330417130998, "learning_rate": 6.086933089167049e-07, "loss": 0.012556697241961956, "step": 1803 }, { "epoch": 0.4362756952841596, "grad_norm": 1.354977426627327, "learning_rate": 6.08318678440186e-07, "loss": 0.005459917243570089, "step": 1804 }, { "epoch": 0.4365175332527207, "grad_norm": 1.766384944871418, "learning_rate": 6.079439841491355e-07, "loss": 0.009934023022651672, "step": 1805 }, { "epoch": 0.4367593712212817, "grad_norm": 1.3546163083496292, "learning_rate": 6.075692262642992e-07, "loss": 0.0023002922534942627, "step": 1806 }, { "epoch": 0.4370012091898428, "grad_norm": 3.2306402676856414, "learning_rate": 6.071944050064614e-07, "loss": 0.015051290392875671, "step": 1807 }, { "epoch": 0.4372430471584039, "grad_norm": 2.2951147055302803, "learning_rate": 6.068195205964424e-07, "loss": 0.016096649691462517, "step": 1808 }, { "epoch": 0.43748488512696493, "grad_norm": 2.2114985128462523, "learning_rate": 6.064445732551008e-07, "loss": 0.013334403745830059, "step": 1809 }, { "epoch": 0.437726723095526, "grad_norm": 1.9949480414430658, "learning_rate": 6.06069563203332e-07, "loss": 0.015136792324483395, "step": 1810 }, { "epoch": 0.43796856106408705, "grad_norm": 2.514292585942615, "learning_rate": 6.056944906620681e-07, "loss": 0.017859671264886856, "step": 1811 }, { "epoch": 0.43821039903264813, "grad_norm": 1.4592958467546817, "learning_rate": 6.053193558522783e-07, "loss": 0.013865426182746887, "step": 1812 }, { "epoch": 0.43845223700120917, "grad_norm": 2.939710362412622, "learning_rate": 6.049441589949683e-07, "loss": 0.016184719279408455, "step": 1813 }, { "epoch": 0.43869407496977025, "grad_norm": 32.57660946494022, "learning_rate": 6.045689003111805e-07, "loss": 0.028223711997270584, "step": 1814 }, { "epoch": 0.43893591293833134, "grad_norm": 1.270762627217486, "learning_rate": 6.041935800219935e-07, "loss": 0.004713200032711029, "step": 1815 }, { "epoch": 0.4391777509068924, "grad_norm": 2.0701938630065997, "learning_rate": 6.038181983485224e-07, "loss": 0.008993211202323437, "step": 1816 }, { "epoch": 0.43941958887545346, "grad_norm": 20.086019898826812, "learning_rate": 6.034427555119183e-07, "loss": 0.013719858601689339, "step": 1817 }, { "epoch": 0.4396614268440145, "grad_norm": 1.613795330032959, "learning_rate": 6.030672517333688e-07, "loss": 0.008286756463348866, "step": 1818 }, { "epoch": 0.4399032648125756, "grad_norm": 1.0055885316762119, "learning_rate": 6.026916872340966e-07, "loss": 0.004801537375897169, "step": 1819 }, { "epoch": 0.44014510278113667, "grad_norm": 2.937805670227568, "learning_rate": 6.023160622353608e-07, "loss": 0.014194292016327381, "step": 1820 }, { "epoch": 0.4403869407496977, "grad_norm": 2.0430819729049157, "learning_rate": 6.01940376958456e-07, "loss": 0.009982439689338207, "step": 1821 }, { "epoch": 0.4406287787182588, "grad_norm": 6.656387990737421, "learning_rate": 6.015646316247122e-07, "loss": 0.02398604527115822, "step": 1822 }, { "epoch": 0.4408706166868198, "grad_norm": 0.8936992627103647, "learning_rate": 6.011888264554949e-07, "loss": 0.0022067863028496504, "step": 1823 }, { "epoch": 0.4411124546553809, "grad_norm": 1.5208136238044017, "learning_rate": 6.008129616722048e-07, "loss": 0.009509943425655365, "step": 1824 }, { "epoch": 0.44135429262394194, "grad_norm": 0.9784067028624225, "learning_rate": 6.004370374962776e-07, "loss": 0.0092869121581316, "step": 1825 }, { "epoch": 0.441596130592503, "grad_norm": 10.612412015954588, "learning_rate": 6.000610541491839e-07, "loss": 0.009684719145298004, "step": 1826 }, { "epoch": 0.4418379685610641, "grad_norm": 6.184468401365255, "learning_rate": 5.996850118524299e-07, "loss": 0.014527668245136738, "step": 1827 }, { "epoch": 0.44207980652962514, "grad_norm": 3.9121329392467876, "learning_rate": 5.993089108275559e-07, "loss": 0.00937636848539114, "step": 1828 }, { "epoch": 0.44232164449818623, "grad_norm": 5.972851509952166, "learning_rate": 5.989327512961365e-07, "loss": 0.03410334512591362, "step": 1829 }, { "epoch": 0.44256348246674726, "grad_norm": 4.750915218001275, "learning_rate": 5.985565334797815e-07, "loss": 0.019189298152923584, "step": 1830 }, { "epoch": 0.44280532043530835, "grad_norm": 2.410204154471426, "learning_rate": 5.981802576001346e-07, "loss": 0.053553253412246704, "step": 1831 }, { "epoch": 0.44304715840386943, "grad_norm": 1.7286415018794448, "learning_rate": 5.978039238788739e-07, "loss": 0.004595133010298014, "step": 1832 }, { "epoch": 0.44328899637243047, "grad_norm": 2.234688508670272, "learning_rate": 5.974275325377112e-07, "loss": 0.0053517199121415615, "step": 1833 }, { "epoch": 0.44353083434099155, "grad_norm": 1.7334483163700287, "learning_rate": 5.970510837983929e-07, "loss": 0.005878133233636618, "step": 1834 }, { "epoch": 0.4437726723095526, "grad_norm": 2.0782789264060497, "learning_rate": 5.966745778826986e-07, "loss": 0.008361938409507275, "step": 1835 }, { "epoch": 0.44401451027811367, "grad_norm": 1.2576037861906009, "learning_rate": 5.962980150124421e-07, "loss": 0.0026269431691616774, "step": 1836 }, { "epoch": 0.4442563482466747, "grad_norm": 2.3172370065180563, "learning_rate": 5.959213954094701e-07, "loss": 0.006456558592617512, "step": 1837 }, { "epoch": 0.4444981862152358, "grad_norm": 0.47455414082998426, "learning_rate": 5.955447192956634e-07, "loss": 0.0025813214015215635, "step": 1838 }, { "epoch": 0.4447400241837969, "grad_norm": 2.4563167184616037, "learning_rate": 5.951679868929357e-07, "loss": 0.010897601954638958, "step": 1839 }, { "epoch": 0.4449818621523579, "grad_norm": 27.9398636404421, "learning_rate": 5.94791198423234e-07, "loss": 0.026734739542007446, "step": 1840 }, { "epoch": 0.445223700120919, "grad_norm": 4.990440724454938, "learning_rate": 5.944143541085382e-07, "loss": 0.0149461030960083, "step": 1841 }, { "epoch": 0.44546553808948003, "grad_norm": 2.5213098255676463, "learning_rate": 5.940374541708613e-07, "loss": 0.0070090824738144875, "step": 1842 }, { "epoch": 0.4457073760580411, "grad_norm": 2.9086019630696436, "learning_rate": 5.936604988322491e-07, "loss": 0.00556144118309021, "step": 1843 }, { "epoch": 0.4459492140266022, "grad_norm": 1.957730939879953, "learning_rate": 5.932834883147797e-07, "loss": 0.0034361854195594788, "step": 1844 }, { "epoch": 0.44619105199516323, "grad_norm": 2.3821565008334797, "learning_rate": 5.929064228405638e-07, "loss": 0.014507986605167389, "step": 1845 }, { "epoch": 0.4464328899637243, "grad_norm": 0.7702108738675072, "learning_rate": 5.925293026317448e-07, "loss": 0.0044950623996555805, "step": 1846 }, { "epoch": 0.44667472793228535, "grad_norm": 1.5792984474953629, "learning_rate": 5.921521279104983e-07, "loss": 0.007457607891410589, "step": 1847 }, { "epoch": 0.44691656590084644, "grad_norm": 12.401285275211675, "learning_rate": 5.917748988990314e-07, "loss": 0.042038694024086, "step": 1848 }, { "epoch": 0.4471584038694075, "grad_norm": 3.168460620072427, "learning_rate": 5.913976158195839e-07, "loss": 0.023974036797881126, "step": 1849 }, { "epoch": 0.44740024183796856, "grad_norm": 4.3037372917584635, "learning_rate": 5.910202788944272e-07, "loss": 0.011131062172353268, "step": 1850 }, { "epoch": 0.44764207980652965, "grad_norm": 5.117260104553682, "learning_rate": 5.906428883458644e-07, "loss": 0.020897885784506798, "step": 1851 }, { "epoch": 0.4478839177750907, "grad_norm": 1.1683574742018228, "learning_rate": 5.902654443962301e-07, "loss": 0.0065663764253258705, "step": 1852 }, { "epoch": 0.44812575574365177, "grad_norm": 2.917480260764125, "learning_rate": 5.898879472678906e-07, "loss": 0.00765560707077384, "step": 1853 }, { "epoch": 0.4483675937122128, "grad_norm": 0.8005349088440495, "learning_rate": 5.895103971832433e-07, "loss": 0.003852631663903594, "step": 1854 }, { "epoch": 0.4486094316807739, "grad_norm": 4.2610304573593965, "learning_rate": 5.891327943647171e-07, "loss": 0.03388305380940437, "step": 1855 }, { "epoch": 0.44885126964933497, "grad_norm": 2.1583496612173168, "learning_rate": 5.887551390347712e-07, "loss": 0.020057139918208122, "step": 1856 }, { "epoch": 0.449093107617896, "grad_norm": 2.064000654909704, "learning_rate": 5.88377431415897e-07, "loss": 0.008546906523406506, "step": 1857 }, { "epoch": 0.4493349455864571, "grad_norm": 2.7790876027177887, "learning_rate": 5.879996717306156e-07, "loss": 0.014640606008470058, "step": 1858 }, { "epoch": 0.4495767835550181, "grad_norm": 4.535928776694034, "learning_rate": 5.876218602014792e-07, "loss": 0.01400680560618639, "step": 1859 }, { "epoch": 0.4498186215235792, "grad_norm": 2.512625935601332, "learning_rate": 5.872439970510705e-07, "loss": 0.01738062873482704, "step": 1860 }, { "epoch": 0.45006045949214024, "grad_norm": 0.8082631353090648, "learning_rate": 5.868660825020029e-07, "loss": 0.004186998587101698, "step": 1861 }, { "epoch": 0.45030229746070133, "grad_norm": 1.3705667270871662, "learning_rate": 5.864881167769195e-07, "loss": 0.005119276233017445, "step": 1862 }, { "epoch": 0.4505441354292624, "grad_norm": 11.82835979609001, "learning_rate": 5.861101000984939e-07, "loss": 0.016369203105568886, "step": 1863 }, { "epoch": 0.45078597339782345, "grad_norm": 0.8272398984716689, "learning_rate": 5.857320326894299e-07, "loss": 0.00430005369707942, "step": 1864 }, { "epoch": 0.45102781136638453, "grad_norm": 1.4942965366196268, "learning_rate": 5.853539147724606e-07, "loss": 0.00853272620588541, "step": 1865 }, { "epoch": 0.45126964933494557, "grad_norm": 1.048548154442375, "learning_rate": 5.849757465703496e-07, "loss": 0.005496188532561064, "step": 1866 }, { "epoch": 0.45151148730350665, "grad_norm": 1.9508999505280418, "learning_rate": 5.845975283058897e-07, "loss": 0.0038184337317943573, "step": 1867 }, { "epoch": 0.45175332527206774, "grad_norm": 2.086033261515183, "learning_rate": 5.842192602019028e-07, "loss": 0.02124149352312088, "step": 1868 }, { "epoch": 0.4519951632406288, "grad_norm": 1.1861086815897586, "learning_rate": 5.838409424812411e-07, "loss": 0.0064680613577365875, "step": 1869 }, { "epoch": 0.45223700120918986, "grad_norm": 1.1373109325428477, "learning_rate": 5.834625753667855e-07, "loss": 0.005788464564830065, "step": 1870 }, { "epoch": 0.4524788391777509, "grad_norm": 2.1872395148885886, "learning_rate": 5.830841590814457e-07, "loss": 0.014744524843990803, "step": 1871 }, { "epoch": 0.452720677146312, "grad_norm": 2.2894966820817237, "learning_rate": 5.82705693848161e-07, "loss": 0.010422810912132263, "step": 1872 }, { "epoch": 0.452962515114873, "grad_norm": 2.1640639956720316, "learning_rate": 5.823271798898992e-07, "loss": 0.025758827105164528, "step": 1873 }, { "epoch": 0.4532043530834341, "grad_norm": 3.332197141076291, "learning_rate": 5.819486174296566e-07, "loss": 0.008016099222004414, "step": 1874 }, { "epoch": 0.4534461910519952, "grad_norm": 7.668710998957984, "learning_rate": 5.815700066904585e-07, "loss": 0.011422212235629559, "step": 1875 }, { "epoch": 0.4536880290205562, "grad_norm": 1.2113800353602595, "learning_rate": 5.811913478953586e-07, "loss": 0.003383275121450424, "step": 1876 }, { "epoch": 0.4539298669891173, "grad_norm": 7.085983781166983, "learning_rate": 5.808126412674387e-07, "loss": 0.010404628701508045, "step": 1877 }, { "epoch": 0.45417170495767833, "grad_norm": 1.9018170091554583, "learning_rate": 5.804338870298087e-07, "loss": 0.008534080348908901, "step": 1878 }, { "epoch": 0.4544135429262394, "grad_norm": 3.6439691510270484, "learning_rate": 5.800550854056068e-07, "loss": 0.014090088196098804, "step": 1879 }, { "epoch": 0.4546553808948005, "grad_norm": 1.9321019564353128, "learning_rate": 5.79676236617999e-07, "loss": 0.004702141974121332, "step": 1880 }, { "epoch": 0.45489721886336154, "grad_norm": 1.2255786063980014, "learning_rate": 5.792973408901791e-07, "loss": 0.006498456932604313, "step": 1881 }, { "epoch": 0.45513905683192263, "grad_norm": 0.9721546404030347, "learning_rate": 5.789183984453686e-07, "loss": 0.0071668173186481, "step": 1882 }, { "epoch": 0.45538089480048366, "grad_norm": 2.808743614388042, "learning_rate": 5.785394095068163e-07, "loss": 0.01423799991607666, "step": 1883 }, { "epoch": 0.45562273276904475, "grad_norm": 4.122290491798655, "learning_rate": 5.78160374297799e-07, "loss": 0.032029975205659866, "step": 1884 }, { "epoch": 0.4558645707376058, "grad_norm": 1.3613266377578197, "learning_rate": 5.777812930416199e-07, "loss": 0.007352312095463276, "step": 1885 }, { "epoch": 0.45610640870616687, "grad_norm": 1.8896808959711773, "learning_rate": 5.7740216596161e-07, "loss": 0.008404809050261974, "step": 1886 }, { "epoch": 0.45634824667472795, "grad_norm": 1.3026703310063374, "learning_rate": 5.77022993281127e-07, "loss": 0.0022506185341626406, "step": 1887 }, { "epoch": 0.456590084643289, "grad_norm": 3.071583057396184, "learning_rate": 5.766437752235555e-07, "loss": 0.011195621453225613, "step": 1888 }, { "epoch": 0.45683192261185007, "grad_norm": 2.181310871146405, "learning_rate": 5.762645120123069e-07, "loss": 0.014286184683442116, "step": 1889 }, { "epoch": 0.4570737605804111, "grad_norm": 1.6435632762884897, "learning_rate": 5.75885203870819e-07, "loss": 0.014015093445777893, "step": 1890 }, { "epoch": 0.4573155985489722, "grad_norm": 2.6781850529092, "learning_rate": 5.755058510225564e-07, "loss": 0.02353173866868019, "step": 1891 }, { "epoch": 0.4575574365175333, "grad_norm": 2.068909929324589, "learning_rate": 5.751264536910097e-07, "loss": 0.011903389357030392, "step": 1892 }, { "epoch": 0.4577992744860943, "grad_norm": 1.3986515330098646, "learning_rate": 5.747470120996962e-07, "loss": 0.006409552879631519, "step": 1893 }, { "epoch": 0.4580411124546554, "grad_norm": 2.359024772227941, "learning_rate": 5.743675264721586e-07, "loss": 0.015660205855965614, "step": 1894 }, { "epoch": 0.45828295042321643, "grad_norm": 0.8882657279094655, "learning_rate": 5.739879970319661e-07, "loss": 0.0060548619367182255, "step": 1895 }, { "epoch": 0.4585247883917775, "grad_norm": 5.073852854284334, "learning_rate": 5.736084240027134e-07, "loss": 0.007060847710818052, "step": 1896 }, { "epoch": 0.45876662636033855, "grad_norm": 5.500945234149898, "learning_rate": 5.732288076080211e-07, "loss": 0.00822035875171423, "step": 1897 }, { "epoch": 0.45900846432889963, "grad_norm": 2.777820486937661, "learning_rate": 5.728491480715349e-07, "loss": 0.026790393516421318, "step": 1898 }, { "epoch": 0.4592503022974607, "grad_norm": 2.238180053740053, "learning_rate": 5.724694456169266e-07, "loss": 0.02168724127113819, "step": 1899 }, { "epoch": 0.45949214026602175, "grad_norm": 1.4478101195825293, "learning_rate": 5.72089700467893e-07, "loss": 0.009914937429130077, "step": 1900 }, { "epoch": 0.45973397823458284, "grad_norm": 1.83924183847353, "learning_rate": 5.717099128481556e-07, "loss": 0.017816836014389992, "step": 1901 }, { "epoch": 0.4599758162031439, "grad_norm": 1.0239098318906776, "learning_rate": 5.713300829814615e-07, "loss": 0.002489091595634818, "step": 1902 }, { "epoch": 0.46021765417170496, "grad_norm": 9.984705729468079, "learning_rate": 5.709502110915828e-07, "loss": 0.05077647045254707, "step": 1903 }, { "epoch": 0.46045949214026605, "grad_norm": 1.8318410716657962, "learning_rate": 5.705702974023156e-07, "loss": 0.016462402418255806, "step": 1904 }, { "epoch": 0.4607013301088271, "grad_norm": 11.953083875981998, "learning_rate": 5.70190342137481e-07, "loss": 0.06812357157468796, "step": 1905 }, { "epoch": 0.46094316807738817, "grad_norm": 1.530806326721635, "learning_rate": 5.698103455209252e-07, "loss": 0.009111697785556316, "step": 1906 }, { "epoch": 0.4611850060459492, "grad_norm": 1.9554805665572177, "learning_rate": 5.694303077765179e-07, "loss": 0.007038243114948273, "step": 1907 }, { "epoch": 0.4614268440145103, "grad_norm": 1.8850834428562755, "learning_rate": 5.690502291281534e-07, "loss": 0.023184770718216896, "step": 1908 }, { "epoch": 0.4616686819830713, "grad_norm": 1.554055360577552, "learning_rate": 5.686701097997498e-07, "loss": 0.005668935365974903, "step": 1909 }, { "epoch": 0.4619105199516324, "grad_norm": 0.9167530455160081, "learning_rate": 5.682899500152501e-07, "loss": 0.0033146000932902098, "step": 1910 }, { "epoch": 0.4621523579201935, "grad_norm": 10.592604479736739, "learning_rate": 5.679097499986198e-07, "loss": 0.011595881544053555, "step": 1911 }, { "epoch": 0.4623941958887545, "grad_norm": 4.74249920723656, "learning_rate": 5.675295099738492e-07, "loss": 0.013300836086273193, "step": 1912 }, { "epoch": 0.4626360338573156, "grad_norm": 2.0463635762543695, "learning_rate": 5.671492301649514e-07, "loss": 0.007539201062172651, "step": 1913 }, { "epoch": 0.46287787182587664, "grad_norm": 0.5224198857405149, "learning_rate": 5.667689107959634e-07, "loss": 0.0029148366302251816, "step": 1914 }, { "epoch": 0.46311970979443773, "grad_norm": 1.35578656731644, "learning_rate": 5.663885520909456e-07, "loss": 0.013347170315682888, "step": 1915 }, { "epoch": 0.4633615477629988, "grad_norm": 2.055976890872007, "learning_rate": 5.66008154273981e-07, "loss": 0.017800530418753624, "step": 1916 }, { "epoch": 0.46360338573155985, "grad_norm": 3.9991991428032536, "learning_rate": 5.656277175691762e-07, "loss": 0.018659492954611778, "step": 1917 }, { "epoch": 0.46384522370012093, "grad_norm": 2.687707847753613, "learning_rate": 5.652472422006605e-07, "loss": 0.027064144611358643, "step": 1918 }, { "epoch": 0.46408706166868197, "grad_norm": 1.5222958297137394, "learning_rate": 5.648667283925859e-07, "loss": 0.003600421128794551, "step": 1919 }, { "epoch": 0.46432889963724305, "grad_norm": 1.037473759830322, "learning_rate": 5.644861763691272e-07, "loss": 0.003919678274542093, "step": 1920 }, { "epoch": 0.4645707376058041, "grad_norm": 1.9610894511300305, "learning_rate": 5.641055863544817e-07, "loss": 0.0053765843622386456, "step": 1921 }, { "epoch": 0.46481257557436517, "grad_norm": 4.459017420812482, "learning_rate": 5.637249585728689e-07, "loss": 0.031186401844024658, "step": 1922 }, { "epoch": 0.46505441354292626, "grad_norm": 3.451192621247646, "learning_rate": 5.633442932485307e-07, "loss": 0.0046721650287508965, "step": 1923 }, { "epoch": 0.4652962515114873, "grad_norm": 11.71089963540832, "learning_rate": 5.629635906057311e-07, "loss": 0.07488007098436356, "step": 1924 }, { "epoch": 0.4655380894800484, "grad_norm": 3.2929295655281794, "learning_rate": 5.62582850868756e-07, "loss": 0.004942882340401411, "step": 1925 }, { "epoch": 0.4657799274486094, "grad_norm": 3.0937553968363796, "learning_rate": 5.622020742619136e-07, "loss": 0.020114531740546227, "step": 1926 }, { "epoch": 0.4660217654171705, "grad_norm": 0.9882312935347641, "learning_rate": 5.618212610095329e-07, "loss": 0.005522423889487982, "step": 1927 }, { "epoch": 0.4662636033857316, "grad_norm": 5.92423599169935, "learning_rate": 5.614404113359655e-07, "loss": 0.007874765433371067, "step": 1928 }, { "epoch": 0.4665054413542926, "grad_norm": 3.9721568250732724, "learning_rate": 5.610595254655839e-07, "loss": 0.01450191717594862, "step": 1929 }, { "epoch": 0.4667472793228537, "grad_norm": 5.010229110027678, "learning_rate": 5.606786036227819e-07, "loss": 0.008833670057356358, "step": 1930 }, { "epoch": 0.46698911729141473, "grad_norm": 1.640695506708282, "learning_rate": 5.602976460319745e-07, "loss": 0.009047149680554867, "step": 1931 }, { "epoch": 0.4672309552599758, "grad_norm": 2.2253927897761203, "learning_rate": 5.599166529175983e-07, "loss": 0.013949013315141201, "step": 1932 }, { "epoch": 0.46747279322853685, "grad_norm": 5.814177164181354, "learning_rate": 5.595356245041099e-07, "loss": 0.023235460743308067, "step": 1933 }, { "epoch": 0.46771463119709794, "grad_norm": 0.8561485792848212, "learning_rate": 5.591545610159876e-07, "loss": 0.008218116126954556, "step": 1934 }, { "epoch": 0.46795646916565903, "grad_norm": 1.8514583277398726, "learning_rate": 5.587734626777299e-07, "loss": 0.007464256137609482, "step": 1935 }, { "epoch": 0.46819830713422006, "grad_norm": 1.9223441871180151, "learning_rate": 5.583923297138559e-07, "loss": 0.02427242510020733, "step": 1936 }, { "epoch": 0.46844014510278115, "grad_norm": 2.790554497848924, "learning_rate": 5.580111623489049e-07, "loss": 0.012607695534825325, "step": 1937 }, { "epoch": 0.4686819830713422, "grad_norm": 2.4586300271922252, "learning_rate": 5.576299608074368e-07, "loss": 0.00555461598560214, "step": 1938 }, { "epoch": 0.46892382103990327, "grad_norm": 1.1091363823890237, "learning_rate": 5.572487253140314e-07, "loss": 0.0058467513881623745, "step": 1939 }, { "epoch": 0.46916565900846435, "grad_norm": 2.0525157157091005, "learning_rate": 5.568674560932888e-07, "loss": 0.023189255967736244, "step": 1940 }, { "epoch": 0.4694074969770254, "grad_norm": 0.9506699375151619, "learning_rate": 5.564861533698287e-07, "loss": 0.003057472174987197, "step": 1941 }, { "epoch": 0.46964933494558647, "grad_norm": 0.8578571323706659, "learning_rate": 5.561048173682905e-07, "loss": 0.0049394466914236546, "step": 1942 }, { "epoch": 0.4698911729141475, "grad_norm": 10.901172625014171, "learning_rate": 5.557234483133335e-07, "loss": 0.003322698874399066, "step": 1943 }, { "epoch": 0.4701330108827086, "grad_norm": 4.768896378914926, "learning_rate": 5.553420464296361e-07, "loss": 0.015989987179636955, "step": 1944 }, { "epoch": 0.4703748488512696, "grad_norm": 1.5711662402421933, "learning_rate": 5.549606119418963e-07, "loss": 0.011423028074204922, "step": 1945 }, { "epoch": 0.4706166868198307, "grad_norm": 2.721297074489808, "learning_rate": 5.545791450748311e-07, "loss": 0.015080860815942287, "step": 1946 }, { "epoch": 0.4708585247883918, "grad_norm": 3.0712453625444724, "learning_rate": 5.541976460531772e-07, "loss": 0.003940451890230179, "step": 1947 }, { "epoch": 0.47110036275695283, "grad_norm": 2.2664943004336147, "learning_rate": 5.538161151016893e-07, "loss": 0.010864960961043835, "step": 1948 }, { "epoch": 0.4713422007255139, "grad_norm": 1.3354724711794035, "learning_rate": 5.534345524451414e-07, "loss": 0.0054395864717662334, "step": 1949 }, { "epoch": 0.47158403869407495, "grad_norm": 1.9723862485018282, "learning_rate": 5.530529583083261e-07, "loss": 0.021056463941931725, "step": 1950 }, { "epoch": 0.47182587666263603, "grad_norm": 2.6118871039946154, "learning_rate": 5.526713329160549e-07, "loss": 0.013990170322358608, "step": 1951 }, { "epoch": 0.4720677146311971, "grad_norm": 0.9317905789280743, "learning_rate": 5.522896764931572e-07, "loss": 0.006754912436008453, "step": 1952 }, { "epoch": 0.47230955259975815, "grad_norm": 1.3682115927200598, "learning_rate": 5.519079892644809e-07, "loss": 0.0028005142230540514, "step": 1953 }, { "epoch": 0.47255139056831924, "grad_norm": 2.9969666801634993, "learning_rate": 5.51526271454892e-07, "loss": 0.011155798099935055, "step": 1954 }, { "epoch": 0.4727932285368803, "grad_norm": 1.5231866121413309, "learning_rate": 5.511445232892745e-07, "loss": 0.010435210540890694, "step": 1955 }, { "epoch": 0.47303506650544136, "grad_norm": 1.2453677670842644, "learning_rate": 5.507627449925306e-07, "loss": 0.00918258260935545, "step": 1956 }, { "epoch": 0.4732769044740024, "grad_norm": 2.6346213009695156, "learning_rate": 5.503809367895798e-07, "loss": 0.01717308908700943, "step": 1957 }, { "epoch": 0.4735187424425635, "grad_norm": 1.7958488897963054, "learning_rate": 5.499990989053594e-07, "loss": 0.009527313522994518, "step": 1958 }, { "epoch": 0.47376058041112457, "grad_norm": 9.992162859224598, "learning_rate": 5.49617231564824e-07, "loss": 0.013804808259010315, "step": 1959 }, { "epoch": 0.4740024183796856, "grad_norm": 3.283614756010296, "learning_rate": 5.492353349929464e-07, "loss": 0.0037030528765171766, "step": 1960 }, { "epoch": 0.4742442563482467, "grad_norm": 1.8987372832792235, "learning_rate": 5.488534094147153e-07, "loss": 0.017420129850506783, "step": 1961 }, { "epoch": 0.4744860943168077, "grad_norm": 4.229386155952791, "learning_rate": 5.484714550551373e-07, "loss": 0.018803171813488007, "step": 1962 }, { "epoch": 0.4747279322853688, "grad_norm": 5.01886440197233, "learning_rate": 5.48089472139236e-07, "loss": 0.0056609660387039185, "step": 1963 }, { "epoch": 0.4749697702539299, "grad_norm": 2.319760864740171, "learning_rate": 5.477074608920515e-07, "loss": 0.008656710386276245, "step": 1964 }, { "epoch": 0.4752116082224909, "grad_norm": 1.2610743011396932, "learning_rate": 5.473254215386408e-07, "loss": 0.00324251945130527, "step": 1965 }, { "epoch": 0.475453446191052, "grad_norm": 2.313285757481044, "learning_rate": 5.469433543040774e-07, "loss": 0.013987488113343716, "step": 1966 }, { "epoch": 0.47569528415961304, "grad_norm": 12.021492056157935, "learning_rate": 5.46561259413451e-07, "loss": 0.014552563428878784, "step": 1967 }, { "epoch": 0.47593712212817413, "grad_norm": 12.541288437789625, "learning_rate": 5.461791370918681e-07, "loss": 0.006722328253090382, "step": 1968 }, { "epoch": 0.47617896009673516, "grad_norm": 1.0395844321830496, "learning_rate": 5.457969875644509e-07, "loss": 0.007586290594190359, "step": 1969 }, { "epoch": 0.47642079806529625, "grad_norm": 1.686873882410928, "learning_rate": 5.454148110563379e-07, "loss": 0.007571916561573744, "step": 1970 }, { "epoch": 0.47666263603385733, "grad_norm": 22.27206077111556, "learning_rate": 5.450326077926833e-07, "loss": 0.012168431654572487, "step": 1971 }, { "epoch": 0.47690447400241837, "grad_norm": 2.5064451455119303, "learning_rate": 5.44650377998657e-07, "loss": 0.009517057798802853, "step": 1972 }, { "epoch": 0.47714631197097945, "grad_norm": 5.615001571541827, "learning_rate": 5.442681218994448e-07, "loss": 0.011734874919056892, "step": 1973 }, { "epoch": 0.4773881499395405, "grad_norm": 1.515095868363086, "learning_rate": 5.438858397202481e-07, "loss": 0.005408324301242828, "step": 1974 }, { "epoch": 0.47762998790810157, "grad_norm": 2.1842364456221124, "learning_rate": 5.435035316862832e-07, "loss": 0.012782299891114235, "step": 1975 }, { "epoch": 0.47787182587666266, "grad_norm": 1.207033325567084, "learning_rate": 5.43121198022782e-07, "loss": 0.0031050255056470633, "step": 1976 }, { "epoch": 0.4781136638452237, "grad_norm": 2.147204692373527, "learning_rate": 5.427388389549912e-07, "loss": 0.015112968161702156, "step": 1977 }, { "epoch": 0.4783555018137848, "grad_norm": 3.829609300628564, "learning_rate": 5.423564547081728e-07, "loss": 0.015167760662734509, "step": 1978 }, { "epoch": 0.4785973397823458, "grad_norm": 1.5962856584832705, "learning_rate": 5.419740455076035e-07, "loss": 0.006225015036761761, "step": 1979 }, { "epoch": 0.4788391777509069, "grad_norm": 3.693130892315786, "learning_rate": 5.415916115785744e-07, "loss": 0.009282294660806656, "step": 1980 }, { "epoch": 0.479081015719468, "grad_norm": 6.083696351123049, "learning_rate": 5.412091531463918e-07, "loss": 0.018288739025592804, "step": 1981 }, { "epoch": 0.479322853688029, "grad_norm": 0.9583172816236104, "learning_rate": 5.40826670436376e-07, "loss": 0.009205508045852184, "step": 1982 }, { "epoch": 0.4795646916565901, "grad_norm": 4.229271682438242, "learning_rate": 5.404441636738615e-07, "loss": 0.030317524448037148, "step": 1983 }, { "epoch": 0.47980652962515113, "grad_norm": 2.764391435646022, "learning_rate": 5.400616330841973e-07, "loss": 0.0033941783476620913, "step": 1984 }, { "epoch": 0.4800483675937122, "grad_norm": 2.689672244765566, "learning_rate": 5.396790788927463e-07, "loss": 0.02265903912484646, "step": 1985 }, { "epoch": 0.48029020556227325, "grad_norm": 10.95875094710126, "learning_rate": 5.392965013248852e-07, "loss": 0.008603313937783241, "step": 1986 }, { "epoch": 0.48053204353083434, "grad_norm": 0.8126689714490988, "learning_rate": 5.389139006060046e-07, "loss": 0.006542096380144358, "step": 1987 }, { "epoch": 0.48077388149939543, "grad_norm": 1.3344097858423307, "learning_rate": 5.385312769615087e-07, "loss": 0.007109244354069233, "step": 1988 }, { "epoch": 0.48101571946795646, "grad_norm": 2.54520703179211, "learning_rate": 5.381486306168152e-07, "loss": 0.0214229803532362, "step": 1989 }, { "epoch": 0.48125755743651755, "grad_norm": 0.6109621749897395, "learning_rate": 5.377659617973554e-07, "loss": 0.0027828568127006292, "step": 1990 }, { "epoch": 0.4814993954050786, "grad_norm": 0.9530000323977607, "learning_rate": 5.373832707285733e-07, "loss": 0.01100683119148016, "step": 1991 }, { "epoch": 0.48174123337363967, "grad_norm": 3.0883413639559847, "learning_rate": 5.370005576359265e-07, "loss": 0.004609306808561087, "step": 1992 }, { "epoch": 0.48198307134220075, "grad_norm": 0.9293930544268937, "learning_rate": 5.366178227448855e-07, "loss": 0.004753440152853727, "step": 1993 }, { "epoch": 0.4822249093107618, "grad_norm": 2.877389675360317, "learning_rate": 5.362350662809334e-07, "loss": 0.01023457944393158, "step": 1994 }, { "epoch": 0.48246674727932287, "grad_norm": 0.9596062646220822, "learning_rate": 5.358522884695661e-07, "loss": 0.0021883889567106962, "step": 1995 }, { "epoch": 0.4827085852478839, "grad_norm": 2.9979246700711077, "learning_rate": 5.354694895362923e-07, "loss": 0.015410559251904488, "step": 1996 }, { "epoch": 0.482950423216445, "grad_norm": 0.9942426501917152, "learning_rate": 5.35086669706633e-07, "loss": 0.002696515992283821, "step": 1997 }, { "epoch": 0.483192261185006, "grad_norm": 3.148786631671511, "learning_rate": 5.347038292061213e-07, "loss": 0.019875485450029373, "step": 1998 }, { "epoch": 0.4834340991535671, "grad_norm": 1.422368917433921, "learning_rate": 5.343209682603028e-07, "loss": 0.013528150506317616, "step": 1999 }, { "epoch": 0.4836759371221282, "grad_norm": 1.7274864543225044, "learning_rate": 5.339380870947352e-07, "loss": 0.008074997924268246, "step": 2000 }, { "epoch": 0.48391777509068923, "grad_norm": 0.42322889576762257, "learning_rate": 5.335551859349876e-07, "loss": 0.001314452732913196, "step": 2001 }, { "epoch": 0.4841596130592503, "grad_norm": 0.30940704888088405, "learning_rate": 5.331722650066415e-07, "loss": 0.0009103795746341348, "step": 2002 }, { "epoch": 0.48440145102781135, "grad_norm": 1.134443405231329, "learning_rate": 5.327893245352893e-07, "loss": 0.007327725179493427, "step": 2003 }, { "epoch": 0.48464328899637243, "grad_norm": 2.0564031038470927, "learning_rate": 5.324063647465358e-07, "loss": 0.0023825967218726873, "step": 2004 }, { "epoch": 0.4848851269649335, "grad_norm": 3.2335103267357073, "learning_rate": 5.320233858659966e-07, "loss": 0.021195916458964348, "step": 2005 }, { "epoch": 0.48512696493349455, "grad_norm": 3.009298064012461, "learning_rate": 5.316403881192987e-07, "loss": 0.018627211451530457, "step": 2006 }, { "epoch": 0.48536880290205564, "grad_norm": 1.2816614846117704, "learning_rate": 5.312573717320803e-07, "loss": 0.009466508403420448, "step": 2007 }, { "epoch": 0.48561064087061667, "grad_norm": 13.774501769507651, "learning_rate": 5.308743369299904e-07, "loss": 0.020364562049508095, "step": 2008 }, { "epoch": 0.48585247883917776, "grad_norm": 9.147559821679277, "learning_rate": 5.304912839386889e-07, "loss": 0.04643095284700394, "step": 2009 }, { "epoch": 0.4860943168077388, "grad_norm": 9.103192045579847, "learning_rate": 5.301082129838464e-07, "loss": 0.01396886445581913, "step": 2010 }, { "epoch": 0.4863361547762999, "grad_norm": 2.8940771220249673, "learning_rate": 5.297251242911445e-07, "loss": 0.013690831139683723, "step": 2011 }, { "epoch": 0.48657799274486097, "grad_norm": 3.262905529345913, "learning_rate": 5.293420180862745e-07, "loss": 0.01383076049387455, "step": 2012 }, { "epoch": 0.486819830713422, "grad_norm": 1.3290332535730154, "learning_rate": 5.289588945949386e-07, "loss": 0.00304981367662549, "step": 2013 }, { "epoch": 0.4870616686819831, "grad_norm": 2.0744497398856447, "learning_rate": 5.285757540428487e-07, "loss": 0.005607844330370426, "step": 2014 }, { "epoch": 0.4873035066505441, "grad_norm": 6.850477965756761, "learning_rate": 5.281925966557275e-07, "loss": 0.014734542928636074, "step": 2015 }, { "epoch": 0.4875453446191052, "grad_norm": 4.0252385729546, "learning_rate": 5.278094226593069e-07, "loss": 0.004496993962675333, "step": 2016 }, { "epoch": 0.4877871825876663, "grad_norm": 2.1257839065036896, "learning_rate": 5.27426232279329e-07, "loss": 0.01598469913005829, "step": 2017 }, { "epoch": 0.4880290205562273, "grad_norm": 10.095540050686582, "learning_rate": 5.270430257415451e-07, "loss": 0.018000375479459763, "step": 2018 }, { "epoch": 0.4882708585247884, "grad_norm": 1.7032199607369891, "learning_rate": 5.266598032717166e-07, "loss": 0.008633661083877087, "step": 2019 }, { "epoch": 0.48851269649334944, "grad_norm": 5.217597642254164, "learning_rate": 5.262765650956138e-07, "loss": 0.01301747839897871, "step": 2020 }, { "epoch": 0.48875453446191053, "grad_norm": 3.474940412283206, "learning_rate": 5.258933114390165e-07, "loss": 0.009034569375216961, "step": 2021 }, { "epoch": 0.48899637243047156, "grad_norm": 1.119819167693878, "learning_rate": 5.255100425277137e-07, "loss": 0.006117657758295536, "step": 2022 }, { "epoch": 0.48923821039903265, "grad_norm": 3.44283835094608, "learning_rate": 5.251267585875033e-07, "loss": 0.008767258375883102, "step": 2023 }, { "epoch": 0.48948004836759373, "grad_norm": 1.5545108610206357, "learning_rate": 5.247434598441918e-07, "loss": 0.0035298080183565617, "step": 2024 }, { "epoch": 0.48972188633615477, "grad_norm": 1.628318070639983, "learning_rate": 5.243601465235949e-07, "loss": 0.005594469606876373, "step": 2025 }, { "epoch": 0.48996372430471585, "grad_norm": 0.9972195124139659, "learning_rate": 5.239768188515364e-07, "loss": 0.005254193674772978, "step": 2026 }, { "epoch": 0.4902055622732769, "grad_norm": 0.5751861485858396, "learning_rate": 5.235934770538487e-07, "loss": 0.005551420152187347, "step": 2027 }, { "epoch": 0.49044740024183797, "grad_norm": 0.6855076468046716, "learning_rate": 5.23210121356373e-07, "loss": 0.002568476600572467, "step": 2028 }, { "epoch": 0.49068923821039906, "grad_norm": 1.300827761697571, "learning_rate": 5.228267519849577e-07, "loss": 0.011652080342173576, "step": 2029 }, { "epoch": 0.4909310761789601, "grad_norm": 4.246105924159069, "learning_rate": 5.224433691654605e-07, "loss": 0.009162700735032558, "step": 2030 }, { "epoch": 0.4911729141475212, "grad_norm": 1.4282308545469122, "learning_rate": 5.22059973123746e-07, "loss": 0.005793328396975994, "step": 2031 }, { "epoch": 0.4914147521160822, "grad_norm": 2.88352989077531, "learning_rate": 5.216765640856871e-07, "loss": 0.006751646287739277, "step": 2032 }, { "epoch": 0.4916565900846433, "grad_norm": 1.0335672615299876, "learning_rate": 5.212931422771641e-07, "loss": 0.00748414546251297, "step": 2033 }, { "epoch": 0.49189842805320433, "grad_norm": 1.13176208940214, "learning_rate": 5.209097079240651e-07, "loss": 0.0031819369178265333, "step": 2034 }, { "epoch": 0.4921402660217654, "grad_norm": 1.2741707624801542, "learning_rate": 5.205262612522852e-07, "loss": 0.0077537670731544495, "step": 2035 }, { "epoch": 0.4923821039903265, "grad_norm": 2.59169548852771, "learning_rate": 5.20142802487727e-07, "loss": 0.02466355264186859, "step": 2036 }, { "epoch": 0.49262394195888753, "grad_norm": 0.8422944308872788, "learning_rate": 5.197593318563005e-07, "loss": 0.009069286286830902, "step": 2037 }, { "epoch": 0.4928657799274486, "grad_norm": 1.5315750632887697, "learning_rate": 5.193758495839221e-07, "loss": 0.007631488144397736, "step": 2038 }, { "epoch": 0.49310761789600965, "grad_norm": 3.711753015282079, "learning_rate": 5.189923558965156e-07, "loss": 0.008043293841183186, "step": 2039 }, { "epoch": 0.49334945586457074, "grad_norm": 2.8822770950113465, "learning_rate": 5.186088510200112e-07, "loss": 0.010170499794185162, "step": 2040 }, { "epoch": 0.49359129383313183, "grad_norm": 4.516997155740361, "learning_rate": 5.182253351803456e-07, "loss": 0.009494240395724773, "step": 2041 }, { "epoch": 0.49383313180169286, "grad_norm": 1.3888337920191633, "learning_rate": 5.178418086034625e-07, "loss": 0.009332416579127312, "step": 2042 }, { "epoch": 0.49407496977025395, "grad_norm": 1.602577631570022, "learning_rate": 5.174582715153112e-07, "loss": 0.012220828793942928, "step": 2043 }, { "epoch": 0.494316807738815, "grad_norm": 3.211640824516965, "learning_rate": 5.170747241418477e-07, "loss": 0.003848031163215637, "step": 2044 }, { "epoch": 0.49455864570737607, "grad_norm": 9.054029256096012, "learning_rate": 5.16691166709034e-07, "loss": 0.0065882327035069466, "step": 2045 }, { "epoch": 0.4948004836759371, "grad_norm": 1.9061711952159588, "learning_rate": 5.163075994428379e-07, "loss": 0.007450792472809553, "step": 2046 }, { "epoch": 0.4950423216444982, "grad_norm": 2.273135745767965, "learning_rate": 5.159240225692329e-07, "loss": 0.004098345059901476, "step": 2047 }, { "epoch": 0.49528415961305927, "grad_norm": 2.2705624644661593, "learning_rate": 5.155404363141985e-07, "loss": 0.02198759652674198, "step": 2048 }, { "epoch": 0.4955259975816203, "grad_norm": 1.5472353702520536, "learning_rate": 5.151568409037193e-07, "loss": 0.00406441418454051, "step": 2049 }, { "epoch": 0.4957678355501814, "grad_norm": 7.723097255094576, "learning_rate": 5.147732365637858e-07, "loss": 0.004149241838604212, "step": 2050 }, { "epoch": 0.4960096735187424, "grad_norm": 1.2872624473416172, "learning_rate": 5.143896235203932e-07, "loss": 0.010643037967383862, "step": 2051 }, { "epoch": 0.4962515114873035, "grad_norm": 1.1229109931372954, "learning_rate": 5.140060019995422e-07, "loss": 0.01403124537318945, "step": 2052 }, { "epoch": 0.4964933494558646, "grad_norm": 1.4706521413025264, "learning_rate": 5.136223722272385e-07, "loss": 0.005986087955534458, "step": 2053 }, { "epoch": 0.49673518742442563, "grad_norm": 25.511358691353596, "learning_rate": 5.132387344294925e-07, "loss": 0.01873341202735901, "step": 2054 }, { "epoch": 0.4969770253929867, "grad_norm": 0.5537419043853741, "learning_rate": 5.128550888323192e-07, "loss": 0.0023502602707594633, "step": 2055 }, { "epoch": 0.49721886336154775, "grad_norm": 1.6327557446034318, "learning_rate": 5.124714356617388e-07, "loss": 0.012533937580883503, "step": 2056 }, { "epoch": 0.49746070133010883, "grad_norm": 1.8371347209618847, "learning_rate": 5.120877751437752e-07, "loss": 0.008640650659799576, "step": 2057 }, { "epoch": 0.49770253929866987, "grad_norm": 2.368856427123867, "learning_rate": 5.11704107504457e-07, "loss": 0.013656086288392544, "step": 2058 }, { "epoch": 0.49794437726723095, "grad_norm": 0.6245030161828875, "learning_rate": 5.11320432969817e-07, "loss": 0.004901897627860308, "step": 2059 }, { "epoch": 0.49818621523579204, "grad_norm": 2.0106112680009374, "learning_rate": 5.109367517658921e-07, "loss": 0.012660863809287548, "step": 2060 }, { "epoch": 0.49842805320435307, "grad_norm": 1.089442112027129, "learning_rate": 5.105530641187231e-07, "loss": 0.00846066139638424, "step": 2061 }, { "epoch": 0.49866989117291416, "grad_norm": 1.9812581221066952, "learning_rate": 5.101693702543544e-07, "loss": 0.016108958050608635, "step": 2062 }, { "epoch": 0.4989117291414752, "grad_norm": 5.382872791715212, "learning_rate": 5.097856703988343e-07, "loss": 0.01107096578925848, "step": 2063 }, { "epoch": 0.4991535671100363, "grad_norm": 4.176593065627478, "learning_rate": 5.094019647782147e-07, "loss": 0.01118592731654644, "step": 2064 }, { "epoch": 0.49939540507859737, "grad_norm": 4.270364397521984, "learning_rate": 5.090182536185505e-07, "loss": 0.031998783349990845, "step": 2065 }, { "epoch": 0.4996372430471584, "grad_norm": 1.4892872130597752, "learning_rate": 5.086345371459001e-07, "loss": 0.006030125077813864, "step": 2066 }, { "epoch": 0.4998790810157195, "grad_norm": 1.3575402842672137, "learning_rate": 5.082508155863252e-07, "loss": 0.010444999672472477, "step": 2067 }, { "epoch": 0.5001209189842806, "grad_norm": 2.9008547516738985, "learning_rate": 5.078670891658904e-07, "loss": 0.010801920667290688, "step": 2068 }, { "epoch": 0.5003627569528416, "grad_norm": 2.101908288971325, "learning_rate": 5.074833581106629e-07, "loss": 0.004192173946648836, "step": 2069 }, { "epoch": 0.5006045949214026, "grad_norm": 0.7422005384153761, "learning_rate": 5.070996226467128e-07, "loss": 0.0029048908036202192, "step": 2070 }, { "epoch": 0.5008464328899638, "grad_norm": 5.17507432468597, "learning_rate": 5.06715883000113e-07, "loss": 0.02348451316356659, "step": 2071 }, { "epoch": 0.5010882708585248, "grad_norm": 2.876110998142769, "learning_rate": 5.063321393969386e-07, "loss": 0.011338836513459682, "step": 2072 }, { "epoch": 0.5013301088270858, "grad_norm": 1.4931674128229033, "learning_rate": 5.059483920632673e-07, "loss": 0.00888950377702713, "step": 2073 }, { "epoch": 0.5015719467956469, "grad_norm": 3.356329955288112, "learning_rate": 5.055646412251785e-07, "loss": 0.003356030909344554, "step": 2074 }, { "epoch": 0.501813784764208, "grad_norm": 0.48850062130875577, "learning_rate": 5.051808871087541e-07, "loss": 0.0018231210997328162, "step": 2075 }, { "epoch": 0.502055622732769, "grad_norm": 1.8643256959467607, "learning_rate": 5.04797129940078e-07, "loss": 0.01280525978654623, "step": 2076 }, { "epoch": 0.5022974607013301, "grad_norm": 1.5260611403764934, "learning_rate": 5.044133699452355e-07, "loss": 0.007862417958676815, "step": 2077 }, { "epoch": 0.5025392986698912, "grad_norm": 1.3015751421905633, "learning_rate": 5.040296073503139e-07, "loss": 0.008505478501319885, "step": 2078 }, { "epoch": 0.5027811366384523, "grad_norm": 2.4572149144786963, "learning_rate": 5.036458423814019e-07, "loss": 0.01075849961489439, "step": 2079 }, { "epoch": 0.5030229746070133, "grad_norm": 5.488629726138034, "learning_rate": 5.032620752645895e-07, "loss": 0.0013016888406127691, "step": 2080 }, { "epoch": 0.5032648125755743, "grad_norm": 1.735146154400088, "learning_rate": 5.028783062259681e-07, "loss": 0.002225738251581788, "step": 2081 }, { "epoch": 0.5035066505441355, "grad_norm": 5.256666236191183, "learning_rate": 5.024945354916303e-07, "loss": 0.0025484024081379175, "step": 2082 }, { "epoch": 0.5037484885126965, "grad_norm": 1.9021440108221148, "learning_rate": 5.021107632876696e-07, "loss": 0.018630674108862877, "step": 2083 }, { "epoch": 0.5039903264812575, "grad_norm": 0.45583198366231764, "learning_rate": 5.017269898401803e-07, "loss": 0.0018306513084098697, "step": 2084 }, { "epoch": 0.5042321644498187, "grad_norm": 5.262789643734722, "learning_rate": 5.013432153752575e-07, "loss": 0.004207140300422907, "step": 2085 }, { "epoch": 0.5044740024183797, "grad_norm": 2.06312870705596, "learning_rate": 5.00959440118997e-07, "loss": 0.008948764763772488, "step": 2086 }, { "epoch": 0.5047158403869407, "grad_norm": 3.6190472186798766, "learning_rate": 5.00575664297495e-07, "loss": 0.014150175265967846, "step": 2087 }, { "epoch": 0.5049576783555018, "grad_norm": 0.35778187198050154, "learning_rate": 5.001918881368478e-07, "loss": 0.0014636249979957938, "step": 2088 }, { "epoch": 0.5051995163240629, "grad_norm": 2.014248744138177, "learning_rate": 4.998081118631523e-07, "loss": 0.01737532950937748, "step": 2089 }, { "epoch": 0.5054413542926239, "grad_norm": 3.19968783186684, "learning_rate": 4.994243357025051e-07, "loss": 0.018412848934531212, "step": 2090 }, { "epoch": 0.505683192261185, "grad_norm": 0.6582119913293077, "learning_rate": 4.990405598810028e-07, "loss": 0.003032025881111622, "step": 2091 }, { "epoch": 0.5059250302297461, "grad_norm": 11.63884410837311, "learning_rate": 4.986567846247425e-07, "loss": 0.005698408931493759, "step": 2092 }, { "epoch": 0.5061668681983071, "grad_norm": 0.8531385227249054, "learning_rate": 4.982730101598196e-07, "loss": 0.004974285140633583, "step": 2093 }, { "epoch": 0.5064087061668682, "grad_norm": 2.190101114440323, "learning_rate": 4.978892367123304e-07, "loss": 0.008315306156873703, "step": 2094 }, { "epoch": 0.5066505441354293, "grad_norm": 2.3132571270677635, "learning_rate": 4.975054645083697e-07, "loss": 0.025684688240289688, "step": 2095 }, { "epoch": 0.5068923821039903, "grad_norm": 1.5313906772384978, "learning_rate": 4.971216937740319e-07, "loss": 0.005346179008483887, "step": 2096 }, { "epoch": 0.5071342200725514, "grad_norm": 1.1476555314673285, "learning_rate": 4.967379247354106e-07, "loss": 0.004988674074411392, "step": 2097 }, { "epoch": 0.5073760580411124, "grad_norm": 0.9463328306995824, "learning_rate": 4.963541576185982e-07, "loss": 0.005240770522505045, "step": 2098 }, { "epoch": 0.5076178960096736, "grad_norm": 3.7295938549686665, "learning_rate": 4.959703926496861e-07, "loss": 0.008783296681940556, "step": 2099 }, { "epoch": 0.5078597339782346, "grad_norm": 1.6725369824407545, "learning_rate": 4.955866300547646e-07, "loss": 0.006856387946754694, "step": 2100 }, { "epoch": 0.5081015719467956, "grad_norm": 2.8879752843869295, "learning_rate": 4.95202870059922e-07, "loss": 0.008989709429442883, "step": 2101 }, { "epoch": 0.5083434099153568, "grad_norm": 2.5648879312505595, "learning_rate": 4.948191128912457e-07, "loss": 0.009419436566531658, "step": 2102 }, { "epoch": 0.5085852478839178, "grad_norm": 1.9310945090285603, "learning_rate": 4.944353587748216e-07, "loss": 0.012041396461427212, "step": 2103 }, { "epoch": 0.5088270858524788, "grad_norm": 0.5305679910788439, "learning_rate": 4.940516079367326e-07, "loss": 0.0017971761990338564, "step": 2104 }, { "epoch": 0.5090689238210399, "grad_norm": 1.143236209803116, "learning_rate": 4.936678606030613e-07, "loss": 0.00274258223362267, "step": 2105 }, { "epoch": 0.509310761789601, "grad_norm": 5.971204801167459, "learning_rate": 4.93284116999887e-07, "loss": 0.013757756911218166, "step": 2106 }, { "epoch": 0.509552599758162, "grad_norm": 1.9650551960637577, "learning_rate": 4.929003773532871e-07, "loss": 0.011737561784684658, "step": 2107 }, { "epoch": 0.5097944377267231, "grad_norm": 2.051164793211015, "learning_rate": 4.925166418893373e-07, "loss": 0.021147890016436577, "step": 2108 }, { "epoch": 0.5100362756952842, "grad_norm": 1.4181409139467667, "learning_rate": 4.921329108341096e-07, "loss": 0.0076390886679291725, "step": 2109 }, { "epoch": 0.5102781136638452, "grad_norm": 1.555661695157201, "learning_rate": 4.917491844136746e-07, "loss": 0.006989732384681702, "step": 2110 }, { "epoch": 0.5105199516324063, "grad_norm": 2.270346673850833, "learning_rate": 4.913654628540999e-07, "loss": 0.005028528161346912, "step": 2111 }, { "epoch": 0.5107617896009673, "grad_norm": 0.28598607990592617, "learning_rate": 4.909817463814495e-07, "loss": 0.0010731378570199013, "step": 2112 }, { "epoch": 0.5110036275695284, "grad_norm": 1.2515023419064149, "learning_rate": 4.905980352217853e-07, "loss": 0.013069278560578823, "step": 2113 }, { "epoch": 0.5112454655380895, "grad_norm": 71.32223724830037, "learning_rate": 4.902143296011657e-07, "loss": 0.021469319239258766, "step": 2114 }, { "epoch": 0.5114873035066505, "grad_norm": 1.5546042368312842, "learning_rate": 4.898306297456455e-07, "loss": 0.016708074137568474, "step": 2115 }, { "epoch": 0.5117291414752116, "grad_norm": 3.9051446616402856, "learning_rate": 4.89446935881277e-07, "loss": 0.0055262381210923195, "step": 2116 }, { "epoch": 0.5119709794437727, "grad_norm": 5.141377685732552, "learning_rate": 4.890632482341079e-07, "loss": 0.025758642703294754, "step": 2117 }, { "epoch": 0.5122128174123337, "grad_norm": 4.105834456803036, "learning_rate": 4.886795670301829e-07, "loss": 0.022147951647639275, "step": 2118 }, { "epoch": 0.5124546553808949, "grad_norm": 3.007059151823381, "learning_rate": 4.88295892495543e-07, "loss": 0.010106120258569717, "step": 2119 }, { "epoch": 0.5126964933494559, "grad_norm": 1.482911457204492, "learning_rate": 4.879122248562248e-07, "loss": 0.004586155526340008, "step": 2120 }, { "epoch": 0.5129383313180169, "grad_norm": 1.1938876356847772, "learning_rate": 4.875285643382612e-07, "loss": 0.004338033031672239, "step": 2121 }, { "epoch": 0.513180169286578, "grad_norm": 3.7430942511814673, "learning_rate": 4.871449111676808e-07, "loss": 0.008072778582572937, "step": 2122 }, { "epoch": 0.5134220072551391, "grad_norm": 2.8016455105184193, "learning_rate": 4.867612655705075e-07, "loss": 0.002757600275799632, "step": 2123 }, { "epoch": 0.5136638452237001, "grad_norm": 2.2745952679896995, "learning_rate": 4.863776277727616e-07, "loss": 0.010869033634662628, "step": 2124 }, { "epoch": 0.5139056831922612, "grad_norm": 9.317900697719715, "learning_rate": 4.859939980004577e-07, "loss": 0.02182195708155632, "step": 2125 }, { "epoch": 0.5141475211608223, "grad_norm": 6.374757204373877, "learning_rate": 4.856103764796067e-07, "loss": 0.0033937746193259954, "step": 2126 }, { "epoch": 0.5143893591293833, "grad_norm": 8.873951934341298, "learning_rate": 4.852267634362144e-07, "loss": 0.006746204104274511, "step": 2127 }, { "epoch": 0.5146311970979444, "grad_norm": 5.803300231672152, "learning_rate": 4.848431590962806e-07, "loss": 0.013562599197030067, "step": 2128 }, { "epoch": 0.5148730350665054, "grad_norm": 5.583499911816566, "learning_rate": 4.844595636858015e-07, "loss": 0.0041518365032970905, "step": 2129 }, { "epoch": 0.5151148730350665, "grad_norm": 2.425167445003734, "learning_rate": 4.840759774307672e-07, "loss": 0.017879892140626907, "step": 2130 }, { "epoch": 0.5153567110036276, "grad_norm": 0.7118590200411002, "learning_rate": 4.836924005571622e-07, "loss": 0.0022846765350550413, "step": 2131 }, { "epoch": 0.5155985489721886, "grad_norm": 1.8020499642536, "learning_rate": 4.833088332909659e-07, "loss": 0.012738103047013283, "step": 2132 }, { "epoch": 0.5158403869407497, "grad_norm": 1.9292453094761735, "learning_rate": 4.829252758581523e-07, "loss": 0.007991532795131207, "step": 2133 }, { "epoch": 0.5160822249093108, "grad_norm": 0.8689527022813027, "learning_rate": 4.825417284846889e-07, "loss": 0.0058484161272645, "step": 2134 }, { "epoch": 0.5163240628778718, "grad_norm": 2.9754186235080424, "learning_rate": 4.821581913965377e-07, "loss": 0.02083270251750946, "step": 2135 }, { "epoch": 0.5165659008464328, "grad_norm": 1.9640571353238097, "learning_rate": 4.817746648196544e-07, "loss": 0.006881494075059891, "step": 2136 }, { "epoch": 0.516807738814994, "grad_norm": 3.3063706015839123, "learning_rate": 4.813911489799889e-07, "loss": 0.017514921724796295, "step": 2137 }, { "epoch": 0.517049576783555, "grad_norm": 0.7899072116914467, "learning_rate": 4.810076441034845e-07, "loss": 0.0073286923579871655, "step": 2138 }, { "epoch": 0.517291414752116, "grad_norm": 102.65512030918421, "learning_rate": 4.806241504160779e-07, "loss": 0.01940048672258854, "step": 2139 }, { "epoch": 0.5175332527206772, "grad_norm": 4.546583069420043, "learning_rate": 4.802406681436995e-07, "loss": 0.02314862050116062, "step": 2140 }, { "epoch": 0.5177750906892382, "grad_norm": 0.42391164885995103, "learning_rate": 4.79857197512273e-07, "loss": 0.0015039261197671294, "step": 2141 }, { "epoch": 0.5180169286577992, "grad_norm": 1.5553527279402604, "learning_rate": 4.794737387477149e-07, "loss": 0.00815717875957489, "step": 2142 }, { "epoch": 0.5182587666263604, "grad_norm": 2.1595757953219294, "learning_rate": 4.790902920759348e-07, "loss": 0.008983477018773556, "step": 2143 }, { "epoch": 0.5185006045949214, "grad_norm": 3.2831970809611892, "learning_rate": 4.787068577228359e-07, "loss": 0.006305484566837549, "step": 2144 }, { "epoch": 0.5187424425634825, "grad_norm": 9.825886566453367, "learning_rate": 4.783234359143129e-07, "loss": 0.019799377769231796, "step": 2145 }, { "epoch": 0.5189842805320435, "grad_norm": 1.0717219970899254, "learning_rate": 4.779400268762539e-07, "loss": 0.0032959766685962677, "step": 2146 }, { "epoch": 0.5192261185006046, "grad_norm": 1.5890836281697025, "learning_rate": 4.775566308345394e-07, "loss": 0.00827763881534338, "step": 2147 }, { "epoch": 0.5194679564691657, "grad_norm": 9.9745195561091, "learning_rate": 4.771732480150421e-07, "loss": 0.011047981679439545, "step": 2148 }, { "epoch": 0.5197097944377267, "grad_norm": 4.125022002191004, "learning_rate": 4.767898786436272e-07, "loss": 0.008630692027509212, "step": 2149 }, { "epoch": 0.5199516324062878, "grad_norm": 5.151607471941369, "learning_rate": 4.764065229461513e-07, "loss": 0.010904504917562008, "step": 2150 }, { "epoch": 0.5201934703748489, "grad_norm": 1.320136822924213, "learning_rate": 4.7602318114846364e-07, "loss": 0.012002482078969479, "step": 2151 }, { "epoch": 0.5204353083434099, "grad_norm": 1.5373336384707639, "learning_rate": 4.756398534764052e-07, "loss": 0.0035241448786109686, "step": 2152 }, { "epoch": 0.5206771463119709, "grad_norm": 1.4564796877246262, "learning_rate": 4.752565401558082e-07, "loss": 0.0066191959194839, "step": 2153 }, { "epoch": 0.5209189842805321, "grad_norm": 1.5910446186229354, "learning_rate": 4.7487324141249675e-07, "loss": 0.014001238159835339, "step": 2154 }, { "epoch": 0.5211608222490931, "grad_norm": 13.865490057124378, "learning_rate": 4.744899574722863e-07, "loss": 0.006901332177221775, "step": 2155 }, { "epoch": 0.5214026602176541, "grad_norm": 2.1147739075878476, "learning_rate": 4.7410668856098347e-07, "loss": 0.012554625049233437, "step": 2156 }, { "epoch": 0.5216444981862153, "grad_norm": 9.44199549850162, "learning_rate": 4.737234349043864e-07, "loss": 0.0016597937792539597, "step": 2157 }, { "epoch": 0.5218863361547763, "grad_norm": 1.7073024349498476, "learning_rate": 4.7334019672828353e-07, "loss": 0.02590765431523323, "step": 2158 }, { "epoch": 0.5221281741233373, "grad_norm": 1.895082407480454, "learning_rate": 4.729569742584549e-07, "loss": 0.010185874998569489, "step": 2159 }, { "epoch": 0.5223700120918985, "grad_norm": 0.6373746796471914, "learning_rate": 4.7257376772067115e-07, "loss": 0.002777277259156108, "step": 2160 }, { "epoch": 0.5226118500604595, "grad_norm": 1.5229921954225532, "learning_rate": 4.7219057734069314e-07, "loss": 0.008682879619300365, "step": 2161 }, { "epoch": 0.5228536880290205, "grad_norm": 12.297415486754605, "learning_rate": 4.7180740334427246e-07, "loss": 0.014094310812652111, "step": 2162 }, { "epoch": 0.5230955259975816, "grad_norm": 0.5119029405866629, "learning_rate": 4.714242459571513e-07, "loss": 0.001620698720216751, "step": 2163 }, { "epoch": 0.5233373639661427, "grad_norm": 2.1985402443537105, "learning_rate": 4.710411054050615e-07, "loss": 0.003812571754679084, "step": 2164 }, { "epoch": 0.5235792019347038, "grad_norm": 1.6124390111352485, "learning_rate": 4.706579819137256e-07, "loss": 0.009376012720167637, "step": 2165 }, { "epoch": 0.5238210399032648, "grad_norm": 3.3859922950219614, "learning_rate": 4.7027487570885555e-07, "loss": 0.009469578973948956, "step": 2166 }, { "epoch": 0.5240628778718259, "grad_norm": 1.4483333364935367, "learning_rate": 4.698917870161534e-07, "loss": 0.007426024880260229, "step": 2167 }, { "epoch": 0.524304715840387, "grad_norm": 0.23706662482662277, "learning_rate": 4.6950871606131113e-07, "loss": 0.0008404185064136982, "step": 2168 }, { "epoch": 0.524546553808948, "grad_norm": 2.5651427610896294, "learning_rate": 4.6912566307000956e-07, "loss": 0.013288840651512146, "step": 2169 }, { "epoch": 0.524788391777509, "grad_norm": 2.328733724688181, "learning_rate": 4.6874262826791965e-07, "loss": 0.0069128782488405704, "step": 2170 }, { "epoch": 0.5250302297460702, "grad_norm": 17.80394496219189, "learning_rate": 4.6835961188070135e-07, "loss": 0.0022976973559707403, "step": 2171 }, { "epoch": 0.5252720677146312, "grad_norm": 2.877646916783181, "learning_rate": 4.6797661413400335e-07, "loss": 0.013584441505372524, "step": 2172 }, { "epoch": 0.5255139056831922, "grad_norm": 0.500275974996551, "learning_rate": 4.6759363525346405e-07, "loss": 0.0020992092322558165, "step": 2173 }, { "epoch": 0.5257557436517534, "grad_norm": 1.5502504684610607, "learning_rate": 4.6721067546471073e-07, "loss": 0.010188973508775234, "step": 2174 }, { "epoch": 0.5259975816203144, "grad_norm": 2.3427664598078435, "learning_rate": 4.6682773499335857e-07, "loss": 0.018503746017813683, "step": 2175 }, { "epoch": 0.5262394195888754, "grad_norm": 2.402043728565432, "learning_rate": 4.6644481406501246e-07, "loss": 0.007625562138855457, "step": 2176 }, { "epoch": 0.5264812575574365, "grad_norm": 1.8046544755545235, "learning_rate": 4.660619129052648e-07, "loss": 0.009391717612743378, "step": 2177 }, { "epoch": 0.5267230955259976, "grad_norm": 1.4936027271753851, "learning_rate": 4.656790317396971e-07, "loss": 0.011298303492367268, "step": 2178 }, { "epoch": 0.5269649334945586, "grad_norm": 7.270755236592206, "learning_rate": 4.6529617079387877e-07, "loss": 0.040451884269714355, "step": 2179 }, { "epoch": 0.5272067714631197, "grad_norm": 1.6383702168207896, "learning_rate": 4.6491333029336706e-07, "loss": 0.0029080677777528763, "step": 2180 }, { "epoch": 0.5274486094316808, "grad_norm": 1.7751723540987123, "learning_rate": 4.645305104637076e-07, "loss": 0.005842707585543394, "step": 2181 }, { "epoch": 0.5276904474002418, "grad_norm": 5.312059273492731, "learning_rate": 4.64147711530434e-07, "loss": 0.04892439767718315, "step": 2182 }, { "epoch": 0.5279322853688029, "grad_norm": 1.8819210583810586, "learning_rate": 4.6376493371906667e-07, "loss": 0.01416635699570179, "step": 2183 }, { "epoch": 0.528174123337364, "grad_norm": 8.123335909542556, "learning_rate": 4.6338217725511464e-07, "loss": 0.011267605237662792, "step": 2184 }, { "epoch": 0.528415961305925, "grad_norm": 2.141600377812735, "learning_rate": 4.6299944236407344e-07, "loss": 0.0018629418918862939, "step": 2185 }, { "epoch": 0.5286577992744861, "grad_norm": 3.20726474368836, "learning_rate": 4.626167292714267e-07, "loss": 0.008978385478258133, "step": 2186 }, { "epoch": 0.5288996372430471, "grad_norm": 2.4603716624297296, "learning_rate": 4.6223403820264476e-07, "loss": 0.015019935555756092, "step": 2187 }, { "epoch": 0.5291414752116083, "grad_norm": 1.4581927188990227, "learning_rate": 4.618513693831848e-07, "loss": 0.014339059591293335, "step": 2188 }, { "epoch": 0.5293833131801693, "grad_norm": 2.981458895003998, "learning_rate": 4.614687230384913e-07, "loss": 0.015387973748147488, "step": 2189 }, { "epoch": 0.5296251511487303, "grad_norm": 2.188984320739968, "learning_rate": 4.610860993939955e-07, "loss": 0.004472175147384405, "step": 2190 }, { "epoch": 0.5298669891172915, "grad_norm": 1.4660682545897958, "learning_rate": 4.607034986751148e-07, "loss": 0.01410586852580309, "step": 2191 }, { "epoch": 0.5301088270858525, "grad_norm": 1.411506957344901, "learning_rate": 4.6032092110725373e-07, "loss": 0.010911998338997364, "step": 2192 }, { "epoch": 0.5303506650544135, "grad_norm": 1.7922433679791596, "learning_rate": 4.599383669158027e-07, "loss": 0.011475924402475357, "step": 2193 }, { "epoch": 0.5305925030229746, "grad_norm": 1.8197139255632981, "learning_rate": 4.595558363261385e-07, "loss": 0.01112773921340704, "step": 2194 }, { "epoch": 0.5308343409915357, "grad_norm": 3.182480414945562, "learning_rate": 4.5917332956362415e-07, "loss": 0.044217269867658615, "step": 2195 }, { "epoch": 0.5310761789600967, "grad_norm": 1.0581583788982716, "learning_rate": 4.587908468536082e-07, "loss": 0.009640065021812916, "step": 2196 }, { "epoch": 0.5313180169286578, "grad_norm": 1.4797798282109047, "learning_rate": 4.584083884214255e-07, "loss": 0.011117758229374886, "step": 2197 }, { "epoch": 0.5315598548972189, "grad_norm": 1.4768776229452145, "learning_rate": 4.5802595449239663e-07, "loss": 0.008239923045039177, "step": 2198 }, { "epoch": 0.5318016928657799, "grad_norm": 0.5761150622419727, "learning_rate": 4.5764354529182716e-07, "loss": 0.004155202768743038, "step": 2199 }, { "epoch": 0.532043530834341, "grad_norm": 2.707627162946668, "learning_rate": 4.5726116104500867e-07, "loss": 0.007692489307373762, "step": 2200 }, { "epoch": 0.532285368802902, "grad_norm": 0.7318499739641802, "learning_rate": 4.568788019772181e-07, "loss": 0.0021714724134653807, "step": 2201 }, { "epoch": 0.5325272067714631, "grad_norm": 0.9095209077439876, "learning_rate": 4.564964683137168e-07, "loss": 0.002716007409617305, "step": 2202 }, { "epoch": 0.5327690447400242, "grad_norm": 1.444005894356713, "learning_rate": 4.561141602797518e-07, "loss": 0.0057130903005599976, "step": 2203 }, { "epoch": 0.5330108827085852, "grad_norm": 3.9890362039082645, "learning_rate": 4.557318781005552e-07, "loss": 0.01439773477613926, "step": 2204 }, { "epoch": 0.5332527206771464, "grad_norm": 1.773486146642107, "learning_rate": 4.5534962200134304e-07, "loss": 0.006793975830078125, "step": 2205 }, { "epoch": 0.5334945586457074, "grad_norm": 1.1316890397913988, "learning_rate": 4.5496739220731694e-07, "loss": 0.005200402345508337, "step": 2206 }, { "epoch": 0.5337363966142684, "grad_norm": 6.229036280190056, "learning_rate": 4.5458518894366216e-07, "loss": 0.01765989139676094, "step": 2207 }, { "epoch": 0.5339782345828296, "grad_norm": 0.4370824661969023, "learning_rate": 4.54203012435549e-07, "loss": 0.0017072021728381515, "step": 2208 }, { "epoch": 0.5342200725513906, "grad_norm": 0.3512536344617216, "learning_rate": 4.538208629081319e-07, "loss": 0.00138959102332592, "step": 2209 }, { "epoch": 0.5344619105199516, "grad_norm": 2.1994957753870032, "learning_rate": 4.53438740586549e-07, "loss": 0.012127428315579891, "step": 2210 }, { "epoch": 0.5347037484885127, "grad_norm": 9.610092922069745, "learning_rate": 4.5305664569592255e-07, "loss": 0.03493771329522133, "step": 2211 }, { "epoch": 0.5349455864570738, "grad_norm": 2.2265320012397045, "learning_rate": 4.5267457846135923e-07, "loss": 0.006982346065342426, "step": 2212 }, { "epoch": 0.5351874244256348, "grad_norm": 3.8664624534321974, "learning_rate": 4.5229253910794845e-07, "loss": 0.0028108262922614813, "step": 2213 }, { "epoch": 0.5354292623941959, "grad_norm": 1.2709718228906504, "learning_rate": 4.519105278607641e-07, "loss": 0.005377421621233225, "step": 2214 }, { "epoch": 0.535671100362757, "grad_norm": 1.38700969351973, "learning_rate": 4.515285449448627e-07, "loss": 0.009556067176163197, "step": 2215 }, { "epoch": 0.535912938331318, "grad_norm": 2.599315591419496, "learning_rate": 4.511465905852847e-07, "loss": 0.01119113527238369, "step": 2216 }, { "epoch": 0.5361547762998791, "grad_norm": 8.343843969075479, "learning_rate": 4.5076466500705375e-07, "loss": 0.04686035215854645, "step": 2217 }, { "epoch": 0.5363966142684401, "grad_norm": 1.4706249438100831, "learning_rate": 4.503827684351759e-07, "loss": 0.006200333125889301, "step": 2218 }, { "epoch": 0.5366384522370012, "grad_norm": 1.0265402079183938, "learning_rate": 4.500009010946406e-07, "loss": 0.0046156286261975765, "step": 2219 }, { "epoch": 0.5368802902055623, "grad_norm": 24.83133015810998, "learning_rate": 4.496190632104203e-07, "loss": 0.01649009995162487, "step": 2220 }, { "epoch": 0.5371221281741233, "grad_norm": 2.182539938944038, "learning_rate": 4.492372550074694e-07, "loss": 0.012728539295494556, "step": 2221 }, { "epoch": 0.5373639661426844, "grad_norm": 1.8169582067813566, "learning_rate": 4.488554767107254e-07, "loss": 0.007436856627464294, "step": 2222 }, { "epoch": 0.5376058041112455, "grad_norm": 0.7625953085550309, "learning_rate": 4.484737285451081e-07, "loss": 0.0044312807731330395, "step": 2223 }, { "epoch": 0.5378476420798065, "grad_norm": 1.3795095264348278, "learning_rate": 4.480920107355191e-07, "loss": 0.007225909736007452, "step": 2224 }, { "epoch": 0.5380894800483675, "grad_norm": 4.379753447984319, "learning_rate": 4.477103235068429e-07, "loss": 0.004242399707436562, "step": 2225 }, { "epoch": 0.5383313180169287, "grad_norm": 1.948813983394343, "learning_rate": 4.473286670839451e-07, "loss": 0.01539524644613266, "step": 2226 }, { "epoch": 0.5385731559854897, "grad_norm": 2.1939381464344985, "learning_rate": 4.4694704169167385e-07, "loss": 0.01173942256718874, "step": 2227 }, { "epoch": 0.5388149939540507, "grad_norm": 3.0523627113546605, "learning_rate": 4.465654475548588e-07, "loss": 0.0073815020732581615, "step": 2228 }, { "epoch": 0.5390568319226119, "grad_norm": 3.225681598796069, "learning_rate": 4.4618388489831084e-07, "loss": 0.0077844164334237576, "step": 2229 }, { "epoch": 0.5392986698911729, "grad_norm": 1.0409265561283145, "learning_rate": 4.4580235394682274e-07, "loss": 0.0032366805244237185, "step": 2230 }, { "epoch": 0.539540507859734, "grad_norm": 1.9227241744702543, "learning_rate": 4.454208549251688e-07, "loss": 0.009608741849660873, "step": 2231 }, { "epoch": 0.5397823458282951, "grad_norm": 1.3523747612536727, "learning_rate": 4.450393880581036e-07, "loss": 0.00213138316757977, "step": 2232 }, { "epoch": 0.5400241837968561, "grad_norm": 0.6634650658334206, "learning_rate": 4.446579535703639e-07, "loss": 0.0026973355561494827, "step": 2233 }, { "epoch": 0.5402660217654172, "grad_norm": 4.249423152160833, "learning_rate": 4.442765516866665e-07, "loss": 0.04125981405377388, "step": 2234 }, { "epoch": 0.5405078597339782, "grad_norm": 4.195282076616139, "learning_rate": 4.4389518263170943e-07, "loss": 0.010837746784090996, "step": 2235 }, { "epoch": 0.5407496977025393, "grad_norm": 5.829285305974023, "learning_rate": 4.435138466301714e-07, "loss": 0.03168770298361778, "step": 2236 }, { "epoch": 0.5409915356711004, "grad_norm": 1.2566996512518633, "learning_rate": 4.4313254390671117e-07, "loss": 0.007754642516374588, "step": 2237 }, { "epoch": 0.5412333736396614, "grad_norm": 4.3850640282919615, "learning_rate": 4.4275127468596845e-07, "loss": 0.004543952643871307, "step": 2238 }, { "epoch": 0.5414752116082225, "grad_norm": 2.711654976818814, "learning_rate": 4.4237003919256335e-07, "loss": 0.006715252995491028, "step": 2239 }, { "epoch": 0.5417170495767836, "grad_norm": 3.4202887735976595, "learning_rate": 4.4198883765109513e-07, "loss": 0.009204794652760029, "step": 2240 }, { "epoch": 0.5419588875453446, "grad_norm": 2.3425014844235124, "learning_rate": 4.4160767028614415e-07, "loss": 0.01720310002565384, "step": 2241 }, { "epoch": 0.5422007255139056, "grad_norm": 1.8238604883438159, "learning_rate": 4.4122653732227007e-07, "loss": 0.013056701980531216, "step": 2242 }, { "epoch": 0.5424425634824668, "grad_norm": 10.77298247844379, "learning_rate": 4.4084543898401226e-07, "loss": 0.014982372522354126, "step": 2243 }, { "epoch": 0.5426844014510278, "grad_norm": 2.72170804511792, "learning_rate": 4.4046437549589015e-07, "loss": 0.02657724916934967, "step": 2244 }, { "epoch": 0.5429262394195888, "grad_norm": 8.733381905505077, "learning_rate": 4.400833470824018e-07, "loss": 0.016571035608649254, "step": 2245 }, { "epoch": 0.54316807738815, "grad_norm": 0.5314285717449362, "learning_rate": 4.397023539680254e-07, "loss": 0.0013594769407063723, "step": 2246 }, { "epoch": 0.543409915356711, "grad_norm": 0.623716826542851, "learning_rate": 4.3932139637721826e-07, "loss": 0.0020721449982374907, "step": 2247 }, { "epoch": 0.543651753325272, "grad_norm": 2.8143169072614858, "learning_rate": 4.389404745344161e-07, "loss": 0.032020293176174164, "step": 2248 }, { "epoch": 0.5438935912938331, "grad_norm": 1.1263074138060318, "learning_rate": 4.385595886640344e-07, "loss": 0.003855775808915496, "step": 2249 }, { "epoch": 0.5441354292623942, "grad_norm": 7.395954172482127, "learning_rate": 4.38178738990467e-07, "loss": 0.01570163480937481, "step": 2250 }, { "epoch": 0.5443772672309553, "grad_norm": 1.2999117504721502, "learning_rate": 4.377979257380865e-07, "loss": 0.0038300328887999058, "step": 2251 }, { "epoch": 0.5446191051995163, "grad_norm": 4.486302848380811, "learning_rate": 4.3741714913124383e-07, "loss": 0.03744574263691902, "step": 2252 }, { "epoch": 0.5448609431680774, "grad_norm": 5.277729256321808, "learning_rate": 4.37036409394269e-07, "loss": 0.01566879265010357, "step": 2253 }, { "epoch": 0.5451027811366385, "grad_norm": 0.8720054801180613, "learning_rate": 4.3665570675146935e-07, "loss": 0.003953771200031042, "step": 2254 }, { "epoch": 0.5453446191051995, "grad_norm": 121.11192247555012, "learning_rate": 4.3627504142713124e-07, "loss": 0.0021097457502037287, "step": 2255 }, { "epoch": 0.5455864570737606, "grad_norm": 1.5993044440000166, "learning_rate": 4.3589441364551834e-07, "loss": 0.009773158468306065, "step": 2256 }, { "epoch": 0.5458282950423217, "grad_norm": 1.5207496246208283, "learning_rate": 4.3551382363087264e-07, "loss": 0.013898918405175209, "step": 2257 }, { "epoch": 0.5460701330108827, "grad_norm": 0.9904275981047799, "learning_rate": 4.3513327160741406e-07, "loss": 0.004218466579914093, "step": 2258 }, { "epoch": 0.5463119709794437, "grad_norm": 6.775080896575988, "learning_rate": 4.3475275779933953e-07, "loss": 0.01559709943830967, "step": 2259 }, { "epoch": 0.5465538089480049, "grad_norm": 1.954700665661973, "learning_rate": 4.343722824308237e-07, "loss": 0.006707057356834412, "step": 2260 }, { "epoch": 0.5467956469165659, "grad_norm": 0.93847222097301, "learning_rate": 4.3399184572601905e-07, "loss": 0.002807276789098978, "step": 2261 }, { "epoch": 0.5470374848851269, "grad_norm": 56.5279598099948, "learning_rate": 4.3361144790905443e-07, "loss": 0.012656367383897305, "step": 2262 }, { "epoch": 0.5472793228536881, "grad_norm": 2.018886094655754, "learning_rate": 4.332310892040365e-07, "loss": 0.0022940144408494234, "step": 2263 }, { "epoch": 0.5475211608222491, "grad_norm": 11.12704059213866, "learning_rate": 4.3285076983504865e-07, "loss": 0.010682093910872936, "step": 2264 }, { "epoch": 0.5477629987908101, "grad_norm": 1.003871939433547, "learning_rate": 4.324704900261508e-07, "loss": 0.009149372577667236, "step": 2265 }, { "epoch": 0.5480048367593712, "grad_norm": 6.961041038021549, "learning_rate": 4.320902500013802e-07, "loss": 0.007756874896585941, "step": 2266 }, { "epoch": 0.5482466747279323, "grad_norm": 2.545930735051008, "learning_rate": 4.3171004998475003e-07, "loss": 0.007862013764679432, "step": 2267 }, { "epoch": 0.5484885126964933, "grad_norm": 4.052464117292689, "learning_rate": 4.313298902002501e-07, "loss": 0.006793415639549494, "step": 2268 }, { "epoch": 0.5487303506650544, "grad_norm": 2.0466538360246314, "learning_rate": 4.309497708718468e-07, "loss": 0.007937371730804443, "step": 2269 }, { "epoch": 0.5489721886336155, "grad_norm": 4.498640616913888, "learning_rate": 4.3056969222348214e-07, "loss": 0.007838219404220581, "step": 2270 }, { "epoch": 0.5492140266021766, "grad_norm": 1.5400492894563396, "learning_rate": 4.301896544790747e-07, "loss": 0.006152440793812275, "step": 2271 }, { "epoch": 0.5494558645707376, "grad_norm": 1.2657432689688495, "learning_rate": 4.298096578625189e-07, "loss": 0.00823363196104765, "step": 2272 }, { "epoch": 0.5496977025392986, "grad_norm": 0.7558277548280876, "learning_rate": 4.294297025976845e-07, "loss": 0.0036743711680173874, "step": 2273 }, { "epoch": 0.5499395405078598, "grad_norm": 83.82579041260776, "learning_rate": 4.290497889084173e-07, "loss": 0.005966815631836653, "step": 2274 }, { "epoch": 0.5501813784764208, "grad_norm": 1.2403251758962672, "learning_rate": 4.2866991701853843e-07, "loss": 0.007735170423984528, "step": 2275 }, { "epoch": 0.5504232164449818, "grad_norm": 2.7786473006528922, "learning_rate": 4.2829008715184434e-07, "loss": 0.010687959380447865, "step": 2276 }, { "epoch": 0.550665054413543, "grad_norm": 4.031460523252346, "learning_rate": 4.279102995321071e-07, "loss": 0.24999187886714935, "step": 2277 }, { "epoch": 0.550906892382104, "grad_norm": 2.1064031124036915, "learning_rate": 4.2753055438307326e-07, "loss": 0.006696998607367277, "step": 2278 }, { "epoch": 0.551148730350665, "grad_norm": 3.300604012622449, "learning_rate": 4.27150851928465e-07, "loss": 0.018892524763941765, "step": 2279 }, { "epoch": 0.5513905683192262, "grad_norm": 3.6743126848338528, "learning_rate": 4.26771192391979e-07, "loss": 0.011490820907056332, "step": 2280 }, { "epoch": 0.5516324062877872, "grad_norm": 6.443071374757842, "learning_rate": 4.263915759972865e-07, "loss": 0.00513662351295352, "step": 2281 }, { "epoch": 0.5518742442563482, "grad_norm": 1.7749628090005842, "learning_rate": 4.260120029680339e-07, "loss": 0.004767226055264473, "step": 2282 }, { "epoch": 0.5521160822249093, "grad_norm": 4.046385349497119, "learning_rate": 4.256324735278415e-07, "loss": 0.017466895282268524, "step": 2283 }, { "epoch": 0.5523579201934704, "grad_norm": 3.273598560988554, "learning_rate": 4.2525298790030386e-07, "loss": 0.023785654455423355, "step": 2284 }, { "epoch": 0.5525997581620314, "grad_norm": 0.6381042364328031, "learning_rate": 4.248735463089904e-07, "loss": 0.0013274396769702435, "step": 2285 }, { "epoch": 0.5528415961305925, "grad_norm": 2.479271902838656, "learning_rate": 4.244941489774437e-07, "loss": 0.008940608240664005, "step": 2286 }, { "epoch": 0.5530834340991536, "grad_norm": 4.415721828540861, "learning_rate": 4.24114796129181e-07, "loss": 0.0074286810122430325, "step": 2287 }, { "epoch": 0.5533252720677146, "grad_norm": 4.5877369458461805, "learning_rate": 4.2373548798769325e-07, "loss": 0.007428416050970554, "step": 2288 }, { "epoch": 0.5535671100362757, "grad_norm": 3.74821550410919, "learning_rate": 4.233562247764445e-07, "loss": 0.026188770309090614, "step": 2289 }, { "epoch": 0.5538089480048367, "grad_norm": 0.6294528811938142, "learning_rate": 4.22977006718873e-07, "loss": 0.0022214348427951336, "step": 2290 }, { "epoch": 0.5540507859733979, "grad_norm": 0.9882849711020627, "learning_rate": 4.2259783403839e-07, "loss": 0.004844804760068655, "step": 2291 }, { "epoch": 0.5542926239419589, "grad_norm": 2.207175115513072, "learning_rate": 4.222187069583801e-07, "loss": 0.018375877290964127, "step": 2292 }, { "epoch": 0.5545344619105199, "grad_norm": 6.913265748746823, "learning_rate": 4.2183962570220094e-07, "loss": 0.031015703454613686, "step": 2293 }, { "epoch": 0.5547762998790811, "grad_norm": 2.0396109189814364, "learning_rate": 4.2146059049318365e-07, "loss": 0.011068996973335743, "step": 2294 }, { "epoch": 0.5550181378476421, "grad_norm": 0.7228415501096773, "learning_rate": 4.210816015546314e-07, "loss": 0.003998195752501488, "step": 2295 }, { "epoch": 0.5552599758162031, "grad_norm": 4.480471020171518, "learning_rate": 4.2070265910982106e-07, "loss": 0.002593080746009946, "step": 2296 }, { "epoch": 0.5555018137847642, "grad_norm": 3.9271834275885906, "learning_rate": 4.2032376338200105e-07, "loss": 0.026061033830046654, "step": 2297 }, { "epoch": 0.5557436517533253, "grad_norm": 0.520757892412136, "learning_rate": 4.1994491459439327e-07, "loss": 0.0015521958703175187, "step": 2298 }, { "epoch": 0.5559854897218863, "grad_norm": 1.1494626123184508, "learning_rate": 4.1956611297019135e-07, "loss": 0.004256068263202906, "step": 2299 }, { "epoch": 0.5562273276904474, "grad_norm": 1.4294061250367542, "learning_rate": 4.191873587325614e-07, "loss": 0.002460300689563155, "step": 2300 }, { "epoch": 0.5564691656590085, "grad_norm": 1.4462020712536976, "learning_rate": 4.188086521046413e-07, "loss": 0.0053014084696769714, "step": 2301 }, { "epoch": 0.5567110036275695, "grad_norm": 2.8926325007587397, "learning_rate": 4.184299933095414e-07, "loss": 0.014791942201554775, "step": 2302 }, { "epoch": 0.5569528415961306, "grad_norm": 3.820969973583209, "learning_rate": 4.1805138257034333e-07, "loss": 0.008467663079500198, "step": 2303 }, { "epoch": 0.5571946795646917, "grad_norm": 4.139234884819153, "learning_rate": 4.1767282011010096e-07, "loss": 0.01103928592056036, "step": 2304 }, { "epoch": 0.5574365175332527, "grad_norm": 4.579613174271688, "learning_rate": 4.1729430615183897e-07, "loss": 0.004500003065913916, "step": 2305 }, { "epoch": 0.5576783555018138, "grad_norm": 5.052304151589532, "learning_rate": 4.1691584091855424e-07, "loss": 0.01218867115676403, "step": 2306 }, { "epoch": 0.5579201934703748, "grad_norm": 1.6467031467316058, "learning_rate": 4.1653742463321454e-07, "loss": 0.007508444134145975, "step": 2307 }, { "epoch": 0.558162031438936, "grad_norm": 0.9568845761315328, "learning_rate": 4.161590575187588e-07, "loss": 0.005422201473265886, "step": 2308 }, { "epoch": 0.558403869407497, "grad_norm": 13.070252076749494, "learning_rate": 4.15780739798097e-07, "loss": 0.00688293157145381, "step": 2309 }, { "epoch": 0.558645707376058, "grad_norm": 6.1001834932603956, "learning_rate": 4.1540247169411046e-07, "loss": 0.01431330293416977, "step": 2310 }, { "epoch": 0.5588875453446192, "grad_norm": 2.11785929335284, "learning_rate": 4.150242534296503e-07, "loss": 0.011980640701949596, "step": 2311 }, { "epoch": 0.5591293833131802, "grad_norm": 13.260650966996042, "learning_rate": 4.146460852275392e-07, "loss": 0.04656738042831421, "step": 2312 }, { "epoch": 0.5593712212817412, "grad_norm": 5.853185023513176, "learning_rate": 4.142679673105702e-07, "loss": 0.007762209977954626, "step": 2313 }, { "epoch": 0.5596130592503022, "grad_norm": 0.9630149456855113, "learning_rate": 4.1388989990150607e-07, "loss": 0.0042963833548128605, "step": 2314 }, { "epoch": 0.5598548972188634, "grad_norm": 3.5800019581234857, "learning_rate": 4.135118832230806e-07, "loss": 0.008749394677579403, "step": 2315 }, { "epoch": 0.5600967351874244, "grad_norm": 1.3204125400491427, "learning_rate": 4.131339174979972e-07, "loss": 0.0031381454318761826, "step": 2316 }, { "epoch": 0.5603385731559855, "grad_norm": 0.5095020347888213, "learning_rate": 4.1275600294892937e-07, "loss": 0.002067659981548786, "step": 2317 }, { "epoch": 0.5605804111245466, "grad_norm": 29.282623077318103, "learning_rate": 4.123781397985209e-07, "loss": 0.0015540597960352898, "step": 2318 }, { "epoch": 0.5608222490931076, "grad_norm": 5.375862696528994, "learning_rate": 4.1200032826938444e-07, "loss": 0.028084000572562218, "step": 2319 }, { "epoch": 0.5610640870616687, "grad_norm": 2.3840101106692093, "learning_rate": 4.1162256858410294e-07, "loss": 0.007764254696667194, "step": 2320 }, { "epoch": 0.5613059250302297, "grad_norm": 14.712356678190567, "learning_rate": 4.112448609652287e-07, "loss": 0.00305054965429008, "step": 2321 }, { "epoch": 0.5615477629987908, "grad_norm": 6.529276086700303, "learning_rate": 4.108672056352829e-07, "loss": 0.008379476144909859, "step": 2322 }, { "epoch": 0.5617896009673519, "grad_norm": 0.5748073182095716, "learning_rate": 4.104896028167566e-07, "loss": 0.002435829257592559, "step": 2323 }, { "epoch": 0.5620314389359129, "grad_norm": 1.5099026830087732, "learning_rate": 4.1011205273210947e-07, "loss": 0.006871446967124939, "step": 2324 }, { "epoch": 0.562273276904474, "grad_norm": 0.5061929574647305, "learning_rate": 4.097345556037698e-07, "loss": 0.002355773700401187, "step": 2325 }, { "epoch": 0.5625151148730351, "grad_norm": 0.6114344363496228, "learning_rate": 4.0935711165413566e-07, "loss": 0.0041200825944542885, "step": 2326 }, { "epoch": 0.5627569528415961, "grad_norm": 2.3209701917910763, "learning_rate": 4.0897972110557275e-07, "loss": 0.005414687097072601, "step": 2327 }, { "epoch": 0.5629987908101572, "grad_norm": 9.958287986498851, "learning_rate": 4.0860238418041596e-07, "loss": 0.0196392685174942, "step": 2328 }, { "epoch": 0.5632406287787183, "grad_norm": 1.1325331619603953, "learning_rate": 4.082251011009686e-07, "loss": 0.001430350705049932, "step": 2329 }, { "epoch": 0.5634824667472793, "grad_norm": 5.08250615359713, "learning_rate": 4.0784787208950174e-07, "loss": 0.004375682678073645, "step": 2330 }, { "epoch": 0.5637243047158403, "grad_norm": 1.3871867503751483, "learning_rate": 4.074706973682551e-07, "loss": 0.007853860035538673, "step": 2331 }, { "epoch": 0.5639661426844015, "grad_norm": 4.46185340268657, "learning_rate": 4.0709357715943625e-07, "loss": 0.013951479457318783, "step": 2332 }, { "epoch": 0.5642079806529625, "grad_norm": 0.4224001850363652, "learning_rate": 4.0671651168522035e-07, "loss": 0.0015941864112392068, "step": 2333 }, { "epoch": 0.5644498186215235, "grad_norm": 1.4364181250685897, "learning_rate": 4.063395011677511e-07, "loss": 0.006009246688336134, "step": 2334 }, { "epoch": 0.5646916565900847, "grad_norm": 1.1766449232176648, "learning_rate": 4.059625458291386e-07, "loss": 0.005885516759008169, "step": 2335 }, { "epoch": 0.5649334945586457, "grad_norm": 3.941981117819702, "learning_rate": 4.0558564589146173e-07, "loss": 0.006140188779681921, "step": 2336 }, { "epoch": 0.5651753325272068, "grad_norm": 1.2437621965846146, "learning_rate": 4.0520880157676607e-07, "loss": 0.006285582669079304, "step": 2337 }, { "epoch": 0.5654171704957678, "grad_norm": 44.04175093630189, "learning_rate": 4.048320131070643e-07, "loss": 0.005524017848074436, "step": 2338 }, { "epoch": 0.5656590084643289, "grad_norm": 0.15145872449675848, "learning_rate": 4.0445528070433663e-07, "loss": 0.000454901804914698, "step": 2339 }, { "epoch": 0.56590084643289, "grad_norm": 14.572580435415274, "learning_rate": 4.0407860459053006e-07, "loss": 0.004831135272979736, "step": 2340 }, { "epoch": 0.566142684401451, "grad_norm": 0.3661841765376974, "learning_rate": 4.0370198498755803e-07, "loss": 0.0014752753777429461, "step": 2341 }, { "epoch": 0.5663845223700121, "grad_norm": 0.945873356920825, "learning_rate": 4.033254221173013e-07, "loss": 0.0027955721598118544, "step": 2342 }, { "epoch": 0.5666263603385732, "grad_norm": 1.0137888418675687, "learning_rate": 4.0294891620160716e-07, "loss": 0.00651592155918479, "step": 2343 }, { "epoch": 0.5668681983071342, "grad_norm": 0.8925025263223699, "learning_rate": 4.0257246746228876e-07, "loss": 0.0029980973340570927, "step": 2344 }, { "epoch": 0.5671100362756953, "grad_norm": 1.8144762999241775, "learning_rate": 4.021960761211263e-07, "loss": 0.006261315196752548, "step": 2345 }, { "epoch": 0.5673518742442564, "grad_norm": 1.654993542766236, "learning_rate": 4.018197423998654e-07, "loss": 0.003272443311288953, "step": 2346 }, { "epoch": 0.5675937122128174, "grad_norm": 0.18352909578920718, "learning_rate": 4.0144346652021847e-07, "loss": 0.0008368836715817451, "step": 2347 }, { "epoch": 0.5678355501813784, "grad_norm": 2.5381159736278542, "learning_rate": 4.0106724870386363e-07, "loss": 0.006584499962627888, "step": 2348 }, { "epoch": 0.5680773881499396, "grad_norm": 0.6824910133888643, "learning_rate": 4.006910891724442e-07, "loss": 0.005285496357828379, "step": 2349 }, { "epoch": 0.5683192261185006, "grad_norm": 1.6394503489146506, "learning_rate": 4.003149881475699e-07, "loss": 0.0016045544762164354, "step": 2350 }, { "epoch": 0.5685610640870616, "grad_norm": 18.762015878974072, "learning_rate": 3.9993894585081607e-07, "loss": 0.0339023694396019, "step": 2351 }, { "epoch": 0.5688029020556228, "grad_norm": 1.3095581828561822, "learning_rate": 3.995629625037225e-07, "loss": 0.0013667599996551871, "step": 2352 }, { "epoch": 0.5690447400241838, "grad_norm": 1.7991561866387245, "learning_rate": 3.991870383277952e-07, "loss": 0.014493267051875591, "step": 2353 }, { "epoch": 0.5692865779927448, "grad_norm": 1.9375383121084553, "learning_rate": 3.988111735445051e-07, "loss": 0.004657557234168053, "step": 2354 }, { "epoch": 0.5695284159613059, "grad_norm": 16.64205242666325, "learning_rate": 3.984353683752878e-07, "loss": 0.01536609511822462, "step": 2355 }, { "epoch": 0.569770253929867, "grad_norm": 14.957409688447916, "learning_rate": 3.98059623041544e-07, "loss": 0.006437452044337988, "step": 2356 }, { "epoch": 0.570012091898428, "grad_norm": 1.9195424862711057, "learning_rate": 3.9768393776463923e-07, "loss": 0.006705466192215681, "step": 2357 }, { "epoch": 0.5702539298669891, "grad_norm": 0.46951154223863484, "learning_rate": 3.9730831276590336e-07, "loss": 0.001739474362693727, "step": 2358 }, { "epoch": 0.5704957678355502, "grad_norm": 1.9114682456976064, "learning_rate": 3.969327482666313e-07, "loss": 0.004461544565856457, "step": 2359 }, { "epoch": 0.5707376058041113, "grad_norm": 3.8387137625709125, "learning_rate": 3.9655724448808157e-07, "loss": 0.00634576752781868, "step": 2360 }, { "epoch": 0.5709794437726723, "grad_norm": 2.2142436047795115, "learning_rate": 3.961818016514775e-07, "loss": 0.0037384566385298967, "step": 2361 }, { "epoch": 0.5712212817412333, "grad_norm": 2.954749425980197, "learning_rate": 3.9580641997800656e-07, "loss": 0.01653810404241085, "step": 2362 }, { "epoch": 0.5714631197097945, "grad_norm": 3.0374423072644805, "learning_rate": 3.954310996888196e-07, "loss": 0.014961219392716885, "step": 2363 }, { "epoch": 0.5717049576783555, "grad_norm": 2.1272731087229144, "learning_rate": 3.9505584100503174e-07, "loss": 0.011659598909318447, "step": 2364 }, { "epoch": 0.5719467956469165, "grad_norm": 1.5992718316006451, "learning_rate": 3.9468064414772174e-07, "loss": 0.027650324627757072, "step": 2365 }, { "epoch": 0.5721886336154777, "grad_norm": 3.769425051869815, "learning_rate": 3.9430550933793184e-07, "loss": 0.009431784972548485, "step": 2366 }, { "epoch": 0.5724304715840387, "grad_norm": 0.8453243564198654, "learning_rate": 3.939304367966681e-07, "loss": 0.002343925181776285, "step": 2367 }, { "epoch": 0.5726723095525997, "grad_norm": 9.5283299293132, "learning_rate": 3.9355542674489916e-07, "loss": 0.007653389126062393, "step": 2368 }, { "epoch": 0.5729141475211609, "grad_norm": 1.2575375781030516, "learning_rate": 3.9318047940355754e-07, "loss": 0.007723644375801086, "step": 2369 }, { "epoch": 0.5731559854897219, "grad_norm": 0.5403129183340741, "learning_rate": 3.928055949935387e-07, "loss": 0.001134726800955832, "step": 2370 }, { "epoch": 0.5733978234582829, "grad_norm": 5.442382256204033, "learning_rate": 3.9243077373570066e-07, "loss": 0.008645433001220226, "step": 2371 }, { "epoch": 0.573639661426844, "grad_norm": 2.1141120615510705, "learning_rate": 3.920560158508644e-07, "loss": 0.0019011556869372725, "step": 2372 }, { "epoch": 0.5738814993954051, "grad_norm": 0.9897380932886528, "learning_rate": 3.916813215598139e-07, "loss": 0.004554008599370718, "step": 2373 }, { "epoch": 0.5741233373639661, "grad_norm": 2.721779987284366, "learning_rate": 3.913066910832951e-07, "loss": 0.012673281133174896, "step": 2374 }, { "epoch": 0.5743651753325272, "grad_norm": 1.7729607081862875, "learning_rate": 3.90932124642017e-07, "loss": 0.012333600781857967, "step": 2375 }, { "epoch": 0.5746070133010883, "grad_norm": 4.482108126338816, "learning_rate": 3.9055762245665005e-07, "loss": 0.008919094689190388, "step": 2376 }, { "epoch": 0.5748488512696494, "grad_norm": 4.488369687844908, "learning_rate": 3.901831847478275e-07, "loss": 0.015267436392605305, "step": 2377 }, { "epoch": 0.5750906892382104, "grad_norm": 14.276666944538574, "learning_rate": 3.898088117361448e-07, "loss": 0.016993964090943336, "step": 2378 }, { "epoch": 0.5753325272067714, "grad_norm": 1.6782180143185312, "learning_rate": 3.8943450364215827e-07, "loss": 0.019171318039298058, "step": 2379 }, { "epoch": 0.5755743651753326, "grad_norm": 3.521616220372251, "learning_rate": 3.890602606863871e-07, "loss": 0.010692787356674671, "step": 2380 }, { "epoch": 0.5758162031438936, "grad_norm": 1.8033030629078588, "learning_rate": 3.886860830893117e-07, "loss": 0.006897612940520048, "step": 2381 }, { "epoch": 0.5760580411124546, "grad_norm": 1.3990970817900366, "learning_rate": 3.8831197107137334e-07, "loss": 0.005804137792438269, "step": 2382 }, { "epoch": 0.5762998790810158, "grad_norm": 2.8023535564298148, "learning_rate": 3.8793792485297584e-07, "loss": 0.013263091444969177, "step": 2383 }, { "epoch": 0.5765417170495768, "grad_norm": 2.752799365105146, "learning_rate": 3.8756394465448306e-07, "loss": 0.01564108394086361, "step": 2384 }, { "epoch": 0.5767835550181378, "grad_norm": 2.0650665621619346, "learning_rate": 3.871900306962209e-07, "loss": 0.015441874042153358, "step": 2385 }, { "epoch": 0.5770253929866989, "grad_norm": 1.272989179070698, "learning_rate": 3.86816183198476e-07, "loss": 0.006521080620586872, "step": 2386 }, { "epoch": 0.57726723095526, "grad_norm": 1.6673594244520518, "learning_rate": 3.8644240238149537e-07, "loss": 0.007566571235656738, "step": 2387 }, { "epoch": 0.577509068923821, "grad_norm": 5.4151975603054225, "learning_rate": 3.8606868846548735e-07, "loss": 0.020950553938746452, "step": 2388 }, { "epoch": 0.5777509068923821, "grad_norm": 2.1958702988042478, "learning_rate": 3.856950416706206e-07, "loss": 0.01964942365884781, "step": 2389 }, { "epoch": 0.5779927448609432, "grad_norm": 0.4991990920114097, "learning_rate": 3.853214622170239e-07, "loss": 0.0009246303816325963, "step": 2390 }, { "epoch": 0.5782345828295042, "grad_norm": 1.0851798625203077, "learning_rate": 3.849479503247869e-07, "loss": 0.004814524669200182, "step": 2391 }, { "epoch": 0.5784764207980653, "grad_norm": 6.037049557228156, "learning_rate": 3.845745062139595e-07, "loss": 0.009289574809372425, "step": 2392 }, { "epoch": 0.5787182587666264, "grad_norm": 1.4900491540148089, "learning_rate": 3.8420113010455106e-07, "loss": 0.008050248958170414, "step": 2393 }, { "epoch": 0.5789600967351874, "grad_norm": 0.80076907562879, "learning_rate": 3.8382782221653154e-07, "loss": 0.0029675140976905823, "step": 2394 }, { "epoch": 0.5792019347037485, "grad_norm": 1.126736153171307, "learning_rate": 3.8345458276983e-07, "loss": 0.008109009824693203, "step": 2395 }, { "epoch": 0.5794437726723095, "grad_norm": 2.7034364062879166, "learning_rate": 3.8308141198433587e-07, "loss": 0.015356478281319141, "step": 2396 }, { "epoch": 0.5796856106408707, "grad_norm": 1.8847260380439221, "learning_rate": 3.827083100798979e-07, "loss": 0.0051036193035542965, "step": 2397 }, { "epoch": 0.5799274486094317, "grad_norm": 6.746628211202744, "learning_rate": 3.823352772763238e-07, "loss": 0.008658845908939838, "step": 2398 }, { "epoch": 0.5801692865779927, "grad_norm": 1.5062788047553537, "learning_rate": 3.819623137933813e-07, "loss": 0.005318023264408112, "step": 2399 }, { "epoch": 0.5804111245465539, "grad_norm": 1.191366103295653, "learning_rate": 3.81589419850797e-07, "loss": 0.004930156283080578, "step": 2400 }, { "epoch": 0.5806529625151149, "grad_norm": 2.3707019550428785, "learning_rate": 3.8121659566825617e-07, "loss": 0.006244716700166464, "step": 2401 }, { "epoch": 0.5808948004836759, "grad_norm": 4.875290791085731, "learning_rate": 3.8084384146540355e-07, "loss": 0.014046966098248959, "step": 2402 }, { "epoch": 0.581136638452237, "grad_norm": 1.3704836978374488, "learning_rate": 3.8047115746184237e-07, "loss": 0.004961853381246328, "step": 2403 }, { "epoch": 0.5813784764207981, "grad_norm": 2.1771096148663847, "learning_rate": 3.800985438771345e-07, "loss": 0.022361159324645996, "step": 2404 }, { "epoch": 0.5816203143893591, "grad_norm": 21.722634223857806, "learning_rate": 3.797260009308005e-07, "loss": 0.11677928268909454, "step": 2405 }, { "epoch": 0.5818621523579202, "grad_norm": 3.7572149494863276, "learning_rate": 3.7935352884231874e-07, "loss": 0.00198494759388268, "step": 2406 }, { "epoch": 0.5821039903264813, "grad_norm": 1.6868051195430798, "learning_rate": 3.789811278311266e-07, "loss": 0.009821309708058834, "step": 2407 }, { "epoch": 0.5823458282950423, "grad_norm": 2.664747362054953, "learning_rate": 3.7860879811661945e-07, "loss": 0.01305987685918808, "step": 2408 }, { "epoch": 0.5825876662636034, "grad_norm": 0.2562860629910327, "learning_rate": 3.7823653991815e-07, "loss": 0.0006815394735895097, "step": 2409 }, { "epoch": 0.5828295042321644, "grad_norm": 3.509222214584458, "learning_rate": 3.778643534550295e-07, "loss": 0.008849330246448517, "step": 2410 }, { "epoch": 0.5830713422007255, "grad_norm": 2.848600801972832, "learning_rate": 3.7749223894652707e-07, "loss": 0.014879169873893261, "step": 2411 }, { "epoch": 0.5833131801692866, "grad_norm": 1.1408653127621156, "learning_rate": 3.771201966118687e-07, "loss": 0.004892752505838871, "step": 2412 }, { "epoch": 0.5835550181378476, "grad_norm": 2.6129302685369042, "learning_rate": 3.767482266702384e-07, "loss": 0.014892429113388062, "step": 2413 }, { "epoch": 0.5837968561064087, "grad_norm": 1.5943436557699753, "learning_rate": 3.7637632934077725e-07, "loss": 0.005617182701826096, "step": 2414 }, { "epoch": 0.5840386940749698, "grad_norm": 0.7989511227036653, "learning_rate": 3.7600450484258376e-07, "loss": 0.001057676738128066, "step": 2415 }, { "epoch": 0.5842805320435308, "grad_norm": 1.3863272458260643, "learning_rate": 3.756327533947137e-07, "loss": 0.006003437098115683, "step": 2416 }, { "epoch": 0.584522370012092, "grad_norm": 1.0564438410015702, "learning_rate": 3.7526107521617924e-07, "loss": 0.011665808968245983, "step": 2417 }, { "epoch": 0.584764207980653, "grad_norm": 7.115744712798365, "learning_rate": 3.748894705259498e-07, "loss": 0.005915092770010233, "step": 2418 }, { "epoch": 0.585006045949214, "grad_norm": 0.8558602461293505, "learning_rate": 3.745179395429517e-07, "loss": 0.0037140939384698868, "step": 2419 }, { "epoch": 0.585247883917775, "grad_norm": 9.831679853391025, "learning_rate": 3.741464824860672e-07, "loss": 0.004261501599103212, "step": 2420 }, { "epoch": 0.5854897218863362, "grad_norm": 3.188965372623083, "learning_rate": 3.7377509957413546e-07, "loss": 0.010882309637963772, "step": 2421 }, { "epoch": 0.5857315598548972, "grad_norm": 3.149181654986249, "learning_rate": 3.734037910259521e-07, "loss": 0.013726782985031605, "step": 2422 }, { "epoch": 0.5859733978234583, "grad_norm": 0.9988024165556675, "learning_rate": 3.7303255706026826e-07, "loss": 0.0031697936356067657, "step": 2423 }, { "epoch": 0.5862152357920194, "grad_norm": 1.0505257023571049, "learning_rate": 3.7266139789579196e-07, "loss": 0.004877379164099693, "step": 2424 }, { "epoch": 0.5864570737605804, "grad_norm": 4.313849399906442, "learning_rate": 3.722903137511865e-07, "loss": 0.006416696589440107, "step": 2425 }, { "epoch": 0.5866989117291415, "grad_norm": 1.876842368749468, "learning_rate": 3.719193048450713e-07, "loss": 0.009405180811882019, "step": 2426 }, { "epoch": 0.5869407496977025, "grad_norm": 2.2649174575924196, "learning_rate": 3.715483713960217e-07, "loss": 0.005118229426443577, "step": 2427 }, { "epoch": 0.5871825876662636, "grad_norm": 1.0305967924347923, "learning_rate": 3.711775136225679e-07, "loss": 0.006969881244003773, "step": 2428 }, { "epoch": 0.5874244256348247, "grad_norm": 1.9113389398062175, "learning_rate": 3.70806731743196e-07, "loss": 0.010042314417660236, "step": 2429 }, { "epoch": 0.5876662636033857, "grad_norm": 6.083342897144597, "learning_rate": 3.7043602597634746e-07, "loss": 0.015263221226632595, "step": 2430 }, { "epoch": 0.5879081015719468, "grad_norm": 0.7537539722328656, "learning_rate": 3.700653965404185e-07, "loss": 0.004378484096378088, "step": 2431 }, { "epoch": 0.5881499395405079, "grad_norm": 3.939981657593955, "learning_rate": 3.696948436537607e-07, "loss": 0.014938822947442532, "step": 2432 }, { "epoch": 0.5883917775090689, "grad_norm": 1.8133379736602766, "learning_rate": 3.6932436753468064e-07, "loss": 0.005710491444915533, "step": 2433 }, { "epoch": 0.5886336154776299, "grad_norm": 1.3805989074548008, "learning_rate": 3.689539684014391e-07, "loss": 0.004701680038124323, "step": 2434 }, { "epoch": 0.5888754534461911, "grad_norm": 4.385468261628954, "learning_rate": 3.685836464722523e-07, "loss": 0.010895385406911373, "step": 2435 }, { "epoch": 0.5891172914147521, "grad_norm": 2.6539370173974826, "learning_rate": 3.6821340196529017e-07, "loss": 0.015117189846932888, "step": 2436 }, { "epoch": 0.5893591293833131, "grad_norm": 4.877597646646495, "learning_rate": 3.6784323509867756e-07, "loss": 0.006921682506799698, "step": 2437 }, { "epoch": 0.5896009673518743, "grad_norm": 1.9266316903232732, "learning_rate": 3.6747314609049364e-07, "loss": 0.004572663921862841, "step": 2438 }, { "epoch": 0.5898428053204353, "grad_norm": 3.237109849637397, "learning_rate": 3.6710313515877107e-07, "loss": 0.009472428821027279, "step": 2439 }, { "epoch": 0.5900846432889963, "grad_norm": 2.7666187760889707, "learning_rate": 3.667332025214972e-07, "loss": 0.017767800018191338, "step": 2440 }, { "epoch": 0.5903264812575575, "grad_norm": 1.131586195822484, "learning_rate": 3.663633483966133e-07, "loss": 0.0029670705553144217, "step": 2441 }, { "epoch": 0.5905683192261185, "grad_norm": 0.8019402581320761, "learning_rate": 3.659935730020135e-07, "loss": 0.002583805937319994, "step": 2442 }, { "epoch": 0.5908101571946796, "grad_norm": 21.6307654770953, "learning_rate": 3.6562387655554667e-07, "loss": 0.010763146914541721, "step": 2443 }, { "epoch": 0.5910519951632406, "grad_norm": 8.187516994853793, "learning_rate": 3.652542592750142e-07, "loss": 0.012833543121814728, "step": 2444 }, { "epoch": 0.5912938331318017, "grad_norm": 1.3493166750897805, "learning_rate": 3.648847213781715e-07, "loss": 0.007978192530572414, "step": 2445 }, { "epoch": 0.5915356711003628, "grad_norm": 4.729451381362559, "learning_rate": 3.6451526308272707e-07, "loss": 0.016800759360194206, "step": 2446 }, { "epoch": 0.5917775090689238, "grad_norm": 2.8851619907438835, "learning_rate": 3.641458846063421e-07, "loss": 0.014793346635997295, "step": 2447 }, { "epoch": 0.5920193470374849, "grad_norm": 8.678330812259583, "learning_rate": 3.6377658616663133e-07, "loss": 0.011452125385403633, "step": 2448 }, { "epoch": 0.592261185006046, "grad_norm": 1.098565708736394, "learning_rate": 3.6340736798116223e-07, "loss": 0.0030111554078757763, "step": 2449 }, { "epoch": 0.592503022974607, "grad_norm": 2.1831244365403797, "learning_rate": 3.630382302674545e-07, "loss": 0.00604160875082016, "step": 2450 }, { "epoch": 0.592744860943168, "grad_norm": 2.684053447591563, "learning_rate": 3.626691732429811e-07, "loss": 0.0022879827301949263, "step": 2451 }, { "epoch": 0.5929866989117292, "grad_norm": 1.31373683796825, "learning_rate": 3.62300197125167e-07, "loss": 0.0033373888581991196, "step": 2452 }, { "epoch": 0.5932285368802902, "grad_norm": 1.7964576829350138, "learning_rate": 3.6193130213138976e-07, "loss": 0.003865672741085291, "step": 2453 }, { "epoch": 0.5934703748488512, "grad_norm": 1.0929352975064777, "learning_rate": 3.6156248847897907e-07, "loss": 0.007577951531857252, "step": 2454 }, { "epoch": 0.5937122128174124, "grad_norm": 11.200346720617805, "learning_rate": 3.6119375638521633e-07, "loss": 0.0655246153473854, "step": 2455 }, { "epoch": 0.5939540507859734, "grad_norm": 1.8026630525861704, "learning_rate": 3.6082510606733543e-07, "loss": 0.0057901605032384396, "step": 2456 }, { "epoch": 0.5941958887545344, "grad_norm": 1.532191584694754, "learning_rate": 3.604565377425222e-07, "loss": 0.003371618455275893, "step": 2457 }, { "epoch": 0.5944377267230955, "grad_norm": 3.0185230809153185, "learning_rate": 3.600880516279132e-07, "loss": 0.004546977113932371, "step": 2458 }, { "epoch": 0.5946795646916566, "grad_norm": 1.508822683294529, "learning_rate": 3.597196479405977e-07, "loss": 0.0051629976369440556, "step": 2459 }, { "epoch": 0.5949214026602176, "grad_norm": 4.020053733315693, "learning_rate": 3.5935132689761584e-07, "loss": 0.04057082161307335, "step": 2460 }, { "epoch": 0.5951632406287787, "grad_norm": 2.881441158902693, "learning_rate": 3.589830887159591e-07, "loss": 0.011968323960900307, "step": 2461 }, { "epoch": 0.5954050785973398, "grad_norm": 2.651031505246184, "learning_rate": 3.5861493361256995e-07, "loss": 0.007703728508204222, "step": 2462 }, { "epoch": 0.5956469165659009, "grad_norm": 11.728290680905669, "learning_rate": 3.582468618043426e-07, "loss": 0.008935852907598019, "step": 2463 }, { "epoch": 0.5958887545344619, "grad_norm": 0.5394337735420699, "learning_rate": 3.578788735081213e-07, "loss": 0.0009662317461334169, "step": 2464 }, { "epoch": 0.596130592503023, "grad_norm": 4.514612513177139, "learning_rate": 3.57510968940702e-07, "loss": 0.015548801980912685, "step": 2465 }, { "epoch": 0.5963724304715841, "grad_norm": 0.9123837572765042, "learning_rate": 3.571431483188303e-07, "loss": 0.0032512859907001257, "step": 2466 }, { "epoch": 0.5966142684401451, "grad_norm": 1.337302397503213, "learning_rate": 3.567754118592033e-07, "loss": 0.0037090093828737736, "step": 2467 }, { "epoch": 0.5968561064087061, "grad_norm": 1.0961976888953475, "learning_rate": 3.5640775977846804e-07, "loss": 0.005471766460686922, "step": 2468 }, { "epoch": 0.5970979443772673, "grad_norm": 1.890439450111642, "learning_rate": 3.560401922932219e-07, "loss": 0.007670526392757893, "step": 2469 }, { "epoch": 0.5973397823458283, "grad_norm": 3.4158758587603693, "learning_rate": 3.5567270962001244e-07, "loss": 0.014308616518974304, "step": 2470 }, { "epoch": 0.5975816203143893, "grad_norm": 1.0632794228883127, "learning_rate": 3.5530531197533743e-07, "loss": 0.001552472822368145, "step": 2471 }, { "epoch": 0.5978234582829505, "grad_norm": 1.103276597992421, "learning_rate": 3.549379995756441e-07, "loss": 0.0020167778711766005, "step": 2472 }, { "epoch": 0.5980652962515115, "grad_norm": 5.944912908860522, "learning_rate": 3.5457077263733006e-07, "loss": 0.005993384402245283, "step": 2473 }, { "epoch": 0.5983071342200725, "grad_norm": 1.7368423671023712, "learning_rate": 3.542036313767419e-07, "loss": 0.016501257196068764, "step": 2474 }, { "epoch": 0.5985489721886336, "grad_norm": 2.3452381435239023, "learning_rate": 3.5383657601017627e-07, "loss": 0.007883856073021889, "step": 2475 }, { "epoch": 0.5987908101571947, "grad_norm": 1.9775378665236318, "learning_rate": 3.534696067538793e-07, "loss": 0.018210232257843018, "step": 2476 }, { "epoch": 0.5990326481257557, "grad_norm": 1.1290740194350437, "learning_rate": 3.5310272382404573e-07, "loss": 0.0067194425500929356, "step": 2477 }, { "epoch": 0.5992744860943168, "grad_norm": 1.9854630886889555, "learning_rate": 3.527359274368198e-07, "loss": 0.013479962944984436, "step": 2478 }, { "epoch": 0.5995163240628779, "grad_norm": 2.2368614247066287, "learning_rate": 3.523692178082953e-07, "loss": 0.00867731124162674, "step": 2479 }, { "epoch": 0.599758162031439, "grad_norm": 2.468474229968884, "learning_rate": 3.520025951545138e-07, "loss": 0.011096841655671597, "step": 2480 }, { "epoch": 0.6, "grad_norm": 1.9732930466475722, "learning_rate": 3.5163605969146655e-07, "loss": 0.008386182598769665, "step": 2481 }, { "epoch": 0.600241837968561, "grad_norm": 2.440399820920091, "learning_rate": 3.512696116350933e-07, "loss": 0.007469832431524992, "step": 2482 }, { "epoch": 0.6004836759371222, "grad_norm": 0.44201926273977926, "learning_rate": 3.5090325120128175e-07, "loss": 0.000666339707095176, "step": 2483 }, { "epoch": 0.6007255139056832, "grad_norm": 1.078153759662525, "learning_rate": 3.5053697860586865e-07, "loss": 0.007786849047988653, "step": 2484 }, { "epoch": 0.6009673518742442, "grad_norm": 2.2982652372147507, "learning_rate": 3.5017079406463855e-07, "loss": 0.006247203331440687, "step": 2485 }, { "epoch": 0.6012091898428054, "grad_norm": 3.9454840291583677, "learning_rate": 3.498046977933242e-07, "loss": 0.023244241252541542, "step": 2486 }, { "epoch": 0.6014510278113664, "grad_norm": 2.657138416623253, "learning_rate": 3.494386900076067e-07, "loss": 0.007635900285094976, "step": 2487 }, { "epoch": 0.6016928657799274, "grad_norm": 2.3208125930312393, "learning_rate": 3.4907277092311443e-07, "loss": 0.007250975351780653, "step": 2488 }, { "epoch": 0.6019347037484886, "grad_norm": 6.466990058102051, "learning_rate": 3.4870694075542397e-07, "loss": 0.013623371720314026, "step": 2489 }, { "epoch": 0.6021765417170496, "grad_norm": 2.576793891610353, "learning_rate": 3.483411997200596e-07, "loss": 0.009972202591598034, "step": 2490 }, { "epoch": 0.6024183796856106, "grad_norm": 8.288046204792408, "learning_rate": 3.4797554803249247e-07, "loss": 0.03004579059779644, "step": 2491 }, { "epoch": 0.6026602176541717, "grad_norm": 0.7315606668023071, "learning_rate": 3.4760998590814174e-07, "loss": 0.00271337921731174, "step": 2492 }, { "epoch": 0.6029020556227328, "grad_norm": 1.553915057081882, "learning_rate": 3.472445135623736e-07, "loss": 0.0077723548747599125, "step": 2493 }, { "epoch": 0.6031438935912938, "grad_norm": 1.4810761031539146, "learning_rate": 3.46879131210501e-07, "loss": 0.00213535875082016, "step": 2494 }, { "epoch": 0.6033857315598549, "grad_norm": 1.410258810024435, "learning_rate": 3.4651383906778455e-07, "loss": 0.00778296310454607, "step": 2495 }, { "epoch": 0.603627569528416, "grad_norm": 0.9925503008812163, "learning_rate": 3.461486373494309e-07, "loss": 0.007136689033359289, "step": 2496 }, { "epoch": 0.603869407496977, "grad_norm": 1.2599189116931595, "learning_rate": 3.4578352627059415e-07, "loss": 0.007144670933485031, "step": 2497 }, { "epoch": 0.6041112454655381, "grad_norm": 1.6035019051954926, "learning_rate": 3.454185060463749e-07, "loss": 0.00852819811552763, "step": 2498 }, { "epoch": 0.6043530834340991, "grad_norm": 0.6389281885298237, "learning_rate": 3.4505357689181953e-07, "loss": 0.0015774023486301303, "step": 2499 }, { "epoch": 0.6045949214026602, "grad_norm": 2.15779017165521, "learning_rate": 3.446887390219217e-07, "loss": 0.008526649326086044, "step": 2500 }, { "epoch": 0.6048367593712213, "grad_norm": 9.23259877406128, "learning_rate": 3.4432399265162073e-07, "loss": 0.006393674295395613, "step": 2501 }, { "epoch": 0.6050785973397823, "grad_norm": 0.8434379673637937, "learning_rate": 3.439593379958021e-07, "loss": 0.00216473126783967, "step": 2502 }, { "epoch": 0.6053204353083435, "grad_norm": 2.42449488708228, "learning_rate": 3.435947752692975e-07, "loss": 0.008514218032360077, "step": 2503 }, { "epoch": 0.6055622732769045, "grad_norm": 1.6759800643629987, "learning_rate": 3.43230304686884e-07, "loss": 0.011742512695491314, "step": 2504 }, { "epoch": 0.6058041112454655, "grad_norm": 2.5688190436493947, "learning_rate": 3.428659264632848e-07, "loss": 0.007309482898563147, "step": 2505 }, { "epoch": 0.6060459492140265, "grad_norm": 0.6707720648877638, "learning_rate": 3.425016408131689e-07, "loss": 0.0018338278168812394, "step": 2506 }, { "epoch": 0.6062877871825877, "grad_norm": 3.410727850798613, "learning_rate": 3.4213744795114984e-07, "loss": 0.004431718494743109, "step": 2507 }, { "epoch": 0.6065296251511487, "grad_norm": 2.4518132362454708, "learning_rate": 3.417733480917876e-07, "loss": 0.011544554494321346, "step": 2508 }, { "epoch": 0.6067714631197098, "grad_norm": 1.4963430915833384, "learning_rate": 3.414093414495865e-07, "loss": 0.017569342628121376, "step": 2509 }, { "epoch": 0.6070133010882709, "grad_norm": 5.325345609153569, "learning_rate": 3.4104542823899647e-07, "loss": 0.004013149533420801, "step": 2510 }, { "epoch": 0.6072551390568319, "grad_norm": 2.045810677936244, "learning_rate": 3.4068160867441204e-07, "loss": 0.01690117083489895, "step": 2511 }, { "epoch": 0.607496977025393, "grad_norm": 1.2055634732569493, "learning_rate": 3.403178829701731e-07, "loss": 0.002908800495788455, "step": 2512 }, { "epoch": 0.6077388149939541, "grad_norm": 1.4821550613815235, "learning_rate": 3.3995425134056335e-07, "loss": 0.0036733762826770544, "step": 2513 }, { "epoch": 0.6079806529625151, "grad_norm": 3.9776661981152213, "learning_rate": 3.395907139998122e-07, "loss": 0.007113808300346136, "step": 2514 }, { "epoch": 0.6082224909310762, "grad_norm": 1.0641055244898132, "learning_rate": 3.3922727116209236e-07, "loss": 0.007237429264932871, "step": 2515 }, { "epoch": 0.6084643288996372, "grad_norm": 1.2168762860858664, "learning_rate": 3.3886392304152176e-07, "loss": 0.004056454636156559, "step": 2516 }, { "epoch": 0.6087061668681983, "grad_norm": 1.5681227815814367, "learning_rate": 3.3850066985216217e-07, "loss": 0.0104092201218009, "step": 2517 }, { "epoch": 0.6089480048367594, "grad_norm": 1.5048497355562107, "learning_rate": 3.381375118080194e-07, "loss": 0.006949558854103088, "step": 2518 }, { "epoch": 0.6091898428053204, "grad_norm": 2.548803658503309, "learning_rate": 3.377744491230432e-07, "loss": 0.002427001716569066, "step": 2519 }, { "epoch": 0.6094316807738815, "grad_norm": 3.741356389912944, "learning_rate": 3.3741148201112734e-07, "loss": 0.014977908693253994, "step": 2520 }, { "epoch": 0.6096735187424426, "grad_norm": 4.479006485165003, "learning_rate": 3.370486106861089e-07, "loss": 0.008476566523313522, "step": 2521 }, { "epoch": 0.6099153567110036, "grad_norm": 1.6658905515278284, "learning_rate": 3.36685835361769e-07, "loss": 0.011255073361098766, "step": 2522 }, { "epoch": 0.6101571946795646, "grad_norm": 2.4993511538384494, "learning_rate": 3.3632315625183205e-07, "loss": 0.003261660458520055, "step": 2523 }, { "epoch": 0.6103990326481258, "grad_norm": 29.77222399160331, "learning_rate": 3.359605735699654e-07, "loss": 0.009274502284824848, "step": 2524 }, { "epoch": 0.6106408706166868, "grad_norm": 1.3357208294432115, "learning_rate": 3.3559808752978015e-07, "loss": 0.0013926593819633126, "step": 2525 }, { "epoch": 0.6108827085852478, "grad_norm": 0.22923743678323416, "learning_rate": 3.3523569834483007e-07, "loss": 0.0007732170051895082, "step": 2526 }, { "epoch": 0.611124546553809, "grad_norm": 1.717100353399886, "learning_rate": 3.348734062286119e-07, "loss": 0.006205902900546789, "step": 2527 }, { "epoch": 0.61136638452237, "grad_norm": 0.8216637783445421, "learning_rate": 3.3451121139456573e-07, "loss": 0.0016396207502111793, "step": 2528 }, { "epoch": 0.611608222490931, "grad_norm": 1.3072523695168634, "learning_rate": 3.3414911405607334e-07, "loss": 0.006113884970545769, "step": 2529 }, { "epoch": 0.6118500604594922, "grad_norm": 25.196464525667512, "learning_rate": 3.337871144264599e-07, "loss": 0.00902878399938345, "step": 2530 }, { "epoch": 0.6120918984280532, "grad_norm": 2.172281290487571, "learning_rate": 3.3342521271899305e-07, "loss": 0.0079987533390522, "step": 2531 }, { "epoch": 0.6123337363966143, "grad_norm": 1.9256532384545424, "learning_rate": 3.330634091468819e-07, "loss": 0.006278918124735355, "step": 2532 }, { "epoch": 0.6125755743651753, "grad_norm": 2.480512456421289, "learning_rate": 3.3270170392327877e-07, "loss": 0.00990771222859621, "step": 2533 }, { "epoch": 0.6128174123337364, "grad_norm": 3.2390314597985457, "learning_rate": 3.323400972612772e-07, "loss": 0.007346365135163069, "step": 2534 }, { "epoch": 0.6130592503022975, "grad_norm": 2.1364866625656656, "learning_rate": 3.319785893739131e-07, "loss": 0.009563694708049297, "step": 2535 }, { "epoch": 0.6133010882708585, "grad_norm": 4.583043091334559, "learning_rate": 3.3161718047416443e-07, "loss": 0.004946191795170307, "step": 2536 }, { "epoch": 0.6135429262394196, "grad_norm": 1.9501740407751156, "learning_rate": 3.3125587077495e-07, "loss": 0.0014222607715055346, "step": 2537 }, { "epoch": 0.6137847642079807, "grad_norm": 0.7481171246730401, "learning_rate": 3.3089466048913096e-07, "loss": 0.003249967470765114, "step": 2538 }, { "epoch": 0.6140266021765417, "grad_norm": 2.1341689548285885, "learning_rate": 3.305335498295098e-07, "loss": 0.006419052369892597, "step": 2539 }, { "epoch": 0.6142684401451027, "grad_norm": 1.2984152430732137, "learning_rate": 3.3017253900882973e-07, "loss": 0.0043290299363434315, "step": 2540 }, { "epoch": 0.6145102781136639, "grad_norm": 1.140913501822398, "learning_rate": 3.298116282397759e-07, "loss": 0.004779377486556768, "step": 2541 }, { "epoch": 0.6147521160822249, "grad_norm": 1.1506913882157734, "learning_rate": 3.2945081773497417e-07, "loss": 0.00610545976087451, "step": 2542 }, { "epoch": 0.6149939540507859, "grad_norm": 0.8390295016363959, "learning_rate": 3.29090107706991e-07, "loss": 0.0013605091953650117, "step": 2543 }, { "epoch": 0.6152357920193471, "grad_norm": 51.936741192689524, "learning_rate": 3.2872949836833433e-07, "loss": 0.008170964196324348, "step": 2544 }, { "epoch": 0.6154776299879081, "grad_norm": 3.334066104034169, "learning_rate": 3.2836898993145207e-07, "loss": 0.030122263357043266, "step": 2545 }, { "epoch": 0.6157194679564691, "grad_norm": 1.3468716458165555, "learning_rate": 3.2800858260873333e-07, "loss": 0.003454855177551508, "step": 2546 }, { "epoch": 0.6159613059250302, "grad_norm": 2.980682189153179, "learning_rate": 3.2764827661250736e-07, "loss": 0.01142532005906105, "step": 2547 }, { "epoch": 0.6162031438935913, "grad_norm": 0.8274758731388001, "learning_rate": 3.272880721550436e-07, "loss": 0.004689921624958515, "step": 2548 }, { "epoch": 0.6164449818621524, "grad_norm": 7.117429963695155, "learning_rate": 3.269279694485519e-07, "loss": 0.017823100090026855, "step": 2549 }, { "epoch": 0.6166868198307134, "grad_norm": 2.314460201564985, "learning_rate": 3.265679687051821e-07, "loss": 0.00663434574380517, "step": 2550 }, { "epoch": 0.6169286577992745, "grad_norm": 2.942325631604732, "learning_rate": 3.2620807013702367e-07, "loss": 0.011699991300702095, "step": 2551 }, { "epoch": 0.6171704957678356, "grad_norm": 1.06151650174488, "learning_rate": 3.2584827395610623e-07, "loss": 0.01141554769128561, "step": 2552 }, { "epoch": 0.6174123337363966, "grad_norm": 3.957566413308096, "learning_rate": 3.254885803743993e-07, "loss": 0.015186774544417858, "step": 2553 }, { "epoch": 0.6176541717049577, "grad_norm": 1.8321108540613964, "learning_rate": 3.2512898960381123e-07, "loss": 0.0035095203202217817, "step": 2554 }, { "epoch": 0.6178960096735188, "grad_norm": 1.0052271431273998, "learning_rate": 3.2476950185619054e-07, "loss": 0.003867593128234148, "step": 2555 }, { "epoch": 0.6181378476420798, "grad_norm": 4.0516436910835205, "learning_rate": 3.2441011734332446e-07, "loss": 0.006470229011029005, "step": 2556 }, { "epoch": 0.6183796856106408, "grad_norm": 2.0229307425502254, "learning_rate": 3.240508362769399e-07, "loss": 0.003177950857207179, "step": 2557 }, { "epoch": 0.618621523579202, "grad_norm": 2.5376818507847743, "learning_rate": 3.2369165886870263e-07, "loss": 0.019187087193131447, "step": 2558 }, { "epoch": 0.618863361547763, "grad_norm": 1.2864061560871909, "learning_rate": 3.233325853302169e-07, "loss": 0.004783714190125465, "step": 2559 }, { "epoch": 0.619105199516324, "grad_norm": 2.415752408219998, "learning_rate": 3.229736158730265e-07, "loss": 0.0046408893540501595, "step": 2560 }, { "epoch": 0.6193470374848852, "grad_norm": 3.63402476446358, "learning_rate": 3.2261475070861387e-07, "loss": 0.012962581589818, "step": 2561 }, { "epoch": 0.6195888754534462, "grad_norm": 3.071676451016553, "learning_rate": 3.222559900483991e-07, "loss": 0.01639263890683651, "step": 2562 }, { "epoch": 0.6198307134220072, "grad_norm": 0.27412327278794907, "learning_rate": 3.218973341037419e-07, "loss": 0.0007897219038568437, "step": 2563 }, { "epoch": 0.6200725513905683, "grad_norm": 1.8910181175608733, "learning_rate": 3.215387830859393e-07, "loss": 0.007086074445396662, "step": 2564 }, { "epoch": 0.6203143893591294, "grad_norm": 2.611211063091192, "learning_rate": 3.211803372062272e-07, "loss": 0.010525285266339779, "step": 2565 }, { "epoch": 0.6205562273276904, "grad_norm": 2.4312705201180664, "learning_rate": 3.2082199667577927e-07, "loss": 0.0031648047734051943, "step": 2566 }, { "epoch": 0.6207980652962515, "grad_norm": 0.8822552348599705, "learning_rate": 3.2046376170570697e-07, "loss": 0.0015416514361277223, "step": 2567 }, { "epoch": 0.6210399032648126, "grad_norm": 2.199174245585885, "learning_rate": 3.201056325070599e-07, "loss": 0.020087994635105133, "step": 2568 }, { "epoch": 0.6212817412333737, "grad_norm": 4.078684902730297, "learning_rate": 3.197476092908253e-07, "loss": 0.005422299727797508, "step": 2569 }, { "epoch": 0.6215235792019347, "grad_norm": 0.6030670358006831, "learning_rate": 3.193896922679276e-07, "loss": 0.002104787854477763, "step": 2570 }, { "epoch": 0.6217654171704957, "grad_norm": 3.496209495541931, "learning_rate": 3.19031881649229e-07, "loss": 0.004844146780669689, "step": 2571 }, { "epoch": 0.6220072551390569, "grad_norm": 1.3534315111094402, "learning_rate": 3.1867417764552915e-07, "loss": 0.007080291397869587, "step": 2572 }, { "epoch": 0.6222490931076179, "grad_norm": 1.986186628395541, "learning_rate": 3.183165804675645e-07, "loss": 0.007890946231782436, "step": 2573 }, { "epoch": 0.6224909310761789, "grad_norm": 1.5972227160129393, "learning_rate": 3.1795909032600874e-07, "loss": 0.007101227529346943, "step": 2574 }, { "epoch": 0.6227327690447401, "grad_norm": 1.075852028698493, "learning_rate": 3.1760170743147264e-07, "loss": 0.003145630704239011, "step": 2575 }, { "epoch": 0.6229746070133011, "grad_norm": 1.5979500963272173, "learning_rate": 3.172444319945033e-07, "loss": 0.006587111856788397, "step": 2576 }, { "epoch": 0.6232164449818621, "grad_norm": 1.7729286083851976, "learning_rate": 3.168872642255854e-07, "loss": 0.009225214831531048, "step": 2577 }, { "epoch": 0.6234582829504233, "grad_norm": 2.0508252972734797, "learning_rate": 3.1653020433513927e-07, "loss": 0.0030170173849910498, "step": 2578 }, { "epoch": 0.6237001209189843, "grad_norm": 1.3198661067163482, "learning_rate": 3.161732525335221e-07, "loss": 0.0014703100314363837, "step": 2579 }, { "epoch": 0.6239419588875453, "grad_norm": 2.091803054495661, "learning_rate": 3.158164090310277e-07, "loss": 0.009460615925490856, "step": 2580 }, { "epoch": 0.6241837968561064, "grad_norm": 1.3171967321324909, "learning_rate": 3.154596740378854e-07, "loss": 0.00481977267190814, "step": 2581 }, { "epoch": 0.6244256348246675, "grad_norm": 3.0151895512277878, "learning_rate": 3.151030477642609e-07, "loss": 0.010755936615169048, "step": 2582 }, { "epoch": 0.6246674727932285, "grad_norm": 1.9260736207438414, "learning_rate": 3.1474653042025634e-07, "loss": 0.008203105069696903, "step": 2583 }, { "epoch": 0.6249093107617896, "grad_norm": 1.0515913726116362, "learning_rate": 3.1439012221590877e-07, "loss": 0.0036128521896898746, "step": 2584 }, { "epoch": 0.6251511487303507, "grad_norm": 1.5696993265784964, "learning_rate": 3.140338233611918e-07, "loss": 0.001417907769791782, "step": 2585 }, { "epoch": 0.6253929866989117, "grad_norm": 1.6381546914650287, "learning_rate": 3.136776340660138e-07, "loss": 0.002573068020865321, "step": 2586 }, { "epoch": 0.6256348246674728, "grad_norm": 0.9198749365285293, "learning_rate": 3.133215545402194e-07, "loss": 0.0016868355451151729, "step": 2587 }, { "epoch": 0.6258766626360338, "grad_norm": 0.6288259264574088, "learning_rate": 3.1296558499358823e-07, "loss": 0.0024347652215510607, "step": 2588 }, { "epoch": 0.626118500604595, "grad_norm": 0.6977741333424686, "learning_rate": 3.1260972563583486e-07, "loss": 0.0015799390384927392, "step": 2589 }, { "epoch": 0.626360338573156, "grad_norm": 1.8523240038879047, "learning_rate": 3.1225397667660915e-07, "loss": 0.007385853212326765, "step": 2590 }, { "epoch": 0.626602176541717, "grad_norm": 2.845166878661688, "learning_rate": 3.1189833832549617e-07, "loss": 0.00762223219498992, "step": 2591 }, { "epoch": 0.6268440145102782, "grad_norm": 1.345395142112575, "learning_rate": 3.115428107920154e-07, "loss": 0.003634026274085045, "step": 2592 }, { "epoch": 0.6270858524788392, "grad_norm": 1.0411311952472118, "learning_rate": 3.1118739428562146e-07, "loss": 0.005089374724775553, "step": 2593 }, { "epoch": 0.6273276904474002, "grad_norm": 1.7498714526550103, "learning_rate": 3.1083208901570303e-07, "loss": 0.007975855842232704, "step": 2594 }, { "epoch": 0.6275695284159613, "grad_norm": 0.6666515976910419, "learning_rate": 3.104768951915836e-07, "loss": 0.0024392700288444757, "step": 2595 }, { "epoch": 0.6278113663845224, "grad_norm": 0.6044037064664852, "learning_rate": 3.101218130225214e-07, "loss": 0.0016620521200820804, "step": 2596 }, { "epoch": 0.6280532043530834, "grad_norm": 1.38015038201445, "learning_rate": 3.097668427177079e-07, "loss": 0.005625721532851458, "step": 2597 }, { "epoch": 0.6282950423216445, "grad_norm": 0.7912722409006052, "learning_rate": 3.094119844862695e-07, "loss": 0.0035014692693948746, "step": 2598 }, { "epoch": 0.6285368802902056, "grad_norm": 2.402226788756233, "learning_rate": 3.0905723853726636e-07, "loss": 0.010892791673541069, "step": 2599 }, { "epoch": 0.6287787182587666, "grad_norm": 1.2869455958736549, "learning_rate": 3.08702605079692e-07, "loss": 0.004001993220299482, "step": 2600 }, { "epoch": 0.6290205562273277, "grad_norm": 1.4989719417445286, "learning_rate": 3.083480843224744e-07, "loss": 0.008163579739630222, "step": 2601 }, { "epoch": 0.6292623941958888, "grad_norm": 1.871217209109689, "learning_rate": 3.079936764744749e-07, "loss": 0.005594962742179632, "step": 2602 }, { "epoch": 0.6295042321644498, "grad_norm": 0.7005851059402555, "learning_rate": 3.0763938174448807e-07, "loss": 0.002770152408629656, "step": 2603 }, { "epoch": 0.6297460701330109, "grad_norm": 26.434671709505526, "learning_rate": 3.0728520034124215e-07, "loss": 0.016067536547780037, "step": 2604 }, { "epoch": 0.6299879081015719, "grad_norm": 4.932041369118199, "learning_rate": 3.069311324733982e-07, "loss": 0.01814362220466137, "step": 2605 }, { "epoch": 0.630229746070133, "grad_norm": 6.4147919365039066, "learning_rate": 3.06577178349551e-07, "loss": 0.0032666753977537155, "step": 2606 }, { "epoch": 0.6304715840386941, "grad_norm": 2.4882521569269387, "learning_rate": 3.0622333817822795e-07, "loss": 0.007944300770759583, "step": 2607 }, { "epoch": 0.6307134220072551, "grad_norm": 3.11728108436484, "learning_rate": 3.05869612167889e-07, "loss": 0.010254321619868279, "step": 2608 }, { "epoch": 0.6309552599758163, "grad_norm": 2.0214570528783318, "learning_rate": 3.055160005269275e-07, "loss": 0.005829895846545696, "step": 2609 }, { "epoch": 0.6311970979443773, "grad_norm": 0.8415879086454985, "learning_rate": 3.051625034636693e-07, "loss": 0.0037971590645611286, "step": 2610 }, { "epoch": 0.6314389359129383, "grad_norm": 1.6490940804505725, "learning_rate": 3.0480912118637216e-07, "loss": 0.0078891571611166, "step": 2611 }, { "epoch": 0.6316807738814993, "grad_norm": 3.3948922293142765, "learning_rate": 3.044558539032268e-07, "loss": 0.005777872167527676, "step": 2612 }, { "epoch": 0.6319226118500605, "grad_norm": 1.899385277830986, "learning_rate": 3.0410270182235616e-07, "loss": 0.0008624767069704831, "step": 2613 }, { "epoch": 0.6321644498186215, "grad_norm": 2.0721187652825592, "learning_rate": 3.03749665151815e-07, "loss": 0.004106851760298014, "step": 2614 }, { "epoch": 0.6324062877871826, "grad_norm": 1.0064843724492347, "learning_rate": 3.033967440995904e-07, "loss": 0.005883588921278715, "step": 2615 }, { "epoch": 0.6326481257557437, "grad_norm": 0.7882422031263778, "learning_rate": 3.030439388736008e-07, "loss": 0.003915857989341021, "step": 2616 }, { "epoch": 0.6328899637243047, "grad_norm": 2.2716674526847775, "learning_rate": 3.026912496816971e-07, "loss": 0.014042270369827747, "step": 2617 }, { "epoch": 0.6331318016928658, "grad_norm": 2.543786053205447, "learning_rate": 3.023386767316616e-07, "loss": 0.004652631934732199, "step": 2618 }, { "epoch": 0.6333736396614268, "grad_norm": 4.106735033408269, "learning_rate": 3.0198622023120777e-07, "loss": 0.004029994830489159, "step": 2619 }, { "epoch": 0.6336154776299879, "grad_norm": 6.541606242460406, "learning_rate": 3.01633880387981e-07, "loss": 0.011069445870816708, "step": 2620 }, { "epoch": 0.633857315598549, "grad_norm": 5.105151180616426, "learning_rate": 3.0128165740955757e-07, "loss": 0.004856977146118879, "step": 2621 }, { "epoch": 0.63409915356711, "grad_norm": 9.592043063630609, "learning_rate": 3.009295515034451e-07, "loss": 0.0058909072540700436, "step": 2622 }, { "epoch": 0.6343409915356711, "grad_norm": 3.594993109582647, "learning_rate": 3.0057756287708203e-07, "loss": 0.002565902890637517, "step": 2623 }, { "epoch": 0.6345828295042322, "grad_norm": 3.8438395153700085, "learning_rate": 3.0022569173783803e-07, "loss": 0.020278559997677803, "step": 2624 }, { "epoch": 0.6348246674727932, "grad_norm": 1.2753959963647827, "learning_rate": 2.9987393829301314e-07, "loss": 0.006504340562969446, "step": 2625 }, { "epoch": 0.6350665054413543, "grad_norm": 2.6218027264020494, "learning_rate": 2.995223027498387e-07, "loss": 0.004764688666909933, "step": 2626 }, { "epoch": 0.6353083434099154, "grad_norm": 7.722094783619137, "learning_rate": 2.9917078531547566e-07, "loss": 0.008714109659194946, "step": 2627 }, { "epoch": 0.6355501813784764, "grad_norm": 1.3513723536933606, "learning_rate": 2.988193861970163e-07, "loss": 0.0028807108756154776, "step": 2628 }, { "epoch": 0.6357920193470374, "grad_norm": 2.2060613415998898, "learning_rate": 2.984681056014829e-07, "loss": 0.00553398160263896, "step": 2629 }, { "epoch": 0.6360338573155986, "grad_norm": 4.210815700458308, "learning_rate": 2.9811694373582753e-07, "loss": 0.015251770615577698, "step": 2630 }, { "epoch": 0.6362756952841596, "grad_norm": 3.108057215340447, "learning_rate": 2.9776590080693266e-07, "loss": 0.009064191952347755, "step": 2631 }, { "epoch": 0.6365175332527206, "grad_norm": 2.4930404873152407, "learning_rate": 2.974149770216108e-07, "loss": 0.014439515769481659, "step": 2632 }, { "epoch": 0.6367593712212818, "grad_norm": 1.23082458607311, "learning_rate": 2.970641725866038e-07, "loss": 0.0027350252494215965, "step": 2633 }, { "epoch": 0.6370012091898428, "grad_norm": 7.604335220895076, "learning_rate": 2.9671348770858394e-07, "loss": 0.014598684385418892, "step": 2634 }, { "epoch": 0.6372430471584039, "grad_norm": 2.8927547549771524, "learning_rate": 2.9636292259415216e-07, "loss": 0.008856578730046749, "step": 2635 }, { "epoch": 0.6374848851269649, "grad_norm": 6.312281088915677, "learning_rate": 2.960124774498396e-07, "loss": 0.024371163919568062, "step": 2636 }, { "epoch": 0.637726723095526, "grad_norm": 1.809003935762986, "learning_rate": 2.9566215248210655e-07, "loss": 0.007513316813856363, "step": 2637 }, { "epoch": 0.6379685610640871, "grad_norm": 2.6391893939463036, "learning_rate": 2.9531194789734203e-07, "loss": 0.003712164703756571, "step": 2638 }, { "epoch": 0.6382103990326481, "grad_norm": 5.485911689527756, "learning_rate": 2.949618639018646e-07, "loss": 0.002852000994607806, "step": 2639 }, { "epoch": 0.6384522370012092, "grad_norm": 1.24451300224333, "learning_rate": 2.94611900701922e-07, "loss": 0.00755262142047286, "step": 2640 }, { "epoch": 0.6386940749697703, "grad_norm": 4.372109738960764, "learning_rate": 2.942620585036899e-07, "loss": 0.013035229407250881, "step": 2641 }, { "epoch": 0.6389359129383313, "grad_norm": 1.5886028386156943, "learning_rate": 2.939123375132735e-07, "loss": 0.0063825794495642185, "step": 2642 }, { "epoch": 0.6391777509068923, "grad_norm": 1.6554642311413164, "learning_rate": 2.9356273793670665e-07, "loss": 0.0052612414583563805, "step": 2643 }, { "epoch": 0.6394195888754535, "grad_norm": 2.1288614877615806, "learning_rate": 2.9321325997995087e-07, "loss": 0.006764989346265793, "step": 2644 }, { "epoch": 0.6396614268440145, "grad_norm": 1.9202320153986505, "learning_rate": 2.92863903848897e-07, "loss": 0.01158450823277235, "step": 2645 }, { "epoch": 0.6399032648125755, "grad_norm": 0.8972781460515677, "learning_rate": 2.9251466974936323e-07, "loss": 0.0008506160229444504, "step": 2646 }, { "epoch": 0.6401451027811367, "grad_norm": 1.286451029974971, "learning_rate": 2.921655578870964e-07, "loss": 0.005325640551745892, "step": 2647 }, { "epoch": 0.6403869407496977, "grad_norm": 2.1739800376392053, "learning_rate": 2.9181656846777126e-07, "loss": 0.011484072543680668, "step": 2648 }, { "epoch": 0.6406287787182587, "grad_norm": 1.3547957967297055, "learning_rate": 2.914677016969903e-07, "loss": 0.018036477267742157, "step": 2649 }, { "epoch": 0.6408706166868199, "grad_norm": 1.2790304189499597, "learning_rate": 2.9111895778028384e-07, "loss": 0.003605165285989642, "step": 2650 }, { "epoch": 0.6411124546553809, "grad_norm": 1.6262116949810692, "learning_rate": 2.907703369231098e-07, "loss": 0.00710055697709322, "step": 2651 }, { "epoch": 0.641354292623942, "grad_norm": 4.494339517240883, "learning_rate": 2.9042183933085353e-07, "loss": 0.03130095452070236, "step": 2652 }, { "epoch": 0.641596130592503, "grad_norm": 1.8758590028352655, "learning_rate": 2.900734652088279e-07, "loss": 0.010296112857758999, "step": 2653 }, { "epoch": 0.6418379685610641, "grad_norm": 2.1185136484862825, "learning_rate": 2.8972521476227295e-07, "loss": 0.009429010562598705, "step": 2654 }, { "epoch": 0.6420798065296252, "grad_norm": 3.137172689040267, "learning_rate": 2.8937708819635567e-07, "loss": 0.011632302775979042, "step": 2655 }, { "epoch": 0.6423216444981862, "grad_norm": 0.8269620040975147, "learning_rate": 2.890290857161708e-07, "loss": 0.0014504300197586417, "step": 2656 }, { "epoch": 0.6425634824667473, "grad_norm": 3.6300037119977264, "learning_rate": 2.8868120752673877e-07, "loss": 0.019121801480650902, "step": 2657 }, { "epoch": 0.6428053204353084, "grad_norm": 1.9586617707045262, "learning_rate": 2.883334538330079e-07, "loss": 0.006306984927505255, "step": 2658 }, { "epoch": 0.6430471584038694, "grad_norm": 4.040737304735284, "learning_rate": 2.879858248398529e-07, "loss": 0.006591360550373793, "step": 2659 }, { "epoch": 0.6432889963724304, "grad_norm": 2.1296133615494055, "learning_rate": 2.876383207520742e-07, "loss": 0.003097009612247348, "step": 2660 }, { "epoch": 0.6435308343409916, "grad_norm": 0.563885340509819, "learning_rate": 2.872909417743997e-07, "loss": 0.0006541004404425621, "step": 2661 }, { "epoch": 0.6437726723095526, "grad_norm": 1.3561279165785691, "learning_rate": 2.869436881114833e-07, "loss": 0.006958054844290018, "step": 2662 }, { "epoch": 0.6440145102781136, "grad_norm": 2.976586129081644, "learning_rate": 2.865965599679047e-07, "loss": 0.01608237810432911, "step": 2663 }, { "epoch": 0.6442563482466748, "grad_norm": 1.4212822583142666, "learning_rate": 2.8624955754817e-07, "loss": 0.0017247401410713792, "step": 2664 }, { "epoch": 0.6444981862152358, "grad_norm": 2.9360163213671555, "learning_rate": 2.859026810567111e-07, "loss": 0.004969370551407337, "step": 2665 }, { "epoch": 0.6447400241837968, "grad_norm": 0.3228458958329068, "learning_rate": 2.855559306978859e-07, "loss": 0.0007817470468580723, "step": 2666 }, { "epoch": 0.6449818621523579, "grad_norm": 1.639331330527642, "learning_rate": 2.852093066759775e-07, "loss": 0.008749676868319511, "step": 2667 }, { "epoch": 0.645223700120919, "grad_norm": 1.0855805416028548, "learning_rate": 2.8486280919519525e-07, "loss": 0.005599068012088537, "step": 2668 }, { "epoch": 0.64546553808948, "grad_norm": 10.72508043528623, "learning_rate": 2.845164384596732e-07, "loss": 0.008643381297588348, "step": 2669 }, { "epoch": 0.6457073760580411, "grad_norm": 5.264779731012563, "learning_rate": 2.841701946734718e-07, "loss": 0.006047456059604883, "step": 2670 }, { "epoch": 0.6459492140266022, "grad_norm": 1.362223308678942, "learning_rate": 2.838240780405753e-07, "loss": 0.0037459630984812975, "step": 2671 }, { "epoch": 0.6461910519951632, "grad_norm": 1.4612375182348483, "learning_rate": 2.834780887648942e-07, "loss": 0.0017640594160184264, "step": 2672 }, { "epoch": 0.6464328899637243, "grad_norm": 3.3347724645277843, "learning_rate": 2.831322270502636e-07, "loss": 0.017432330176234245, "step": 2673 }, { "epoch": 0.6466747279322854, "grad_norm": 3.797116451132859, "learning_rate": 2.827864931004433e-07, "loss": 0.004988922271877527, "step": 2674 }, { "epoch": 0.6469165659008465, "grad_norm": 8.259391224638174, "learning_rate": 2.8244088711911784e-07, "loss": 0.008717731572687626, "step": 2675 }, { "epoch": 0.6471584038694075, "grad_norm": 2.6078906349821778, "learning_rate": 2.820954093098966e-07, "loss": 0.007390404585748911, "step": 2676 }, { "epoch": 0.6474002418379685, "grad_norm": 0.2644177479681601, "learning_rate": 2.817500598763132e-07, "loss": 0.0008479997632093728, "step": 2677 }, { "epoch": 0.6476420798065297, "grad_norm": 11.819500168487403, "learning_rate": 2.814048390218258e-07, "loss": 0.014743258245289326, "step": 2678 }, { "epoch": 0.6478839177750907, "grad_norm": 1.945866819457496, "learning_rate": 2.810597469498168e-07, "loss": 0.0015622781356796622, "step": 2679 }, { "epoch": 0.6481257557436517, "grad_norm": 61.790148658516536, "learning_rate": 2.807147838635923e-07, "loss": 0.0459880456328392, "step": 2680 }, { "epoch": 0.6483675937122129, "grad_norm": 0.569941711263636, "learning_rate": 2.803699499663836e-07, "loss": 0.0013994527980685234, "step": 2681 }, { "epoch": 0.6486094316807739, "grad_norm": 1.2122286677533205, "learning_rate": 2.8002524546134434e-07, "loss": 0.0031448346562683582, "step": 2682 }, { "epoch": 0.6488512696493349, "grad_norm": 7.477363503066067, "learning_rate": 2.7968067055155294e-07, "loss": 0.012087894603610039, "step": 2683 }, { "epoch": 0.649093107617896, "grad_norm": 1.1362418892489978, "learning_rate": 2.793362254400111e-07, "loss": 0.005733349826186895, "step": 2684 }, { "epoch": 0.6493349455864571, "grad_norm": 1.4542878472462408, "learning_rate": 2.789919103296441e-07, "loss": 0.003979716915637255, "step": 2685 }, { "epoch": 0.6495767835550181, "grad_norm": 3.4763611195077315, "learning_rate": 2.786477254233011e-07, "loss": 0.005725554656237364, "step": 2686 }, { "epoch": 0.6498186215235792, "grad_norm": 5.27519152608663, "learning_rate": 2.783036709237535e-07, "loss": 0.015136517584323883, "step": 2687 }, { "epoch": 0.6500604594921403, "grad_norm": 5.176996423569923, "learning_rate": 2.7795974703369705e-07, "loss": 0.005820395890623331, "step": 2688 }, { "epoch": 0.6503022974607013, "grad_norm": 3.9688994949414615, "learning_rate": 2.776159539557498e-07, "loss": 0.004006742965430021, "step": 2689 }, { "epoch": 0.6505441354292624, "grad_norm": 2.0537880501310966, "learning_rate": 2.772722918924528e-07, "loss": 0.003268467728048563, "step": 2690 }, { "epoch": 0.6507859733978234, "grad_norm": 13.303823318720337, "learning_rate": 2.769287610462704e-07, "loss": 0.005250119604170322, "step": 2691 }, { "epoch": 0.6510278113663845, "grad_norm": 1.8930912114819263, "learning_rate": 2.76585361619589e-07, "loss": 0.007561991456896067, "step": 2692 }, { "epoch": 0.6512696493349456, "grad_norm": 1.7666507155287738, "learning_rate": 2.7624209381471796e-07, "loss": 0.005048775114119053, "step": 2693 }, { "epoch": 0.6515114873035066, "grad_norm": 1.6605970038288673, "learning_rate": 2.7589895783388906e-07, "loss": 0.003009124891832471, "step": 2694 }, { "epoch": 0.6517533252720678, "grad_norm": 0.7211195615413142, "learning_rate": 2.7555595387925635e-07, "loss": 0.0008466505678370595, "step": 2695 }, { "epoch": 0.6519951632406288, "grad_norm": 0.7301541037803502, "learning_rate": 2.7521308215289585e-07, "loss": 0.0026602065190672874, "step": 2696 }, { "epoch": 0.6522370012091898, "grad_norm": 1.0681673767206195, "learning_rate": 2.7487034285680664e-07, "loss": 0.0022833645343780518, "step": 2697 }, { "epoch": 0.652478839177751, "grad_norm": 2.225405939380153, "learning_rate": 2.745277361929085e-07, "loss": 0.008491197600960732, "step": 2698 }, { "epoch": 0.652720677146312, "grad_norm": 1.3361261764718473, "learning_rate": 2.7418526236304363e-07, "loss": 0.007752922363579273, "step": 2699 }, { "epoch": 0.652962515114873, "grad_norm": 1.2316712124679405, "learning_rate": 2.7384292156897657e-07, "loss": 0.0026090089231729507, "step": 2700 }, { "epoch": 0.653204353083434, "grad_norm": 3.4885830618708793, "learning_rate": 2.7350071401239215e-07, "loss": 0.006743570324033499, "step": 2701 }, { "epoch": 0.6534461910519952, "grad_norm": 2.7533580562077296, "learning_rate": 2.731586398948981e-07, "loss": 0.01468273252248764, "step": 2702 }, { "epoch": 0.6536880290205562, "grad_norm": 0.49456354089665105, "learning_rate": 2.728166994180227e-07, "loss": 0.0025294977240264416, "step": 2703 }, { "epoch": 0.6539298669891173, "grad_norm": 15.758779278907303, "learning_rate": 2.7247489278321565e-07, "loss": 0.006176185328513384, "step": 2704 }, { "epoch": 0.6541717049576784, "grad_norm": 0.9485926618466358, "learning_rate": 2.721332201918479e-07, "loss": 0.0035886019468307495, "step": 2705 }, { "epoch": 0.6544135429262394, "grad_norm": 0.7148847376696754, "learning_rate": 2.717916818452114e-07, "loss": 0.0016547593986615539, "step": 2706 }, { "epoch": 0.6546553808948005, "grad_norm": 2.8873845668218423, "learning_rate": 2.7145027794451904e-07, "loss": 0.014333332888782024, "step": 2707 }, { "epoch": 0.6548972188633615, "grad_norm": 1.0722355609614702, "learning_rate": 2.711090086909043e-07, "loss": 0.0026018868666142225, "step": 2708 }, { "epoch": 0.6551390568319226, "grad_norm": 1.391154753416794, "learning_rate": 2.707678742854216e-07, "loss": 0.00557868042960763, "step": 2709 }, { "epoch": 0.6553808948004837, "grad_norm": 0.6925367287610116, "learning_rate": 2.704268749290457e-07, "loss": 0.003199611324816942, "step": 2710 }, { "epoch": 0.6556227327690447, "grad_norm": 5.330914418471105, "learning_rate": 2.7008601082267237e-07, "loss": 0.0063367667607963085, "step": 2711 }, { "epoch": 0.6558645707376058, "grad_norm": 2.082727838354522, "learning_rate": 2.6974528216711645e-07, "loss": 0.007479418069124222, "step": 2712 }, { "epoch": 0.6561064087061669, "grad_norm": 3.5834037636720306, "learning_rate": 2.694046891631146e-07, "loss": 0.007590916007757187, "step": 2713 }, { "epoch": 0.6563482466747279, "grad_norm": 0.24358222061144977, "learning_rate": 2.690642320113219e-07, "loss": 0.0006695957854390144, "step": 2714 }, { "epoch": 0.656590084643289, "grad_norm": 1.6683328357610503, "learning_rate": 2.687239109123149e-07, "loss": 0.0026376370806246996, "step": 2715 }, { "epoch": 0.6568319226118501, "grad_norm": 1.8045487671498366, "learning_rate": 2.683837260665893e-07, "loss": 0.004065885674208403, "step": 2716 }, { "epoch": 0.6570737605804111, "grad_norm": 2.1770893274287855, "learning_rate": 2.6804367767455995e-07, "loss": 0.011180776171386242, "step": 2717 }, { "epoch": 0.6573155985489721, "grad_norm": 1.1173645586617782, "learning_rate": 2.677037659365624e-07, "loss": 0.0006399305420927703, "step": 2718 }, { "epoch": 0.6575574365175333, "grad_norm": 0.48070584977107234, "learning_rate": 2.673639910528511e-07, "loss": 0.0005786266992799938, "step": 2719 }, { "epoch": 0.6577992744860943, "grad_norm": 1.0594164524671823, "learning_rate": 2.6702435322360005e-07, "loss": 0.0018441755091771483, "step": 2720 }, { "epoch": 0.6580411124546554, "grad_norm": 2.3563228138028482, "learning_rate": 2.666848526489023e-07, "loss": 0.0009731936152093112, "step": 2721 }, { "epoch": 0.6582829504232165, "grad_norm": 7.486235745356023, "learning_rate": 2.663454895287702e-07, "loss": 0.013836294412612915, "step": 2722 }, { "epoch": 0.6585247883917775, "grad_norm": 3.1013461493994505, "learning_rate": 2.6600626406313507e-07, "loss": 0.007566386368125677, "step": 2723 }, { "epoch": 0.6587666263603386, "grad_norm": 3.229425131984419, "learning_rate": 2.6566717645184734e-07, "loss": 0.006686115171760321, "step": 2724 }, { "epoch": 0.6590084643288996, "grad_norm": 0.4930541778678229, "learning_rate": 2.653282268946757e-07, "loss": 0.0008180599543265998, "step": 2725 }, { "epoch": 0.6592503022974607, "grad_norm": 1.2358223043426044, "learning_rate": 2.6498941559130797e-07, "loss": 0.005711536388844252, "step": 2726 }, { "epoch": 0.6594921402660218, "grad_norm": 1.4027454624318183, "learning_rate": 2.646507427413508e-07, "loss": 0.004904373083263636, "step": 2727 }, { "epoch": 0.6597339782345828, "grad_norm": 1.2887038175012497, "learning_rate": 2.6431220854432823e-07, "loss": 0.0030922687146812677, "step": 2728 }, { "epoch": 0.6599758162031439, "grad_norm": 1.0446354820722557, "learning_rate": 2.6397381319968364e-07, "loss": 0.005142991431057453, "step": 2729 }, { "epoch": 0.660217654171705, "grad_norm": 1.392768915460629, "learning_rate": 2.636355569067782e-07, "loss": 0.008685707114636898, "step": 2730 }, { "epoch": 0.660459492140266, "grad_norm": 1.7180354253728636, "learning_rate": 2.6329743986489106e-07, "loss": 0.004550529178231955, "step": 2731 }, { "epoch": 0.660701330108827, "grad_norm": 1.9790539980702122, "learning_rate": 2.629594622732195e-07, "loss": 0.011101420037448406, "step": 2732 }, { "epoch": 0.6609431680773882, "grad_norm": 1.0203700184962528, "learning_rate": 2.6262162433087856e-07, "loss": 0.0035618997644633055, "step": 2733 }, { "epoch": 0.6611850060459492, "grad_norm": 17.9047697033252, "learning_rate": 2.6228392623690113e-07, "loss": 0.022084757685661316, "step": 2734 }, { "epoch": 0.6614268440145102, "grad_norm": 1.153792779554731, "learning_rate": 2.619463681902376e-07, "loss": 0.004272285383194685, "step": 2735 }, { "epoch": 0.6616686819830714, "grad_norm": 1.5195153543865043, "learning_rate": 2.6160895038975583e-07, "loss": 0.009851986542344093, "step": 2736 }, { "epoch": 0.6619105199516324, "grad_norm": 1.0203680521835707, "learning_rate": 2.612716730342409e-07, "loss": 0.0021454438101500273, "step": 2737 }, { "epoch": 0.6621523579201934, "grad_norm": 3.2311982532222325, "learning_rate": 2.609345363223959e-07, "loss": 0.006476884242147207, "step": 2738 }, { "epoch": 0.6623941958887546, "grad_norm": 0.8373565848065297, "learning_rate": 2.6059754045284003e-07, "loss": 0.0017374769086018205, "step": 2739 }, { "epoch": 0.6626360338573156, "grad_norm": 0.9415707471916012, "learning_rate": 2.6026068562410994e-07, "loss": 0.0021623061038553715, "step": 2740 }, { "epoch": 0.6628778718258767, "grad_norm": 1.798963114048759, "learning_rate": 2.5992397203465987e-07, "loss": 0.0043832832016050816, "step": 2741 }, { "epoch": 0.6631197097944377, "grad_norm": 0.7315526423464704, "learning_rate": 2.5958739988285937e-07, "loss": 0.0010690443450585008, "step": 2742 }, { "epoch": 0.6633615477629988, "grad_norm": 2.5224375163893002, "learning_rate": 2.592509693669964e-07, "loss": 0.02272314392030239, "step": 2743 }, { "epoch": 0.6636033857315599, "grad_norm": 1.0034982539722508, "learning_rate": 2.589146806852737e-07, "loss": 0.004861332010477781, "step": 2744 }, { "epoch": 0.6638452237001209, "grad_norm": 1.710581965272992, "learning_rate": 2.5857853403581203e-07, "loss": 0.00539977103471756, "step": 2745 }, { "epoch": 0.664087061668682, "grad_norm": 0.5781749131269242, "learning_rate": 2.5824252961664753e-07, "loss": 0.0011735414154827595, "step": 2746 }, { "epoch": 0.6643288996372431, "grad_norm": 2.1880298977581027, "learning_rate": 2.579066676257329e-07, "loss": 0.002805860713124275, "step": 2747 }, { "epoch": 0.6645707376058041, "grad_norm": 10.236634409321097, "learning_rate": 2.575709482609366e-07, "loss": 0.08884584158658981, "step": 2748 }, { "epoch": 0.6648125755743651, "grad_norm": 1.6843871915897677, "learning_rate": 2.5723537172004363e-07, "loss": 0.0075944154523313046, "step": 2749 }, { "epoch": 0.6650544135429263, "grad_norm": 2.1961578296182114, "learning_rate": 2.5689993820075424e-07, "loss": 0.012241782620549202, "step": 2750 }, { "epoch": 0.6652962515114873, "grad_norm": 1.5543290138244443, "learning_rate": 2.565646479006847e-07, "loss": 0.0043162996880710125, "step": 2751 }, { "epoch": 0.6655380894800483, "grad_norm": 3.6075427397097766, "learning_rate": 2.562295010173673e-07, "loss": 0.004520448390394449, "step": 2752 }, { "epoch": 0.6657799274486095, "grad_norm": 1.7595880447149617, "learning_rate": 2.5589449774824885e-07, "loss": 0.008374701254069805, "step": 2753 }, { "epoch": 0.6660217654171705, "grad_norm": 3.103841919471624, "learning_rate": 2.555596382906928e-07, "loss": 0.007385728415101767, "step": 2754 }, { "epoch": 0.6662636033857315, "grad_norm": 0.13442124483796417, "learning_rate": 2.552249228419767e-07, "loss": 0.00045695502194575965, "step": 2755 }, { "epoch": 0.6665054413542926, "grad_norm": 1.1180642660141815, "learning_rate": 2.548903515992937e-07, "loss": 0.0023006279952824116, "step": 2756 }, { "epoch": 0.6667472793228537, "grad_norm": 0.765595481204178, "learning_rate": 2.5455592475975267e-07, "loss": 0.001256044371984899, "step": 2757 }, { "epoch": 0.6669891172914147, "grad_norm": 2.1487564305492945, "learning_rate": 2.5422164252037616e-07, "loss": 0.0030935071408748627, "step": 2758 }, { "epoch": 0.6672309552599758, "grad_norm": 1.7270479338555216, "learning_rate": 2.5388750507810253e-07, "loss": 0.005365477409213781, "step": 2759 }, { "epoch": 0.6674727932285369, "grad_norm": 0.9185974110333208, "learning_rate": 2.535535126297845e-07, "loss": 0.0020274359267205, "step": 2760 }, { "epoch": 0.667714631197098, "grad_norm": 3.7617902174129014, "learning_rate": 2.532196653721892e-07, "loss": 0.004456727299839258, "step": 2761 }, { "epoch": 0.667956469165659, "grad_norm": 2.456126009331277, "learning_rate": 2.528859635019985e-07, "loss": 0.00804931577295065, "step": 2762 }, { "epoch": 0.6681983071342201, "grad_norm": 0.6732568751090345, "learning_rate": 2.525524072158084e-07, "loss": 0.0005763005465269089, "step": 2763 }, { "epoch": 0.6684401451027812, "grad_norm": 2.052201597033012, "learning_rate": 2.522189967101292e-07, "loss": 0.004461321514099836, "step": 2764 }, { "epoch": 0.6686819830713422, "grad_norm": 2.3464127880960364, "learning_rate": 2.518857321813854e-07, "loss": 0.01341393031179905, "step": 2765 }, { "epoch": 0.6689238210399032, "grad_norm": 1.1038926268100193, "learning_rate": 2.515526138259154e-07, "loss": 0.0023132546339184046, "step": 2766 }, { "epoch": 0.6691656590084644, "grad_norm": 10.751431738420175, "learning_rate": 2.5121964183997136e-07, "loss": 0.0059290421195328236, "step": 2767 }, { "epoch": 0.6694074969770254, "grad_norm": 4.853616071940676, "learning_rate": 2.5088681641972e-07, "loss": 0.008220545947551727, "step": 2768 }, { "epoch": 0.6696493349455864, "grad_norm": 0.6235011759848964, "learning_rate": 2.505541377612401e-07, "loss": 0.0009935864945873618, "step": 2769 }, { "epoch": 0.6698911729141476, "grad_norm": 0.7102562222380773, "learning_rate": 2.5022160606052567e-07, "loss": 0.001065183081664145, "step": 2770 }, { "epoch": 0.6701330108827086, "grad_norm": 1.9385896590328522, "learning_rate": 2.4988922151348323e-07, "loss": 0.009550128132104874, "step": 2771 }, { "epoch": 0.6703748488512696, "grad_norm": 4.368311383269451, "learning_rate": 2.495569843159327e-07, "loss": 0.019702307879924774, "step": 2772 }, { "epoch": 0.6706166868198307, "grad_norm": 1.835246460975499, "learning_rate": 2.492248946636075e-07, "loss": 0.007321327459067106, "step": 2773 }, { "epoch": 0.6708585247883918, "grad_norm": 1.0387126554007833, "learning_rate": 2.4889295275215336e-07, "loss": 0.002740883966907859, "step": 2774 }, { "epoch": 0.6711003627569528, "grad_norm": 0.8703531000087957, "learning_rate": 2.485611587771301e-07, "loss": 0.0049254982732236385, "step": 2775 }, { "epoch": 0.6713422007255139, "grad_norm": 2.087785592488959, "learning_rate": 2.4822951293400944e-07, "loss": 0.00498657813295722, "step": 2776 }, { "epoch": 0.671584038694075, "grad_norm": 3.2902164416172255, "learning_rate": 2.4789801541817645e-07, "loss": 0.00869855284690857, "step": 2777 }, { "epoch": 0.671825876662636, "grad_norm": 1.1615389638095304, "learning_rate": 2.4756666642492833e-07, "loss": 0.0010380306048318744, "step": 2778 }, { "epoch": 0.6720677146311971, "grad_norm": 0.9375962938523353, "learning_rate": 2.4723546614947516e-07, "loss": 0.004244370851665735, "step": 2779 }, { "epoch": 0.6723095525997581, "grad_norm": 0.998409998137746, "learning_rate": 2.4690441478693913e-07, "loss": 0.0026583538856357336, "step": 2780 }, { "epoch": 0.6725513905683193, "grad_norm": 0.54957532003579, "learning_rate": 2.465735125323547e-07, "loss": 0.0008711063419468701, "step": 2781 }, { "epoch": 0.6727932285368803, "grad_norm": 6.385038120552849, "learning_rate": 2.4624275958066916e-07, "loss": 0.012342958711087704, "step": 2782 }, { "epoch": 0.6730350665054413, "grad_norm": 1.2040587090339194, "learning_rate": 2.459121561267404e-07, "loss": 0.007987595163285732, "step": 2783 }, { "epoch": 0.6732769044740025, "grad_norm": 8.747632786025154, "learning_rate": 2.4558170236534017e-07, "loss": 0.01009443961083889, "step": 2784 }, { "epoch": 0.6735187424425635, "grad_norm": 2.503104674417594, "learning_rate": 2.4525139849114993e-07, "loss": 0.008375060744583607, "step": 2785 }, { "epoch": 0.6737605804111245, "grad_norm": 8.352587511411777, "learning_rate": 2.449212446987646e-07, "loss": 0.005321206524968147, "step": 2786 }, { "epoch": 0.6740024183796857, "grad_norm": 3.374349906573885, "learning_rate": 2.4459124118268976e-07, "loss": 0.014537421055138111, "step": 2787 }, { "epoch": 0.6742442563482467, "grad_norm": 4.658544190783751, "learning_rate": 2.442613881373426e-07, "loss": 0.011411244980990887, "step": 2788 }, { "epoch": 0.6744860943168077, "grad_norm": 1.091259825210954, "learning_rate": 2.439316857570518e-07, "loss": 0.0038191471248865128, "step": 2789 }, { "epoch": 0.6747279322853688, "grad_norm": 1.355663100753835, "learning_rate": 2.436021342360571e-07, "loss": 0.007724584545940161, "step": 2790 }, { "epoch": 0.6749697702539299, "grad_norm": 0.8301713866209127, "learning_rate": 2.4327273376850965e-07, "loss": 0.003709192620590329, "step": 2791 }, { "epoch": 0.6752116082224909, "grad_norm": 4.897552073883658, "learning_rate": 2.4294348454847105e-07, "loss": 0.0059927660040557384, "step": 2792 }, { "epoch": 0.675453446191052, "grad_norm": 1.924422902927769, "learning_rate": 2.4261438676991483e-07, "loss": 0.009426930919289589, "step": 2793 }, { "epoch": 0.6756952841596131, "grad_norm": 4.825391748434516, "learning_rate": 2.422854406267239e-07, "loss": 0.0025644786655902863, "step": 2794 }, { "epoch": 0.6759371221281741, "grad_norm": 0.9171633330955685, "learning_rate": 2.4195664631269326e-07, "loss": 0.002591493772342801, "step": 2795 }, { "epoch": 0.6761789600967352, "grad_norm": 1.9896880207210959, "learning_rate": 2.416280040215272e-07, "loss": 0.00715812249109149, "step": 2796 }, { "epoch": 0.6764207980652962, "grad_norm": 0.8870663793214307, "learning_rate": 2.4129951394684117e-07, "loss": 0.0023639178834855556, "step": 2797 }, { "epoch": 0.6766626360338573, "grad_norm": 1.0415481616978048, "learning_rate": 2.409711762821612e-07, "loss": 0.004629779607057571, "step": 2798 }, { "epoch": 0.6769044740024184, "grad_norm": 3.0079170093192458, "learning_rate": 2.406429912209224e-07, "loss": 0.00461773993447423, "step": 2799 }, { "epoch": 0.6771463119709794, "grad_norm": 1.942987251014085, "learning_rate": 2.4031495895647135e-07, "loss": 0.009004796855151653, "step": 2800 }, { "epoch": 0.6773881499395406, "grad_norm": 1.352342290740422, "learning_rate": 2.3998707968206376e-07, "loss": 0.002666922053322196, "step": 2801 }, { "epoch": 0.6776299879081016, "grad_norm": 1.0766646923735175, "learning_rate": 2.3965935359086535e-07, "loss": 0.004961277358233929, "step": 2802 }, { "epoch": 0.6778718258766626, "grad_norm": 2.0882773270353834, "learning_rate": 2.393317808759517e-07, "loss": 0.00930787529796362, "step": 2803 }, { "epoch": 0.6781136638452236, "grad_norm": 2.028589416101321, "learning_rate": 2.3900436173030805e-07, "loss": 0.0029456201009452343, "step": 2804 }, { "epoch": 0.6783555018137848, "grad_norm": 6.577410285542311, "learning_rate": 2.38677096346829e-07, "loss": 0.009585302323102951, "step": 2805 }, { "epoch": 0.6785973397823458, "grad_norm": 1.4637758513827814, "learning_rate": 2.3834998491831864e-07, "loss": 0.0007637901580892503, "step": 2806 }, { "epoch": 0.6788391777509069, "grad_norm": 8.236790215871462, "learning_rate": 2.380230276374905e-07, "loss": 0.011183969676494598, "step": 2807 }, { "epoch": 0.679081015719468, "grad_norm": 0.6219280900120544, "learning_rate": 2.3769622469696698e-07, "loss": 0.0006741035031154752, "step": 2808 }, { "epoch": 0.679322853688029, "grad_norm": 3.584239250879501, "learning_rate": 2.3736957628928024e-07, "loss": 0.025583593174815178, "step": 2809 }, { "epoch": 0.6795646916565901, "grad_norm": 1.320699107361452, "learning_rate": 2.3704308260687022e-07, "loss": 0.008479658514261246, "step": 2810 }, { "epoch": 0.6798065296251512, "grad_norm": 1.2827003281305769, "learning_rate": 2.3671674384208695e-07, "loss": 0.001990528544411063, "step": 2811 }, { "epoch": 0.6800483675937122, "grad_norm": 2.789851848763679, "learning_rate": 2.3639056018718868e-07, "loss": 0.005860238801687956, "step": 2812 }, { "epoch": 0.6802902055622733, "grad_norm": 2.3046273852706127, "learning_rate": 2.360645318343416e-07, "loss": 0.0022345210891216993, "step": 2813 }, { "epoch": 0.6805320435308343, "grad_norm": 8.941833326370409, "learning_rate": 2.357386589756219e-07, "loss": 0.007860195823013783, "step": 2814 }, { "epoch": 0.6807738814993954, "grad_norm": 1.7868055798103988, "learning_rate": 2.3541294180301248e-07, "loss": 0.006156229879707098, "step": 2815 }, { "epoch": 0.6810157194679565, "grad_norm": 8.494495728856297, "learning_rate": 2.3508738050840593e-07, "loss": 0.02055084891617298, "step": 2816 }, { "epoch": 0.6812575574365175, "grad_norm": 4.605105896692985, "learning_rate": 2.3476197528360226e-07, "loss": 0.003205361310392618, "step": 2817 }, { "epoch": 0.6814993954050786, "grad_norm": 3.2741122094053083, "learning_rate": 2.3443672632030965e-07, "loss": 0.013572064228355885, "step": 2818 }, { "epoch": 0.6817412333736397, "grad_norm": 3.6877159652246925, "learning_rate": 2.341116338101443e-07, "loss": 0.01390847284346819, "step": 2819 }, { "epoch": 0.6819830713422007, "grad_norm": 1.5102846995367905, "learning_rate": 2.3378669794463024e-07, "loss": 0.007390542421489954, "step": 2820 }, { "epoch": 0.6822249093107617, "grad_norm": 1.1722220608957243, "learning_rate": 2.334619189151991e-07, "loss": 0.010433605872094631, "step": 2821 }, { "epoch": 0.6824667472793229, "grad_norm": 1.1982623317886145, "learning_rate": 2.331372969131901e-07, "loss": 0.005711723119020462, "step": 2822 }, { "epoch": 0.6827085852478839, "grad_norm": 5.458460910616111, "learning_rate": 2.3281283212985058e-07, "loss": 0.015308531932532787, "step": 2823 }, { "epoch": 0.682950423216445, "grad_norm": 3.929002702616489, "learning_rate": 2.3248852475633396e-07, "loss": 0.015978189185261726, "step": 2824 }, { "epoch": 0.6831922611850061, "grad_norm": 2.175905862812713, "learning_rate": 2.3216437498370245e-07, "loss": 0.010997998528182507, "step": 2825 }, { "epoch": 0.6834340991535671, "grad_norm": 6.7500332759605435, "learning_rate": 2.3184038300292385e-07, "loss": 0.010257646441459656, "step": 2826 }, { "epoch": 0.6836759371221282, "grad_norm": 2.9586863107120904, "learning_rate": 2.3151654900487438e-07, "loss": 0.006204612087458372, "step": 2827 }, { "epoch": 0.6839177750906892, "grad_norm": 2.4826740210499176, "learning_rate": 2.3119287318033648e-07, "loss": 0.012737974524497986, "step": 2828 }, { "epoch": 0.6841596130592503, "grad_norm": 2.389477606314685, "learning_rate": 2.3086935571999938e-07, "loss": 0.0095481276512146, "step": 2829 }, { "epoch": 0.6844014510278114, "grad_norm": 6.043928190621346, "learning_rate": 2.305459968144593e-07, "loss": 0.034850820899009705, "step": 2830 }, { "epoch": 0.6846432889963724, "grad_norm": 1.9501826095055337, "learning_rate": 2.3022279665421885e-07, "loss": 0.007881985045969486, "step": 2831 }, { "epoch": 0.6848851269649335, "grad_norm": 2.733118836224541, "learning_rate": 2.298997554296872e-07, "loss": 0.010220244526863098, "step": 2832 }, { "epoch": 0.6851269649334946, "grad_norm": 7.855434533545192, "learning_rate": 2.295768733311798e-07, "loss": 0.005349303130060434, "step": 2833 }, { "epoch": 0.6853688029020556, "grad_norm": 1.4568215790249508, "learning_rate": 2.2925415054891844e-07, "loss": 0.005240305792540312, "step": 2834 }, { "epoch": 0.6856106408706167, "grad_norm": 0.48206613813014915, "learning_rate": 2.2893158727303108e-07, "loss": 0.0012729793088510633, "step": 2835 }, { "epoch": 0.6858524788391778, "grad_norm": 3.547340607760846, "learning_rate": 2.286091836935516e-07, "loss": 0.012515335343778133, "step": 2836 }, { "epoch": 0.6860943168077388, "grad_norm": 18.30367694422491, "learning_rate": 2.2828694000041982e-07, "loss": 0.0048813545145094395, "step": 2837 }, { "epoch": 0.6863361547762998, "grad_norm": 1.2077664606875633, "learning_rate": 2.2796485638348127e-07, "loss": 0.012004519812762737, "step": 2838 }, { "epoch": 0.686577992744861, "grad_norm": 1.6469700129092861, "learning_rate": 2.2764293303248783e-07, "loss": 0.011189932003617287, "step": 2839 }, { "epoch": 0.686819830713422, "grad_norm": 1.5099308384806034, "learning_rate": 2.2732117013709573e-07, "loss": 0.005693626124411821, "step": 2840 }, { "epoch": 0.687061668681983, "grad_norm": 2.9144506610501577, "learning_rate": 2.269995678868678e-07, "loss": 0.005253791343420744, "step": 2841 }, { "epoch": 0.6873035066505442, "grad_norm": 2.0625275893849895, "learning_rate": 2.2667812647127176e-07, "loss": 0.009474808350205421, "step": 2842 }, { "epoch": 0.6875453446191052, "grad_norm": 2.1675713810129684, "learning_rate": 2.2635684607968054e-07, "loss": 0.011406098492443562, "step": 2843 }, { "epoch": 0.6877871825876662, "grad_norm": 1.0009607460369057, "learning_rate": 2.2603572690137224e-07, "loss": 0.0016193079063668847, "step": 2844 }, { "epoch": 0.6880290205562273, "grad_norm": 12.781514698378494, "learning_rate": 2.257147691255301e-07, "loss": 0.0016216840595006943, "step": 2845 }, { "epoch": 0.6882708585247884, "grad_norm": 4.6571572119886655, "learning_rate": 2.253939729412422e-07, "loss": 0.017562733963131905, "step": 2846 }, { "epoch": 0.6885126964933495, "grad_norm": 0.8035363773555302, "learning_rate": 2.2507333853750133e-07, "loss": 0.002619184320792556, "step": 2847 }, { "epoch": 0.6887545344619105, "grad_norm": 4.969029404162743, "learning_rate": 2.247528661032051e-07, "loss": 0.0033597988076508045, "step": 2848 }, { "epoch": 0.6889963724304716, "grad_norm": 0.7703097018718792, "learning_rate": 2.2443255582715542e-07, "loss": 0.008173561654984951, "step": 2849 }, { "epoch": 0.6892382103990327, "grad_norm": 4.131227319430264, "learning_rate": 2.2411240789805957e-07, "loss": 0.008064956404268742, "step": 2850 }, { "epoch": 0.6894800483675937, "grad_norm": 2.571620697161643, "learning_rate": 2.2379242250452783e-07, "loss": 0.008322933688759804, "step": 2851 }, { "epoch": 0.6897218863361547, "grad_norm": 0.8143214479438436, "learning_rate": 2.234725998350755e-07, "loss": 0.003129705786705017, "step": 2852 }, { "epoch": 0.6899637243047159, "grad_norm": 0.7390277753890868, "learning_rate": 2.231529400781224e-07, "loss": 0.004504543729126453, "step": 2853 }, { "epoch": 0.6902055622732769, "grad_norm": 1.8538881588805796, "learning_rate": 2.2283344342199122e-07, "loss": 0.007169377990067005, "step": 2854 }, { "epoch": 0.6904474002418379, "grad_norm": 2.4921530086874006, "learning_rate": 2.2251411005490995e-07, "loss": 0.006942337844520807, "step": 2855 }, { "epoch": 0.6906892382103991, "grad_norm": 1.876861785002152, "learning_rate": 2.2219494016500883e-07, "loss": 0.0011460533132776618, "step": 2856 }, { "epoch": 0.6909310761789601, "grad_norm": 6.784946147346181, "learning_rate": 2.2187593394032335e-07, "loss": 0.00801840703934431, "step": 2857 }, { "epoch": 0.6911729141475211, "grad_norm": 1.713960569341791, "learning_rate": 2.2155709156879153e-07, "loss": 0.011062158271670341, "step": 2858 }, { "epoch": 0.6914147521160823, "grad_norm": 2.018779308828437, "learning_rate": 2.2123841323825526e-07, "loss": 0.0020900992676615715, "step": 2859 }, { "epoch": 0.6916565900846433, "grad_norm": 2.178116314367444, "learning_rate": 2.209198991364597e-07, "loss": 0.015269639901816845, "step": 2860 }, { "epoch": 0.6918984280532043, "grad_norm": 2.0305770862248287, "learning_rate": 2.2060154945105318e-07, "loss": 0.0018207358662039042, "step": 2861 }, { "epoch": 0.6921402660217654, "grad_norm": 1.2612367412816694, "learning_rate": 2.2028336436958738e-07, "loss": 0.014513780362904072, "step": 2862 }, { "epoch": 0.6923821039903265, "grad_norm": 0.26882716439488397, "learning_rate": 2.1996534407951678e-07, "loss": 0.0007530161528848112, "step": 2863 }, { "epoch": 0.6926239419588875, "grad_norm": 6.067617314723303, "learning_rate": 2.1964748876819893e-07, "loss": 0.010183251462876797, "step": 2864 }, { "epoch": 0.6928657799274486, "grad_norm": 0.7509147370593526, "learning_rate": 2.1932979862289392e-07, "loss": 0.006368754897266626, "step": 2865 }, { "epoch": 0.6931076178960097, "grad_norm": 2.505213897201599, "learning_rate": 2.1901227383076527e-07, "loss": 0.02530396357178688, "step": 2866 }, { "epoch": 0.6933494558645708, "grad_norm": 1.243869938800033, "learning_rate": 2.1869491457887785e-07, "loss": 0.014989040791988373, "step": 2867 }, { "epoch": 0.6935912938331318, "grad_norm": 1.746012576158221, "learning_rate": 2.1837772105420023e-07, "loss": 0.008811959065496922, "step": 2868 }, { "epoch": 0.6938331318016928, "grad_norm": 0.8809795845718951, "learning_rate": 2.1806069344360284e-07, "loss": 0.0028307156171649694, "step": 2869 }, { "epoch": 0.694074969770254, "grad_norm": 0.4023871785535531, "learning_rate": 2.177438319338578e-07, "loss": 0.0009282859391532838, "step": 2870 }, { "epoch": 0.694316807738815, "grad_norm": 5.441689934091539, "learning_rate": 2.1742713671164052e-07, "loss": 0.029218723997473717, "step": 2871 }, { "epoch": 0.694558645707376, "grad_norm": 112.2000778711474, "learning_rate": 2.1711060796352758e-07, "loss": 0.005062326323240995, "step": 2872 }, { "epoch": 0.6948004836759372, "grad_norm": 0.5405481044529704, "learning_rate": 2.167942458759978e-07, "loss": 0.0009251214796677232, "step": 2873 }, { "epoch": 0.6950423216444982, "grad_norm": 1.4944881916850912, "learning_rate": 2.1647805063543174e-07, "loss": 0.002677741227671504, "step": 2874 }, { "epoch": 0.6952841596130592, "grad_norm": 1.2639329963646646, "learning_rate": 2.161620224281117e-07, "loss": 0.0030957560520619154, "step": 2875 }, { "epoch": 0.6955259975816203, "grad_norm": 3.5917719028940347, "learning_rate": 2.1584616144022154e-07, "loss": 0.008846386335790157, "step": 2876 }, { "epoch": 0.6957678355501814, "grad_norm": 4.9513850635136345, "learning_rate": 2.1553046785784672e-07, "loss": 0.012378274463117123, "step": 2877 }, { "epoch": 0.6960096735187424, "grad_norm": 1.7158382781524926, "learning_rate": 2.1521494186697386e-07, "loss": 0.008682112209498882, "step": 2878 }, { "epoch": 0.6962515114873035, "grad_norm": 6.823392898608623, "learning_rate": 2.1489958365349086e-07, "loss": 0.023362746462225914, "step": 2879 }, { "epoch": 0.6964933494558646, "grad_norm": 0.6554612495732383, "learning_rate": 2.1458439340318735e-07, "loss": 0.003246322274208069, "step": 2880 }, { "epoch": 0.6967351874244256, "grad_norm": 2.60878579639326, "learning_rate": 2.1426937130175294e-07, "loss": 0.0031787026673555374, "step": 2881 }, { "epoch": 0.6969770253929867, "grad_norm": 0.8623125852811871, "learning_rate": 2.1395451753477923e-07, "loss": 0.003260981058701873, "step": 2882 }, { "epoch": 0.6972188633615478, "grad_norm": 1.0888856653701118, "learning_rate": 2.13639832287758e-07, "loss": 0.0034103028010576963, "step": 2883 }, { "epoch": 0.6974607013301088, "grad_norm": 0.8024667608430346, "learning_rate": 2.1332531574608204e-07, "loss": 0.001842526369728148, "step": 2884 }, { "epoch": 0.6977025392986699, "grad_norm": 0.24200355282773542, "learning_rate": 2.130109680950446e-07, "loss": 0.00035970742464996874, "step": 2885 }, { "epoch": 0.6979443772672309, "grad_norm": 3.2811524243733943, "learning_rate": 2.1269678951983944e-07, "loss": 0.0036573782563209534, "step": 2886 }, { "epoch": 0.698186215235792, "grad_norm": 1.3980691634048825, "learning_rate": 2.1238278020556084e-07, "loss": 0.0067705982364714146, "step": 2887 }, { "epoch": 0.6984280532043531, "grad_norm": 3.3621996233816382, "learning_rate": 2.120689403372032e-07, "loss": 0.006207783240824938, "step": 2888 }, { "epoch": 0.6986698911729141, "grad_norm": 2.7638088672316923, "learning_rate": 2.1175527009966121e-07, "loss": 0.010348324663937092, "step": 2889 }, { "epoch": 0.6989117291414753, "grad_norm": 3.1683950019864087, "learning_rate": 2.114417696777294e-07, "loss": 0.002838532207533717, "step": 2890 }, { "epoch": 0.6991535671100363, "grad_norm": 2.6280335719548558, "learning_rate": 2.1112843925610302e-07, "loss": 0.013487217016518116, "step": 2891 }, { "epoch": 0.6993954050785973, "grad_norm": 0.9059243898529762, "learning_rate": 2.1081527901937602e-07, "loss": 0.004504096694290638, "step": 2892 }, { "epoch": 0.6996372430471584, "grad_norm": 2.4327626948422276, "learning_rate": 2.1050228915204276e-07, "loss": 0.002894960343837738, "step": 2893 }, { "epoch": 0.6998790810157195, "grad_norm": 0.5587695160350323, "learning_rate": 2.1018946983849727e-07, "loss": 0.0033794858027249575, "step": 2894 }, { "epoch": 0.7001209189842805, "grad_norm": 0.819272558919208, "learning_rate": 2.0987682126303264e-07, "loss": 0.0015221197390928864, "step": 2895 }, { "epoch": 0.7003627569528416, "grad_norm": 0.7286336513303441, "learning_rate": 2.0956434360984232e-07, "loss": 0.0019208707381039858, "step": 2896 }, { "epoch": 0.7006045949214027, "grad_norm": 1.572428602568774, "learning_rate": 2.0925203706301764e-07, "loss": 0.007013513240963221, "step": 2897 }, { "epoch": 0.7008464328899637, "grad_norm": 1.638995605630324, "learning_rate": 2.0893990180655052e-07, "loss": 0.005638156086206436, "step": 2898 }, { "epoch": 0.7010882708585248, "grad_norm": 3.661953936720139, "learning_rate": 2.0862793802433115e-07, "loss": 0.01686142198741436, "step": 2899 }, { "epoch": 0.7013301088270859, "grad_norm": 1.5891781962416935, "learning_rate": 2.0831614590014884e-07, "loss": 0.0009401511633768678, "step": 2900 }, { "epoch": 0.7015719467956469, "grad_norm": 3.3374607893879698, "learning_rate": 2.08004525617692e-07, "loss": 0.016349436715245247, "step": 2901 }, { "epoch": 0.701813784764208, "grad_norm": 0.4292157098241925, "learning_rate": 2.0769307736054748e-07, "loss": 0.0006176980095915496, "step": 2902 }, { "epoch": 0.702055622732769, "grad_norm": 2.3582951130859806, "learning_rate": 2.0738180131220106e-07, "loss": 0.005341570358723402, "step": 2903 }, { "epoch": 0.7022974607013301, "grad_norm": 2.6156752824428864, "learning_rate": 2.0707069765603692e-07, "loss": 0.001753138960339129, "step": 2904 }, { "epoch": 0.7025392986698912, "grad_norm": 4.416260970521467, "learning_rate": 2.067597665753376e-07, "loss": 0.0023146166931837797, "step": 2905 }, { "epoch": 0.7027811366384522, "grad_norm": 3.620831148036007, "learning_rate": 2.064490082532841e-07, "loss": 0.007429126184433699, "step": 2906 }, { "epoch": 0.7030229746070134, "grad_norm": 0.5962641481468941, "learning_rate": 2.0613842287295595e-07, "loss": 0.0022598435170948505, "step": 2907 }, { "epoch": 0.7032648125755744, "grad_norm": 3.5150031023771517, "learning_rate": 2.0582801061733007e-07, "loss": 0.011691557243466377, "step": 2908 }, { "epoch": 0.7035066505441354, "grad_norm": 2.9819682522729285, "learning_rate": 2.0551777166928175e-07, "loss": 0.004917718004435301, "step": 2909 }, { "epoch": 0.7037484885126964, "grad_norm": 0.625528626732962, "learning_rate": 2.052077062115847e-07, "loss": 0.0026556418742984533, "step": 2910 }, { "epoch": 0.7039903264812576, "grad_norm": 0.7017349077983712, "learning_rate": 2.0489781442690924e-07, "loss": 0.0045957923866808414, "step": 2911 }, { "epoch": 0.7042321644498186, "grad_norm": 16.131760811492924, "learning_rate": 2.0458809649782454e-07, "loss": 0.002312274882569909, "step": 2912 }, { "epoch": 0.7044740024183797, "grad_norm": 2.8966469153419583, "learning_rate": 2.0427855260679676e-07, "loss": 0.006459602154791355, "step": 2913 }, { "epoch": 0.7047158403869408, "grad_norm": 1.6127223583661727, "learning_rate": 2.0396918293618964e-07, "loss": 0.004795900080353022, "step": 2914 }, { "epoch": 0.7049576783555018, "grad_norm": 1.2548373740593572, "learning_rate": 2.0365998766826425e-07, "loss": 0.004857036750763655, "step": 2915 }, { "epoch": 0.7051995163240629, "grad_norm": 6.328641953474544, "learning_rate": 2.033509669851789e-07, "loss": 0.008083926513791084, "step": 2916 }, { "epoch": 0.7054413542926239, "grad_norm": 1.8473886940035678, "learning_rate": 2.030421210689891e-07, "loss": 0.00771896680817008, "step": 2917 }, { "epoch": 0.705683192261185, "grad_norm": 0.7386773853888087, "learning_rate": 2.0273345010164738e-07, "loss": 0.0025230792816728354, "step": 2918 }, { "epoch": 0.7059250302297461, "grad_norm": 1.7806327016597483, "learning_rate": 2.024249542650032e-07, "loss": 0.005031404551118612, "step": 2919 }, { "epoch": 0.7061668681983071, "grad_norm": 2.9119490281721268, "learning_rate": 2.0211663374080268e-07, "loss": 0.009897586889564991, "step": 2920 }, { "epoch": 0.7064087061668682, "grad_norm": 3.0282816675798623, "learning_rate": 2.018084887106894e-07, "loss": 0.0065341517329216, "step": 2921 }, { "epoch": 0.7066505441354293, "grad_norm": 3.3847822842235415, "learning_rate": 2.0150051935620216e-07, "loss": 0.005243866704404354, "step": 2922 }, { "epoch": 0.7068923821039903, "grad_norm": 4.154130642123363, "learning_rate": 2.0119272585877783e-07, "loss": 0.00788305513560772, "step": 2923 }, { "epoch": 0.7071342200725514, "grad_norm": 0.7862909907944647, "learning_rate": 2.0088510839974826e-07, "loss": 0.0023842023219913244, "step": 2924 }, { "epoch": 0.7073760580411125, "grad_norm": 2.0799959845301177, "learning_rate": 2.005776671603427e-07, "loss": 0.004804296884685755, "step": 2925 }, { "epoch": 0.7076178960096735, "grad_norm": 1.9026505016478616, "learning_rate": 2.0027040232168618e-07, "loss": 0.010488402098417282, "step": 2926 }, { "epoch": 0.7078597339782345, "grad_norm": 2.6116957421525235, "learning_rate": 1.9996331406479921e-07, "loss": 0.0038419216871261597, "step": 2927 }, { "epoch": 0.7081015719467957, "grad_norm": 5.790664523622821, "learning_rate": 1.996564025705994e-07, "loss": 0.00999812688678503, "step": 2928 }, { "epoch": 0.7083434099153567, "grad_norm": 0.37448271478741735, "learning_rate": 1.9934966801989938e-07, "loss": 0.0009465268813073635, "step": 2929 }, { "epoch": 0.7085852478839177, "grad_norm": 1.3393697219948921, "learning_rate": 1.9904311059340785e-07, "loss": 0.0012657403713092208, "step": 2930 }, { "epoch": 0.7088270858524789, "grad_norm": 1.2237152619890446, "learning_rate": 1.9873673047172907e-07, "loss": 0.005799652077257633, "step": 2931 }, { "epoch": 0.7090689238210399, "grad_norm": 2.055694224124096, "learning_rate": 1.9843052783536297e-07, "loss": 0.0026193202938884497, "step": 2932 }, { "epoch": 0.709310761789601, "grad_norm": 1.3793738896119054, "learning_rate": 1.9812450286470468e-07, "loss": 0.0023100466933101416, "step": 2933 }, { "epoch": 0.709552599758162, "grad_norm": 0.6571731759965455, "learning_rate": 1.978186557400449e-07, "loss": 0.00580270541831851, "step": 2934 }, { "epoch": 0.7097944377267231, "grad_norm": 0.6093465693161604, "learning_rate": 1.9751298664156947e-07, "loss": 0.0010951152071356773, "step": 2935 }, { "epoch": 0.7100362756952842, "grad_norm": 2.1725278964344374, "learning_rate": 1.9720749574935914e-07, "loss": 0.010758242569863796, "step": 2936 }, { "epoch": 0.7102781136638452, "grad_norm": 1.1787552644369592, "learning_rate": 1.9690218324339037e-07, "loss": 0.004256709478795528, "step": 2937 }, { "epoch": 0.7105199516324063, "grad_norm": 1.4597720283122466, "learning_rate": 1.9659704930353337e-07, "loss": 0.0022411991376429796, "step": 2938 }, { "epoch": 0.7107617896009674, "grad_norm": 3.551395688666978, "learning_rate": 1.9629209410955433e-07, "loss": 0.007504924666136503, "step": 2939 }, { "epoch": 0.7110036275695284, "grad_norm": 5.422607455361543, "learning_rate": 1.9598731784111338e-07, "loss": 0.008736873045563698, "step": 2940 }, { "epoch": 0.7112454655380894, "grad_norm": 1.584617434559813, "learning_rate": 1.956827206777656e-07, "loss": 0.011143113486468792, "step": 2941 }, { "epoch": 0.7114873035066506, "grad_norm": 1.3909046690346742, "learning_rate": 1.9537830279896023e-07, "loss": 0.0062304167076945305, "step": 2942 }, { "epoch": 0.7117291414752116, "grad_norm": 1.586923276730878, "learning_rate": 1.9507406438404123e-07, "loss": 0.010357355698943138, "step": 2943 }, { "epoch": 0.7119709794437726, "grad_norm": 3.3282958569412004, "learning_rate": 1.9477000561224665e-07, "loss": 0.00822143629193306, "step": 2944 }, { "epoch": 0.7122128174123338, "grad_norm": 0.5552237188236318, "learning_rate": 1.9446612666270867e-07, "loss": 0.001951386104337871, "step": 2945 }, { "epoch": 0.7124546553808948, "grad_norm": 4.2840200124196395, "learning_rate": 1.9416242771445369e-07, "loss": 0.015477637760341167, "step": 2946 }, { "epoch": 0.7126964933494558, "grad_norm": 9.89827674412773, "learning_rate": 1.9385890894640172e-07, "loss": 0.0026106780860573053, "step": 2947 }, { "epoch": 0.712938331318017, "grad_norm": 0.5328799214635379, "learning_rate": 1.935555705373675e-07, "loss": 0.001171432319097221, "step": 2948 }, { "epoch": 0.713180169286578, "grad_norm": 9.608565027545298, "learning_rate": 1.9325241266605824e-07, "loss": 0.0060205962508916855, "step": 2949 }, { "epoch": 0.713422007255139, "grad_norm": 0.9708661880198931, "learning_rate": 1.9294943551107547e-07, "loss": 0.00660103652626276, "step": 2950 }, { "epoch": 0.7136638452237001, "grad_norm": 1.5546752730723699, "learning_rate": 1.9264663925091472e-07, "loss": 0.010428732261061668, "step": 2951 }, { "epoch": 0.7139056831922612, "grad_norm": 0.9435352589003321, "learning_rate": 1.9234402406396382e-07, "loss": 0.0012864366872236133, "step": 2952 }, { "epoch": 0.7141475211608223, "grad_norm": 0.790619836297087, "learning_rate": 1.9204159012850518e-07, "loss": 0.003902724012732506, "step": 2953 }, { "epoch": 0.7143893591293833, "grad_norm": 1.204685012958432, "learning_rate": 1.9173933762271316e-07, "loss": 0.0020207955967634916, "step": 2954 }, { "epoch": 0.7146311970979444, "grad_norm": 1.7713594631038618, "learning_rate": 1.914372667246563e-07, "loss": 0.01372991781681776, "step": 2955 }, { "epoch": 0.7148730350665055, "grad_norm": 0.9496485623295029, "learning_rate": 1.911353776122956e-07, "loss": 0.0035515979398041964, "step": 2956 }, { "epoch": 0.7151148730350665, "grad_norm": 0.8646522065158433, "learning_rate": 1.908336704634851e-07, "loss": 0.00429603923112154, "step": 2957 }, { "epoch": 0.7153567110036275, "grad_norm": 1.5375881904218291, "learning_rate": 1.9053214545597157e-07, "loss": 0.003511036979034543, "step": 2958 }, { "epoch": 0.7155985489721887, "grad_norm": 0.8612126615360925, "learning_rate": 1.9023080276739451e-07, "loss": 0.005835298448801041, "step": 2959 }, { "epoch": 0.7158403869407497, "grad_norm": 0.6669352704977691, "learning_rate": 1.899296425752861e-07, "loss": 0.0015650615096092224, "step": 2960 }, { "epoch": 0.7160822249093107, "grad_norm": 5.865621385323008, "learning_rate": 1.8962866505707066e-07, "loss": 0.006136657670140266, "step": 2961 }, { "epoch": 0.7163240628778719, "grad_norm": 1.7254733105067943, "learning_rate": 1.8932787039006576e-07, "loss": 0.008385561406612396, "step": 2962 }, { "epoch": 0.7165659008464329, "grad_norm": 1.8722233015820018, "learning_rate": 1.890272587514799e-07, "loss": 0.003367191180586815, "step": 2963 }, { "epoch": 0.7168077388149939, "grad_norm": 4.1199308830841765, "learning_rate": 1.8872683031841518e-07, "loss": 0.01075268816202879, "step": 2964 }, { "epoch": 0.717049576783555, "grad_norm": 1.4468487856223538, "learning_rate": 1.884265852678646e-07, "loss": 0.0042852722108364105, "step": 2965 }, { "epoch": 0.7172914147521161, "grad_norm": 1.6328291120306717, "learning_rate": 1.8812652377671356e-07, "loss": 0.002721386030316353, "step": 2966 }, { "epoch": 0.7175332527206771, "grad_norm": 1.9103003490507409, "learning_rate": 1.8782664602173986e-07, "loss": 0.003996000625193119, "step": 2967 }, { "epoch": 0.7177750906892382, "grad_norm": 0.5853196859233012, "learning_rate": 1.8752695217961195e-07, "loss": 0.0008084429427981377, "step": 2968 }, { "epoch": 0.7180169286577993, "grad_norm": 2.531572489495071, "learning_rate": 1.8722744242689098e-07, "loss": 0.0090676574036479, "step": 2969 }, { "epoch": 0.7182587666263603, "grad_norm": 2.9554498354179395, "learning_rate": 1.8692811694002908e-07, "loss": 0.009599782526493073, "step": 2970 }, { "epoch": 0.7185006045949214, "grad_norm": 1.0749855643306918, "learning_rate": 1.866289758953699e-07, "loss": 0.008773313835263252, "step": 2971 }, { "epoch": 0.7187424425634825, "grad_norm": 2.8886358325759756, "learning_rate": 1.863300194691485e-07, "loss": 0.007182104978710413, "step": 2972 }, { "epoch": 0.7189842805320436, "grad_norm": 0.6880663753374436, "learning_rate": 1.8603124783749107e-07, "loss": 0.0022650689352303743, "step": 2973 }, { "epoch": 0.7192261185006046, "grad_norm": 3.4468257165718614, "learning_rate": 1.8573266117641513e-07, "loss": 0.008484505116939545, "step": 2974 }, { "epoch": 0.7194679564691656, "grad_norm": 4.780250408316387, "learning_rate": 1.85434259661829e-07, "loss": 0.008836175315082073, "step": 2975 }, { "epoch": 0.7197097944377268, "grad_norm": 0.5471188909587034, "learning_rate": 1.8513604346953214e-07, "loss": 0.002766565652564168, "step": 2976 }, { "epoch": 0.7199516324062878, "grad_norm": 8.337279901953448, "learning_rate": 1.8483801277521445e-07, "loss": 0.00851306039839983, "step": 2977 }, { "epoch": 0.7201934703748488, "grad_norm": 2.6827334051901466, "learning_rate": 1.8454016775445736e-07, "loss": 0.01278416346758604, "step": 2978 }, { "epoch": 0.72043530834341, "grad_norm": 1.0657778179873691, "learning_rate": 1.842425085827316e-07, "loss": 0.002916876692324877, "step": 2979 }, { "epoch": 0.720677146311971, "grad_norm": 1.0510974608903956, "learning_rate": 1.8394503543539986e-07, "loss": 0.0035172675270587206, "step": 2980 }, { "epoch": 0.720918984280532, "grad_norm": 1.5155773840321594, "learning_rate": 1.8364774848771426e-07, "loss": 0.006720913108438253, "step": 2981 }, { "epoch": 0.7211608222490931, "grad_norm": 2.2122450143713404, "learning_rate": 1.8335064791481748e-07, "loss": 0.007621284574270248, "step": 2982 }, { "epoch": 0.7214026602176542, "grad_norm": 1.4430690136867177, "learning_rate": 1.830537338917427e-07, "loss": 0.004020869731903076, "step": 2983 }, { "epoch": 0.7216444981862152, "grad_norm": 3.900185927971474, "learning_rate": 1.8275700659341238e-07, "loss": 0.0057753510773181915, "step": 2984 }, { "epoch": 0.7218863361547763, "grad_norm": 1.498125261395078, "learning_rate": 1.8246046619464005e-07, "loss": 0.005796687677502632, "step": 2985 }, { "epoch": 0.7221281741233374, "grad_norm": 0.9650033198875332, "learning_rate": 1.8216411287012844e-07, "loss": 0.0024072048254311085, "step": 2986 }, { "epoch": 0.7223700120918984, "grad_norm": 0.8785123174520604, "learning_rate": 1.818679467944702e-07, "loss": 0.0009275806369259953, "step": 2987 }, { "epoch": 0.7226118500604595, "grad_norm": 1.1671570826748483, "learning_rate": 1.8157196814214772e-07, "loss": 0.01051642931997776, "step": 2988 }, { "epoch": 0.7228536880290205, "grad_norm": 0.739588800603491, "learning_rate": 1.812761770875329e-07, "loss": 0.0023275548592209816, "step": 2989 }, { "epoch": 0.7230955259975816, "grad_norm": 9.964515895310356, "learning_rate": 1.8098057380488725e-07, "loss": 0.00871946569532156, "step": 2990 }, { "epoch": 0.7233373639661427, "grad_norm": 1.3390185446151748, "learning_rate": 1.8068515846836136e-07, "loss": 0.001924254815094173, "step": 2991 }, { "epoch": 0.7235792019347037, "grad_norm": 3.2647039911935654, "learning_rate": 1.803899312519958e-07, "loss": 0.009600557386875153, "step": 2992 }, { "epoch": 0.7238210399032649, "grad_norm": 6.896614678562661, "learning_rate": 1.8009489232971925e-07, "loss": 0.01508875098079443, "step": 2993 }, { "epoch": 0.7240628778718259, "grad_norm": 1.9250696959968248, "learning_rate": 1.7980004187535058e-07, "loss": 0.012600074522197247, "step": 2994 }, { "epoch": 0.7243047158403869, "grad_norm": 1.9098892837963715, "learning_rate": 1.7950538006259642e-07, "loss": 0.005264537874609232, "step": 2995 }, { "epoch": 0.7245465538089481, "grad_norm": 1.6278350421661167, "learning_rate": 1.792109070650535e-07, "loss": 0.0060701086185872555, "step": 2996 }, { "epoch": 0.7247883917775091, "grad_norm": 2.649322049900936, "learning_rate": 1.7891662305620648e-07, "loss": 0.004009123891592026, "step": 2997 }, { "epoch": 0.7250302297460701, "grad_norm": 1.9640278764662533, "learning_rate": 1.7862252820942898e-07, "loss": 0.0017727194353938103, "step": 2998 }, { "epoch": 0.7252720677146312, "grad_norm": 3.373757420997822, "learning_rate": 1.783286226979831e-07, "loss": 0.004852070473134518, "step": 2999 }, { "epoch": 0.7255139056831923, "grad_norm": 5.7479558398272586, "learning_rate": 1.7803490669501946e-07, "loss": 0.003250274108722806, "step": 3000 }, { "epoch": 0.7257557436517533, "grad_norm": 2.5870804553806988, "learning_rate": 1.77741380373577e-07, "loss": 0.015738999471068382, "step": 3001 }, { "epoch": 0.7259975816203144, "grad_norm": 0.6345378599046019, "learning_rate": 1.7744804390658292e-07, "loss": 0.0013502984074875712, "step": 3002 }, { "epoch": 0.7262394195888755, "grad_norm": 0.6988141710356165, "learning_rate": 1.7715489746685253e-07, "loss": 0.0015798722160980105, "step": 3003 }, { "epoch": 0.7264812575574365, "grad_norm": 0.7115478469149061, "learning_rate": 1.768619412270892e-07, "loss": 0.00285231857560575, "step": 3004 }, { "epoch": 0.7267230955259976, "grad_norm": 2.3109652945929504, "learning_rate": 1.765691753598843e-07, "loss": 0.0034062133636325598, "step": 3005 }, { "epoch": 0.7269649334945586, "grad_norm": 2.827793705910036, "learning_rate": 1.76276600037717e-07, "loss": 0.00848688930273056, "step": 3006 }, { "epoch": 0.7272067714631197, "grad_norm": 3.657179524028247, "learning_rate": 1.75984215432954e-07, "loss": 0.023267794400453568, "step": 3007 }, { "epoch": 0.7274486094316808, "grad_norm": 14.23119661835388, "learning_rate": 1.7569202171785048e-07, "loss": 0.005167878698557615, "step": 3008 }, { "epoch": 0.7276904474002418, "grad_norm": 0.8699478754886072, "learning_rate": 1.7540001906454781e-07, "loss": 0.002010509604588151, "step": 3009 }, { "epoch": 0.727932285368803, "grad_norm": 1.8267785631807316, "learning_rate": 1.7510820764507606e-07, "loss": 0.0038976885844022036, "step": 3010 }, { "epoch": 0.728174123337364, "grad_norm": 8.393518742043936, "learning_rate": 1.7481658763135187e-07, "loss": 0.0009184445370920002, "step": 3011 }, { "epoch": 0.728415961305925, "grad_norm": 8.908051506946467, "learning_rate": 1.745251591951794e-07, "loss": 0.006707082036882639, "step": 3012 }, { "epoch": 0.728657799274486, "grad_norm": 3.473952923726401, "learning_rate": 1.7423392250824998e-07, "loss": 0.00521819619461894, "step": 3013 }, { "epoch": 0.7288996372430472, "grad_norm": 1.3411568880395295, "learning_rate": 1.7394287774214172e-07, "loss": 0.007516409736126661, "step": 3014 }, { "epoch": 0.7291414752116082, "grad_norm": 0.5191484871048249, "learning_rate": 1.7365202506832e-07, "loss": 0.0016563745448365808, "step": 3015 }, { "epoch": 0.7293833131801692, "grad_norm": 0.7602437679518905, "learning_rate": 1.7336136465813677e-07, "loss": 0.0015701580559834838, "step": 3016 }, { "epoch": 0.7296251511487304, "grad_norm": 0.7695170092444842, "learning_rate": 1.7307089668283081e-07, "loss": 0.0021653599105775356, "step": 3017 }, { "epoch": 0.7298669891172914, "grad_norm": 2.594797492016948, "learning_rate": 1.7278062131352744e-07, "loss": 0.0055209072306752205, "step": 3018 }, { "epoch": 0.7301088270858525, "grad_norm": 2.7772679284442177, "learning_rate": 1.7249053872123897e-07, "loss": 0.014589724130928516, "step": 3019 }, { "epoch": 0.7303506650544136, "grad_norm": 0.19444700739720416, "learning_rate": 1.722006490768632e-07, "loss": 0.0005947572062723339, "step": 3020 }, { "epoch": 0.7305925030229746, "grad_norm": 1.195569972622971, "learning_rate": 1.7191095255118532e-07, "loss": 0.0013208225136622787, "step": 3021 }, { "epoch": 0.7308343409915357, "grad_norm": 1.6861132391916953, "learning_rate": 1.7162144931487615e-07, "loss": 0.0032257146667689085, "step": 3022 }, { "epoch": 0.7310761789600967, "grad_norm": 0.4332161728123832, "learning_rate": 1.7133213953849235e-07, "loss": 0.0013786298222839832, "step": 3023 }, { "epoch": 0.7313180169286578, "grad_norm": 1.104272594908216, "learning_rate": 1.7104302339247757e-07, "loss": 0.005728436168283224, "step": 3024 }, { "epoch": 0.7315598548972189, "grad_norm": 1.2160380166721287, "learning_rate": 1.7075410104716014e-07, "loss": 0.006006521638482809, "step": 3025 }, { "epoch": 0.7318016928657799, "grad_norm": 0.5751451407912606, "learning_rate": 1.7046537267275545e-07, "loss": 0.0015381206758320332, "step": 3026 }, { "epoch": 0.732043530834341, "grad_norm": 9.807708369679796, "learning_rate": 1.7017683843936387e-07, "loss": 0.009809562005102634, "step": 3027 }, { "epoch": 0.7322853688029021, "grad_norm": 1.8450632230199555, "learning_rate": 1.6988849851697144e-07, "loss": 0.00794878602027893, "step": 3028 }, { "epoch": 0.7325272067714631, "grad_norm": 0.9357963357157568, "learning_rate": 1.6960035307545e-07, "loss": 0.0018447584006935358, "step": 3029 }, { "epoch": 0.7327690447400241, "grad_norm": 2.9457318376387627, "learning_rate": 1.6931240228455657e-07, "loss": 0.00876964908093214, "step": 3030 }, { "epoch": 0.7330108827085853, "grad_norm": 2.9885955928426515, "learning_rate": 1.6902464631393366e-07, "loss": 0.009093730710446835, "step": 3031 }, { "epoch": 0.7332527206771463, "grad_norm": 1.6563247992898191, "learning_rate": 1.6873708533310886e-07, "loss": 0.003294584108516574, "step": 3032 }, { "epoch": 0.7334945586457073, "grad_norm": 2.045800843154056, "learning_rate": 1.6844971951149505e-07, "loss": 0.004635537508875132, "step": 3033 }, { "epoch": 0.7337363966142685, "grad_norm": 13.698701190958982, "learning_rate": 1.6816254901838983e-07, "loss": 0.014577703550457954, "step": 3034 }, { "epoch": 0.7339782345828295, "grad_norm": 1.6190925390287785, "learning_rate": 1.678755740229764e-07, "loss": 0.004085676744580269, "step": 3035 }, { "epoch": 0.7342200725513905, "grad_norm": 20.123124603324452, "learning_rate": 1.675887946943217e-07, "loss": 0.0033027667086571455, "step": 3036 }, { "epoch": 0.7344619105199516, "grad_norm": 1.6150587713494777, "learning_rate": 1.673022112013785e-07, "loss": 0.009104202501475811, "step": 3037 }, { "epoch": 0.7347037484885127, "grad_norm": 0.5823741397806579, "learning_rate": 1.6701582371298356e-07, "loss": 0.002273252699524164, "step": 3038 }, { "epoch": 0.7349455864570738, "grad_norm": 2.477005250535629, "learning_rate": 1.6672963239785836e-07, "loss": 0.0022972363512963057, "step": 3039 }, { "epoch": 0.7351874244256348, "grad_norm": 1.6604902354850966, "learning_rate": 1.6644363742460865e-07, "loss": 0.018211230635643005, "step": 3040 }, { "epoch": 0.7354292623941959, "grad_norm": 0.961471479978288, "learning_rate": 1.6615783896172475e-07, "loss": 0.0036418759264051914, "step": 3041 }, { "epoch": 0.735671100362757, "grad_norm": 1.5083147802540409, "learning_rate": 1.6587223717758104e-07, "loss": 0.00520687410607934, "step": 3042 }, { "epoch": 0.735912938331318, "grad_norm": 2.406485520666719, "learning_rate": 1.6558683224043606e-07, "loss": 0.007687436882406473, "step": 3043 }, { "epoch": 0.7361547762998791, "grad_norm": 7.457406637925948, "learning_rate": 1.6530162431843242e-07, "loss": 0.0042798263020813465, "step": 3044 }, { "epoch": 0.7363966142684402, "grad_norm": 1.8573810238225683, "learning_rate": 1.6501661357959656e-07, "loss": 0.00980936549603939, "step": 3045 }, { "epoch": 0.7366384522370012, "grad_norm": 1.7585501447484297, "learning_rate": 1.6473180019183897e-07, "loss": 0.0041270130313932896, "step": 3046 }, { "epoch": 0.7368802902055622, "grad_norm": 2.5334304219384034, "learning_rate": 1.6444718432295357e-07, "loss": 0.010285424068570137, "step": 3047 }, { "epoch": 0.7371221281741234, "grad_norm": 1.2746147491942172, "learning_rate": 1.6416276614061801e-07, "loss": 0.007204240653663874, "step": 3048 }, { "epoch": 0.7373639661426844, "grad_norm": 1.9634473280332878, "learning_rate": 1.63878545812394e-07, "loss": 0.009119913913309574, "step": 3049 }, { "epoch": 0.7376058041112454, "grad_norm": 2.8068013193114902, "learning_rate": 1.6359452350572557e-07, "loss": 0.01615770533680916, "step": 3050 }, { "epoch": 0.7378476420798066, "grad_norm": 2.7535449621274366, "learning_rate": 1.633106993879413e-07, "loss": 0.004681741818785667, "step": 3051 }, { "epoch": 0.7380894800483676, "grad_norm": 1.4163581173131317, "learning_rate": 1.6302707362625216e-07, "loss": 0.003788616508245468, "step": 3052 }, { "epoch": 0.7383313180169286, "grad_norm": 6.221239517451693, "learning_rate": 1.6274364638775268e-07, "loss": 0.005621916148811579, "step": 3053 }, { "epoch": 0.7385731559854897, "grad_norm": 0.998958284107972, "learning_rate": 1.624604178394202e-07, "loss": 0.0036337680649012327, "step": 3054 }, { "epoch": 0.7388149939540508, "grad_norm": 5.161693658697669, "learning_rate": 1.6217738814811527e-07, "loss": 0.008911259472370148, "step": 3055 }, { "epoch": 0.7390568319226118, "grad_norm": 1.5176608648440835, "learning_rate": 1.6189455748058094e-07, "loss": 0.003438475076109171, "step": 3056 }, { "epoch": 0.7392986698911729, "grad_norm": 12.152543766812204, "learning_rate": 1.6161192600344332e-07, "loss": 0.011300322599709034, "step": 3057 }, { "epoch": 0.739540507859734, "grad_norm": 1.8321403339847122, "learning_rate": 1.6132949388321104e-07, "loss": 0.0031509504187852144, "step": 3058 }, { "epoch": 0.739782345828295, "grad_norm": 0.7842890919411999, "learning_rate": 1.6104726128627505e-07, "loss": 0.002088912995532155, "step": 3059 }, { "epoch": 0.7400241837968561, "grad_norm": 1.7881460200547459, "learning_rate": 1.6076522837890955e-07, "loss": 0.020383303984999657, "step": 3060 }, { "epoch": 0.7402660217654172, "grad_norm": 3.4668252307826037, "learning_rate": 1.6048339532727e-07, "loss": 0.00543619180098176, "step": 3061 }, { "epoch": 0.7405078597339783, "grad_norm": 24.37630160335757, "learning_rate": 1.6020176229739491e-07, "loss": 0.00479874899610877, "step": 3062 }, { "epoch": 0.7407496977025393, "grad_norm": 1.0525898789499344, "learning_rate": 1.5992032945520455e-07, "loss": 0.003311476204544306, "step": 3063 }, { "epoch": 0.7409915356711003, "grad_norm": 2.484766946143461, "learning_rate": 1.5963909696650135e-07, "loss": 0.01069724839180708, "step": 3064 }, { "epoch": 0.7412333736396615, "grad_norm": 1.1053691585001508, "learning_rate": 1.5935806499697018e-07, "loss": 0.0030253571458160877, "step": 3065 }, { "epoch": 0.7414752116082225, "grad_norm": 1.7192139099925088, "learning_rate": 1.5907723371217674e-07, "loss": 0.0070189437828958035, "step": 3066 }, { "epoch": 0.7417170495767835, "grad_norm": 0.4901677839587056, "learning_rate": 1.5879660327756955e-07, "loss": 0.0010879618348553777, "step": 3067 }, { "epoch": 0.7419588875453447, "grad_norm": 1.3249897164358353, "learning_rate": 1.585161738584782e-07, "loss": 0.007020737510174513, "step": 3068 }, { "epoch": 0.7422007255139057, "grad_norm": 1.0001755562839096, "learning_rate": 1.5823594562011399e-07, "loss": 0.004730248358100653, "step": 3069 }, { "epoch": 0.7424425634824667, "grad_norm": 1.564945930446366, "learning_rate": 1.579559187275698e-07, "loss": 0.005497082602232695, "step": 3070 }, { "epoch": 0.7426844014510278, "grad_norm": 11.070527314691315, "learning_rate": 1.5767609334581976e-07, "loss": 0.00679359957575798, "step": 3071 }, { "epoch": 0.7429262394195889, "grad_norm": 1.473836044671399, "learning_rate": 1.5739646963971935e-07, "loss": 0.005922455340623856, "step": 3072 }, { "epoch": 0.7431680773881499, "grad_norm": 0.4438445107806686, "learning_rate": 1.5711704777400518e-07, "loss": 0.0011968864127993584, "step": 3073 }, { "epoch": 0.743409915356711, "grad_norm": 0.566094863654644, "learning_rate": 1.5683782791329504e-07, "loss": 0.0016762128798291087, "step": 3074 }, { "epoch": 0.7436517533252721, "grad_norm": 5.462579036526086, "learning_rate": 1.565588102220874e-07, "loss": 0.021716883406043053, "step": 3075 }, { "epoch": 0.7438935912938331, "grad_norm": 0.7024409017062676, "learning_rate": 1.5627999486476246e-07, "loss": 0.0015776113141328096, "step": 3076 }, { "epoch": 0.7441354292623942, "grad_norm": 0.9769521184789061, "learning_rate": 1.5600138200558e-07, "loss": 0.005997603293508291, "step": 3077 }, { "epoch": 0.7443772672309552, "grad_norm": 24.322961410026252, "learning_rate": 1.5572297180868154e-07, "loss": 0.004503983072936535, "step": 3078 }, { "epoch": 0.7446191051995164, "grad_norm": 0.6450242937134756, "learning_rate": 1.5544476443808885e-07, "loss": 0.0032937414944171906, "step": 3079 }, { "epoch": 0.7448609431680774, "grad_norm": 0.9891937515192251, "learning_rate": 1.5516676005770367e-07, "loss": 0.005268284119665623, "step": 3080 }, { "epoch": 0.7451027811366384, "grad_norm": 4.624930715475359, "learning_rate": 1.5488895883130914e-07, "loss": 0.0050992765463888645, "step": 3081 }, { "epoch": 0.7453446191051996, "grad_norm": 3.5046914179276007, "learning_rate": 1.5461136092256805e-07, "loss": 0.015407130122184753, "step": 3082 }, { "epoch": 0.7455864570737606, "grad_norm": 9.528361066200578, "learning_rate": 1.5433396649502362e-07, "loss": 0.007036268711090088, "step": 3083 }, { "epoch": 0.7458282950423216, "grad_norm": 0.5999569941830454, "learning_rate": 1.5405677571209903e-07, "loss": 0.0011618536664173007, "step": 3084 }, { "epoch": 0.7460701330108828, "grad_norm": 1.2337902131414633, "learning_rate": 1.5377978873709775e-07, "loss": 0.0027356890495866537, "step": 3085 }, { "epoch": 0.7463119709794438, "grad_norm": 1.6037507924371717, "learning_rate": 1.5350300573320285e-07, "loss": 0.010098114609718323, "step": 3086 }, { "epoch": 0.7465538089480048, "grad_norm": 0.5836940756656102, "learning_rate": 1.532264268634776e-07, "loss": 0.0011749084806069732, "step": 3087 }, { "epoch": 0.7467956469165659, "grad_norm": 1.3951749136187888, "learning_rate": 1.5295005229086471e-07, "loss": 0.005140152759850025, "step": 3088 }, { "epoch": 0.747037484885127, "grad_norm": 2.5468550770365916, "learning_rate": 1.5267388217818649e-07, "loss": 0.0031984366942197084, "step": 3089 }, { "epoch": 0.747279322853688, "grad_norm": 1.9501868704363121, "learning_rate": 1.523979166881455e-07, "loss": 0.0094267213717103, "step": 3090 }, { "epoch": 0.7475211608222491, "grad_norm": 1.734796684156703, "learning_rate": 1.521221559833225e-07, "loss": 0.007947845384478569, "step": 3091 }, { "epoch": 0.7477629987908102, "grad_norm": 2.906510745937398, "learning_rate": 1.518466002261789e-07, "loss": 0.0024081729352474213, "step": 3092 }, { "epoch": 0.7480048367593712, "grad_norm": 0.5102282943017565, "learning_rate": 1.515712495790542e-07, "loss": 0.0015269347932189703, "step": 3093 }, { "epoch": 0.7482466747279323, "grad_norm": 1.142905923221665, "learning_rate": 1.512961042041681e-07, "loss": 0.012555211782455444, "step": 3094 }, { "epoch": 0.7484885126964933, "grad_norm": 2.2100182281690794, "learning_rate": 1.5102116426361889e-07, "loss": 0.005344451870769262, "step": 3095 }, { "epoch": 0.7487303506650544, "grad_norm": 0.8067489503061931, "learning_rate": 1.5074642991938336e-07, "loss": 0.002274835016578436, "step": 3096 }, { "epoch": 0.7489721886336155, "grad_norm": 5.624163292077561, "learning_rate": 1.5047190133331806e-07, "loss": 0.009286229498684406, "step": 3097 }, { "epoch": 0.7492140266021765, "grad_norm": 0.7090347614723933, "learning_rate": 1.501975786671579e-07, "loss": 0.0013991787564009428, "step": 3098 }, { "epoch": 0.7494558645707377, "grad_norm": 5.111518385145867, "learning_rate": 1.4992346208251633e-07, "loss": 0.0040916078723967075, "step": 3099 }, { "epoch": 0.7496977025392987, "grad_norm": 3.0152534567890235, "learning_rate": 1.496495517408855e-07, "loss": 0.011888734064996243, "step": 3100 }, { "epoch": 0.7499395405078597, "grad_norm": 1.582311149088029, "learning_rate": 1.4937584780363654e-07, "loss": 0.002076405566185713, "step": 3101 }, { "epoch": 0.7501813784764207, "grad_norm": 7.058356035934043, "learning_rate": 1.4910235043201807e-07, "loss": 0.00515540549531579, "step": 3102 }, { "epoch": 0.7504232164449819, "grad_norm": 0.9393923410808529, "learning_rate": 1.4882905978715766e-07, "loss": 0.0030989048536866903, "step": 3103 }, { "epoch": 0.7506650544135429, "grad_norm": 1.7147492486870384, "learning_rate": 1.4855597603006092e-07, "loss": 0.011460659094154835, "step": 3104 }, { "epoch": 0.750906892382104, "grad_norm": 2.064762731902972, "learning_rate": 1.4828309932161136e-07, "loss": 0.004099302925169468, "step": 3105 }, { "epoch": 0.7511487303506651, "grad_norm": 2.3393247562898054, "learning_rate": 1.4801042982257128e-07, "loss": 0.004853765945881605, "step": 3106 }, { "epoch": 0.7513905683192261, "grad_norm": 1.5228965640521157, "learning_rate": 1.477379676935796e-07, "loss": 0.00494037102907896, "step": 3107 }, { "epoch": 0.7516324062877872, "grad_norm": 1.8240962768213345, "learning_rate": 1.4746571309515443e-07, "loss": 0.005992748774588108, "step": 3108 }, { "epoch": 0.7518742442563483, "grad_norm": 16.483161780232056, "learning_rate": 1.4719366618769074e-07, "loss": 0.01892511360347271, "step": 3109 }, { "epoch": 0.7521160822249093, "grad_norm": 1.3465945014541911, "learning_rate": 1.4692182713146145e-07, "loss": 0.010809908621013165, "step": 3110 }, { "epoch": 0.7523579201934704, "grad_norm": 4.168368614298227, "learning_rate": 1.4665019608661693e-07, "loss": 0.010371272452175617, "step": 3111 }, { "epoch": 0.7525997581620314, "grad_norm": 1.3255527091707475, "learning_rate": 1.4637877321318516e-07, "loss": 0.00407804362475872, "step": 3112 }, { "epoch": 0.7528415961305925, "grad_norm": 2.890490646993031, "learning_rate": 1.4610755867107134e-07, "loss": 0.012696260586380959, "step": 3113 }, { "epoch": 0.7530834340991536, "grad_norm": 1.9791455546617305, "learning_rate": 1.4583655262005784e-07, "loss": 0.00464760884642601, "step": 3114 }, { "epoch": 0.7533252720677146, "grad_norm": 2.6279127316607185, "learning_rate": 1.4556575521980453e-07, "loss": 0.006800898350775242, "step": 3115 }, { "epoch": 0.7535671100362757, "grad_norm": 0.8873820430415307, "learning_rate": 1.4529516662984782e-07, "loss": 0.003781203180551529, "step": 3116 }, { "epoch": 0.7538089480048368, "grad_norm": 19.21562690268658, "learning_rate": 1.4502478700960203e-07, "loss": 0.29025980830192566, "step": 3117 }, { "epoch": 0.7540507859733978, "grad_norm": 0.9410853981066839, "learning_rate": 1.4475461651835723e-07, "loss": 0.0032223686575889587, "step": 3118 }, { "epoch": 0.7542926239419588, "grad_norm": 7.115342156252904, "learning_rate": 1.4448465531528086e-07, "loss": 0.016242390498518944, "step": 3119 }, { "epoch": 0.75453446191052, "grad_norm": 6.236298386414263, "learning_rate": 1.442149035594175e-07, "loss": 0.011759469285607338, "step": 3120 }, { "epoch": 0.754776299879081, "grad_norm": 1.5101230497765257, "learning_rate": 1.4394536140968728e-07, "loss": 0.005757116246968508, "step": 3121 }, { "epoch": 0.755018137847642, "grad_norm": 3.3157863316139733, "learning_rate": 1.4367602902488797e-07, "loss": 0.003939529415220022, "step": 3122 }, { "epoch": 0.7552599758162032, "grad_norm": 2.44780633394839, "learning_rate": 1.4340690656369263e-07, "loss": 0.003338068723678589, "step": 3123 }, { "epoch": 0.7555018137847642, "grad_norm": 0.7007014279855279, "learning_rate": 1.4313799418465173e-07, "loss": 0.0012140729231759906, "step": 3124 }, { "epoch": 0.7557436517533253, "grad_norm": 1.9789874017521536, "learning_rate": 1.428692920461913e-07, "loss": 0.007746960502117872, "step": 3125 }, { "epoch": 0.7559854897218863, "grad_norm": 2.1672136416893806, "learning_rate": 1.426008003066137e-07, "loss": 0.007152661215513945, "step": 3126 }, { "epoch": 0.7562273276904474, "grad_norm": 2.0876238231598885, "learning_rate": 1.4233251912409732e-07, "loss": 0.006923177279531956, "step": 3127 }, { "epoch": 0.7564691656590085, "grad_norm": 3.462795548713231, "learning_rate": 1.4206444865669647e-07, "loss": 0.004149565007537603, "step": 3128 }, { "epoch": 0.7567110036275695, "grad_norm": 3.05018720610648, "learning_rate": 1.417965890623415e-07, "loss": 0.007325805723667145, "step": 3129 }, { "epoch": 0.7569528415961306, "grad_norm": 3.446899035301594, "learning_rate": 1.4152894049883802e-07, "loss": 0.009830021299421787, "step": 3130 }, { "epoch": 0.7571946795646917, "grad_norm": 1.7493438366570138, "learning_rate": 1.4126150312386825e-07, "loss": 0.011381921358406544, "step": 3131 }, { "epoch": 0.7574365175332527, "grad_norm": 4.656000612098763, "learning_rate": 1.4099427709498884e-07, "loss": 0.01763243041932583, "step": 3132 }, { "epoch": 0.7576783555018138, "grad_norm": 2.9689723737928255, "learning_rate": 1.4072726256963308e-07, "loss": 0.012096861377358437, "step": 3133 }, { "epoch": 0.7579201934703749, "grad_norm": 1.6964894881577874, "learning_rate": 1.4046045970510845e-07, "loss": 0.00416781147941947, "step": 3134 }, { "epoch": 0.7581620314389359, "grad_norm": 2.2956932681039692, "learning_rate": 1.4019386865859884e-07, "loss": 0.006207955069839954, "step": 3135 }, { "epoch": 0.7584038694074969, "grad_norm": 3.6906141026445054, "learning_rate": 1.3992748958716288e-07, "loss": 0.007866688072681427, "step": 3136 }, { "epoch": 0.7586457073760581, "grad_norm": 4.708662679774112, "learning_rate": 1.396613226477339e-07, "loss": 0.007038299925625324, "step": 3137 }, { "epoch": 0.7588875453446191, "grad_norm": 2.4904268375318, "learning_rate": 1.3939536799712104e-07, "loss": 0.015531608834862709, "step": 3138 }, { "epoch": 0.7591293833131801, "grad_norm": 1.2343295854425065, "learning_rate": 1.3912962579200793e-07, "loss": 0.004823850933462381, "step": 3139 }, { "epoch": 0.7593712212817413, "grad_norm": 6.507042090938083, "learning_rate": 1.3886409618895306e-07, "loss": 0.009561549872159958, "step": 3140 }, { "epoch": 0.7596130592503023, "grad_norm": 5.046487248499158, "learning_rate": 1.3859877934438973e-07, "loss": 0.005957392044365406, "step": 3141 }, { "epoch": 0.7598548972188633, "grad_norm": 2.5695372998367905, "learning_rate": 1.3833367541462587e-07, "loss": 0.0037917494773864746, "step": 3142 }, { "epoch": 0.7600967351874244, "grad_norm": 16.391530719371872, "learning_rate": 1.3806878455584404e-07, "loss": 0.05082155391573906, "step": 3143 }, { "epoch": 0.7603385731559855, "grad_norm": 3.9200175346837045, "learning_rate": 1.3780410692410121e-07, "loss": 0.025098178535699844, "step": 3144 }, { "epoch": 0.7605804111245466, "grad_norm": 0.9347135594494492, "learning_rate": 1.3753964267532875e-07, "loss": 0.0032977834343910217, "step": 3145 }, { "epoch": 0.7608222490931076, "grad_norm": 6.713245253538171, "learning_rate": 1.3727539196533212e-07, "loss": 0.003458814462646842, "step": 3146 }, { "epoch": 0.7610640870616687, "grad_norm": 1.7623998335931195, "learning_rate": 1.370113549497916e-07, "loss": 0.008906674571335316, "step": 3147 }, { "epoch": 0.7613059250302298, "grad_norm": 0.5109124550097703, "learning_rate": 1.3674753178426063e-07, "loss": 0.0010352468816563487, "step": 3148 }, { "epoch": 0.7615477629987908, "grad_norm": 4.33683263057801, "learning_rate": 1.3648392262416752e-07, "loss": 0.020756440237164497, "step": 3149 }, { "epoch": 0.7617896009673518, "grad_norm": 0.5251593925457141, "learning_rate": 1.362205276248141e-07, "loss": 0.0015753628686070442, "step": 3150 }, { "epoch": 0.762031438935913, "grad_norm": 1.4771724509586364, "learning_rate": 1.3595734694137605e-07, "loss": 0.0026542849373072386, "step": 3151 }, { "epoch": 0.762273276904474, "grad_norm": 2.395710808287426, "learning_rate": 1.3569438072890298e-07, "loss": 0.007945043034851551, "step": 3152 }, { "epoch": 0.762515114873035, "grad_norm": 1.7979475569703232, "learning_rate": 1.3543162914231754e-07, "loss": 0.005184263922274113, "step": 3153 }, { "epoch": 0.7627569528415962, "grad_norm": 1.3717432803981695, "learning_rate": 1.3516909233641687e-07, "loss": 0.0015630129491910338, "step": 3154 }, { "epoch": 0.7629987908101572, "grad_norm": 0.46118655896935723, "learning_rate": 1.3490677046587084e-07, "loss": 0.0020388278644531965, "step": 3155 }, { "epoch": 0.7632406287787182, "grad_norm": 1.2745509423230728, "learning_rate": 1.346446636852231e-07, "loss": 0.0033768494613468647, "step": 3156 }, { "epoch": 0.7634824667472794, "grad_norm": 0.9922165554753808, "learning_rate": 1.3438277214889027e-07, "loss": 0.0049502053298056126, "step": 3157 }, { "epoch": 0.7637243047158404, "grad_norm": 1.3029600738727045, "learning_rate": 1.3412109601116246e-07, "loss": 0.0018058496061712503, "step": 3158 }, { "epoch": 0.7639661426844014, "grad_norm": 1.6261261440508743, "learning_rate": 1.338596354262026e-07, "loss": 0.004221064504235983, "step": 3159 }, { "epoch": 0.7642079806529625, "grad_norm": 1.2773330592221392, "learning_rate": 1.3359839054804677e-07, "loss": 0.004504418466240168, "step": 3160 }, { "epoch": 0.7644498186215236, "grad_norm": 1.974057527041026, "learning_rate": 1.3333736153060432e-07, "loss": 0.005629079416394234, "step": 3161 }, { "epoch": 0.7646916565900846, "grad_norm": 1.230240264496493, "learning_rate": 1.3307654852765655e-07, "loss": 0.007293128874152899, "step": 3162 }, { "epoch": 0.7649334945586457, "grad_norm": 1.7820225740221967, "learning_rate": 1.3281595169285852e-07, "loss": 0.005243875086307526, "step": 3163 }, { "epoch": 0.7651753325272068, "grad_norm": 4.314307697257109, "learning_rate": 1.3255557117973697e-07, "loss": 0.003005063394084573, "step": 3164 }, { "epoch": 0.7654171704957679, "grad_norm": 2.9982616807559337, "learning_rate": 1.3229540714169207e-07, "loss": 0.010851840488612652, "step": 3165 }, { "epoch": 0.7656590084643289, "grad_norm": 19.283763597038188, "learning_rate": 1.3203545973199587e-07, "loss": 0.0043342746794223785, "step": 3166 }, { "epoch": 0.7659008464328899, "grad_norm": 1.3453150970361416, "learning_rate": 1.3177572910379309e-07, "loss": 0.006138605065643787, "step": 3167 }, { "epoch": 0.7661426844014511, "grad_norm": 3.218983260769898, "learning_rate": 1.315162154101005e-07, "loss": 0.011176795698702335, "step": 3168 }, { "epoch": 0.7663845223700121, "grad_norm": 0.8306950968823449, "learning_rate": 1.3125691880380723e-07, "loss": 0.0025633645709604025, "step": 3169 }, { "epoch": 0.7666263603385731, "grad_norm": 0.8551843128806691, "learning_rate": 1.3099783943767457e-07, "loss": 0.004182841163128614, "step": 3170 }, { "epoch": 0.7668681983071343, "grad_norm": 3.210790669532923, "learning_rate": 1.3073897746433548e-07, "loss": 0.0041560460813343525, "step": 3171 }, { "epoch": 0.7671100362756953, "grad_norm": 1.3324983199389588, "learning_rate": 1.3048033303629563e-07, "loss": 0.005217993166297674, "step": 3172 }, { "epoch": 0.7673518742442563, "grad_norm": 2.7203754658083814, "learning_rate": 1.3022190630593138e-07, "loss": 0.007699624635279179, "step": 3173 }, { "epoch": 0.7675937122128174, "grad_norm": 1.5922723039399234, "learning_rate": 1.2996369742549202e-07, "loss": 0.003109197597950697, "step": 3174 }, { "epoch": 0.7678355501813785, "grad_norm": 2.118603909884224, "learning_rate": 1.297057065470975e-07, "loss": 0.009035130962729454, "step": 3175 }, { "epoch": 0.7680773881499395, "grad_norm": 2.3909786380541327, "learning_rate": 1.2944793382273983e-07, "loss": 0.006708769593387842, "step": 3176 }, { "epoch": 0.7683192261185006, "grad_norm": 2.0621709253096814, "learning_rate": 1.291903794042828e-07, "loss": 0.006542855408042669, "step": 3177 }, { "epoch": 0.7685610640870617, "grad_norm": 1.4028828286605415, "learning_rate": 1.289330434434607e-07, "loss": 0.00582387950271368, "step": 3178 }, { "epoch": 0.7688029020556227, "grad_norm": 2.0932106514641085, "learning_rate": 1.2867592609188e-07, "loss": 0.003909481689333916, "step": 3179 }, { "epoch": 0.7690447400241838, "grad_norm": 1.860354669641399, "learning_rate": 1.284190275010179e-07, "loss": 0.009587649255990982, "step": 3180 }, { "epoch": 0.7692865779927449, "grad_norm": 0.893015587959938, "learning_rate": 1.2816234782222295e-07, "loss": 0.005494568031281233, "step": 3181 }, { "epoch": 0.769528415961306, "grad_norm": 5.89788619095338, "learning_rate": 1.2790588720671447e-07, "loss": 0.003702716901898384, "step": 3182 }, { "epoch": 0.769770253929867, "grad_norm": 0.8114760417403539, "learning_rate": 1.27649645805583e-07, "loss": 0.004191444255411625, "step": 3183 }, { "epoch": 0.770012091898428, "grad_norm": 2.426219876165398, "learning_rate": 1.2739362376978973e-07, "loss": 0.006800346076488495, "step": 3184 }, { "epoch": 0.7702539298669892, "grad_norm": 3.877050531450103, "learning_rate": 1.2713782125016676e-07, "loss": 0.007745085749775171, "step": 3185 }, { "epoch": 0.7704957678355502, "grad_norm": 3.1130802198513803, "learning_rate": 1.2688223839741674e-07, "loss": 0.011432578787207603, "step": 3186 }, { "epoch": 0.7707376058041112, "grad_norm": 3.376416747977908, "learning_rate": 1.2662687536211281e-07, "loss": 0.012130322866141796, "step": 3187 }, { "epoch": 0.7709794437726724, "grad_norm": 1.6181163641951215, "learning_rate": 1.2637173229469922e-07, "loss": 0.007773288991302252, "step": 3188 }, { "epoch": 0.7712212817412334, "grad_norm": 2.464862653396542, "learning_rate": 1.2611680934548964e-07, "loss": 0.01058485358953476, "step": 3189 }, { "epoch": 0.7714631197097944, "grad_norm": 1.644376198139842, "learning_rate": 1.258621066646689e-07, "loss": 0.004693292547017336, "step": 3190 }, { "epoch": 0.7717049576783555, "grad_norm": 0.9824147742378782, "learning_rate": 1.2560762440229172e-07, "loss": 0.0020433084573596716, "step": 3191 }, { "epoch": 0.7719467956469166, "grad_norm": 0.5769770177665626, "learning_rate": 1.2535336270828296e-07, "loss": 0.0011470125755295157, "step": 3192 }, { "epoch": 0.7721886336154776, "grad_norm": 5.025687684926691, "learning_rate": 1.2509932173243765e-07, "loss": 0.011071751825511456, "step": 3193 }, { "epoch": 0.7724304715840387, "grad_norm": 2.624629854007317, "learning_rate": 1.2484550162442038e-07, "loss": 0.009483064524829388, "step": 3194 }, { "epoch": 0.7726723095525998, "grad_norm": 6.927529766944572, "learning_rate": 1.2459190253376633e-07, "loss": 0.004799526650458574, "step": 3195 }, { "epoch": 0.7729141475211608, "grad_norm": 0.6727310517789796, "learning_rate": 1.2433852460987993e-07, "loss": 0.002985595492646098, "step": 3196 }, { "epoch": 0.7731559854897219, "grad_norm": 0.7225147251491396, "learning_rate": 1.2408536800203545e-07, "loss": 0.0021348032169044018, "step": 3197 }, { "epoch": 0.7733978234582829, "grad_norm": 0.9839727588658728, "learning_rate": 1.2383243285937684e-07, "loss": 0.0035888098645955324, "step": 3198 }, { "epoch": 0.773639661426844, "grad_norm": 8.647555862694803, "learning_rate": 1.2357971933091755e-07, "loss": 0.011245787143707275, "step": 3199 }, { "epoch": 0.7738814993954051, "grad_norm": 4.969896955878614, "learning_rate": 1.2332722756554037e-07, "loss": 0.022319931536912918, "step": 3200 }, { "epoch": 0.7741233373639661, "grad_norm": 1.5658770889135272, "learning_rate": 1.2307495771199738e-07, "loss": 0.006393443327397108, "step": 3201 }, { "epoch": 0.7743651753325272, "grad_norm": 38.77629644592216, "learning_rate": 1.2282290991891058e-07, "loss": 0.006926730275154114, "step": 3202 }, { "epoch": 0.7746070133010883, "grad_norm": 0.7920464626943293, "learning_rate": 1.2257108433476998e-07, "loss": 0.0013269450282678008, "step": 3203 }, { "epoch": 0.7748488512696493, "grad_norm": 3.7117922688484217, "learning_rate": 1.223194811079359e-07, "loss": 0.004661704879254103, "step": 3204 }, { "epoch": 0.7750906892382105, "grad_norm": 1.7243898172246916, "learning_rate": 1.2206810038663655e-07, "loss": 0.004491386003792286, "step": 3205 }, { "epoch": 0.7753325272067715, "grad_norm": 1.3562464151627234, "learning_rate": 1.2181694231897e-07, "loss": 0.005011648405343294, "step": 3206 }, { "epoch": 0.7755743651753325, "grad_norm": 4.61380577311871, "learning_rate": 1.215660070529026e-07, "loss": 0.008751890622079372, "step": 3207 }, { "epoch": 0.7758162031438935, "grad_norm": 5.320346233659693, "learning_rate": 1.213152947362696e-07, "loss": 0.00450844457373023, "step": 3208 }, { "epoch": 0.7760580411124547, "grad_norm": 38.66042897796737, "learning_rate": 1.210648055167749e-07, "loss": 0.03266347944736481, "step": 3209 }, { "epoch": 0.7762998790810157, "grad_norm": 2.6628952737631137, "learning_rate": 1.2081453954199082e-07, "loss": 0.006470152642577887, "step": 3210 }, { "epoch": 0.7765417170495768, "grad_norm": 2.7572959717204886, "learning_rate": 1.2056449695935843e-07, "loss": 0.01694295182824135, "step": 3211 }, { "epoch": 0.7767835550181379, "grad_norm": 3.322027544903852, "learning_rate": 1.20314677916187e-07, "loss": 0.007293786853551865, "step": 3212 }, { "epoch": 0.7770253929866989, "grad_norm": 1.4425244687599024, "learning_rate": 1.200650825596541e-07, "loss": 0.004464572761207819, "step": 3213 }, { "epoch": 0.77726723095526, "grad_norm": 1.3684892127164408, "learning_rate": 1.198157110368056e-07, "loss": 0.0032728954683989286, "step": 3214 }, { "epoch": 0.777509068923821, "grad_norm": 4.272888802221474, "learning_rate": 1.1956656349455552e-07, "loss": 0.013372696004807949, "step": 3215 }, { "epoch": 0.7777509068923821, "grad_norm": 1.249801049217389, "learning_rate": 1.1931764007968576e-07, "loss": 0.004818582907319069, "step": 3216 }, { "epoch": 0.7779927448609432, "grad_norm": 3.136773432169378, "learning_rate": 1.1906894093884629e-07, "loss": 0.003595285816118121, "step": 3217 }, { "epoch": 0.7782345828295042, "grad_norm": 0.9356532951017587, "learning_rate": 1.1882046621855524e-07, "loss": 0.0014080863911658525, "step": 3218 }, { "epoch": 0.7784764207980653, "grad_norm": 1.8112161421288753, "learning_rate": 1.185722160651978e-07, "loss": 0.005270479712635279, "step": 3219 }, { "epoch": 0.7787182587666264, "grad_norm": 3.167453674449949, "learning_rate": 1.1832419062502763e-07, "loss": 0.0050644720904529095, "step": 3220 }, { "epoch": 0.7789600967351874, "grad_norm": 2.7942168250488613, "learning_rate": 1.1807639004416553e-07, "loss": 0.012163348495960236, "step": 3221 }, { "epoch": 0.7792019347037484, "grad_norm": 1.264359956731985, "learning_rate": 1.1782881446860004e-07, "loss": 0.0013634859351441264, "step": 3222 }, { "epoch": 0.7794437726723096, "grad_norm": 1.178370772163607, "learning_rate": 1.1758146404418706e-07, "loss": 0.005230753216892481, "step": 3223 }, { "epoch": 0.7796856106408706, "grad_norm": 2.484211388441153, "learning_rate": 1.1733433891664978e-07, "loss": 0.0029535170178860426, "step": 3224 }, { "epoch": 0.7799274486094316, "grad_norm": 4.266146877056484, "learning_rate": 1.1708743923157882e-07, "loss": 0.005659075453877449, "step": 3225 }, { "epoch": 0.7801692865779928, "grad_norm": 1.138767204567377, "learning_rate": 1.1684076513443186e-07, "loss": 0.007152247708290815, "step": 3226 }, { "epoch": 0.7804111245465538, "grad_norm": 10.732150048330068, "learning_rate": 1.1659431677053372e-07, "loss": 0.010464879684150219, "step": 3227 }, { "epoch": 0.7806529625151148, "grad_norm": 6.108690487721513, "learning_rate": 1.1634809428507609e-07, "loss": 0.0045618037693202496, "step": 3228 }, { "epoch": 0.780894800483676, "grad_norm": 2.0935656948130537, "learning_rate": 1.1610209782311808e-07, "loss": 0.006816452834755182, "step": 3229 }, { "epoch": 0.781136638452237, "grad_norm": 1.655429768934874, "learning_rate": 1.1585632752958485e-07, "loss": 0.004959524143487215, "step": 3230 }, { "epoch": 0.781378476420798, "grad_norm": 0.7734174684661328, "learning_rate": 1.1561078354926912e-07, "loss": 0.0017750256229192019, "step": 3231 }, { "epoch": 0.7816203143893591, "grad_norm": 0.4744910888832745, "learning_rate": 1.153654660268299e-07, "loss": 0.00039109564386308193, "step": 3232 }, { "epoch": 0.7818621523579202, "grad_norm": 4.2792594964993, "learning_rate": 1.1512037510679235e-07, "loss": 0.0012013826053589582, "step": 3233 }, { "epoch": 0.7821039903264813, "grad_norm": 1.945520700863265, "learning_rate": 1.148755109335492e-07, "loss": 0.00582101522013545, "step": 3234 }, { "epoch": 0.7823458282950423, "grad_norm": 0.7594178449166159, "learning_rate": 1.1463087365135832e-07, "loss": 0.0020188174676150084, "step": 3235 }, { "epoch": 0.7825876662636034, "grad_norm": 0.8305271615556572, "learning_rate": 1.143864634043451e-07, "loss": 0.003940328489989042, "step": 3236 }, { "epoch": 0.7828295042321645, "grad_norm": 1.6312697085588748, "learning_rate": 1.1414228033650042e-07, "loss": 0.004584223031997681, "step": 3237 }, { "epoch": 0.7830713422007255, "grad_norm": 2.3013334362314652, "learning_rate": 1.1389832459168158e-07, "loss": 0.005626000463962555, "step": 3238 }, { "epoch": 0.7833131801692865, "grad_norm": 4.520479479438216, "learning_rate": 1.1365459631361191e-07, "loss": 0.015125936828553677, "step": 3239 }, { "epoch": 0.7835550181378477, "grad_norm": 2.4723806718526458, "learning_rate": 1.134110956458807e-07, "loss": 0.004936720710247755, "step": 3240 }, { "epoch": 0.7837968561064087, "grad_norm": 1.8635076241359214, "learning_rate": 1.1316782273194319e-07, "loss": 0.00560983270406723, "step": 3241 }, { "epoch": 0.7840386940749697, "grad_norm": 1.63918899183142, "learning_rate": 1.1292477771512043e-07, "loss": 0.007370102684944868, "step": 3242 }, { "epoch": 0.7842805320435309, "grad_norm": 1.8065576827111616, "learning_rate": 1.1268196073859931e-07, "loss": 0.0049287304282188416, "step": 3243 }, { "epoch": 0.7845223700120919, "grad_norm": 4.774141440669755, "learning_rate": 1.1243937194543196e-07, "loss": 0.007205156143754721, "step": 3244 }, { "epoch": 0.7847642079806529, "grad_norm": 3.619686248013944, "learning_rate": 1.1219701147853699e-07, "loss": 0.0030340314842760563, "step": 3245 }, { "epoch": 0.7850060459492141, "grad_norm": 3.526946112983912, "learning_rate": 1.119548794806972e-07, "loss": 0.011931808665394783, "step": 3246 }, { "epoch": 0.7852478839177751, "grad_norm": 1.0539158563859852, "learning_rate": 1.1171297609456199e-07, "loss": 0.0019191562896594405, "step": 3247 }, { "epoch": 0.7854897218863361, "grad_norm": 1.3714055358325943, "learning_rate": 1.1147130146264549e-07, "loss": 0.004397721029818058, "step": 3248 }, { "epoch": 0.7857315598548972, "grad_norm": 4.4787002179949384, "learning_rate": 1.1122985572732685e-07, "loss": 0.007344340439885855, "step": 3249 }, { "epoch": 0.7859733978234583, "grad_norm": 2.1518572041866264, "learning_rate": 1.1098863903085098e-07, "loss": 0.0012576996814459562, "step": 3250 }, { "epoch": 0.7862152357920194, "grad_norm": 3.3397003097424713, "learning_rate": 1.1074765151532739e-07, "loss": 0.006291106343269348, "step": 3251 }, { "epoch": 0.7864570737605804, "grad_norm": 1.183467059152966, "learning_rate": 1.1050689332273084e-07, "loss": 0.005396442953497171, "step": 3252 }, { "epoch": 0.7866989117291415, "grad_norm": 1.712800342573199, "learning_rate": 1.1026636459490079e-07, "loss": 0.005181462503969669, "step": 3253 }, { "epoch": 0.7869407496977026, "grad_norm": 3.674132863097532, "learning_rate": 1.1002606547354154e-07, "loss": 0.002609398914501071, "step": 3254 }, { "epoch": 0.7871825876662636, "grad_norm": 2.0816698892112524, "learning_rate": 1.0978599610022226e-07, "loss": 0.014989267103374004, "step": 3255 }, { "epoch": 0.7874244256348246, "grad_norm": 6.027160849671359, "learning_rate": 1.0954615661637667e-07, "loss": 0.002387524349614978, "step": 3256 }, { "epoch": 0.7876662636033858, "grad_norm": 1.705361260458056, "learning_rate": 1.0930654716330307e-07, "loss": 0.014658558182418346, "step": 3257 }, { "epoch": 0.7879081015719468, "grad_norm": 1.1897126430952856, "learning_rate": 1.0906716788216397e-07, "loss": 0.007232125848531723, "step": 3258 }, { "epoch": 0.7881499395405078, "grad_norm": 1.22840991929079, "learning_rate": 1.0882801891398713e-07, "loss": 0.0022253566421568394, "step": 3259 }, { "epoch": 0.788391777509069, "grad_norm": 0.9342861848205153, "learning_rate": 1.085891003996634e-07, "loss": 0.003351859748363495, "step": 3260 }, { "epoch": 0.78863361547763, "grad_norm": 1.186902969574281, "learning_rate": 1.0835041247994892e-07, "loss": 0.002082196297124028, "step": 3261 }, { "epoch": 0.788875453446191, "grad_norm": 0.705274294218337, "learning_rate": 1.0811195529546335e-07, "loss": 0.0035021149087697268, "step": 3262 }, { "epoch": 0.7891172914147521, "grad_norm": 1.5311197696653411, "learning_rate": 1.0787372898669073e-07, "loss": 0.007753640413284302, "step": 3263 }, { "epoch": 0.7893591293833132, "grad_norm": 2.1804045692029184, "learning_rate": 1.0763573369397893e-07, "loss": 0.0041105193085968494, "step": 3264 }, { "epoch": 0.7896009673518742, "grad_norm": 4.748226748649589, "learning_rate": 1.0739796955753977e-07, "loss": 0.01870628632605076, "step": 3265 }, { "epoch": 0.7898428053204353, "grad_norm": 1.52267967749632, "learning_rate": 1.0716043671744879e-07, "loss": 0.0073014977388083935, "step": 3266 }, { "epoch": 0.7900846432889964, "grad_norm": 3.838467861570845, "learning_rate": 1.069231353136455e-07, "loss": 0.011439739726483822, "step": 3267 }, { "epoch": 0.7903264812575574, "grad_norm": 1.8707554434345774, "learning_rate": 1.0668606548593284e-07, "loss": 0.0016600212547928095, "step": 3268 }, { "epoch": 0.7905683192261185, "grad_norm": 1.5652298942796454, "learning_rate": 1.0644922737397721e-07, "loss": 0.0054398709908127785, "step": 3269 }, { "epoch": 0.7908101571946796, "grad_norm": 0.9122380762799116, "learning_rate": 1.0621262111730922e-07, "loss": 0.004370154347270727, "step": 3270 }, { "epoch": 0.7910519951632407, "grad_norm": 1.2782912642668776, "learning_rate": 1.059762468553218e-07, "loss": 0.0020821518264710903, "step": 3271 }, { "epoch": 0.7912938331318017, "grad_norm": 4.442814109428151, "learning_rate": 1.0574010472727202e-07, "loss": 0.009475156664848328, "step": 3272 }, { "epoch": 0.7915356711003627, "grad_norm": 2.1758152673386144, "learning_rate": 1.0550419487227985e-07, "loss": 0.009278838522732258, "step": 3273 }, { "epoch": 0.7917775090689239, "grad_norm": 1.6526347668136991, "learning_rate": 1.0526851742932835e-07, "loss": 0.005154969170689583, "step": 3274 }, { "epoch": 0.7920193470374849, "grad_norm": 1.3929693354795052, "learning_rate": 1.0503307253726423e-07, "loss": 0.0029114687349647284, "step": 3275 }, { "epoch": 0.7922611850060459, "grad_norm": 1.8556435850082391, "learning_rate": 1.0479786033479626e-07, "loss": 0.013080179691314697, "step": 3276 }, { "epoch": 0.7925030229746071, "grad_norm": 1.1604153362313696, "learning_rate": 1.0456288096049698e-07, "loss": 0.0031806014012545347, "step": 3277 }, { "epoch": 0.7927448609431681, "grad_norm": 1.8840723901736596, "learning_rate": 1.0432813455280126e-07, "loss": 0.0031408637296408415, "step": 3278 }, { "epoch": 0.7929866989117291, "grad_norm": 4.085937493953329, "learning_rate": 1.0409362125000698e-07, "loss": 0.0071047828532755375, "step": 3279 }, { "epoch": 0.7932285368802902, "grad_norm": 3.5989811682755164, "learning_rate": 1.0385934119027451e-07, "loss": 0.009666692465543747, "step": 3280 }, { "epoch": 0.7934703748488513, "grad_norm": 1.161724989277615, "learning_rate": 1.0362529451162682e-07, "loss": 0.005957550834864378, "step": 3281 }, { "epoch": 0.7937122128174123, "grad_norm": 4.1280820600394454, "learning_rate": 1.0339148135194959e-07, "loss": 0.005394525360316038, "step": 3282 }, { "epoch": 0.7939540507859734, "grad_norm": 1.1420544369352092, "learning_rate": 1.0315790184899065e-07, "loss": 0.005020855460315943, "step": 3283 }, { "epoch": 0.7941958887545345, "grad_norm": 2.4027696511199697, "learning_rate": 1.0292455614036038e-07, "loss": 0.0028669226448982954, "step": 3284 }, { "epoch": 0.7944377267230955, "grad_norm": 1.923408310875244, "learning_rate": 1.0269144436353123e-07, "loss": 0.0024547765497118235, "step": 3285 }, { "epoch": 0.7946795646916566, "grad_norm": 0.9451460603377816, "learning_rate": 1.0245856665583829e-07, "loss": 0.004073964431881905, "step": 3286 }, { "epoch": 0.7949214026602176, "grad_norm": 1.4907847176684805, "learning_rate": 1.0222592315447798e-07, "loss": 0.0038211941719055176, "step": 3287 }, { "epoch": 0.7951632406287787, "grad_norm": 1.0603339579360935, "learning_rate": 1.0199351399650946e-07, "loss": 0.004337636288255453, "step": 3288 }, { "epoch": 0.7954050785973398, "grad_norm": 0.8923704336471875, "learning_rate": 1.0176133931885361e-07, "loss": 0.0016992072341963649, "step": 3289 }, { "epoch": 0.7956469165659008, "grad_norm": 3.318509138347926, "learning_rate": 1.0152939925829268e-07, "loss": 0.009625153616070747, "step": 3290 }, { "epoch": 0.795888754534462, "grad_norm": 3.987547763471806, "learning_rate": 1.0129769395147152e-07, "loss": 0.010895474813878536, "step": 3291 }, { "epoch": 0.796130592503023, "grad_norm": 1.1449479151573096, "learning_rate": 1.0106622353489613e-07, "loss": 0.0036306746769696474, "step": 3292 }, { "epoch": 0.796372430471584, "grad_norm": 0.9373514127844256, "learning_rate": 1.0083498814493435e-07, "loss": 0.0014285850338637829, "step": 3293 }, { "epoch": 0.7966142684401452, "grad_norm": 0.9452132822186808, "learning_rate": 1.0060398791781544e-07, "loss": 0.007321496494114399, "step": 3294 }, { "epoch": 0.7968561064087062, "grad_norm": 1.1756409423353922, "learning_rate": 1.0037322298963008e-07, "loss": 0.007077913731336594, "step": 3295 }, { "epoch": 0.7970979443772672, "grad_norm": 0.6358625711585237, "learning_rate": 1.0014269349633053e-07, "loss": 0.0018492619274184108, "step": 3296 }, { "epoch": 0.7973397823458283, "grad_norm": 6.0423708816175274, "learning_rate": 9.991239957373022e-08, "loss": 0.0036009177565574646, "step": 3297 }, { "epoch": 0.7975816203143894, "grad_norm": 2.0035533822731244, "learning_rate": 9.96823413575037e-08, "loss": 0.007527368608862162, "step": 3298 }, { "epoch": 0.7978234582829504, "grad_norm": 0.8822382398974525, "learning_rate": 9.945251898318669e-08, "loss": 0.0021283996757119894, "step": 3299 }, { "epoch": 0.7980652962515115, "grad_norm": 1.6754472900570592, "learning_rate": 9.922293258617643e-08, "loss": 0.0014110247138887644, "step": 3300 }, { "epoch": 0.7983071342200726, "grad_norm": 2.144066153351062, "learning_rate": 9.899358230173022e-08, "loss": 0.012706778012216091, "step": 3301 }, { "epoch": 0.7985489721886336, "grad_norm": 2.7177481517298174, "learning_rate": 9.876446826496726e-08, "loss": 0.007280122023075819, "step": 3302 }, { "epoch": 0.7987908101571947, "grad_norm": 3.3266256884630967, "learning_rate": 9.853559061086663e-08, "loss": 0.00716506689786911, "step": 3303 }, { "epoch": 0.7990326481257557, "grad_norm": 1.398134046694374, "learning_rate": 9.830694947426894e-08, "loss": 0.0010446681408211589, "step": 3304 }, { "epoch": 0.7992744860943168, "grad_norm": 2.9353341900251486, "learning_rate": 9.807854498987517e-08, "loss": 0.00916812103241682, "step": 3305 }, { "epoch": 0.7995163240628779, "grad_norm": 2.0818970093345364, "learning_rate": 9.785037729224638e-08, "loss": 0.00583615293726325, "step": 3306 }, { "epoch": 0.7997581620314389, "grad_norm": 2.8140314375050144, "learning_rate": 9.762244651580503e-08, "loss": 0.0057523781433701515, "step": 3307 }, { "epoch": 0.8, "grad_norm": 1.9786616805523491, "learning_rate": 9.739475279483338e-08, "loss": 0.0129789924249053, "step": 3308 }, { "epoch": 0.8002418379685611, "grad_norm": 0.7831981754266965, "learning_rate": 9.716729626347426e-08, "loss": 0.0034626349806785583, "step": 3309 }, { "epoch": 0.8004836759371221, "grad_norm": 1.60835212687033, "learning_rate": 9.694007705573065e-08, "loss": 0.006215012166649103, "step": 3310 }, { "epoch": 0.8007255139056831, "grad_norm": 2.0841483923983186, "learning_rate": 9.671309530546578e-08, "loss": 0.006381898652762175, "step": 3311 }, { "epoch": 0.8009673518742443, "grad_norm": 2.565214447067411, "learning_rate": 9.648635114640308e-08, "loss": 0.0033126838970929384, "step": 3312 }, { "epoch": 0.8012091898428053, "grad_norm": 3.562931909654285, "learning_rate": 9.625984471212572e-08, "loss": 0.005042660050094128, "step": 3313 }, { "epoch": 0.8014510278113663, "grad_norm": 3.046910674160443, "learning_rate": 9.603357613607715e-08, "loss": 0.004579735454171896, "step": 3314 }, { "epoch": 0.8016928657799275, "grad_norm": 2.633333734522702, "learning_rate": 9.58075455515604e-08, "loss": 0.007052164524793625, "step": 3315 }, { "epoch": 0.8019347037484885, "grad_norm": 3.371993352605668, "learning_rate": 9.558175309173877e-08, "loss": 0.011342777870595455, "step": 3316 }, { "epoch": 0.8021765417170496, "grad_norm": 0.6055364621305053, "learning_rate": 9.535619888963436e-08, "loss": 0.0011873162584379315, "step": 3317 }, { "epoch": 0.8024183796856107, "grad_norm": 0.9348217462245217, "learning_rate": 9.513088307813e-08, "loss": 0.003980913665145636, "step": 3318 }, { "epoch": 0.8026602176541717, "grad_norm": 1.6522275593383873, "learning_rate": 9.490580578996732e-08, "loss": 0.005094876978546381, "step": 3319 }, { "epoch": 0.8029020556227328, "grad_norm": 0.8917558524871566, "learning_rate": 9.468096715774771e-08, "loss": 0.0022186576388776302, "step": 3320 }, { "epoch": 0.8031438935912938, "grad_norm": 1.4159693551064834, "learning_rate": 9.445636731393192e-08, "loss": 0.0057140192948281765, "step": 3321 }, { "epoch": 0.8033857315598549, "grad_norm": 1.4550913818339146, "learning_rate": 9.423200639083994e-08, "loss": 0.006312618497759104, "step": 3322 }, { "epoch": 0.803627569528416, "grad_norm": 3.1571297835709964, "learning_rate": 9.400788452065112e-08, "loss": 0.007289841305464506, "step": 3323 }, { "epoch": 0.803869407496977, "grad_norm": 6.833357108088774, "learning_rate": 9.378400183540392e-08, "loss": 0.0053929355926811695, "step": 3324 }, { "epoch": 0.8041112454655381, "grad_norm": 15.605154306115997, "learning_rate": 9.356035846699589e-08, "loss": 0.017594948410987854, "step": 3325 }, { "epoch": 0.8043530834340992, "grad_norm": 1.1503853429221325, "learning_rate": 9.333695454718348e-08, "loss": 0.0030502579174935818, "step": 3326 }, { "epoch": 0.8045949214026602, "grad_norm": 0.5959160700620312, "learning_rate": 9.311379020758254e-08, "loss": 0.001963474554941058, "step": 3327 }, { "epoch": 0.8048367593712212, "grad_norm": 2.3210251183982047, "learning_rate": 9.289086557966707e-08, "loss": 0.0025327017065137625, "step": 3328 }, { "epoch": 0.8050785973397824, "grad_norm": 2.9417758424038674, "learning_rate": 9.266818079477018e-08, "loss": 0.001989807467907667, "step": 3329 }, { "epoch": 0.8053204353083434, "grad_norm": 2.756497756478511, "learning_rate": 9.244573598408407e-08, "loss": 0.002651232061907649, "step": 3330 }, { "epoch": 0.8055622732769044, "grad_norm": 1.7485768193997175, "learning_rate": 9.222353127865872e-08, "loss": 0.009448964148759842, "step": 3331 }, { "epoch": 0.8058041112454656, "grad_norm": 5.314707376345515, "learning_rate": 9.200156680940358e-08, "loss": 0.00603413674980402, "step": 3332 }, { "epoch": 0.8060459492140266, "grad_norm": 1.2380777568419925, "learning_rate": 9.177984270708561e-08, "loss": 0.003885772777721286, "step": 3333 }, { "epoch": 0.8062877871825876, "grad_norm": 1.5001022307618377, "learning_rate": 9.155835910233112e-08, "loss": 0.0041296291165053844, "step": 3334 }, { "epoch": 0.8065296251511487, "grad_norm": 3.625763045969456, "learning_rate": 9.1337116125624e-08, "loss": 0.004508004989475012, "step": 3335 }, { "epoch": 0.8067714631197098, "grad_norm": 1.4482463424320422, "learning_rate": 9.111611390730684e-08, "loss": 0.004372032824903727, "step": 3336 }, { "epoch": 0.8070133010882709, "grad_norm": 6.818052191977055, "learning_rate": 9.089535257758008e-08, "loss": 0.009966900572180748, "step": 3337 }, { "epoch": 0.8072551390568319, "grad_norm": 1.1915058064471586, "learning_rate": 9.067483226650236e-08, "loss": 0.004299890715628862, "step": 3338 }, { "epoch": 0.807496977025393, "grad_norm": 0.5377530626726031, "learning_rate": 9.045455310399041e-08, "loss": 0.0011969533516094089, "step": 3339 }, { "epoch": 0.8077388149939541, "grad_norm": 3.9185301901311624, "learning_rate": 9.023451521981862e-08, "loss": 0.012194926850497723, "step": 3340 }, { "epoch": 0.8079806529625151, "grad_norm": 3.156672996243921, "learning_rate": 9.001471874361987e-08, "loss": 0.004761547781527042, "step": 3341 }, { "epoch": 0.8082224909310762, "grad_norm": 2.575787013553736, "learning_rate": 8.979516380488383e-08, "loss": 0.004890075419098139, "step": 3342 }, { "epoch": 0.8084643288996373, "grad_norm": 1.0586064206408858, "learning_rate": 8.957585053295896e-08, "loss": 0.005572821944952011, "step": 3343 }, { "epoch": 0.8087061668681983, "grad_norm": 1.8678254699898291, "learning_rate": 8.93567790570503e-08, "loss": 0.0027854209765791893, "step": 3344 }, { "epoch": 0.8089480048367593, "grad_norm": 1.064394784806183, "learning_rate": 8.913794950622127e-08, "loss": 0.001585857360623777, "step": 3345 }, { "epoch": 0.8091898428053205, "grad_norm": 6.326761393417282, "learning_rate": 8.891936200939249e-08, "loss": 0.009344274178147316, "step": 3346 }, { "epoch": 0.8094316807738815, "grad_norm": 0.6230626675939824, "learning_rate": 8.870101669534147e-08, "loss": 0.0017710573738440871, "step": 3347 }, { "epoch": 0.8096735187424425, "grad_norm": 2.007825176376542, "learning_rate": 8.848291369270394e-08, "loss": 0.005306968465447426, "step": 3348 }, { "epoch": 0.8099153567110037, "grad_norm": 5.262148078160125, "learning_rate": 8.826505312997229e-08, "loss": 0.008663167245686054, "step": 3349 }, { "epoch": 0.8101571946795647, "grad_norm": 5.4287637188296705, "learning_rate": 8.804743513549623e-08, "loss": 0.026325920596718788, "step": 3350 }, { "epoch": 0.8103990326481257, "grad_norm": 1.651707908874274, "learning_rate": 8.783005983748253e-08, "loss": 0.006537438370287418, "step": 3351 }, { "epoch": 0.8106408706166868, "grad_norm": 10.742375775319347, "learning_rate": 8.761292736399501e-08, "loss": 0.004381813108921051, "step": 3352 }, { "epoch": 0.8108827085852479, "grad_norm": 2.9248100680697946, "learning_rate": 8.739603784295446e-08, "loss": 0.004087235312908888, "step": 3353 }, { "epoch": 0.811124546553809, "grad_norm": 0.23143753952869822, "learning_rate": 8.717939140213843e-08, "loss": 0.00016587619029451162, "step": 3354 }, { "epoch": 0.81136638452237, "grad_norm": 4.589585027842685, "learning_rate": 8.696298816918135e-08, "loss": 0.005172295030206442, "step": 3355 }, { "epoch": 0.8116082224909311, "grad_norm": 1.857043920865516, "learning_rate": 8.674682827157431e-08, "loss": 0.0041004568338394165, "step": 3356 }, { "epoch": 0.8118500604594922, "grad_norm": 3.9880480121848, "learning_rate": 8.653091183666539e-08, "loss": 0.007333552930504084, "step": 3357 }, { "epoch": 0.8120918984280532, "grad_norm": 2.9029609852181064, "learning_rate": 8.631523899165843e-08, "loss": 0.0026697232387959957, "step": 3358 }, { "epoch": 0.8123337363966142, "grad_norm": 9.232270834606458, "learning_rate": 8.609980986361464e-08, "loss": 0.010387022979557514, "step": 3359 }, { "epoch": 0.8125755743651754, "grad_norm": 1.6507340572474045, "learning_rate": 8.588462457945117e-08, "loss": 0.006220788229256868, "step": 3360 }, { "epoch": 0.8128174123337364, "grad_norm": 1.2504718538058215, "learning_rate": 8.56696832659416e-08, "loss": 0.004788861609995365, "step": 3361 }, { "epoch": 0.8130592503022974, "grad_norm": 1.3959135092229904, "learning_rate": 8.545498604971596e-08, "loss": 0.0022326894104480743, "step": 3362 }, { "epoch": 0.8133010882708586, "grad_norm": 1.7284152617216013, "learning_rate": 8.524053305725987e-08, "loss": 0.019171075895428658, "step": 3363 }, { "epoch": 0.8135429262394196, "grad_norm": 1.0110880835792175, "learning_rate": 8.502632441491587e-08, "loss": 0.002355422591790557, "step": 3364 }, { "epoch": 0.8137847642079806, "grad_norm": 2.948939131775485, "learning_rate": 8.48123602488821e-08, "loss": 0.00368007505312562, "step": 3365 }, { "epoch": 0.8140266021765418, "grad_norm": 4.389322050087717, "learning_rate": 8.459864068521272e-08, "loss": 0.0053887320682406425, "step": 3366 }, { "epoch": 0.8142684401451028, "grad_norm": 2.18523686241243, "learning_rate": 8.438516584981786e-08, "loss": 0.006163894198834896, "step": 3367 }, { "epoch": 0.8145102781136638, "grad_norm": 0.8436172485737616, "learning_rate": 8.417193586846338e-08, "loss": 0.0030014317017048597, "step": 3368 }, { "epoch": 0.8147521160822249, "grad_norm": 3.365159984354133, "learning_rate": 8.39589508667709e-08, "loss": 0.005912042688578367, "step": 3369 }, { "epoch": 0.814993954050786, "grad_norm": 22.27789107020441, "learning_rate": 8.374621097021772e-08, "loss": 0.006293158978223801, "step": 3370 }, { "epoch": 0.815235792019347, "grad_norm": 2.184894841416251, "learning_rate": 8.353371630413708e-08, "loss": 0.0044548693113029, "step": 3371 }, { "epoch": 0.8154776299879081, "grad_norm": 3.7011292101661035, "learning_rate": 8.332146699371694e-08, "loss": 0.004998584743589163, "step": 3372 }, { "epoch": 0.8157194679564692, "grad_norm": 0.4912368508943098, "learning_rate": 8.310946316400169e-08, "loss": 0.0016783978790044785, "step": 3373 }, { "epoch": 0.8159613059250302, "grad_norm": 1.1374898257283503, "learning_rate": 8.289770493989013e-08, "loss": 0.005658071022480726, "step": 3374 }, { "epoch": 0.8162031438935913, "grad_norm": 1.1338417099989493, "learning_rate": 8.268619244613718e-08, "loss": 0.0044265324249863625, "step": 3375 }, { "epoch": 0.8164449818621523, "grad_norm": 0.8689169789950544, "learning_rate": 8.24749258073526e-08, "loss": 0.00438555795699358, "step": 3376 }, { "epoch": 0.8166868198307135, "grad_norm": 1.0005418952347676, "learning_rate": 8.22639051480013e-08, "loss": 0.002752903616055846, "step": 3377 }, { "epoch": 0.8169286577992745, "grad_norm": 1.8542113306160641, "learning_rate": 8.205313059240343e-08, "loss": 0.0022277403622865677, "step": 3378 }, { "epoch": 0.8171704957678355, "grad_norm": 1.3775476400442113, "learning_rate": 8.184260226473394e-08, "loss": 0.004876778926700354, "step": 3379 }, { "epoch": 0.8174123337363967, "grad_norm": 0.32657468498005854, "learning_rate": 8.163232028902295e-08, "loss": 0.0014493477065116167, "step": 3380 }, { "epoch": 0.8176541717049577, "grad_norm": 1.5844548765245108, "learning_rate": 8.142228478915514e-08, "loss": 0.005748353898525238, "step": 3381 }, { "epoch": 0.8178960096735187, "grad_norm": 0.4774166267376699, "learning_rate": 8.121249588887059e-08, "loss": 0.000904927437659353, "step": 3382 }, { "epoch": 0.8181378476420798, "grad_norm": 3.21993015083839, "learning_rate": 8.100295371176314e-08, "loss": 0.008255761116743088, "step": 3383 }, { "epoch": 0.8183796856106409, "grad_norm": 1.6736322233787269, "learning_rate": 8.079365838128238e-08, "loss": 0.004009288735687733, "step": 3384 }, { "epoch": 0.8186215235792019, "grad_norm": 4.824617573809391, "learning_rate": 8.058461002073147e-08, "loss": 0.007973724976181984, "step": 3385 }, { "epoch": 0.818863361547763, "grad_norm": 1.102314068156632, "learning_rate": 8.03758087532686e-08, "loss": 0.004842724651098251, "step": 3386 }, { "epoch": 0.8191051995163241, "grad_norm": 1.5522433905793032, "learning_rate": 8.016725470190661e-08, "loss": 0.008304496295750141, "step": 3387 }, { "epoch": 0.8193470374848851, "grad_norm": 8.244896103808099, "learning_rate": 7.995894798951191e-08, "loss": 0.006350801791995764, "step": 3388 }, { "epoch": 0.8195888754534462, "grad_norm": 1.035862229741521, "learning_rate": 7.975088873880609e-08, "loss": 0.0008913517813198268, "step": 3389 }, { "epoch": 0.8198307134220073, "grad_norm": 0.8624998337127265, "learning_rate": 7.954307707236436e-08, "loss": 0.002575691556558013, "step": 3390 }, { "epoch": 0.8200725513905683, "grad_norm": 4.31758622961246, "learning_rate": 7.933551311261622e-08, "loss": 0.011757180094718933, "step": 3391 }, { "epoch": 0.8203143893591294, "grad_norm": 0.8968323362663901, "learning_rate": 7.912819698184531e-08, "loss": 0.0041533587500452995, "step": 3392 }, { "epoch": 0.8205562273276904, "grad_norm": 0.9145393491979582, "learning_rate": 7.892112880218921e-08, "loss": 0.0020119682885706425, "step": 3393 }, { "epoch": 0.8207980652962515, "grad_norm": 4.537062115584033, "learning_rate": 7.871430869563939e-08, "loss": 0.003901821793988347, "step": 3394 }, { "epoch": 0.8210399032648126, "grad_norm": 2.5415586609595016, "learning_rate": 7.850773678404126e-08, "loss": 0.0028512224089354277, "step": 3395 }, { "epoch": 0.8212817412333736, "grad_norm": 1.2489296004632866, "learning_rate": 7.830141318909395e-08, "loss": 0.0015339379897341132, "step": 3396 }, { "epoch": 0.8215235792019348, "grad_norm": 0.7491707941526043, "learning_rate": 7.80953380323502e-08, "loss": 0.0015599816106259823, "step": 3397 }, { "epoch": 0.8217654171704958, "grad_norm": 2.643732000922881, "learning_rate": 7.788951143521677e-08, "loss": 0.0011856300989165902, "step": 3398 }, { "epoch": 0.8220072551390568, "grad_norm": 1.7399868588080099, "learning_rate": 7.768393351895331e-08, "loss": 0.004264607559889555, "step": 3399 }, { "epoch": 0.8222490931076178, "grad_norm": 1.0573970240107557, "learning_rate": 7.747860440467369e-08, "loss": 0.0045042517594993114, "step": 3400 }, { "epoch": 0.822490931076179, "grad_norm": 1.6546870921157597, "learning_rate": 7.727352421334482e-08, "loss": 0.004386874381452799, "step": 3401 }, { "epoch": 0.82273276904474, "grad_norm": 3.8117813991240665, "learning_rate": 7.706869306578678e-08, "loss": 0.0013769681099802256, "step": 3402 }, { "epoch": 0.822974607013301, "grad_norm": 2.5901552866651065, "learning_rate": 7.686411108267348e-08, "loss": 0.012398652732372284, "step": 3403 }, { "epoch": 0.8232164449818622, "grad_norm": 0.6452400406684118, "learning_rate": 7.665977838453136e-08, "loss": 0.0007838644087314606, "step": 3404 }, { "epoch": 0.8234582829504232, "grad_norm": 1.2718872626355697, "learning_rate": 7.645569509174071e-08, "loss": 0.00964571163058281, "step": 3405 }, { "epoch": 0.8237001209189843, "grad_norm": 0.5922773022296722, "learning_rate": 7.625186132453432e-08, "loss": 0.0020748786628246307, "step": 3406 }, { "epoch": 0.8239419588875453, "grad_norm": 3.042492603987205, "learning_rate": 7.604827720299828e-08, "loss": 0.009546580724418163, "step": 3407 }, { "epoch": 0.8241837968561064, "grad_norm": 3.167670222967101, "learning_rate": 7.584494284707144e-08, "loss": 0.006633003242313862, "step": 3408 }, { "epoch": 0.8244256348246675, "grad_norm": 1.1560682547570706, "learning_rate": 7.564185837654564e-08, "loss": 0.0020658273715525866, "step": 3409 }, { "epoch": 0.8246674727932285, "grad_norm": 0.6011182009388382, "learning_rate": 7.543902391106538e-08, "loss": 0.0012194099836051464, "step": 3410 }, { "epoch": 0.8249093107617896, "grad_norm": 3.6718918455116882, "learning_rate": 7.523643957012782e-08, "loss": 0.034912560135126114, "step": 3411 }, { "epoch": 0.8251511487303507, "grad_norm": 5.159189816972107, "learning_rate": 7.503410547308331e-08, "loss": 0.020904526114463806, "step": 3412 }, { "epoch": 0.8253929866989117, "grad_norm": 1.2316353676732403, "learning_rate": 7.483202173913372e-08, "loss": 0.0044317361898720264, "step": 3413 }, { "epoch": 0.8256348246674728, "grad_norm": 0.8171146801093034, "learning_rate": 7.463018848733466e-08, "loss": 0.0023582156281918287, "step": 3414 }, { "epoch": 0.8258766626360339, "grad_norm": 0.9439959659127944, "learning_rate": 7.442860583659294e-08, "loss": 0.002765112789347768, "step": 3415 }, { "epoch": 0.8261185006045949, "grad_norm": 1.4856554049032127, "learning_rate": 7.42272739056688e-08, "loss": 0.01263632532209158, "step": 3416 }, { "epoch": 0.8263603385731559, "grad_norm": 1.57246775247476, "learning_rate": 7.402619281317418e-08, "loss": 0.0015216537285596132, "step": 3417 }, { "epoch": 0.8266021765417171, "grad_norm": 1.7497089600317044, "learning_rate": 7.382536267757328e-08, "loss": 0.0054147676564753056, "step": 3418 }, { "epoch": 0.8268440145102781, "grad_norm": 2.37179528956562, "learning_rate": 7.362478361718266e-08, "loss": 0.012505779042840004, "step": 3419 }, { "epoch": 0.8270858524788391, "grad_norm": 1.5981761441132138, "learning_rate": 7.34244557501708e-08, "loss": 0.004710372071713209, "step": 3420 }, { "epoch": 0.8273276904474003, "grad_norm": 1.0511297107740671, "learning_rate": 7.32243791945582e-08, "loss": 0.003948930185288191, "step": 3421 }, { "epoch": 0.8275695284159613, "grad_norm": 2.186990729069855, "learning_rate": 7.302455406821744e-08, "loss": 0.005890361499041319, "step": 3422 }, { "epoch": 0.8278113663845224, "grad_norm": 6.150999680288934, "learning_rate": 7.282498048887281e-08, "loss": 0.013382308185100555, "step": 3423 }, { "epoch": 0.8280532043530834, "grad_norm": 3.626823232491113, "learning_rate": 7.262565857410046e-08, "loss": 0.010266737081110477, "step": 3424 }, { "epoch": 0.8282950423216445, "grad_norm": 0.90381281014922, "learning_rate": 7.242658844132838e-08, "loss": 0.0026502362452447414, "step": 3425 }, { "epoch": 0.8285368802902056, "grad_norm": 2.1483547022991365, "learning_rate": 7.222777020783599e-08, "loss": 0.005114787723869085, "step": 3426 }, { "epoch": 0.8287787182587666, "grad_norm": 1.0882381121504634, "learning_rate": 7.202920399075446e-08, "loss": 0.003521303180605173, "step": 3427 }, { "epoch": 0.8290205562273277, "grad_norm": 0.625754769037868, "learning_rate": 7.183088990706676e-08, "loss": 0.0009331193869002163, "step": 3428 }, { "epoch": 0.8292623941958888, "grad_norm": 1.0011582204601335, "learning_rate": 7.163282807360665e-08, "loss": 0.0023101074621081352, "step": 3429 }, { "epoch": 0.8295042321644498, "grad_norm": 1.3466439157073793, "learning_rate": 7.143501860705991e-08, "loss": 0.004702598322182894, "step": 3430 }, { "epoch": 0.8297460701330109, "grad_norm": 4.953236771620598, "learning_rate": 7.123746162396332e-08, "loss": 0.319743275642395, "step": 3431 }, { "epoch": 0.829987908101572, "grad_norm": 2.1043273486782383, "learning_rate": 7.1040157240705e-08, "loss": 0.00337082683108747, "step": 3432 }, { "epoch": 0.830229746070133, "grad_norm": 4.756987847070155, "learning_rate": 7.084310557352429e-08, "loss": 0.004156157374382019, "step": 3433 }, { "epoch": 0.830471584038694, "grad_norm": 2.5483709326302053, "learning_rate": 7.064630673851152e-08, "loss": 0.009947150945663452, "step": 3434 }, { "epoch": 0.8307134220072552, "grad_norm": 0.7888052269147346, "learning_rate": 7.04497608516082e-08, "loss": 0.004604704212397337, "step": 3435 }, { "epoch": 0.8309552599758162, "grad_norm": 0.5391393417022854, "learning_rate": 7.025346802860677e-08, "loss": 0.0021785981953144073, "step": 3436 }, { "epoch": 0.8311970979443772, "grad_norm": 1.4054012959717153, "learning_rate": 7.00574283851505e-08, "loss": 0.004651609342545271, "step": 3437 }, { "epoch": 0.8314389359129384, "grad_norm": 6.910066703196127, "learning_rate": 6.986164203673361e-08, "loss": 0.009133515879511833, "step": 3438 }, { "epoch": 0.8316807738814994, "grad_norm": 1.2677590253762314, "learning_rate": 6.966610909870124e-08, "loss": 0.002121255500242114, "step": 3439 }, { "epoch": 0.8319226118500604, "grad_norm": 0.7370132479862229, "learning_rate": 6.947082968624873e-08, "loss": 0.0019660741090774536, "step": 3440 }, { "epoch": 0.8321644498186215, "grad_norm": 1.0497433287352425, "learning_rate": 6.927580391442272e-08, "loss": 0.00614796532317996, "step": 3441 }, { "epoch": 0.8324062877871826, "grad_norm": 1.663352233024504, "learning_rate": 6.908103189812003e-08, "loss": 0.016016338020563126, "step": 3442 }, { "epoch": 0.8326481257557437, "grad_norm": 4.636994190705985, "learning_rate": 6.888651375208776e-08, "loss": 0.01407467108219862, "step": 3443 }, { "epoch": 0.8328899637243047, "grad_norm": 1.2930997051353263, "learning_rate": 6.869224959092413e-08, "loss": 0.002414766000583768, "step": 3444 }, { "epoch": 0.8331318016928658, "grad_norm": 2.3275696006095465, "learning_rate": 6.849823952907691e-08, "loss": 0.004673442803323269, "step": 3445 }, { "epoch": 0.8333736396614269, "grad_norm": 7.514080725189442, "learning_rate": 6.830448368084496e-08, "loss": 0.012425624765455723, "step": 3446 }, { "epoch": 0.8336154776299879, "grad_norm": 1.1538138689258004, "learning_rate": 6.81109821603768e-08, "loss": 0.0021694451570510864, "step": 3447 }, { "epoch": 0.8338573155985489, "grad_norm": 15.66843669954357, "learning_rate": 6.791773508167142e-08, "loss": 0.007468470372259617, "step": 3448 }, { "epoch": 0.8340991535671101, "grad_norm": 1.9774538317862753, "learning_rate": 6.772474255857774e-08, "loss": 0.008060276508331299, "step": 3449 }, { "epoch": 0.8343409915356711, "grad_norm": 1.2978544345163487, "learning_rate": 6.753200470479481e-08, "loss": 0.0035675582475960255, "step": 3450 }, { "epoch": 0.8345828295042321, "grad_norm": 1.0338856540154733, "learning_rate": 6.733952163387158e-08, "loss": 0.0039798724465072155, "step": 3451 }, { "epoch": 0.8348246674727933, "grad_norm": 1.8783617137960258, "learning_rate": 6.714729345920694e-08, "loss": 0.004394105169922113, "step": 3452 }, { "epoch": 0.8350665054413543, "grad_norm": 0.38365878471734294, "learning_rate": 6.695532029404966e-08, "loss": 0.0011734688887372613, "step": 3453 }, { "epoch": 0.8353083434099153, "grad_norm": 1.0451914156543636, "learning_rate": 6.676360225149802e-08, "loss": 0.002678993623703718, "step": 3454 }, { "epoch": 0.8355501813784765, "grad_norm": 1.9696218068253313, "learning_rate": 6.657213944450063e-08, "loss": 0.0062178331427276134, "step": 3455 }, { "epoch": 0.8357920193470375, "grad_norm": 3.5003201914467152, "learning_rate": 6.638093198585476e-08, "loss": 0.0024611351545900106, "step": 3456 }, { "epoch": 0.8360338573155985, "grad_norm": 1.1347489751747641, "learning_rate": 6.618997998820813e-08, "loss": 0.00746208056807518, "step": 3457 }, { "epoch": 0.8362756952841596, "grad_norm": 3.9679604692058654, "learning_rate": 6.599928356405765e-08, "loss": 0.010253343731164932, "step": 3458 }, { "epoch": 0.8365175332527207, "grad_norm": 2.389940946508896, "learning_rate": 6.580884282574927e-08, "loss": 0.007395806256681681, "step": 3459 }, { "epoch": 0.8367593712212817, "grad_norm": 0.8545578384469075, "learning_rate": 6.561865788547904e-08, "loss": 0.0036355566699057817, "step": 3460 }, { "epoch": 0.8370012091898428, "grad_norm": 0.9240178153482003, "learning_rate": 6.542872885529172e-08, "loss": 0.0015369805041700602, "step": 3461 }, { "epoch": 0.8372430471584039, "grad_norm": 1.6536464591452893, "learning_rate": 6.523905584708162e-08, "loss": 0.004147637635469437, "step": 3462 }, { "epoch": 0.837484885126965, "grad_norm": 3.042791798612875, "learning_rate": 6.504963897259196e-08, "loss": 0.03486691042780876, "step": 3463 }, { "epoch": 0.837726723095526, "grad_norm": 2.3890060947369576, "learning_rate": 6.48604783434154e-08, "loss": 0.006738465279340744, "step": 3464 }, { "epoch": 0.837968561064087, "grad_norm": 1.647041801697724, "learning_rate": 6.467157407099327e-08, "loss": 0.005195583216845989, "step": 3465 }, { "epoch": 0.8382103990326482, "grad_norm": 1.7175801397933295, "learning_rate": 6.448292626661623e-08, "loss": 0.0026570549234747887, "step": 3466 }, { "epoch": 0.8384522370012092, "grad_norm": 0.2826787995871437, "learning_rate": 6.429453504142351e-08, "loss": 0.000737200491130352, "step": 3467 }, { "epoch": 0.8386940749697702, "grad_norm": 3.0970718133135637, "learning_rate": 6.410640050640332e-08, "loss": 0.0018063735915347934, "step": 3468 }, { "epoch": 0.8389359129383314, "grad_norm": 2.0375026489588377, "learning_rate": 6.39185227723929e-08, "loss": 0.0044689239002764225, "step": 3469 }, { "epoch": 0.8391777509068924, "grad_norm": 0.7658442112866956, "learning_rate": 6.373090195007763e-08, "loss": 0.0022849254310131073, "step": 3470 }, { "epoch": 0.8394195888754534, "grad_norm": 0.8200568038053608, "learning_rate": 6.354353814999209e-08, "loss": 0.0015347100561484694, "step": 3471 }, { "epoch": 0.8396614268440145, "grad_norm": 1.7163877122205558, "learning_rate": 6.335643148251918e-08, "loss": 0.0013329396024346352, "step": 3472 }, { "epoch": 0.8399032648125756, "grad_norm": 2.069839670960452, "learning_rate": 6.31695820578903e-08, "loss": 0.004627825226634741, "step": 3473 }, { "epoch": 0.8401451027811366, "grad_norm": 1.693005148848107, "learning_rate": 6.298298998618535e-08, "loss": 0.011209100484848022, "step": 3474 }, { "epoch": 0.8403869407496977, "grad_norm": 3.349678669362048, "learning_rate": 6.279665537733258e-08, "loss": 0.011472049169242382, "step": 3475 }, { "epoch": 0.8406287787182588, "grad_norm": 0.7278497210855779, "learning_rate": 6.261057834110866e-08, "loss": 0.001335187698714435, "step": 3476 }, { "epoch": 0.8408706166868198, "grad_norm": 1.701036244605031, "learning_rate": 6.242475898713839e-08, "loss": 0.0096155209466815, "step": 3477 }, { "epoch": 0.8411124546553809, "grad_norm": 2.707097164568978, "learning_rate": 6.223919742489481e-08, "loss": 0.009922534227371216, "step": 3478 }, { "epoch": 0.841354292623942, "grad_norm": 2.030351188645393, "learning_rate": 6.205389376369902e-08, "loss": 0.017244977876544, "step": 3479 }, { "epoch": 0.841596130592503, "grad_norm": 2.6194570374546955, "learning_rate": 6.186884811272052e-08, "loss": 0.007136376108974218, "step": 3480 }, { "epoch": 0.8418379685610641, "grad_norm": 29.34606810415397, "learning_rate": 6.16840605809762e-08, "loss": 0.008149835281074047, "step": 3481 }, { "epoch": 0.8420798065296251, "grad_norm": 2.0393515582394834, "learning_rate": 6.149953127733138e-08, "loss": 0.0031446039211004972, "step": 3482 }, { "epoch": 0.8423216444981863, "grad_norm": 1.1604292214697591, "learning_rate": 6.131526031049904e-08, "loss": 0.0042668357491493225, "step": 3483 }, { "epoch": 0.8425634824667473, "grad_norm": 1.4560962960380546, "learning_rate": 6.113124778903994e-08, "loss": 0.005266172345727682, "step": 3484 }, { "epoch": 0.8428053204353083, "grad_norm": 0.9171052538954344, "learning_rate": 6.094749382136288e-08, "loss": 0.0033062181901186705, "step": 3485 }, { "epoch": 0.8430471584038695, "grad_norm": 1.0041330614964212, "learning_rate": 6.076399851572384e-08, "loss": 0.0014736721059307456, "step": 3486 }, { "epoch": 0.8432889963724305, "grad_norm": 3.750326265650318, "learning_rate": 6.058076198022683e-08, "loss": 0.009918310679495335, "step": 3487 }, { "epoch": 0.8435308343409915, "grad_norm": 1.455083269314668, "learning_rate": 6.039778432282322e-08, "loss": 0.0015682462835684419, "step": 3488 }, { "epoch": 0.8437726723095526, "grad_norm": 17.561387912179296, "learning_rate": 6.021506565131195e-08, "loss": 0.019969122484326363, "step": 3489 }, { "epoch": 0.8440145102781137, "grad_norm": 0.8663220386039089, "learning_rate": 6.003260607333927e-08, "loss": 0.0013390384847298265, "step": 3490 }, { "epoch": 0.8442563482466747, "grad_norm": 2.3416631647486, "learning_rate": 5.985040569639888e-08, "loss": 0.007106289267539978, "step": 3491 }, { "epoch": 0.8444981862152358, "grad_norm": 14.30522712880801, "learning_rate": 5.96684646278317e-08, "loss": 0.017641711980104446, "step": 3492 }, { "epoch": 0.8447400241837969, "grad_norm": 2.534448170570337, "learning_rate": 5.948678297482601e-08, "loss": 0.004417681600898504, "step": 3493 }, { "epoch": 0.8449818621523579, "grad_norm": 1.1512996476168786, "learning_rate": 5.930536084441712e-08, "loss": 0.004144465085119009, "step": 3494 }, { "epoch": 0.845223700120919, "grad_norm": 7.536477102356488, "learning_rate": 5.912419834348742e-08, "loss": 0.018935158848762512, "step": 3495 }, { "epoch": 0.84546553808948, "grad_norm": 9.557685911310484, "learning_rate": 5.894329557876676e-08, "loss": 0.022585945203900337, "step": 3496 }, { "epoch": 0.8457073760580411, "grad_norm": 0.7473476600863297, "learning_rate": 5.8762652656831166e-08, "loss": 0.0019864223431795835, "step": 3497 }, { "epoch": 0.8459492140266022, "grad_norm": 0.5334035363610605, "learning_rate": 5.8582269684104404e-08, "loss": 0.0003371775965206325, "step": 3498 }, { "epoch": 0.8461910519951632, "grad_norm": 1.6971497709559502, "learning_rate": 5.840214676685673e-08, "loss": 0.008889680728316307, "step": 3499 }, { "epoch": 0.8464328899637243, "grad_norm": 1.3739753326953585, "learning_rate": 5.8222284011204884e-08, "loss": 0.002294188132509589, "step": 3500 }, { "epoch": 0.8466747279322854, "grad_norm": 0.3056956447662762, "learning_rate": 5.8042681523112977e-08, "loss": 0.00022315485693980008, "step": 3501 }, { "epoch": 0.8469165659008464, "grad_norm": 0.7528002338103099, "learning_rate": 5.786333940839139e-08, "loss": 0.0014751952840015292, "step": 3502 }, { "epoch": 0.8471584038694076, "grad_norm": 1.1492763425951031, "learning_rate": 5.7684257772697086e-08, "loss": 0.003938992973417044, "step": 3503 }, { "epoch": 0.8474002418379686, "grad_norm": 2.4443280650783374, "learning_rate": 5.75054367215338e-08, "loss": 0.011020964942872524, "step": 3504 }, { "epoch": 0.8476420798065296, "grad_norm": 0.9323766674094256, "learning_rate": 5.732687636025152e-08, "loss": 0.002423001453280449, "step": 3505 }, { "epoch": 0.8478839177750906, "grad_norm": 1.3144930919848732, "learning_rate": 5.7148576794046776e-08, "loss": 0.00878867320716381, "step": 3506 }, { "epoch": 0.8481257557436518, "grad_norm": 1.0645431149723306, "learning_rate": 5.697053812796238e-08, "loss": 0.005284017883241177, "step": 3507 }, { "epoch": 0.8483675937122128, "grad_norm": 1.8346714444101577, "learning_rate": 5.679276046688758e-08, "loss": 0.014367400668561459, "step": 3508 }, { "epoch": 0.8486094316807739, "grad_norm": 3.1130688306031082, "learning_rate": 5.661524391555755e-08, "loss": 0.011998472735285759, "step": 3509 }, { "epoch": 0.848851269649335, "grad_norm": 0.6116025132766986, "learning_rate": 5.6437988578554196e-08, "loss": 0.0018447096226736903, "step": 3510 }, { "epoch": 0.849093107617896, "grad_norm": 2.9157512093375098, "learning_rate": 5.626099456030481e-08, "loss": 0.012553135864436626, "step": 3511 }, { "epoch": 0.8493349455864571, "grad_norm": 3.1418690039847696, "learning_rate": 5.608426196508348e-08, "loss": 0.0018685170216485858, "step": 3512 }, { "epoch": 0.8495767835550181, "grad_norm": 2.281073494570457, "learning_rate": 5.590779089700948e-08, "loss": 0.005490950308740139, "step": 3513 }, { "epoch": 0.8498186215235792, "grad_norm": 0.6758163763293595, "learning_rate": 5.5731581460048695e-08, "loss": 0.001896618865430355, "step": 3514 }, { "epoch": 0.8500604594921403, "grad_norm": 1.649792108523988, "learning_rate": 5.55556337580127e-08, "loss": 0.004511830396950245, "step": 3515 }, { "epoch": 0.8503022974607013, "grad_norm": 1.0185474933142213, "learning_rate": 5.537994789455841e-08, "loss": 0.0010727354092523456, "step": 3516 }, { "epoch": 0.8505441354292624, "grad_norm": 1.2993331223864948, "learning_rate": 5.52045239731892e-08, "loss": 0.005192307289689779, "step": 3517 }, { "epoch": 0.8507859733978235, "grad_norm": 0.8681987706161888, "learning_rate": 5.5029362097253675e-08, "loss": 0.003444023197516799, "step": 3518 }, { "epoch": 0.8510278113663845, "grad_norm": 1.29772009788277, "learning_rate": 5.485446236994612e-08, "loss": 0.005178158637136221, "step": 3519 }, { "epoch": 0.8512696493349455, "grad_norm": 10.690275995966472, "learning_rate": 5.4679824894306446e-08, "loss": 0.00498309126123786, "step": 3520 }, { "epoch": 0.8515114873035067, "grad_norm": 0.9096777766633878, "learning_rate": 5.4505449773220026e-08, "loss": 0.0028105524834245443, "step": 3521 }, { "epoch": 0.8517533252720677, "grad_norm": 0.747397645719296, "learning_rate": 5.4331337109417675e-08, "loss": 0.0054114568047225475, "step": 3522 }, { "epoch": 0.8519951632406287, "grad_norm": 1.1429378942548292, "learning_rate": 5.4157487005475624e-08, "loss": 0.007895159535109997, "step": 3523 }, { "epoch": 0.8522370012091899, "grad_norm": 0.984518316494729, "learning_rate": 5.3983899563815384e-08, "loss": 0.0015053204260766506, "step": 3524 }, { "epoch": 0.8524788391777509, "grad_norm": 4.7209089472903285, "learning_rate": 5.3810574886703545e-08, "loss": 0.01402282901108265, "step": 3525 }, { "epoch": 0.852720677146312, "grad_norm": 1.9797617591931032, "learning_rate": 5.3637513076252474e-08, "loss": 0.004574387799948454, "step": 3526 }, { "epoch": 0.8529625151148731, "grad_norm": 1.9486929323682203, "learning_rate": 5.346471423441884e-08, "loss": 0.0029895720072090626, "step": 3527 }, { "epoch": 0.8532043530834341, "grad_norm": 2.3407536576576025, "learning_rate": 5.32921784630051e-08, "loss": 0.017080267891287804, "step": 3528 }, { "epoch": 0.8534461910519952, "grad_norm": 0.5758980491431542, "learning_rate": 5.311990586365833e-08, "loss": 0.0006707744323648512, "step": 3529 }, { "epoch": 0.8536880290205562, "grad_norm": 8.837396159200592, "learning_rate": 5.294789653787074e-08, "loss": 0.0075991288758814335, "step": 3530 }, { "epoch": 0.8539298669891173, "grad_norm": 0.910334556868649, "learning_rate": 5.277615058697932e-08, "loss": 0.0014926427975296974, "step": 3531 }, { "epoch": 0.8541717049576784, "grad_norm": 4.158899180795729, "learning_rate": 5.260466811216596e-08, "loss": 0.006037124898284674, "step": 3532 }, { "epoch": 0.8544135429262394, "grad_norm": 0.8168111927615201, "learning_rate": 5.243344921445736e-08, "loss": 0.001537717878818512, "step": 3533 }, { "epoch": 0.8546553808948005, "grad_norm": 2.2199202123698107, "learning_rate": 5.226249399472482e-08, "loss": 0.01137508638203144, "step": 3534 }, { "epoch": 0.8548972188633616, "grad_norm": 5.54166221258196, "learning_rate": 5.209180255368445e-08, "loss": 0.009545047767460346, "step": 3535 }, { "epoch": 0.8551390568319226, "grad_norm": 0.11841056749524344, "learning_rate": 5.1921374991896696e-08, "loss": 0.00033180235186591744, "step": 3536 }, { "epoch": 0.8553808948004836, "grad_norm": 0.3299713295411575, "learning_rate": 5.1751211409767014e-08, "loss": 0.0009334799833595753, "step": 3537 }, { "epoch": 0.8556227327690448, "grad_norm": 4.445589427301817, "learning_rate": 5.158131190754489e-08, "loss": 0.003709539072588086, "step": 3538 }, { "epoch": 0.8558645707376058, "grad_norm": 2.073024071824322, "learning_rate": 5.141167658532425e-08, "loss": 0.010780873708426952, "step": 3539 }, { "epoch": 0.8561064087061668, "grad_norm": 2.0735127481719773, "learning_rate": 5.124230554304387e-08, "loss": 0.006590571254491806, "step": 3540 }, { "epoch": 0.856348246674728, "grad_norm": 3.877437985559055, "learning_rate": 5.107319888048611e-08, "loss": 0.020260239019989967, "step": 3541 }, { "epoch": 0.856590084643289, "grad_norm": 2.6828691682484522, "learning_rate": 5.090435669727838e-08, "loss": 0.006210951134562492, "step": 3542 }, { "epoch": 0.85683192261185, "grad_norm": 1.3756725263245388, "learning_rate": 5.073577909289134e-08, "loss": 0.0029025874100625515, "step": 3543 }, { "epoch": 0.8570737605804111, "grad_norm": 0.451916281793452, "learning_rate": 5.056746616664059e-08, "loss": 0.0008934051729738712, "step": 3544 }, { "epoch": 0.8573155985489722, "grad_norm": 1.0663546429427095, "learning_rate": 5.039941801768549e-08, "loss": 0.0022537303157150745, "step": 3545 }, { "epoch": 0.8575574365175332, "grad_norm": 1.0409248334309942, "learning_rate": 5.023163474502934e-08, "loss": 0.004422168713063002, "step": 3546 }, { "epoch": 0.8577992744860943, "grad_norm": 0.7736211033980916, "learning_rate": 5.00641164475194e-08, "loss": 0.004788893274962902, "step": 3547 }, { "epoch": 0.8580411124546554, "grad_norm": 0.6024940733661571, "learning_rate": 4.989686322384701e-08, "loss": 0.0010775902774184942, "step": 3548 }, { "epoch": 0.8582829504232165, "grad_norm": 2.303243753858158, "learning_rate": 4.9729875172547175e-08, "loss": 0.005075374152511358, "step": 3549 }, { "epoch": 0.8585247883917775, "grad_norm": 1.068045637396513, "learning_rate": 4.9563152391998544e-08, "loss": 0.003643615869805217, "step": 3550 }, { "epoch": 0.8587666263603386, "grad_norm": 2.3685204622744434, "learning_rate": 4.939669498042404e-08, "loss": 0.004211864899843931, "step": 3551 }, { "epoch": 0.8590084643288997, "grad_norm": 6.728666581694284, "learning_rate": 4.923050303588944e-08, "loss": 0.005526585038751364, "step": 3552 }, { "epoch": 0.8592503022974607, "grad_norm": 1.0466738734662473, "learning_rate": 4.906457665630498e-08, "loss": 0.002255786908790469, "step": 3553 }, { "epoch": 0.8594921402660217, "grad_norm": 20.284752956904118, "learning_rate": 4.889891593942364e-08, "loss": 0.012766304425895214, "step": 3554 }, { "epoch": 0.8597339782345829, "grad_norm": 1.1626231936806402, "learning_rate": 4.873352098284239e-08, "loss": 0.0028260538820177317, "step": 3555 }, { "epoch": 0.8599758162031439, "grad_norm": 1.142863613628627, "learning_rate": 4.856839188400169e-08, "loss": 0.004123754799365997, "step": 3556 }, { "epoch": 0.8602176541717049, "grad_norm": 3.3242569333077165, "learning_rate": 4.8403528740184826e-08, "loss": 0.0019356204429641366, "step": 3557 }, { "epoch": 0.8604594921402661, "grad_norm": 1.9647250321682161, "learning_rate": 4.823893164851906e-08, "loss": 0.013725042343139648, "step": 3558 }, { "epoch": 0.8607013301088271, "grad_norm": 3.6614767519708273, "learning_rate": 4.807460070597452e-08, "loss": 0.006096639204770327, "step": 3559 }, { "epoch": 0.8609431680773881, "grad_norm": 2.270528585612833, "learning_rate": 4.791053600936468e-08, "loss": 0.008784787729382515, "step": 3560 }, { "epoch": 0.8611850060459492, "grad_norm": 7.081404963853497, "learning_rate": 4.774673765534598e-08, "loss": 0.004095918033272028, "step": 3561 }, { "epoch": 0.8614268440145103, "grad_norm": 1.5621002389426497, "learning_rate": 4.7583205740418187e-08, "loss": 0.0039303540252149105, "step": 3562 }, { "epoch": 0.8616686819830713, "grad_norm": 0.4081890189919607, "learning_rate": 4.7419940360924006e-08, "loss": 0.0015250190626829863, "step": 3563 }, { "epoch": 0.8619105199516324, "grad_norm": 1.4399487267920805, "learning_rate": 4.7256941613049e-08, "loss": 0.0034025602508336306, "step": 3564 }, { "epoch": 0.8621523579201935, "grad_norm": 1.4224461717826915, "learning_rate": 4.7094209592821785e-08, "loss": 0.0027643213979899883, "step": 3565 }, { "epoch": 0.8623941958887545, "grad_norm": 4.027789488337893, "learning_rate": 4.693174439611375e-08, "loss": 0.0035682194866240025, "step": 3566 }, { "epoch": 0.8626360338573156, "grad_norm": 1.7122874209174015, "learning_rate": 4.676954611863937e-08, "loss": 0.00667928671464324, "step": 3567 }, { "epoch": 0.8628778718258766, "grad_norm": 0.5356583301643563, "learning_rate": 4.660761485595527e-08, "loss": 0.0012392217759042978, "step": 3568 }, { "epoch": 0.8631197097944378, "grad_norm": 4.7184099921239975, "learning_rate": 4.6445950703461467e-08, "loss": 0.029704628512263298, "step": 3569 }, { "epoch": 0.8633615477629988, "grad_norm": 1.432224090176904, "learning_rate": 4.628455375640011e-08, "loss": 0.007165870163589716, "step": 3570 }, { "epoch": 0.8636033857315598, "grad_norm": 1.4344654031701929, "learning_rate": 4.61234241098562e-08, "loss": 0.0021474757231771946, "step": 3571 }, { "epoch": 0.863845223700121, "grad_norm": 0.9608683659584689, "learning_rate": 4.596256185875713e-08, "loss": 0.0020677640568464994, "step": 3572 }, { "epoch": 0.864087061668682, "grad_norm": 1.6647118117775932, "learning_rate": 4.5801967097872594e-08, "loss": 0.0026085737626999617, "step": 3573 }, { "epoch": 0.864328899637243, "grad_norm": 10.559448032260335, "learning_rate": 4.564163992181514e-08, "loss": 0.015766197815537453, "step": 3574 }, { "epoch": 0.8645707376058042, "grad_norm": 2.2379283410950492, "learning_rate": 4.5481580425039355e-08, "loss": 0.004139306489378214, "step": 3575 }, { "epoch": 0.8648125755743652, "grad_norm": 2.7171632287281318, "learning_rate": 4.532178870184217e-08, "loss": 0.009308266453444958, "step": 3576 }, { "epoch": 0.8650544135429262, "grad_norm": 2.0946198306998434, "learning_rate": 4.516226484636276e-08, "loss": 0.0031430330127477646, "step": 3577 }, { "epoch": 0.8652962515114873, "grad_norm": 1.5347793825804654, "learning_rate": 4.5003008952582635e-08, "loss": 0.004636893514543772, "step": 3578 }, { "epoch": 0.8655380894800484, "grad_norm": 0.5117465051555475, "learning_rate": 4.484402111432517e-08, "loss": 0.0005452482728287578, "step": 3579 }, { "epoch": 0.8657799274486094, "grad_norm": 1.843025485244156, "learning_rate": 4.468530142525601e-08, "loss": 0.006071415729820728, "step": 3580 }, { "epoch": 0.8660217654171705, "grad_norm": 1.6747073851539125, "learning_rate": 4.452684997888295e-08, "loss": 0.004556114319711924, "step": 3581 }, { "epoch": 0.8662636033857316, "grad_norm": 1.494722839149605, "learning_rate": 4.4368666868555285e-08, "loss": 0.001870407722890377, "step": 3582 }, { "epoch": 0.8665054413542926, "grad_norm": 1.19577410991399, "learning_rate": 4.4210752187464893e-08, "loss": 0.003811885369941592, "step": 3583 }, { "epoch": 0.8667472793228537, "grad_norm": 1.0301100328534596, "learning_rate": 4.405310602864476e-08, "loss": 0.007169878575950861, "step": 3584 }, { "epoch": 0.8669891172914147, "grad_norm": 2.3586463490494207, "learning_rate": 4.3895728484970306e-08, "loss": 0.008551151491701603, "step": 3585 }, { "epoch": 0.8672309552599758, "grad_norm": 9.599838505878225, "learning_rate": 4.373861964915837e-08, "loss": 0.006576798856258392, "step": 3586 }, { "epoch": 0.8674727932285369, "grad_norm": 6.605072684229336, "learning_rate": 4.3581779613767636e-08, "loss": 0.00534038944169879, "step": 3587 }, { "epoch": 0.8677146311970979, "grad_norm": 0.9771161432385268, "learning_rate": 4.3425208471198235e-08, "loss": 0.0020104777067899704, "step": 3588 }, { "epoch": 0.867956469165659, "grad_norm": 4.3068013116852235, "learning_rate": 4.3268906313692066e-08, "loss": 0.0034589499700814486, "step": 3589 }, { "epoch": 0.8681983071342201, "grad_norm": 0.6666926074910233, "learning_rate": 4.311287323333257e-08, "loss": 0.002435029251500964, "step": 3590 }, { "epoch": 0.8684401451027811, "grad_norm": 3.110015888765515, "learning_rate": 4.295710932204449e-08, "loss": 0.004068340640515089, "step": 3591 }, { "epoch": 0.8686819830713421, "grad_norm": 3.1341811055019115, "learning_rate": 4.280161467159416e-08, "loss": 0.008299610577523708, "step": 3592 }, { "epoch": 0.8689238210399033, "grad_norm": 1.9519223780292407, "learning_rate": 4.2646389373589055e-08, "loss": 0.003674398409202695, "step": 3593 }, { "epoch": 0.8691656590084643, "grad_norm": 1.9298646731727682, "learning_rate": 4.249143351947848e-08, "loss": 0.004936783108860254, "step": 3594 }, { "epoch": 0.8694074969770254, "grad_norm": 1.8431803933584394, "learning_rate": 4.233674720055236e-08, "loss": 0.00576659245416522, "step": 3595 }, { "epoch": 0.8696493349455865, "grad_norm": 7.308752343253236, "learning_rate": 4.218233050794207e-08, "loss": 0.013590879738330841, "step": 3596 }, { "epoch": 0.8698911729141475, "grad_norm": 1.707830904514482, "learning_rate": 4.20281835326205e-08, "loss": 0.006071132142096758, "step": 3597 }, { "epoch": 0.8701330108827086, "grad_norm": 1.900136510158858, "learning_rate": 4.187430636540096e-08, "loss": 0.006115212570875883, "step": 3598 }, { "epoch": 0.8703748488512697, "grad_norm": 3.710294351960281, "learning_rate": 4.1720699096938375e-08, "loss": 0.010767591185867786, "step": 3599 }, { "epoch": 0.8706166868198307, "grad_norm": 1.5432249090279093, "learning_rate": 4.156736181772846e-08, "loss": 0.00837660301476717, "step": 3600 }, { "epoch": 0.8708585247883918, "grad_norm": 1.2436581004098168, "learning_rate": 4.1414294618107805e-08, "loss": 0.004734659101814032, "step": 3601 }, { "epoch": 0.8711003627569528, "grad_norm": 1.8377816339861988, "learning_rate": 4.126149758825398e-08, "loss": 0.01603754237294197, "step": 3602 }, { "epoch": 0.8713422007255139, "grad_norm": 1.0586199475110187, "learning_rate": 4.110897081818537e-08, "loss": 0.004091182257980108, "step": 3603 }, { "epoch": 0.871584038694075, "grad_norm": 1.8274074789388841, "learning_rate": 4.09567143977611e-08, "loss": 0.0033416280057281256, "step": 3604 }, { "epoch": 0.871825876662636, "grad_norm": 0.7119741462701437, "learning_rate": 4.080472841668098e-08, "loss": 0.0034901753533631563, "step": 3605 }, { "epoch": 0.8720677146311971, "grad_norm": 3.620009270660809, "learning_rate": 4.06530129644857e-08, "loss": 0.0023750041145831347, "step": 3606 }, { "epoch": 0.8723095525997582, "grad_norm": 0.8406760297927908, "learning_rate": 4.050156813055622e-08, "loss": 0.0014975573867559433, "step": 3607 }, { "epoch": 0.8725513905683192, "grad_norm": 2.5003794376502495, "learning_rate": 4.035039400411461e-08, "loss": 0.006535371299833059, "step": 3608 }, { "epoch": 0.8727932285368802, "grad_norm": 2.1639296205460696, "learning_rate": 4.019949067422279e-08, "loss": 0.009702418930828571, "step": 3609 }, { "epoch": 0.8730350665054414, "grad_norm": 2.2862409739853295, "learning_rate": 4.004885822978371e-08, "loss": 0.00907179992645979, "step": 3610 }, { "epoch": 0.8732769044740024, "grad_norm": 22.92641960796856, "learning_rate": 3.989849675954049e-08, "loss": 0.00724990526214242, "step": 3611 }, { "epoch": 0.8735187424425634, "grad_norm": 1.431769339499029, "learning_rate": 3.9748406352076434e-08, "loss": 0.004238739609718323, "step": 3612 }, { "epoch": 0.8737605804111246, "grad_norm": 1.5411585807092274, "learning_rate": 3.959858709581559e-08, "loss": 0.0038895278703421354, "step": 3613 }, { "epoch": 0.8740024183796856, "grad_norm": 3.759423863931875, "learning_rate": 3.944903907902175e-08, "loss": 0.004422156605869532, "step": 3614 }, { "epoch": 0.8742442563482467, "grad_norm": 1.6732905167293948, "learning_rate": 3.929976238979937e-08, "loss": 0.01213995460420847, "step": 3615 }, { "epoch": 0.8744860943168078, "grad_norm": 0.8665978853903157, "learning_rate": 3.9150757116092837e-08, "loss": 0.0008523043361492455, "step": 3616 }, { "epoch": 0.8747279322853688, "grad_norm": 0.4723187294351157, "learning_rate": 3.9002023345686585e-08, "loss": 0.0008637085556983948, "step": 3617 }, { "epoch": 0.8749697702539299, "grad_norm": 1.2844107829994236, "learning_rate": 3.8853561166205196e-08, "loss": 0.0027584563940763474, "step": 3618 }, { "epoch": 0.8752116082224909, "grad_norm": 2.8005163195353644, "learning_rate": 3.870537066511326e-08, "loss": 0.003283878555521369, "step": 3619 }, { "epoch": 0.875453446191052, "grad_norm": 0.5766014721920202, "learning_rate": 3.8557451929715215e-08, "loss": 0.0012170152040198445, "step": 3620 }, { "epoch": 0.8756952841596131, "grad_norm": 1.2080107190868525, "learning_rate": 3.840980504715546e-08, "loss": 0.0031407177448272705, "step": 3621 }, { "epoch": 0.8759371221281741, "grad_norm": 3.1912846756096007, "learning_rate": 3.826243010441821e-08, "loss": 0.0025516576133668423, "step": 3622 }, { "epoch": 0.8761789600967352, "grad_norm": 1.9958156988340987, "learning_rate": 3.811532718832744e-08, "loss": 0.0058127534575760365, "step": 3623 }, { "epoch": 0.8764207980652963, "grad_norm": 7.03710763827271, "learning_rate": 3.796849638554717e-08, "loss": 0.004195037763565779, "step": 3624 }, { "epoch": 0.8766626360338573, "grad_norm": 1.0229175962580546, "learning_rate": 3.782193778258047e-08, "loss": 0.004163217265158892, "step": 3625 }, { "epoch": 0.8769044740024183, "grad_norm": 2.832396143987676, "learning_rate": 3.767565146577073e-08, "loss": 0.005085655953735113, "step": 3626 }, { "epoch": 0.8771463119709795, "grad_norm": 0.5946513467635579, "learning_rate": 3.752963752130045e-08, "loss": 0.0018585249781608582, "step": 3627 }, { "epoch": 0.8773881499395405, "grad_norm": 1.1469778225079612, "learning_rate": 3.7383896035191875e-08, "loss": 0.003916834946721792, "step": 3628 }, { "epoch": 0.8776299879081015, "grad_norm": 1.3741581801859546, "learning_rate": 3.7238427093306714e-08, "loss": 0.003200025064870715, "step": 3629 }, { "epoch": 0.8778718258766627, "grad_norm": 2.4774328184508336, "learning_rate": 3.709323078134602e-08, "loss": 0.0062826997600495815, "step": 3630 }, { "epoch": 0.8781136638452237, "grad_norm": 8.321379087593138, "learning_rate": 3.6948307184850425e-08, "loss": 0.0072011942975223064, "step": 3631 }, { "epoch": 0.8783555018137847, "grad_norm": 0.9843423708142751, "learning_rate": 3.6803656389199635e-08, "loss": 0.002692483365535736, "step": 3632 }, { "epoch": 0.8785973397823458, "grad_norm": 0.24218442477508453, "learning_rate": 3.6659278479612865e-08, "loss": 0.0003878904681187123, "step": 3633 }, { "epoch": 0.8788391777509069, "grad_norm": 2.414154027413758, "learning_rate": 3.651517354114847e-08, "loss": 0.007506116759032011, "step": 3634 }, { "epoch": 0.879081015719468, "grad_norm": 3.1909737622790377, "learning_rate": 3.637134165870393e-08, "loss": 0.006319374777376652, "step": 3635 }, { "epoch": 0.879322853688029, "grad_norm": 0.6271129337772996, "learning_rate": 3.622778291701595e-08, "loss": 0.002232188358902931, "step": 3636 }, { "epoch": 0.8795646916565901, "grad_norm": 6.7056785330655435, "learning_rate": 3.608449740066022e-08, "loss": 0.004857690539211035, "step": 3637 }, { "epoch": 0.8798065296251512, "grad_norm": 3.920204874355687, "learning_rate": 3.594148519405171e-08, "loss": 0.006340763997286558, "step": 3638 }, { "epoch": 0.8800483675937122, "grad_norm": 1.5472033943694394, "learning_rate": 3.5798746381443924e-08, "loss": 0.009776336140930653, "step": 3639 }, { "epoch": 0.8802902055622733, "grad_norm": 0.8832614134989618, "learning_rate": 3.5656281046929806e-08, "loss": 0.006719126831740141, "step": 3640 }, { "epoch": 0.8805320435308344, "grad_norm": 1.419735763739836, "learning_rate": 3.551408927444083e-08, "loss": 0.0016687574097886682, "step": 3641 }, { "epoch": 0.8807738814993954, "grad_norm": 3.323880370758382, "learning_rate": 3.537217114774743e-08, "loss": 0.008359917439520359, "step": 3642 }, { "epoch": 0.8810157194679564, "grad_norm": 3.8885341170977625, "learning_rate": 3.5230526750458844e-08, "loss": 0.005473324563354254, "step": 3643 }, { "epoch": 0.8812575574365176, "grad_norm": 3.8552124598110518, "learning_rate": 3.5089156166022925e-08, "loss": 0.010990462265908718, "step": 3644 }, { "epoch": 0.8814993954050786, "grad_norm": 0.37096419395881186, "learning_rate": 3.494805947772644e-08, "loss": 0.0011643258621916175, "step": 3645 }, { "epoch": 0.8817412333736396, "grad_norm": 1.3401199025731745, "learning_rate": 3.4807236768694514e-08, "loss": 0.009761511348187923, "step": 3646 }, { "epoch": 0.8819830713422008, "grad_norm": 1.1391233244860177, "learning_rate": 3.466668812189111e-08, "loss": 0.003947661258280277, "step": 3647 }, { "epoch": 0.8822249093107618, "grad_norm": 4.547022349451236, "learning_rate": 3.452641362011854e-08, "loss": 0.006821535527706146, "step": 3648 }, { "epoch": 0.8824667472793228, "grad_norm": 1.2408444029478665, "learning_rate": 3.4386413346017826e-08, "loss": 0.0048465365543961525, "step": 3649 }, { "epoch": 0.8827085852478839, "grad_norm": 1.2777360077364452, "learning_rate": 3.424668738206809e-08, "loss": 0.0044565219432115555, "step": 3650 }, { "epoch": 0.882950423216445, "grad_norm": 1.6666970630912001, "learning_rate": 3.410723581058733e-08, "loss": 0.0029495488852262497, "step": 3651 }, { "epoch": 0.883192261185006, "grad_norm": 0.6262789725378468, "learning_rate": 3.396805871373137e-08, "loss": 0.002178227761760354, "step": 3652 }, { "epoch": 0.8834340991535671, "grad_norm": 3.010223669607704, "learning_rate": 3.382915617349458e-08, "loss": 0.013346008956432343, "step": 3653 }, { "epoch": 0.8836759371221282, "grad_norm": 2.6966092749265895, "learning_rate": 3.3690528271709815e-08, "loss": 0.005963952746242285, "step": 3654 }, { "epoch": 0.8839177750906893, "grad_norm": 2.835686047412683, "learning_rate": 3.355217509004754e-08, "loss": 0.004204933997243643, "step": 3655 }, { "epoch": 0.8841596130592503, "grad_norm": 1.1659885615510848, "learning_rate": 3.3414096710017045e-08, "loss": 0.0038185585290193558, "step": 3656 }, { "epoch": 0.8844014510278113, "grad_norm": 1.898995888344544, "learning_rate": 3.327629321296527e-08, "loss": 0.0028227779548615217, "step": 3657 }, { "epoch": 0.8846432889963725, "grad_norm": 0.7064733015634779, "learning_rate": 3.3138764680077255e-08, "loss": 0.0018077876884490252, "step": 3658 }, { "epoch": 0.8848851269649335, "grad_norm": 2.6903803712094243, "learning_rate": 3.3001511192376316e-08, "loss": 0.0026317588053643703, "step": 3659 }, { "epoch": 0.8851269649334945, "grad_norm": 0.6412656279079174, "learning_rate": 3.286453283072338e-08, "loss": 0.0016439231112599373, "step": 3660 }, { "epoch": 0.8853688029020557, "grad_norm": 0.2326846331824159, "learning_rate": 3.2727829675817554e-08, "loss": 0.0004943847889080644, "step": 3661 }, { "epoch": 0.8856106408706167, "grad_norm": 1.277515469244691, "learning_rate": 3.2591401808195695e-08, "loss": 0.004582925233989954, "step": 3662 }, { "epoch": 0.8858524788391777, "grad_norm": 4.909839944862248, "learning_rate": 3.2455249308232515e-08, "loss": 0.004827278200536966, "step": 3663 }, { "epoch": 0.8860943168077389, "grad_norm": 3.0606622460482384, "learning_rate": 3.231937225614034e-08, "loss": 0.002446669153869152, "step": 3664 }, { "epoch": 0.8863361547762999, "grad_norm": 2.127614801983762, "learning_rate": 3.2183770731969595e-08, "loss": 0.002295661484822631, "step": 3665 }, { "epoch": 0.8865779927448609, "grad_norm": 1.4187329412857108, "learning_rate": 3.2048444815607925e-08, "loss": 0.004672410432249308, "step": 3666 }, { "epoch": 0.886819830713422, "grad_norm": 2.699928654194755, "learning_rate": 3.191339458678094e-08, "loss": 0.0023057498037815094, "step": 3667 }, { "epoch": 0.8870616686819831, "grad_norm": 0.5560420093531134, "learning_rate": 3.177862012505178e-08, "loss": 0.001355123589746654, "step": 3668 }, { "epoch": 0.8873035066505441, "grad_norm": 1.7080081043442978, "learning_rate": 3.1644121509820824e-08, "loss": 0.0013345369370654225, "step": 3669 }, { "epoch": 0.8875453446191052, "grad_norm": 1.0877084074288712, "learning_rate": 3.150989882032634e-08, "loss": 0.0033182071056216955, "step": 3670 }, { "epoch": 0.8877871825876663, "grad_norm": 1.4762068915530233, "learning_rate": 3.137595213564387e-08, "loss": 0.0072340876795351505, "step": 3671 }, { "epoch": 0.8880290205562273, "grad_norm": 5.719386706501387, "learning_rate": 3.1242281534686335e-08, "loss": 0.004732445813715458, "step": 3672 }, { "epoch": 0.8882708585247884, "grad_norm": 1.429167759417358, "learning_rate": 3.110888709620396e-08, "loss": 0.004482378717511892, "step": 3673 }, { "epoch": 0.8885126964933494, "grad_norm": 3.506385084755186, "learning_rate": 3.097576889878445e-08, "loss": 0.005569502245634794, "step": 3674 }, { "epoch": 0.8887545344619106, "grad_norm": 2.5348106517806435, "learning_rate": 3.08429270208525e-08, "loss": 0.00712291756644845, "step": 3675 }, { "epoch": 0.8889963724304716, "grad_norm": 1.7888476170602974, "learning_rate": 3.07103615406703e-08, "loss": 0.0037771817296743393, "step": 3676 }, { "epoch": 0.8892382103990326, "grad_norm": 1.1345755470907672, "learning_rate": 3.057807253633699e-08, "loss": 0.0029956279322504997, "step": 3677 }, { "epoch": 0.8894800483675938, "grad_norm": 6.779712479254913, "learning_rate": 3.044606008578881e-08, "loss": 0.0020295202266424894, "step": 3678 }, { "epoch": 0.8897218863361548, "grad_norm": 3.833610105018988, "learning_rate": 3.031432426679948e-08, "loss": 0.0018435746897011995, "step": 3679 }, { "epoch": 0.8899637243047158, "grad_norm": 1.9919426864050698, "learning_rate": 3.018286515697904e-08, "loss": 0.005326411686837673, "step": 3680 }, { "epoch": 0.8902055622732769, "grad_norm": 1.1078451115422474, "learning_rate": 3.0051682833775295e-08, "loss": 0.002253942657262087, "step": 3681 }, { "epoch": 0.890447400241838, "grad_norm": 0.7722442170540107, "learning_rate": 2.992077737447224e-08, "loss": 0.0013333642855286598, "step": 3682 }, { "epoch": 0.890689238210399, "grad_norm": 5.83704186956708, "learning_rate": 2.97901488561913e-08, "loss": 0.002728348132222891, "step": 3683 }, { "epoch": 0.8909310761789601, "grad_norm": 2.206193110177119, "learning_rate": 2.965979735589058e-08, "loss": 0.005031376611441374, "step": 3684 }, { "epoch": 0.8911729141475212, "grad_norm": 1.2597839179675405, "learning_rate": 2.9529722950364877e-08, "loss": 0.0050876447930932045, "step": 3685 }, { "epoch": 0.8914147521160822, "grad_norm": 4.138227512408389, "learning_rate": 2.9399925716245886e-08, "loss": 0.0056036715395748615, "step": 3686 }, { "epoch": 0.8916565900846433, "grad_norm": 3.9493136767377637, "learning_rate": 2.927040573000189e-08, "loss": 0.004486731719225645, "step": 3687 }, { "epoch": 0.8918984280532044, "grad_norm": 1.8771095201227648, "learning_rate": 2.9141163067937957e-08, "loss": 0.005554699804633856, "step": 3688 }, { "epoch": 0.8921402660217654, "grad_norm": 1.7538894757818442, "learning_rate": 2.9012197806195572e-08, "loss": 0.0017172545194625854, "step": 3689 }, { "epoch": 0.8923821039903265, "grad_norm": 1.2665956733821195, "learning_rate": 2.8883510020753176e-08, "loss": 0.0021177325397729874, "step": 3690 }, { "epoch": 0.8926239419588875, "grad_norm": 2.5984283364523817, "learning_rate": 2.8755099787425297e-08, "loss": 0.006779917981475592, "step": 3691 }, { "epoch": 0.8928657799274486, "grad_norm": 7.060422512161483, "learning_rate": 2.862696718186325e-08, "loss": 0.007241897284984589, "step": 3692 }, { "epoch": 0.8931076178960097, "grad_norm": 0.9532960045362777, "learning_rate": 2.8499112279554537e-08, "loss": 0.003307495266199112, "step": 3693 }, { "epoch": 0.8933494558645707, "grad_norm": 2.910316147387063, "learning_rate": 2.83715351558233e-08, "loss": 0.012991673313081264, "step": 3694 }, { "epoch": 0.8935912938331319, "grad_norm": 1.7705804537976966, "learning_rate": 2.8244235885830024e-08, "loss": 0.005192557815462351, "step": 3695 }, { "epoch": 0.8938331318016929, "grad_norm": 2.848097502063381, "learning_rate": 2.8117214544571165e-08, "loss": 0.050192058086395264, "step": 3696 }, { "epoch": 0.8940749697702539, "grad_norm": 0.7361595530948917, "learning_rate": 2.799047120687992e-08, "loss": 0.0018561301985755563, "step": 3697 }, { "epoch": 0.894316807738815, "grad_norm": 6.364233249436265, "learning_rate": 2.7864005947425385e-08, "loss": 0.002602220047265291, "step": 3698 }, { "epoch": 0.8945586457073761, "grad_norm": 0.8657473711207884, "learning_rate": 2.7737818840712912e-08, "loss": 0.0010033276630565524, "step": 3699 }, { "epoch": 0.8948004836759371, "grad_norm": 0.8568662927492786, "learning_rate": 2.7611909961083978e-08, "loss": 0.0027563113253563643, "step": 3700 }, { "epoch": 0.8950423216444982, "grad_norm": 1.869289413607644, "learning_rate": 2.7486279382716083e-08, "loss": 0.006278755608946085, "step": 3701 }, { "epoch": 0.8952841596130593, "grad_norm": 2.923801771460587, "learning_rate": 2.736092717962296e-08, "loss": 0.011759032495319843, "step": 3702 }, { "epoch": 0.8955259975816203, "grad_norm": 1.4721431818410244, "learning_rate": 2.723585342565421e-08, "loss": 0.004257688764482737, "step": 3703 }, { "epoch": 0.8957678355501814, "grad_norm": 0.8706977416154275, "learning_rate": 2.7111058194495274e-08, "loss": 0.0014869810547679663, "step": 3704 }, { "epoch": 0.8960096735187424, "grad_norm": 9.030164170191101, "learning_rate": 2.6986541559667674e-08, "loss": 0.005295220762491226, "step": 3705 }, { "epoch": 0.8962515114873035, "grad_norm": 1.0960616927690272, "learning_rate": 2.6862303594528902e-08, "loss": 0.00114295759703964, "step": 3706 }, { "epoch": 0.8964933494558646, "grad_norm": 1.2419146549528417, "learning_rate": 2.6738344372272014e-08, "loss": 0.002812751801684499, "step": 3707 }, { "epoch": 0.8967351874244256, "grad_norm": 1.6835639339356774, "learning_rate": 2.6614663965925822e-08, "loss": 0.008503670804202557, "step": 3708 }, { "epoch": 0.8969770253929867, "grad_norm": 5.51837285516785, "learning_rate": 2.6491262448355367e-08, "loss": 0.020854493603110313, "step": 3709 }, { "epoch": 0.8972188633615478, "grad_norm": 1.3619104530960675, "learning_rate": 2.6368139892260665e-08, "loss": 0.0032013810705393553, "step": 3710 }, { "epoch": 0.8974607013301088, "grad_norm": 8.980002795632293, "learning_rate": 2.6245296370178028e-08, "loss": 0.002665985142812133, "step": 3711 }, { "epoch": 0.8977025392986699, "grad_norm": 15.745906135015593, "learning_rate": 2.612273195447895e-08, "loss": 0.004508616868406534, "step": 3712 }, { "epoch": 0.897944377267231, "grad_norm": 1.8938263505811124, "learning_rate": 2.6000446717370726e-08, "loss": 0.009115359745919704, "step": 3713 }, { "epoch": 0.898186215235792, "grad_norm": 1.5459739464480897, "learning_rate": 2.5878440730896124e-08, "loss": 0.0061209737323224545, "step": 3714 }, { "epoch": 0.898428053204353, "grad_norm": 0.7506452507753736, "learning_rate": 2.5756714066933306e-08, "loss": 0.0011378057533875108, "step": 3715 }, { "epoch": 0.8986698911729142, "grad_norm": 5.073372724793613, "learning_rate": 2.5635266797195964e-08, "loss": 0.004191612359136343, "step": 3716 }, { "epoch": 0.8989117291414752, "grad_norm": 3.1558372448154017, "learning_rate": 2.55140989932332e-08, "loss": 0.0019009357783943415, "step": 3717 }, { "epoch": 0.8991535671100362, "grad_norm": 0.8917207435495885, "learning_rate": 2.539321072642947e-08, "loss": 0.0004693027585744858, "step": 3718 }, { "epoch": 0.8993954050785974, "grad_norm": 2.2993387904171, "learning_rate": 2.5272602068004357e-08, "loss": 0.006509733851999044, "step": 3719 }, { "epoch": 0.8996372430471584, "grad_norm": 1.9615557516768718, "learning_rate": 2.5152273089013078e-08, "loss": 0.006033369805663824, "step": 3720 }, { "epoch": 0.8998790810157195, "grad_norm": 2.3769700451864693, "learning_rate": 2.50322238603457e-08, "loss": 0.007159331347793341, "step": 3721 }, { "epoch": 0.9001209189842805, "grad_norm": 2.2189711921374493, "learning_rate": 2.491245445272783e-08, "loss": 0.00416315533220768, "step": 3722 }, { "epoch": 0.9003627569528416, "grad_norm": 1.5518142931203989, "learning_rate": 2.479296493671984e-08, "loss": 0.002708464628085494, "step": 3723 }, { "epoch": 0.9006045949214027, "grad_norm": 1.0519081838766102, "learning_rate": 2.4673755382717598e-08, "loss": 0.0032877230551093817, "step": 3724 }, { "epoch": 0.9008464328899637, "grad_norm": 3.606777041480777, "learning_rate": 2.4554825860951867e-08, "loss": 0.001991628436371684, "step": 3725 }, { "epoch": 0.9010882708585248, "grad_norm": 0.5597856634513534, "learning_rate": 2.4436176441488155e-08, "loss": 0.0015643419465050101, "step": 3726 }, { "epoch": 0.9013301088270859, "grad_norm": 1.3990056220296623, "learning_rate": 2.431780719422749e-08, "loss": 0.013276767916977406, "step": 3727 }, { "epoch": 0.9015719467956469, "grad_norm": 2.5341894940858944, "learning_rate": 2.4199718188905428e-08, "loss": 0.008672608993947506, "step": 3728 }, { "epoch": 0.9018137847642079, "grad_norm": 2.115422622296632, "learning_rate": 2.4081909495092645e-08, "loss": 0.0037451819516718388, "step": 3729 }, { "epoch": 0.9020556227327691, "grad_norm": 0.7934379598494633, "learning_rate": 2.3964381182194515e-08, "loss": 0.0021377832163125277, "step": 3730 }, { "epoch": 0.9022974607013301, "grad_norm": 3.040584840144187, "learning_rate": 2.3847133319451317e-08, "loss": 0.005350103136152029, "step": 3731 }, { "epoch": 0.9025392986698911, "grad_norm": 4.435354433115158, "learning_rate": 2.3730165975938188e-08, "loss": 0.01255799550563097, "step": 3732 }, { "epoch": 0.9027811366384523, "grad_norm": 1.5111643815234619, "learning_rate": 2.361347922056478e-08, "loss": 0.0017144177109003067, "step": 3733 }, { "epoch": 0.9030229746070133, "grad_norm": 1.5798874663829987, "learning_rate": 2.3497073122075605e-08, "loss": 0.007205869536846876, "step": 3734 }, { "epoch": 0.9032648125755743, "grad_norm": 2.3310443827992264, "learning_rate": 2.338094774904975e-08, "loss": 0.006794875953346491, "step": 3735 }, { "epoch": 0.9035066505441355, "grad_norm": 1.9332308287573112, "learning_rate": 2.3265103169901214e-08, "loss": 0.008235138840973377, "step": 3736 }, { "epoch": 0.9037484885126965, "grad_norm": 0.7000026352103021, "learning_rate": 2.3149539452877952e-08, "loss": 0.0007708969642408192, "step": 3737 }, { "epoch": 0.9039903264812575, "grad_norm": 2.6372338226035072, "learning_rate": 2.3034256666063013e-08, "loss": 0.016821226105093956, "step": 3738 }, { "epoch": 0.9042321644498186, "grad_norm": 1.2265181314735294, "learning_rate": 2.2919254877373727e-08, "loss": 0.0023431447334587574, "step": 3739 }, { "epoch": 0.9044740024183797, "grad_norm": 5.0129157052136275, "learning_rate": 2.2804534154561905e-08, "loss": 0.00821778830140829, "step": 3740 }, { "epoch": 0.9047158403869408, "grad_norm": 0.9526931441416534, "learning_rate": 2.26900945652137e-08, "loss": 0.0011860718950629234, "step": 3741 }, { "epoch": 0.9049576783555018, "grad_norm": 1.1736549608800941, "learning_rate": 2.2575936176749742e-08, "loss": 0.006179090589284897, "step": 3742 }, { "epoch": 0.9051995163240629, "grad_norm": 1.9422013578032271, "learning_rate": 2.2462059056424953e-08, "loss": 0.004499378148466349, "step": 3743 }, { "epoch": 0.905441354292624, "grad_norm": 3.5879734898482845, "learning_rate": 2.2348463271328553e-08, "loss": 0.007311891298741102, "step": 3744 }, { "epoch": 0.905683192261185, "grad_norm": 0.7206837220151548, "learning_rate": 2.223514888838396e-08, "loss": 0.0011645384365692735, "step": 3745 }, { "epoch": 0.905925030229746, "grad_norm": 2.2349162546032546, "learning_rate": 2.212211597434882e-08, "loss": 0.005035401787608862, "step": 3746 }, { "epoch": 0.9061668681983072, "grad_norm": 0.2902563606470838, "learning_rate": 2.2009364595815206e-08, "loss": 0.0006071267416700721, "step": 3747 }, { "epoch": 0.9064087061668682, "grad_norm": 20.555316601031365, "learning_rate": 2.189689481920892e-08, "loss": 0.00516679510474205, "step": 3748 }, { "epoch": 0.9066505441354292, "grad_norm": 0.6124547260787672, "learning_rate": 2.178470671079008e-08, "loss": 0.0010834354907274246, "step": 3749 }, { "epoch": 0.9068923821039904, "grad_norm": 2.9056367856794565, "learning_rate": 2.1672800336652974e-08, "loss": 0.0202814731746912, "step": 3750 }, { "epoch": 0.9071342200725514, "grad_norm": 0.6083022129325375, "learning_rate": 2.156117576272559e-08, "loss": 0.0007808235241100192, "step": 3751 }, { "epoch": 0.9073760580411124, "grad_norm": 1.5157409706024334, "learning_rate": 2.1449833054770272e-08, "loss": 0.004827722441405058, "step": 3752 }, { "epoch": 0.9076178960096735, "grad_norm": 0.529974564341966, "learning_rate": 2.1338772278382934e-08, "loss": 0.0015600277110934258, "step": 3753 }, { "epoch": 0.9078597339782346, "grad_norm": 1.0115846569608986, "learning_rate": 2.1227993498993734e-08, "loss": 0.0026696890126913786, "step": 3754 }, { "epoch": 0.9081015719467956, "grad_norm": 0.5183818285200473, "learning_rate": 2.1117496781866417e-08, "loss": 0.0009217720362357795, "step": 3755 }, { "epoch": 0.9083434099153567, "grad_norm": 0.9726626478299764, "learning_rate": 2.1007282192098795e-08, "loss": 0.002264497336000204, "step": 3756 }, { "epoch": 0.9085852478839178, "grad_norm": 13.118053064075786, "learning_rate": 2.089734979462232e-08, "loss": 0.005559659097343683, "step": 3757 }, { "epoch": 0.9088270858524788, "grad_norm": 8.430257252094355, "learning_rate": 2.078769965420213e-08, "loss": 0.006769171450287104, "step": 3758 }, { "epoch": 0.9090689238210399, "grad_norm": 0.85161969700807, "learning_rate": 2.067833183543727e-08, "loss": 0.0033037408720701933, "step": 3759 }, { "epoch": 0.909310761789601, "grad_norm": 2.9853889875047583, "learning_rate": 2.056924640276031e-08, "loss": 0.012910894118249416, "step": 3760 }, { "epoch": 0.909552599758162, "grad_norm": 1.0212388559325103, "learning_rate": 2.0460443420437567e-08, "loss": 0.001665147254243493, "step": 3761 }, { "epoch": 0.9097944377267231, "grad_norm": 1.3372260957755133, "learning_rate": 2.0351922952568767e-08, "loss": 0.0021179865580052137, "step": 3762 }, { "epoch": 0.9100362756952841, "grad_norm": 5.400466111807313, "learning_rate": 2.0243685063087545e-08, "loss": 0.007279071491211653, "step": 3763 }, { "epoch": 0.9102781136638453, "grad_norm": 1.4465034897058437, "learning_rate": 2.013572981576067e-08, "loss": 0.003153736237436533, "step": 3764 }, { "epoch": 0.9105199516324063, "grad_norm": 5.050327627243268, "learning_rate": 2.0028057274188493e-08, "loss": 0.011430121958255768, "step": 3765 }, { "epoch": 0.9107617896009673, "grad_norm": 1.4316160264693756, "learning_rate": 1.99206675018051e-08, "loss": 0.005521564278751612, "step": 3766 }, { "epoch": 0.9110036275695285, "grad_norm": 1.7252928535135237, "learning_rate": 1.9813560561877496e-08, "loss": 0.008765274658799171, "step": 3767 }, { "epoch": 0.9112454655380895, "grad_norm": 2.2215621964297414, "learning_rate": 1.9706736517506595e-08, "loss": 0.004467828199267387, "step": 3768 }, { "epoch": 0.9114873035066505, "grad_norm": 2.8689232275474215, "learning_rate": 1.9600195431626278e-08, "loss": 0.010042625479400158, "step": 3769 }, { "epoch": 0.9117291414752116, "grad_norm": 1.1991589470584911, "learning_rate": 1.949393736700383e-08, "loss": 0.00951042678207159, "step": 3770 }, { "epoch": 0.9119709794437727, "grad_norm": 1.9955731681952562, "learning_rate": 1.9387962386239853e-08, "loss": 0.008379269391298294, "step": 3771 }, { "epoch": 0.9122128174123337, "grad_norm": 1.7261625711965167, "learning_rate": 1.928227055176801e-08, "loss": 0.004079241305589676, "step": 3772 }, { "epoch": 0.9124546553808948, "grad_norm": 0.6397430045732141, "learning_rate": 1.9176861925855324e-08, "loss": 0.002250737277790904, "step": 3773 }, { "epoch": 0.9126964933494559, "grad_norm": 0.647358575323524, "learning_rate": 1.9071736570601904e-08, "loss": 0.0033205547370016575, "step": 3774 }, { "epoch": 0.9129383313180169, "grad_norm": 2.2693000440231472, "learning_rate": 1.896689454794098e-08, "loss": 0.0032395333983004093, "step": 3775 }, { "epoch": 0.913180169286578, "grad_norm": 1.8245852613411233, "learning_rate": 1.8862335919638816e-08, "loss": 0.0017546724993735552, "step": 3776 }, { "epoch": 0.9134220072551391, "grad_norm": 0.5943140979359874, "learning_rate": 1.875806074729491e-08, "loss": 0.0004087490087840706, "step": 3777 }, { "epoch": 0.9136638452237001, "grad_norm": 2.830283909631291, "learning_rate": 1.8654069092341406e-08, "loss": 0.006081076338887215, "step": 3778 }, { "epoch": 0.9139056831922612, "grad_norm": 3.2422629677703956, "learning_rate": 1.8550361016043737e-08, "loss": 0.0024147634394466877, "step": 3779 }, { "epoch": 0.9141475211608222, "grad_norm": 1.7418236066713513, "learning_rate": 1.8446936579500194e-08, "loss": 0.004022825043648481, "step": 3780 }, { "epoch": 0.9143893591293834, "grad_norm": 2.639050703950433, "learning_rate": 1.8343795843641817e-08, "loss": 0.002130505396053195, "step": 3781 }, { "epoch": 0.9146311970979444, "grad_norm": 0.6430684562392612, "learning_rate": 1.8240938869232835e-08, "loss": 0.0020824705716222525, "step": 3782 }, { "epoch": 0.9148730350665054, "grad_norm": 2.6865085293630773, "learning_rate": 1.8138365716869775e-08, "loss": 0.0018967647338286042, "step": 3783 }, { "epoch": 0.9151148730350666, "grad_norm": 2.182453652378884, "learning_rate": 1.8036076446982464e-08, "loss": 0.011539442464709282, "step": 3784 }, { "epoch": 0.9153567110036276, "grad_norm": 1.426129812224336, "learning_rate": 1.7934071119833304e-08, "loss": 0.0073976414278149605, "step": 3785 }, { "epoch": 0.9155985489721886, "grad_norm": 1.147192593847067, "learning_rate": 1.7832349795517342e-08, "loss": 0.0016108079580590129, "step": 3786 }, { "epoch": 0.9158403869407497, "grad_norm": 0.3228307742008919, "learning_rate": 1.773091253396236e-08, "loss": 0.0005713819991797209, "step": 3787 }, { "epoch": 0.9160822249093108, "grad_norm": 2.2731794472012656, "learning_rate": 1.762975939492878e-08, "loss": 0.008433566428720951, "step": 3788 }, { "epoch": 0.9163240628778718, "grad_norm": 0.8214281017394575, "learning_rate": 1.7528890438009703e-08, "loss": 0.0008546659955754876, "step": 3789 }, { "epoch": 0.9165659008464329, "grad_norm": 1.3735304895428637, "learning_rate": 1.742830572263071e-08, "loss": 0.0055414424277842045, "step": 3790 }, { "epoch": 0.916807738814994, "grad_norm": 10.649630642161167, "learning_rate": 1.7328005308050064e-08, "loss": 0.006628807168453932, "step": 3791 }, { "epoch": 0.917049576783555, "grad_norm": 2.679561625043006, "learning_rate": 1.7227989253358222e-08, "loss": 0.014857644215226173, "step": 3792 }, { "epoch": 0.9172914147521161, "grad_norm": 2.4674109520258285, "learning_rate": 1.712825761747866e-08, "loss": 0.008773892186582088, "step": 3793 }, { "epoch": 0.9175332527206771, "grad_norm": 1.9250904803498676, "learning_rate": 1.7028810459166597e-08, "loss": 0.002637469442561269, "step": 3794 }, { "epoch": 0.9177750906892382, "grad_norm": 1.8529434745069548, "learning_rate": 1.692964783701034e-08, "loss": 0.0104669826105237, "step": 3795 }, { "epoch": 0.9180169286577993, "grad_norm": 1.6820754997050655, "learning_rate": 1.6830769809430044e-08, "loss": 0.006142137106508017, "step": 3796 }, { "epoch": 0.9182587666263603, "grad_norm": 0.15893400208858355, "learning_rate": 1.673217643467856e-08, "loss": 0.0005964852753095329, "step": 3797 }, { "epoch": 0.9185006045949214, "grad_norm": 2.9433837987503906, "learning_rate": 1.6633867770840748e-08, "loss": 0.007729153614491224, "step": 3798 }, { "epoch": 0.9187424425634825, "grad_norm": 2.771337123840745, "learning_rate": 1.6535843875833956e-08, "loss": 0.0023429172579199076, "step": 3799 }, { "epoch": 0.9189842805320435, "grad_norm": 2.596281176489421, "learning_rate": 1.643810480740765e-08, "loss": 0.002264570677652955, "step": 3800 }, { "epoch": 0.9192261185006046, "grad_norm": 1.4205031897980198, "learning_rate": 1.634065062314355e-08, "loss": 0.0025686423759907484, "step": 3801 }, { "epoch": 0.9194679564691657, "grad_norm": 2.5090421615556457, "learning_rate": 1.624348138045545e-08, "loss": 0.008935944177210331, "step": 3802 }, { "epoch": 0.9197097944377267, "grad_norm": 4.636682923289855, "learning_rate": 1.6146597136589336e-08, "loss": 0.009455984458327293, "step": 3803 }, { "epoch": 0.9199516324062877, "grad_norm": 2.003088765262638, "learning_rate": 1.604999794862344e-08, "loss": 0.006033084820955992, "step": 3804 }, { "epoch": 0.9201934703748489, "grad_norm": 1.0087374879528548, "learning_rate": 1.5953683873467737e-08, "loss": 0.0029671199154108763, "step": 3805 }, { "epoch": 0.9204353083434099, "grad_norm": 3.51051174152246, "learning_rate": 1.58576549678644e-08, "loss": 0.010009052231907845, "step": 3806 }, { "epoch": 0.920677146311971, "grad_norm": 7.0195451349937965, "learning_rate": 1.576191128838783e-08, "loss": 0.007248611189424992, "step": 3807 }, { "epoch": 0.9209189842805321, "grad_norm": 4.987847683610389, "learning_rate": 1.566645289144386e-08, "loss": 0.00937191117554903, "step": 3808 }, { "epoch": 0.9211608222490931, "grad_norm": 1.1716004785085041, "learning_rate": 1.557127983327078e-08, "loss": 0.004683173261582851, "step": 3809 }, { "epoch": 0.9214026602176542, "grad_norm": 1.025602374929206, "learning_rate": 1.5476392169938458e-08, "loss": 0.002023537177592516, "step": 3810 }, { "epoch": 0.9216444981862152, "grad_norm": 3.160160480105587, "learning_rate": 1.5381789957348736e-08, "loss": 0.004361228551715612, "step": 3811 }, { "epoch": 0.9218863361547763, "grad_norm": 1.02194911989906, "learning_rate": 1.528747325123525e-08, "loss": 0.01007531862705946, "step": 3812 }, { "epoch": 0.9221281741233374, "grad_norm": 9.780931780096322, "learning_rate": 1.5193442107163446e-08, "loss": 0.008300090208649635, "step": 3813 }, { "epoch": 0.9223700120918984, "grad_norm": 4.4525733637756195, "learning_rate": 1.509969658053062e-08, "loss": 0.005434613674879074, "step": 3814 }, { "epoch": 0.9226118500604595, "grad_norm": 2.2219369254421113, "learning_rate": 1.5006236726565646e-08, "loss": 0.004278344567865133, "step": 3815 }, { "epoch": 0.9228536880290206, "grad_norm": 3.785400361388476, "learning_rate": 1.491306260032915e-08, "loss": 0.005015197675675154, "step": 3816 }, { "epoch": 0.9230955259975816, "grad_norm": 4.487815219367355, "learning_rate": 1.4820174256713502e-08, "loss": 0.01068867277354002, "step": 3817 }, { "epoch": 0.9233373639661426, "grad_norm": 4.281996868077146, "learning_rate": 1.4727571750442702e-08, "loss": 0.005575291812419891, "step": 3818 }, { "epoch": 0.9235792019347038, "grad_norm": 0.5162637189784859, "learning_rate": 1.4635255136072167e-08, "loss": 0.00087272486416623, "step": 3819 }, { "epoch": 0.9238210399032648, "grad_norm": 1.5502754652437276, "learning_rate": 1.4543224467989168e-08, "loss": 0.0006996590527705848, "step": 3820 }, { "epoch": 0.9240628778718258, "grad_norm": 1.5641451159342021, "learning_rate": 1.4451479800412337e-08, "loss": 0.00312894769012928, "step": 3821 }, { "epoch": 0.924304715840387, "grad_norm": 0.756368104719667, "learning_rate": 1.4360021187391714e-08, "loss": 0.0005420835805125535, "step": 3822 }, { "epoch": 0.924546553808948, "grad_norm": 1.0378802688443123, "learning_rate": 1.4268848682809142e-08, "loss": 0.003292979672551155, "step": 3823 }, { "epoch": 0.924788391777509, "grad_norm": 1.5020172620795662, "learning_rate": 1.4177962340377547e-08, "loss": 0.005064605735242367, "step": 3824 }, { "epoch": 0.9250302297460702, "grad_norm": 3.1272943126538966, "learning_rate": 1.4087362213641484e-08, "loss": 0.011983713135123253, "step": 3825 }, { "epoch": 0.9252720677146312, "grad_norm": 1.4438139403499288, "learning_rate": 1.399704835597687e-08, "loss": 0.004592143930494785, "step": 3826 }, { "epoch": 0.9255139056831923, "grad_norm": 1.0736842767535184, "learning_rate": 1.3907020820590865e-08, "loss": 0.00304154003970325, "step": 3827 }, { "epoch": 0.9257557436517533, "grad_norm": 1.0005999944157142, "learning_rate": 1.3817279660522096e-08, "loss": 0.0034907471854239702, "step": 3828 }, { "epoch": 0.9259975816203144, "grad_norm": 1.7676287563645163, "learning_rate": 1.372782492864033e-08, "loss": 0.0031779108103364706, "step": 3829 }, { "epoch": 0.9262394195888755, "grad_norm": 10.872251192860043, "learning_rate": 1.3638656677646632e-08, "loss": 0.006387253291904926, "step": 3830 }, { "epoch": 0.9264812575574365, "grad_norm": 1.7167178826251335, "learning_rate": 1.3549774960073368e-08, "loss": 0.001392035628668964, "step": 3831 }, { "epoch": 0.9267230955259976, "grad_norm": 0.793195165082152, "learning_rate": 1.3461179828284041e-08, "loss": 0.0025387557689100504, "step": 3832 }, { "epoch": 0.9269649334945587, "grad_norm": 1.089839261134261, "learning_rate": 1.337287133447318e-08, "loss": 0.003210230264812708, "step": 3833 }, { "epoch": 0.9272067714631197, "grad_norm": 0.4477064069226522, "learning_rate": 1.328484953066683e-08, "loss": 0.002902827225625515, "step": 3834 }, { "epoch": 0.9274486094316807, "grad_norm": 1.424852546942371, "learning_rate": 1.3197114468721627e-08, "loss": 0.0075434232130646706, "step": 3835 }, { "epoch": 0.9276904474002419, "grad_norm": 1.1937491531886069, "learning_rate": 1.310966620032572e-08, "loss": 0.0027816896326839924, "step": 3836 }, { "epoch": 0.9279322853688029, "grad_norm": 2.4321954317969894, "learning_rate": 1.3022504776998011e-08, "loss": 0.006068504881113768, "step": 3837 }, { "epoch": 0.9281741233373639, "grad_norm": 2.7128891992493274, "learning_rate": 1.2935630250088536e-08, "loss": 0.0028501199558377266, "step": 3838 }, { "epoch": 0.9284159613059251, "grad_norm": 1.6957711012263066, "learning_rate": 1.2849042670778298e-08, "loss": 0.004267033189535141, "step": 3839 }, { "epoch": 0.9286577992744861, "grad_norm": 0.594170049269257, "learning_rate": 1.2762742090079271e-08, "loss": 0.0013163165422156453, "step": 3840 }, { "epoch": 0.9288996372430471, "grad_norm": 2.8133622982624313, "learning_rate": 1.2676728558834226e-08, "loss": 0.003262484446167946, "step": 3841 }, { "epoch": 0.9291414752116082, "grad_norm": 1.3517571996185442, "learning_rate": 1.2591002127716965e-08, "loss": 0.0005082046845927835, "step": 3842 }, { "epoch": 0.9293833131801693, "grad_norm": 6.693685635718772, "learning_rate": 1.2505562847232087e-08, "loss": 0.004657914396375418, "step": 3843 }, { "epoch": 0.9296251511487303, "grad_norm": 1.3433565726735395, "learning_rate": 1.2420410767714995e-08, "loss": 0.004306325223296881, "step": 3844 }, { "epoch": 0.9298669891172914, "grad_norm": 1.001014124727935, "learning_rate": 1.2335545939332004e-08, "loss": 0.003817955730482936, "step": 3845 }, { "epoch": 0.9301088270858525, "grad_norm": 3.7940901933023117, "learning_rate": 1.225096841208001e-08, "loss": 0.013810812495648861, "step": 3846 }, { "epoch": 0.9303506650544136, "grad_norm": 6.698833532353289, "learning_rate": 1.2166678235786766e-08, "loss": 0.004627529066056013, "step": 3847 }, { "epoch": 0.9305925030229746, "grad_norm": 2.2528830575808563, "learning_rate": 1.208267546011088e-08, "loss": 0.014704626984894276, "step": 3848 }, { "epoch": 0.9308343409915357, "grad_norm": 2.626829395135924, "learning_rate": 1.199896013454127e-08, "loss": 0.003557575633749366, "step": 3849 }, { "epoch": 0.9310761789600968, "grad_norm": 7.7454616152967874, "learning_rate": 1.1915532308397923e-08, "loss": 0.027650615200400352, "step": 3850 }, { "epoch": 0.9313180169286578, "grad_norm": 0.7701854291051147, "learning_rate": 1.1832392030831085e-08, "loss": 0.004545525182038546, "step": 3851 }, { "epoch": 0.9315598548972188, "grad_norm": 0.5129942608459702, "learning_rate": 1.1749539350821902e-08, "loss": 0.001838687458075583, "step": 3852 }, { "epoch": 0.93180169286578, "grad_norm": 0.8472487840482501, "learning_rate": 1.1666974317181833e-08, "loss": 0.001877911388874054, "step": 3853 }, { "epoch": 0.932043530834341, "grad_norm": 4.007082206666407, "learning_rate": 1.1584696978553022e-08, "loss": 0.0071611590683460236, "step": 3854 }, { "epoch": 0.932285368802902, "grad_norm": 1.0799966382602242, "learning_rate": 1.1502707383408138e-08, "loss": 0.0021024993620812893, "step": 3855 }, { "epoch": 0.9325272067714632, "grad_norm": 1.8653488962573956, "learning_rate": 1.1421005580050214e-08, "loss": 0.007289628963917494, "step": 3856 }, { "epoch": 0.9327690447400242, "grad_norm": 0.9662883204671745, "learning_rate": 1.1339591616612854e-08, "loss": 0.004501603543758392, "step": 3857 }, { "epoch": 0.9330108827085852, "grad_norm": 0.6176032165697874, "learning_rate": 1.1258465541059914e-08, "loss": 0.004251427482813597, "step": 3858 }, { "epoch": 0.9332527206771463, "grad_norm": 0.26801811677494053, "learning_rate": 1.1177627401185996e-08, "loss": 0.000803825561888516, "step": 3859 }, { "epoch": 0.9334945586457074, "grad_norm": 50.77273726018945, "learning_rate": 1.109707724461556e-08, "loss": 0.004197230096906424, "step": 3860 }, { "epoch": 0.9337363966142684, "grad_norm": 0.6349346615990606, "learning_rate": 1.1016815118803813e-08, "loss": 0.0015752386534586549, "step": 3861 }, { "epoch": 0.9339782345828295, "grad_norm": 1.5205638909325132, "learning_rate": 1.0936841071036151e-08, "loss": 0.0028145727701485157, "step": 3862 }, { "epoch": 0.9342200725513906, "grad_norm": 1.2215429122176926, "learning_rate": 1.0857155148428165e-08, "loss": 0.006803154945373535, "step": 3863 }, { "epoch": 0.9344619105199516, "grad_norm": 1.69836510113347, "learning_rate": 1.0777757397925857e-08, "loss": 0.0009367872844450176, "step": 3864 }, { "epoch": 0.9347037484885127, "grad_norm": 1.8943037437185473, "learning_rate": 1.0698647866305255e-08, "loss": 0.006708188448101282, "step": 3865 }, { "epoch": 0.9349455864570737, "grad_norm": 2.8508286363476283, "learning_rate": 1.0619826600172854e-08, "loss": 0.007034103851765394, "step": 3866 }, { "epoch": 0.9351874244256349, "grad_norm": 0.6251687306420981, "learning_rate": 1.0541293645965066e-08, "loss": 0.001595395035110414, "step": 3867 }, { "epoch": 0.9354292623941959, "grad_norm": 1.564805821939411, "learning_rate": 1.0463049049948603e-08, "loss": 0.0025212427135556936, "step": 3868 }, { "epoch": 0.9356711003627569, "grad_norm": 1.1822177363330504, "learning_rate": 1.03850928582202e-08, "loss": 0.001295384718105197, "step": 3869 }, { "epoch": 0.9359129383313181, "grad_norm": 1.267321929283827, "learning_rate": 1.0307425116706791e-08, "loss": 0.0027618766762316227, "step": 3870 }, { "epoch": 0.9361547762998791, "grad_norm": 2.2547890435628553, "learning_rate": 1.0230045871165216e-08, "loss": 0.01062134187668562, "step": 3871 }, { "epoch": 0.9363966142684401, "grad_norm": 7.160370491012464, "learning_rate": 1.0152955167182564e-08, "loss": 0.002426310908049345, "step": 3872 }, { "epoch": 0.9366384522370013, "grad_norm": 0.8658813660622434, "learning_rate": 1.0076153050175674e-08, "loss": 0.002209628466516733, "step": 3873 }, { "epoch": 0.9368802902055623, "grad_norm": 0.5520130410284317, "learning_rate": 9.999639565391626e-09, "loss": 0.0007047557737678289, "step": 3874 }, { "epoch": 0.9371221281741233, "grad_norm": 2.173167402295718, "learning_rate": 9.923414757907312e-09, "loss": 0.006532622966915369, "step": 3875 }, { "epoch": 0.9373639661426844, "grad_norm": 0.6926437425544298, "learning_rate": 9.847478672629527e-09, "loss": 0.0011457906803116202, "step": 3876 }, { "epoch": 0.9376058041112455, "grad_norm": 3.410410595625032, "learning_rate": 9.771831354295102e-09, "loss": 0.006175599992275238, "step": 3877 }, { "epoch": 0.9378476420798065, "grad_norm": 1.4197846244797063, "learning_rate": 9.696472847470605e-09, "loss": 0.0031774744857102633, "step": 3878 }, { "epoch": 0.9380894800483676, "grad_norm": 2.768171670168272, "learning_rate": 9.621403196552525e-09, "loss": 0.008118963800370693, "step": 3879 }, { "epoch": 0.9383313180169287, "grad_norm": 7.85881360342234, "learning_rate": 9.54662244576715e-09, "loss": 0.004856719169765711, "step": 3880 }, { "epoch": 0.9385731559854897, "grad_norm": 4.812261636537103, "learning_rate": 9.472130639170627e-09, "loss": 0.003669029800221324, "step": 3881 }, { "epoch": 0.9388149939540508, "grad_norm": 0.7926199810120256, "learning_rate": 9.397927820648798e-09, "loss": 0.0015754327178001404, "step": 3882 }, { "epoch": 0.9390568319226118, "grad_norm": 1.0941337858540225, "learning_rate": 9.324014033917304e-09, "loss": 0.0054713967256248, "step": 3883 }, { "epoch": 0.9392986698911729, "grad_norm": 0.2928321314772878, "learning_rate": 9.250389322521423e-09, "loss": 0.0005927306483499706, "step": 3884 }, { "epoch": 0.939540507859734, "grad_norm": 1.359679711816403, "learning_rate": 9.177053729836293e-09, "loss": 0.003120873589068651, "step": 3885 }, { "epoch": 0.939782345828295, "grad_norm": 5.220819992349736, "learning_rate": 9.104007299066519e-09, "loss": 0.003955098334699869, "step": 3886 }, { "epoch": 0.9400241837968561, "grad_norm": 4.965958657537931, "learning_rate": 9.031250073246511e-09, "loss": 0.015122637152671814, "step": 3887 }, { "epoch": 0.9402660217654172, "grad_norm": 1.2725480504664128, "learning_rate": 8.958782095240202e-09, "loss": 0.00649148179218173, "step": 3888 }, { "epoch": 0.9405078597339782, "grad_norm": 14.688477995162668, "learning_rate": 8.886603407741222e-09, "loss": 0.00707029877230525, "step": 3889 }, { "epoch": 0.9407496977025392, "grad_norm": 1.6020786285336919, "learning_rate": 8.814714053272554e-09, "loss": 0.0035131254699081182, "step": 3890 }, { "epoch": 0.9409915356711004, "grad_norm": 0.7492664678318183, "learning_rate": 8.74311407418704e-09, "loss": 0.0016025974182412028, "step": 3891 }, { "epoch": 0.9412333736396614, "grad_norm": 2.022158540113688, "learning_rate": 8.67180351266672e-09, "loss": 0.0037653453182429075, "step": 3892 }, { "epoch": 0.9414752116082225, "grad_norm": 2.1235588458445, "learning_rate": 8.600782410723428e-09, "loss": 0.003861027304083109, "step": 3893 }, { "epoch": 0.9417170495767836, "grad_norm": 1.3724789024850155, "learning_rate": 8.530050810198198e-09, "loss": 0.007929439656436443, "step": 3894 }, { "epoch": 0.9419588875453446, "grad_norm": 2.0768394782314124, "learning_rate": 8.459608752761693e-09, "loss": 0.005657705944031477, "step": 3895 }, { "epoch": 0.9422007255139057, "grad_norm": 0.4111723953641757, "learning_rate": 8.389456279913887e-09, "loss": 0.0006146594532765448, "step": 3896 }, { "epoch": 0.9424425634824668, "grad_norm": 4.333191957577456, "learning_rate": 8.319593432984218e-09, "loss": 0.006860348861664534, "step": 3897 }, { "epoch": 0.9426844014510278, "grad_norm": 1.4282124187992589, "learning_rate": 8.250020253131484e-09, "loss": 0.003252558410167694, "step": 3898 }, { "epoch": 0.9429262394195889, "grad_norm": 1.3416904053279228, "learning_rate": 8.180736781343733e-09, "loss": 0.0047919489443302155, "step": 3899 }, { "epoch": 0.9431680773881499, "grad_norm": 1.053566031801608, "learning_rate": 8.111743058438536e-09, "loss": 0.0020106183364987373, "step": 3900 }, { "epoch": 0.943409915356711, "grad_norm": 2.075988040287652, "learning_rate": 8.043039125062546e-09, "loss": 0.002859117230400443, "step": 3901 }, { "epoch": 0.9436517533252721, "grad_norm": 2.275332004949709, "learning_rate": 7.974625021691827e-09, "loss": 0.002152075758203864, "step": 3902 }, { "epoch": 0.9438935912938331, "grad_norm": 1.3259336588547506, "learning_rate": 7.906500788631642e-09, "loss": 0.0020052827894687653, "step": 3903 }, { "epoch": 0.9441354292623942, "grad_norm": 0.38245164528433667, "learning_rate": 7.838666466016442e-09, "loss": 0.0011495491489768028, "step": 3904 }, { "epoch": 0.9443772672309553, "grad_norm": 0.5540201223517294, "learning_rate": 7.771122093810034e-09, "loss": 0.0013563395477831364, "step": 3905 }, { "epoch": 0.9446191051995163, "grad_norm": 1.0876928766257592, "learning_rate": 7.7038677118052e-09, "loss": 0.0021854822989553213, "step": 3906 }, { "epoch": 0.9448609431680773, "grad_norm": 5.808166441678226, "learning_rate": 7.63690335962408e-09, "loss": 0.04499223455786705, "step": 3907 }, { "epoch": 0.9451027811366385, "grad_norm": 1.3249529382620449, "learning_rate": 7.570229076717782e-09, "loss": 0.0033403930719941854, "step": 3908 }, { "epoch": 0.9453446191051995, "grad_norm": 0.6973351530751293, "learning_rate": 7.503844902366607e-09, "loss": 0.0027927157934755087, "step": 3909 }, { "epoch": 0.9455864570737605, "grad_norm": 0.7271480689674671, "learning_rate": 7.437750875679883e-09, "loss": 0.001009796978905797, "step": 3910 }, { "epoch": 0.9458282950423217, "grad_norm": 1.7076087457243958, "learning_rate": 7.371947035596126e-09, "loss": 0.005376729182898998, "step": 3911 }, { "epoch": 0.9460701330108827, "grad_norm": 1.5808529385738463, "learning_rate": 7.306433420882718e-09, "loss": 0.008879403583705425, "step": 3912 }, { "epoch": 0.9463119709794438, "grad_norm": 0.8404893396811967, "learning_rate": 7.241210070136172e-09, "loss": 0.0030454688239842653, "step": 3913 }, { "epoch": 0.9465538089480048, "grad_norm": 1.1019976829849147, "learning_rate": 7.176277021781973e-09, "loss": 0.003552692010998726, "step": 3914 }, { "epoch": 0.9467956469165659, "grad_norm": 1.7437279487839459, "learning_rate": 7.111634314074577e-09, "loss": 0.004548930563032627, "step": 3915 }, { "epoch": 0.947037484885127, "grad_norm": 0.9838994279988011, "learning_rate": 7.047281985097464e-09, "loss": 0.0022507337853312492, "step": 3916 }, { "epoch": 0.947279322853688, "grad_norm": 1.777398075666907, "learning_rate": 6.983220072762863e-09, "loss": 0.01230649184435606, "step": 3917 }, { "epoch": 0.9475211608222491, "grad_norm": 1.49877086692751, "learning_rate": 6.919448614812029e-09, "loss": 0.005756217520684004, "step": 3918 }, { "epoch": 0.9477629987908102, "grad_norm": 1.3727017884128794, "learning_rate": 6.855967648815186e-09, "loss": 0.004628180991858244, "step": 3919 }, { "epoch": 0.9480048367593712, "grad_norm": 1.9535055742812177, "learning_rate": 6.7927772121711415e-09, "loss": 0.004995924886316061, "step": 3920 }, { "epoch": 0.9482466747279323, "grad_norm": 3.9007264700363815, "learning_rate": 6.729877342107837e-09, "loss": 0.01001033652573824, "step": 3921 }, { "epoch": 0.9484885126964934, "grad_norm": 1.6553904962268462, "learning_rate": 6.667268075681853e-09, "loss": 0.003969516139477491, "step": 3922 }, { "epoch": 0.9487303506650544, "grad_norm": 1.6050099524650185, "learning_rate": 6.60494944977863e-09, "loss": 0.004077553283423185, "step": 3923 }, { "epoch": 0.9489721886336154, "grad_norm": 1.0303592360851146, "learning_rate": 6.542921501112408e-09, "loss": 0.0016978922067210078, "step": 3924 }, { "epoch": 0.9492140266021766, "grad_norm": 1.2008494496294, "learning_rate": 6.481184266226125e-09, "loss": 0.005570605397224426, "step": 3925 }, { "epoch": 0.9494558645707376, "grad_norm": 0.5978038424410229, "learning_rate": 6.4197377814914075e-09, "loss": 0.001912464271299541, "step": 3926 }, { "epoch": 0.9496977025392986, "grad_norm": 3.815367380204405, "learning_rate": 6.358582083108744e-09, "loss": 0.01460131537169218, "step": 3927 }, { "epoch": 0.9499395405078598, "grad_norm": 2.6514223334012548, "learning_rate": 6.297717207107145e-09, "loss": 0.013416248373687267, "step": 3928 }, { "epoch": 0.9501813784764208, "grad_norm": 2.0419171141609618, "learning_rate": 6.237143189344318e-09, "loss": 0.004993713926523924, "step": 3929 }, { "epoch": 0.9504232164449818, "grad_norm": 5.362206263182422, "learning_rate": 6.176860065506828e-09, "loss": 0.005107214208692312, "step": 3930 }, { "epoch": 0.9506650544135429, "grad_norm": 0.6501859599069869, "learning_rate": 6.116867871109488e-09, "loss": 0.001843734527938068, "step": 3931 }, { "epoch": 0.950906892382104, "grad_norm": 3.6812893241703604, "learning_rate": 6.057166641496025e-09, "loss": 0.0015825954033061862, "step": 3932 }, { "epoch": 0.951148730350665, "grad_norm": 3.8862149423256445, "learning_rate": 5.9977564118385836e-09, "loss": 0.007470272481441498, "step": 3933 }, { "epoch": 0.9513905683192261, "grad_norm": 3.1362245700344897, "learning_rate": 5.938637217137943e-09, "loss": 0.007637396454811096, "step": 3934 }, { "epoch": 0.9516324062877872, "grad_norm": 10.596126577770569, "learning_rate": 5.879809092223464e-09, "loss": 0.008219579234719276, "step": 3935 }, { "epoch": 0.9518742442563483, "grad_norm": 2.0840742089458724, "learning_rate": 5.821272071752869e-09, "loss": 0.010750836692750454, "step": 3936 }, { "epoch": 0.9521160822249093, "grad_norm": 1.1296584228137334, "learning_rate": 5.763026190212516e-09, "loss": 0.003500505117699504, "step": 3937 }, { "epoch": 0.9523579201934703, "grad_norm": 0.9131871301501004, "learning_rate": 5.705071481917234e-09, "loss": 0.003963535185903311, "step": 3938 }, { "epoch": 0.9525997581620315, "grad_norm": 3.086359593093254, "learning_rate": 5.647407981010211e-09, "loss": 0.0048793829046189785, "step": 3939 }, { "epoch": 0.9528415961305925, "grad_norm": 1.9674730342058508, "learning_rate": 5.590035721463216e-09, "loss": 0.005661778151988983, "step": 3940 }, { "epoch": 0.9530834340991535, "grad_norm": 1.1987826588477317, "learning_rate": 5.532954737076323e-09, "loss": 0.0007098497590050101, "step": 3941 }, { "epoch": 0.9533252720677147, "grad_norm": 2.571790932026526, "learning_rate": 5.4761650614780775e-09, "loss": 0.00855888333171606, "step": 3942 }, { "epoch": 0.9535671100362757, "grad_norm": 2.64598952124611, "learning_rate": 5.419666728125383e-09, "loss": 0.1677737683057785, "step": 3943 }, { "epoch": 0.9538089480048367, "grad_norm": 0.44313281168392626, "learning_rate": 5.363459770303391e-09, "loss": 0.0011350329732522368, "step": 3944 }, { "epoch": 0.9540507859733979, "grad_norm": 0.21421376314981175, "learning_rate": 5.3075442211257815e-09, "loss": 0.0004617057857103646, "step": 3945 }, { "epoch": 0.9542926239419589, "grad_norm": 2.026511143047783, "learning_rate": 5.251920113534536e-09, "loss": 0.007461567409336567, "step": 3946 }, { "epoch": 0.9545344619105199, "grad_norm": 0.23704003635065143, "learning_rate": 5.196587480299719e-09, "loss": 0.0006678180652670562, "step": 3947 }, { "epoch": 0.954776299879081, "grad_norm": 1.084705398355186, "learning_rate": 5.141546354019921e-09, "loss": 0.0030519168358296156, "step": 3948 }, { "epoch": 0.9550181378476421, "grad_norm": 0.7199312685787693, "learning_rate": 5.08679676712187e-09, "loss": 0.002490381244570017, "step": 3949 }, { "epoch": 0.9552599758162031, "grad_norm": 0.525999267910221, "learning_rate": 5.0323387518605985e-09, "loss": 0.003934287466108799, "step": 3950 }, { "epoch": 0.9555018137847642, "grad_norm": 1.842152868256301, "learning_rate": 4.978172340319331e-09, "loss": 0.0037655585911124945, "step": 3951 }, { "epoch": 0.9557436517533253, "grad_norm": 3.926517494664241, "learning_rate": 4.92429756440943e-09, "loss": 0.00913938507437706, "step": 3952 }, { "epoch": 0.9559854897218864, "grad_norm": 2.2297547699725744, "learning_rate": 4.870714455870561e-09, "loss": 0.005378450267016888, "step": 3953 }, { "epoch": 0.9562273276904474, "grad_norm": 1.5946597806256788, "learning_rate": 4.817423046270585e-09, "loss": 0.004285781644284725, "step": 3954 }, { "epoch": 0.9564691656590084, "grad_norm": 2.1735476533918905, "learning_rate": 4.764423367005333e-09, "loss": 0.029396653175354004, "step": 3955 }, { "epoch": 0.9567110036275696, "grad_norm": 1.6301393033815443, "learning_rate": 4.711715449298881e-09, "loss": 0.0026821638457477093, "step": 3956 }, { "epoch": 0.9569528415961306, "grad_norm": 1.8289281313391486, "learning_rate": 4.6592993242035024e-09, "loss": 0.0033677832689136267, "step": 3957 }, { "epoch": 0.9571946795646916, "grad_norm": 0.6646441630955081, "learning_rate": 4.60717502259933e-09, "loss": 0.001604648889042437, "step": 3958 }, { "epoch": 0.9574365175332528, "grad_norm": 0.6841477919119413, "learning_rate": 4.555342575194798e-09, "loss": 0.001451479154638946, "step": 3959 }, { "epoch": 0.9576783555018138, "grad_norm": 0.6326604073038906, "learning_rate": 4.50380201252637e-09, "loss": 0.0010911149438470602, "step": 3960 }, { "epoch": 0.9579201934703748, "grad_norm": 0.9238363435796076, "learning_rate": 4.452553364958367e-09, "loss": 0.009261935018002987, "step": 3961 }, { "epoch": 0.958162031438936, "grad_norm": 1.946013081346845, "learning_rate": 4.401596662683416e-09, "loss": 0.004129176028072834, "step": 3962 }, { "epoch": 0.958403869407497, "grad_norm": 0.6509381276264692, "learning_rate": 4.35093193572178e-09, "loss": 0.0021910700015723705, "step": 3963 }, { "epoch": 0.958645707376058, "grad_norm": 2.4714058636048755, "learning_rate": 4.300559213922083e-09, "loss": 0.005715600214898586, "step": 3964 }, { "epoch": 0.9588875453446191, "grad_norm": 1.0824611770190864, "learning_rate": 4.250478526960755e-09, "loss": 0.0029371026903390884, "step": 3965 }, { "epoch": 0.9591293833131802, "grad_norm": 1.5622594755049144, "learning_rate": 4.200689904342081e-09, "loss": 0.0037111651618033648, "step": 3966 }, { "epoch": 0.9593712212817412, "grad_norm": 0.6154899915539209, "learning_rate": 4.151193375398432e-09, "loss": 0.0014259000308811665, "step": 3967 }, { "epoch": 0.9596130592503023, "grad_norm": 0.2192659257078046, "learning_rate": 4.101988969290038e-09, "loss": 0.00040996927418746054, "step": 3968 }, { "epoch": 0.9598548972188634, "grad_norm": 1.6305231512624498, "learning_rate": 4.0530767150050435e-09, "loss": 0.0025868581142276525, "step": 3969 }, { "epoch": 0.9600967351874244, "grad_norm": 1.045552113691063, "learning_rate": 4.004456641359455e-09, "loss": 0.006426618900150061, "step": 3970 }, { "epoch": 0.9603385731559855, "grad_norm": 1.6577264481493574, "learning_rate": 3.956128776997137e-09, "loss": 0.004374031908810139, "step": 3971 }, { "epoch": 0.9605804111245465, "grad_norm": 3.3574643467030345, "learning_rate": 3.908093150389813e-09, "loss": 0.010448026470839977, "step": 3972 }, { "epoch": 0.9608222490931076, "grad_norm": 1.0454365050882872, "learning_rate": 3.86034978983707e-09, "loss": 0.003954155370593071, "step": 3973 }, { "epoch": 0.9610640870616687, "grad_norm": 1.7589609250271714, "learning_rate": 3.812898723466296e-09, "loss": 0.006731429602950811, "step": 3974 }, { "epoch": 0.9613059250302297, "grad_norm": 1.3720196929897286, "learning_rate": 3.765739979232574e-09, "loss": 0.0013895605225116014, "step": 3975 }, { "epoch": 0.9615477629987909, "grad_norm": 2.086503710154059, "learning_rate": 3.7188735849190134e-09, "loss": 0.009752320125699043, "step": 3976 }, { "epoch": 0.9617896009673519, "grad_norm": 0.5312160182722787, "learning_rate": 3.6722995681361948e-09, "loss": 0.0021590394899249077, "step": 3977 }, { "epoch": 0.9620314389359129, "grad_norm": 3.5425069231780557, "learning_rate": 3.6260179563226688e-09, "loss": 0.007009327411651611, "step": 3978 }, { "epoch": 0.962273276904474, "grad_norm": 3.5687814708327936, "learning_rate": 3.5800287767446258e-09, "loss": 0.00420847162604332, "step": 3979 }, { "epoch": 0.9625151148730351, "grad_norm": 1.2733851130381153, "learning_rate": 3.5343320564959478e-09, "loss": 0.0013132179155945778, "step": 3980 }, { "epoch": 0.9627569528415961, "grad_norm": 0.9328275399224931, "learning_rate": 3.488927822498322e-09, "loss": 0.0025414281990379095, "step": 3981 }, { "epoch": 0.9629987908101572, "grad_norm": 15.50439966664826, "learning_rate": 3.4438161015010183e-09, "loss": 0.015823116526007652, "step": 3982 }, { "epoch": 0.9632406287787183, "grad_norm": 2.3544279890185575, "learning_rate": 3.398996920081054e-09, "loss": 0.003324490739032626, "step": 3983 }, { "epoch": 0.9634824667472793, "grad_norm": 17.376263650010745, "learning_rate": 3.3544703046430314e-09, "loss": 0.008549312129616737, "step": 3984 }, { "epoch": 0.9637243047158404, "grad_norm": 10.346451581565907, "learning_rate": 3.310236281419243e-09, "loss": 0.0038264442700892687, "step": 3985 }, { "epoch": 0.9639661426844015, "grad_norm": 0.9085585965898342, "learning_rate": 3.2662948764695664e-09, "loss": 0.002513353945687413, "step": 3986 }, { "epoch": 0.9642079806529625, "grad_norm": 0.36439773169647105, "learning_rate": 3.2226461156815153e-09, "loss": 0.0010285836178809404, "step": 3987 }, { "epoch": 0.9644498186215236, "grad_norm": 1.4766337977905857, "learning_rate": 3.1792900247701316e-09, "loss": 0.004256855696439743, "step": 3988 }, { "epoch": 0.9646916565900846, "grad_norm": 2.503467265801696, "learning_rate": 3.136226629278205e-09, "loss": 0.01048127468675375, "step": 3989 }, { "epoch": 0.9649334945586457, "grad_norm": 6.048864926166647, "learning_rate": 3.0934559545758855e-09, "loss": 0.07489524781703949, "step": 3990 }, { "epoch": 0.9651753325272068, "grad_norm": 2.0822550579747157, "learning_rate": 3.0509780258609618e-09, "loss": 0.00399010768160224, "step": 3991 }, { "epoch": 0.9654171704957678, "grad_norm": 4.863499465196045, "learning_rate": 3.0087928681588602e-09, "loss": 0.017993804067373276, "step": 3992 }, { "epoch": 0.965659008464329, "grad_norm": 2.5786465857278027, "learning_rate": 2.966900506322201e-09, "loss": 0.0051785907708108425, "step": 3993 }, { "epoch": 0.96590084643289, "grad_norm": 3.993827143444953, "learning_rate": 2.925300965031463e-09, "loss": 0.004670093767344952, "step": 3994 }, { "epoch": 0.966142684401451, "grad_norm": 2.4428439978728878, "learning_rate": 2.8839942687944875e-09, "loss": 0.0033400498796254396, "step": 3995 }, { "epoch": 0.966384522370012, "grad_norm": 0.7392560053101217, "learning_rate": 2.8429804419465296e-09, "loss": 0.0012738642981275916, "step": 3996 }, { "epoch": 0.9666263603385732, "grad_norm": 0.7311631542082037, "learning_rate": 2.8022595086503174e-09, "loss": 0.0025424016639590263, "step": 3997 }, { "epoch": 0.9668681983071342, "grad_norm": 2.2419786892888096, "learning_rate": 2.761831492896105e-09, "loss": 0.010661709122359753, "step": 3998 }, { "epoch": 0.9671100362756953, "grad_norm": 1.3376710909908085, "learning_rate": 2.7216964185015623e-09, "loss": 0.0013973484747111797, "step": 3999 }, { "epoch": 0.9673518742442564, "grad_norm": 1.5708194994765199, "learning_rate": 2.6818543091116086e-09, "loss": 0.005627765320241451, "step": 4000 }, { "epoch": 0.9675937122128174, "grad_norm": 1.0975691203191598, "learning_rate": 2.642305188198857e-09, "loss": 0.0035541229881346226, "step": 4001 }, { "epoch": 0.9678355501813785, "grad_norm": 1.0473460886873616, "learning_rate": 2.6030490790630575e-09, "loss": 0.004675349220633507, "step": 4002 }, { "epoch": 0.9680773881499395, "grad_norm": 1.4618852316009, "learning_rate": 2.5640860048314895e-09, "loss": 0.0005493045900948346, "step": 4003 }, { "epoch": 0.9683192261185006, "grad_norm": 1.5814577911961392, "learning_rate": 2.525415988458679e-09, "loss": 0.009219684638082981, "step": 4004 }, { "epoch": 0.9685610640870617, "grad_norm": 4.607413491720176, "learning_rate": 2.487039052726625e-09, "loss": 0.009276372380554676, "step": 4005 }, { "epoch": 0.9688029020556227, "grad_norm": 1.0697930786661796, "learning_rate": 2.4489552202445195e-09, "loss": 0.0034629262518137693, "step": 4006 }, { "epoch": 0.9690447400241838, "grad_norm": 15.74380709981451, "learning_rate": 2.4111645134490266e-09, "loss": 0.007384005468338728, "step": 4007 }, { "epoch": 0.9692865779927449, "grad_norm": 3.360579849041275, "learning_rate": 2.3736669546040587e-09, "loss": 0.002935973461717367, "step": 4008 }, { "epoch": 0.9695284159613059, "grad_norm": 1.4716599898382832, "learning_rate": 2.336462565800723e-09, "loss": 0.001661611720919609, "step": 4009 }, { "epoch": 0.969770253929867, "grad_norm": 1.2139527556136784, "learning_rate": 2.299551368957542e-09, "loss": 0.004833772778511047, "step": 4010 }, { "epoch": 0.9700120918984281, "grad_norm": 2.4736733585344073, "learning_rate": 2.262933385820287e-09, "loss": 0.010198699310421944, "step": 4011 }, { "epoch": 0.9702539298669891, "grad_norm": 5.8293658501428265, "learning_rate": 2.2266086379619798e-09, "loss": 0.016899680718779564, "step": 4012 }, { "epoch": 0.9704957678355501, "grad_norm": 4.461617396711331, "learning_rate": 2.1905771467827795e-09, "loss": 0.020511046051979065, "step": 4013 }, { "epoch": 0.9707376058041113, "grad_norm": 19.211786325159412, "learning_rate": 2.1548389335102612e-09, "loss": 0.012472531758248806, "step": 4014 }, { "epoch": 0.9709794437726723, "grad_norm": 6.646342529009479, "learning_rate": 2.119394019199028e-09, "loss": 0.014378082938492298, "step": 4015 }, { "epoch": 0.9712212817412333, "grad_norm": 0.9587535054996338, "learning_rate": 2.0842424247310974e-09, "loss": 0.0018150199903175235, "step": 4016 }, { "epoch": 0.9714631197097945, "grad_norm": 4.414827846981701, "learning_rate": 2.0493841708155157e-09, "loss": 0.013188275508582592, "step": 4017 }, { "epoch": 0.9717049576783555, "grad_norm": 2.401607107324014, "learning_rate": 2.0148192779885776e-09, "loss": 0.00914072897285223, "step": 4018 }, { "epoch": 0.9719467956469166, "grad_norm": 2.1321824064512964, "learning_rate": 1.980547766613716e-09, "loss": 0.006997639779001474, "step": 4019 }, { "epoch": 0.9721886336154776, "grad_norm": 1.0063817958931423, "learning_rate": 1.946569656881614e-09, "loss": 0.00486306706443429, "step": 4020 }, { "epoch": 0.9724304715840387, "grad_norm": 6.621727232576459, "learning_rate": 1.9128849688099246e-09, "loss": 0.013141922652721405, "step": 4021 }, { "epoch": 0.9726723095525998, "grad_norm": 1.4378550834016455, "learning_rate": 1.879493722243608e-09, "loss": 0.004657211247831583, "step": 4022 }, { "epoch": 0.9729141475211608, "grad_norm": 4.489131264533774, "learning_rate": 1.84639593685465e-09, "loss": 0.004171473439782858, "step": 4023 }, { "epoch": 0.9731559854897219, "grad_norm": 1.1052519008341624, "learning_rate": 1.8135916321422307e-09, "loss": 0.004196403082460165, "step": 4024 }, { "epoch": 0.973397823458283, "grad_norm": 1.2289310478229585, "learning_rate": 1.7810808274325017e-09, "loss": 0.009801844134926796, "step": 4025 }, { "epoch": 0.973639661426844, "grad_norm": 2.151436800155413, "learning_rate": 1.7488635418788644e-09, "loss": 0.001692567951977253, "step": 4026 }, { "epoch": 0.973881499395405, "grad_norm": 1.745582201286285, "learning_rate": 1.7169397944615804e-09, "loss": 0.008524197153747082, "step": 4027 }, { "epoch": 0.9741233373639662, "grad_norm": 1.8739897418868898, "learning_rate": 1.685309603988272e-09, "loss": 0.012268083170056343, "step": 4028 }, { "epoch": 0.9743651753325272, "grad_norm": 40.83148554333433, "learning_rate": 1.6539729890933107e-09, "loss": 0.06018717214465141, "step": 4029 }, { "epoch": 0.9746070133010882, "grad_norm": 1.700811665432372, "learning_rate": 1.622929968238318e-09, "loss": 0.00823731254786253, "step": 4030 }, { "epoch": 0.9748488512696494, "grad_norm": 2.50974167516121, "learning_rate": 1.592180559711942e-09, "loss": 0.0032746109645813704, "step": 4031 }, { "epoch": 0.9750906892382104, "grad_norm": 3.761557098655799, "learning_rate": 1.5617247816296364e-09, "loss": 0.009679260663688183, "step": 4032 }, { "epoch": 0.9753325272067714, "grad_norm": 0.6097850659541653, "learning_rate": 1.5315626519341597e-09, "loss": 0.0006810352788306773, "step": 4033 }, { "epoch": 0.9755743651753326, "grad_norm": 1.2235040841955818, "learning_rate": 1.5016941883951307e-09, "loss": 0.0032686498016119003, "step": 4034 }, { "epoch": 0.9758162031438936, "grad_norm": 3.395095857781634, "learning_rate": 1.472119408609085e-09, "loss": 0.002813472179695964, "step": 4035 }, { "epoch": 0.9760580411124546, "grad_norm": 2.170533315453469, "learning_rate": 1.4428383299996405e-09, "loss": 0.010410628281533718, "step": 4036 }, { "epoch": 0.9762998790810157, "grad_norm": 4.244711268877774, "learning_rate": 1.4138509698173873e-09, "loss": 0.01153011154383421, "step": 4037 }, { "epoch": 0.9765417170495768, "grad_norm": 3.251295156288405, "learning_rate": 1.3851573451398869e-09, "loss": 0.0017689465312287211, "step": 4038 }, { "epoch": 0.9767835550181379, "grad_norm": 8.989882812315761, "learning_rate": 1.3567574728715613e-09, "loss": 0.014493589289486408, "step": 4039 }, { "epoch": 0.9770253929866989, "grad_norm": 0.8498771117149776, "learning_rate": 1.328651369743805e-09, "loss": 0.002059716498479247, "step": 4040 }, { "epoch": 0.97726723095526, "grad_norm": 7.378224048756613, "learning_rate": 1.3008390523149838e-09, "loss": 0.0032581507693976164, "step": 4041 }, { "epoch": 0.9775090689238211, "grad_norm": 2.4811588611721764, "learning_rate": 1.2733205369703237e-09, "loss": 0.007961070165038109, "step": 4042 }, { "epoch": 0.9777509068923821, "grad_norm": 0.8081597463715767, "learning_rate": 1.246095839922079e-09, "loss": 0.0013444471405819058, "step": 4043 }, { "epoch": 0.9779927448609431, "grad_norm": 0.6786069459474614, "learning_rate": 1.2191649772093083e-09, "loss": 0.003920670598745346, "step": 4044 }, { "epoch": 0.9782345828295043, "grad_norm": 0.8503175901191806, "learning_rate": 1.1925279646979314e-09, "loss": 0.001933367340825498, "step": 4045 }, { "epoch": 0.9784764207980653, "grad_norm": 0.13846088579300758, "learning_rate": 1.1661848180807842e-09, "loss": 0.0004158194351475686, "step": 4046 }, { "epoch": 0.9787182587666263, "grad_norm": 0.40745818511153764, "learning_rate": 1.140135552877619e-09, "loss": 0.0008731186389923096, "step": 4047 }, { "epoch": 0.9789600967351875, "grad_norm": 0.9680491980692979, "learning_rate": 1.1143801844350486e-09, "loss": 0.0023195066023617983, "step": 4048 }, { "epoch": 0.9792019347037485, "grad_norm": 3.1399284478099, "learning_rate": 1.0889187279264912e-09, "loss": 0.006741201970726252, "step": 4049 }, { "epoch": 0.9794437726723095, "grad_norm": 1.7866021821325355, "learning_rate": 1.0637511983521697e-09, "loss": 0.02129696123301983, "step": 4050 }, { "epoch": 0.9796856106408706, "grad_norm": 2.212137155567709, "learning_rate": 1.0388776105392793e-09, "loss": 0.009169545955955982, "step": 4051 }, { "epoch": 0.9799274486094317, "grad_norm": 2.0915819612910354, "learning_rate": 1.014297979141765e-09, "loss": 0.006063432898372412, "step": 4052 }, { "epoch": 0.9801692865779927, "grad_norm": 1.6475986119004071, "learning_rate": 9.900123186403208e-10, "loss": 0.005845473147928715, "step": 4053 }, { "epoch": 0.9804111245465538, "grad_norm": 0.9184502589564097, "learning_rate": 9.660206433426132e-10, "loss": 0.005760711617767811, "step": 4054 }, { "epoch": 0.9806529625151149, "grad_norm": 2.8308264904345224, "learning_rate": 9.423229673830025e-10, "loss": 0.0022438177838921547, "step": 4055 }, { "epoch": 0.9808948004836759, "grad_norm": 3.801868806940685, "learning_rate": 9.18919304722654e-10, "loss": 0.0036957343108952045, "step": 4056 }, { "epoch": 0.981136638452237, "grad_norm": 0.7336303184942726, "learning_rate": 8.958096691494832e-10, "loss": 0.003921166528016329, "step": 4057 }, { "epoch": 0.9813784764207981, "grad_norm": 2.1283068961956153, "learning_rate": 8.729940742783215e-10, "loss": 0.002765670185908675, "step": 4058 }, { "epoch": 0.9816203143893591, "grad_norm": 1.8603312768653695, "learning_rate": 8.504725335505836e-10, "loss": 0.0031525862868875265, "step": 4059 }, { "epoch": 0.9818621523579202, "grad_norm": 2.794296222858456, "learning_rate": 8.282450602346002e-10, "loss": 0.007000108249485493, "step": 4060 }, { "epoch": 0.9821039903264812, "grad_norm": 2.573199533482301, "learning_rate": 8.063116674253967e-10, "loss": 0.0053541213274002075, "step": 4061 }, { "epoch": 0.9823458282950424, "grad_norm": 4.146326429457162, "learning_rate": 7.846723680447475e-10, "loss": 0.006555384490638971, "step": 4062 }, { "epoch": 0.9825876662636034, "grad_norm": 0.6112792493574541, "learning_rate": 7.633271748411219e-10, "loss": 0.000661155441775918, "step": 4063 }, { "epoch": 0.9828295042321644, "grad_norm": 1.5956605739159149, "learning_rate": 7.422761003897382e-10, "loss": 0.002235561376437545, "step": 4064 }, { "epoch": 0.9830713422007256, "grad_norm": 1.4722626407406252, "learning_rate": 7.215191570926759e-10, "loss": 0.0031631384044885635, "step": 4065 }, { "epoch": 0.9833131801692866, "grad_norm": 6.077342005061576, "learning_rate": 7.010563571785421e-10, "loss": 0.01941276155412197, "step": 4066 }, { "epoch": 0.9835550181378476, "grad_norm": 1.2566349074618195, "learning_rate": 6.808877127026935e-10, "loss": 0.004696803167462349, "step": 4067 }, { "epoch": 0.9837968561064087, "grad_norm": 2.400001850808467, "learning_rate": 6.61013235547292e-10, "loss": 0.0015292386524379253, "step": 4068 }, { "epoch": 0.9840386940749698, "grad_norm": 2.7865930923272573, "learning_rate": 6.414329374210826e-10, "loss": 0.0067425980232656, "step": 4069 }, { "epoch": 0.9842805320435308, "grad_norm": 1.0949255308482084, "learning_rate": 6.2214682985956e-10, "loss": 0.005724875256419182, "step": 4070 }, { "epoch": 0.9845223700120919, "grad_norm": 2.087786978308694, "learning_rate": 6.031549242248579e-10, "loss": 0.0042801410891115665, "step": 4071 }, { "epoch": 0.984764207980653, "grad_norm": 1.5303713488809518, "learning_rate": 5.844572317058594e-10, "loss": 0.0036540774162858725, "step": 4072 }, { "epoch": 0.985006045949214, "grad_norm": 1.0106502396092532, "learning_rate": 5.660537633180306e-10, "loss": 0.004210175480693579, "step": 4073 }, { "epoch": 0.9852478839177751, "grad_norm": 1.6800384132987154, "learning_rate": 5.479445299035323e-10, "loss": 0.007287794258445501, "step": 4074 }, { "epoch": 0.9854897218863361, "grad_norm": 0.804150982499498, "learning_rate": 5.301295421312191e-10, "loss": 0.0044294483959674835, "step": 4075 }, { "epoch": 0.9857315598548972, "grad_norm": 1.636058667668705, "learning_rate": 5.126088104964732e-10, "loss": 0.016477510333061218, "step": 4076 }, { "epoch": 0.9859733978234583, "grad_norm": 1.8794589463853266, "learning_rate": 4.953823453214823e-10, "loss": 0.008509422652423382, "step": 4077 }, { "epoch": 0.9862152357920193, "grad_norm": 1.7083915435612156, "learning_rate": 4.784501567549615e-10, "loss": 0.005276221316307783, "step": 4078 }, { "epoch": 0.9864570737605804, "grad_norm": 1.0896621685527086, "learning_rate": 4.6181225477232024e-10, "loss": 0.004241366870701313, "step": 4079 }, { "epoch": 0.9866989117291415, "grad_norm": 2.911931893990051, "learning_rate": 4.4546864917555104e-10, "loss": 0.024085450917482376, "step": 4080 }, { "epoch": 0.9869407496977025, "grad_norm": 1.465625040933273, "learning_rate": 4.294193495932297e-10, "loss": 0.003986096475273371, "step": 4081 }, { "epoch": 0.9871825876662637, "grad_norm": 2.281584783554106, "learning_rate": 4.13664365480626e-10, "loss": 0.008324920199811459, "step": 4082 }, { "epoch": 0.9874244256348247, "grad_norm": 0.9337961921630422, "learning_rate": 3.9820370611953756e-10, "loss": 0.0016614452470093966, "step": 4083 }, { "epoch": 0.9876662636033857, "grad_norm": 2.1874858411967972, "learning_rate": 3.8303738061851165e-10, "loss": 0.003916155081242323, "step": 4084 }, { "epoch": 0.9879081015719468, "grad_norm": 3.3045526975357777, "learning_rate": 3.6816539791251213e-10, "loss": 0.009404443204402924, "step": 4085 }, { "epoch": 0.9881499395405079, "grad_norm": 0.5207142893841764, "learning_rate": 3.535877667631415e-10, "loss": 0.0011002979008480906, "step": 4086 }, { "epoch": 0.9883917775090689, "grad_norm": 0.4659898906309945, "learning_rate": 3.393044957586966e-10, "loss": 0.000990746426396072, "step": 4087 }, { "epoch": 0.98863361547763, "grad_norm": 3.404239831560153, "learning_rate": 3.253155933140017e-10, "loss": 0.006113268435001373, "step": 4088 }, { "epoch": 0.9888754534461911, "grad_norm": 1.3260462633004804, "learning_rate": 3.1162106767035346e-10, "loss": 0.0019183382391929626, "step": 4089 }, { "epoch": 0.9891172914147521, "grad_norm": 2.515604444656128, "learning_rate": 2.982209268957425e-10, "loss": 0.0008024621638469398, "step": 4090 }, { "epoch": 0.9893591293833132, "grad_norm": 1.0387587981367592, "learning_rate": 2.8511517888463176e-10, "loss": 0.0011360892094671726, "step": 4091 }, { "epoch": 0.9896009673518742, "grad_norm": 3.098463606815211, "learning_rate": 2.7230383135817823e-10, "loss": 0.008875925093889236, "step": 4092 }, { "epoch": 0.9898428053204353, "grad_norm": 0.7897233939860696, "learning_rate": 2.597868918640111e-10, "loss": 0.0012112902477383614, "step": 4093 }, { "epoch": 0.9900846432889964, "grad_norm": 2.0400397891935707, "learning_rate": 2.475643677762873e-10, "loss": 0.00945673231035471, "step": 4094 }, { "epoch": 0.9903264812575574, "grad_norm": 1.5628767722896637, "learning_rate": 2.356362662957467e-10, "loss": 0.011352494359016418, "step": 4095 }, { "epoch": 0.9905683192261185, "grad_norm": 0.6136126755546337, "learning_rate": 2.240025944497126e-10, "loss": 0.0010921111097559333, "step": 4096 }, { "epoch": 0.9908101571946796, "grad_norm": 1.6616055792824218, "learning_rate": 2.1266335909192468e-10, "loss": 0.00405642855912447, "step": 4097 }, { "epoch": 0.9910519951632406, "grad_norm": 4.737261122320851, "learning_rate": 2.0161856690287248e-10, "loss": 0.005287782289087772, "step": 4098 }, { "epoch": 0.9912938331318016, "grad_norm": 0.4029002636874246, "learning_rate": 1.9086822438935114e-10, "loss": 0.0009320862591266632, "step": 4099 }, { "epoch": 0.9915356711003628, "grad_norm": 1.5253991665700322, "learning_rate": 1.8041233788484989e-10, "loss": 0.005230294074863195, "step": 4100 }, { "epoch": 0.9917775090689238, "grad_norm": 0.9824280637333382, "learning_rate": 1.7025091354921917e-10, "loss": 0.003479953156784177, "step": 4101 }, { "epoch": 0.9920193470374848, "grad_norm": 1.3490480101539686, "learning_rate": 1.6038395736905909e-10, "loss": 0.001769967027939856, "step": 4102 }, { "epoch": 0.992261185006046, "grad_norm": 0.8863370343047505, "learning_rate": 1.508114751572753e-10, "loss": 0.005738499108701944, "step": 4103 }, { "epoch": 0.992503022974607, "grad_norm": 3.665234059352769, "learning_rate": 1.4153347255335678e-10, "loss": 0.006820791866630316, "step": 4104 }, { "epoch": 0.992744860943168, "grad_norm": 0.6814780729005808, "learning_rate": 1.325499550233755e-10, "loss": 0.001052793231792748, "step": 4105 }, { "epoch": 0.9929866989117292, "grad_norm": 2.8738942547557387, "learning_rate": 1.2386092785982018e-10, "loss": 0.0013720308197662234, "step": 4106 }, { "epoch": 0.9932285368802902, "grad_norm": 3.4149659571675164, "learning_rate": 1.1546639618170707e-10, "loss": 0.013543883338570595, "step": 4107 }, { "epoch": 0.9934703748488513, "grad_norm": 2.1609800414249642, "learning_rate": 1.0736636493463569e-10, "loss": 0.006712470203638077, "step": 4108 }, { "epoch": 0.9937122128174123, "grad_norm": 4.708199314877262, "learning_rate": 9.956083889051115e-11, "loss": 0.0031089491676539183, "step": 4109 }, { "epoch": 0.9939540507859734, "grad_norm": 1.4084597110901182, "learning_rate": 9.204982264798821e-11, "loss": 0.002392339054495096, "step": 4110 }, { "epoch": 0.9941958887545345, "grad_norm": 1.5232645121216757, "learning_rate": 8.483332063197179e-11, "loss": 0.007248077541589737, "step": 4111 }, { "epoch": 0.9944377267230955, "grad_norm": 0.9376906572422569, "learning_rate": 7.791133709406095e-11, "loss": 0.0020131911151111126, "step": 4112 }, { "epoch": 0.9946795646916566, "grad_norm": 3.035997459951042, "learning_rate": 7.128387611216035e-11, "loss": 0.009225918911397457, "step": 4113 }, { "epoch": 0.9949214026602177, "grad_norm": 1.1707695675692102, "learning_rate": 6.495094159081338e-11, "loss": 0.00754587072879076, "step": 4114 }, { "epoch": 0.9951632406287787, "grad_norm": 1.7830729944465527, "learning_rate": 5.891253726098e-11, "loss": 0.0021671156864613295, "step": 4115 }, { "epoch": 0.9954050785973397, "grad_norm": 9.890529051931276, "learning_rate": 5.3168666680092346e-11, "loss": 0.008056608960032463, "step": 4116 }, { "epoch": 0.9956469165659009, "grad_norm": 1.3544146358627274, "learning_rate": 4.7719333232110196e-11, "loss": 0.005007066763937473, "step": 4117 }, { "epoch": 0.9958887545344619, "grad_norm": 1.1563665546064426, "learning_rate": 4.2564540127409954e-11, "loss": 0.004758245311677456, "step": 4118 }, { "epoch": 0.9961305925030229, "grad_norm": 1.1501731062470655, "learning_rate": 3.7704290402840174e-11, "loss": 0.001651348196901381, "step": 4119 }, { "epoch": 0.9963724304715841, "grad_norm": 0.7533137060705013, "learning_rate": 3.3138586921777064e-11, "loss": 0.0014969356125220656, "step": 4120 }, { "epoch": 0.9966142684401451, "grad_norm": 0.33379263724247643, "learning_rate": 2.8867432374068967e-11, "loss": 0.0007475664024241269, "step": 4121 }, { "epoch": 0.9968561064087061, "grad_norm": 2.0698443000733424, "learning_rate": 2.4890829276036362e-11, "loss": 0.005427918862551451, "step": 4122 }, { "epoch": 0.9970979443772672, "grad_norm": 4.5237144602736015, "learning_rate": 2.1208779970360856e-11, "loss": 0.004531654994934797, "step": 4123 }, { "epoch": 0.9973397823458283, "grad_norm": 5.176320453064066, "learning_rate": 1.7821286626307218e-11, "loss": 0.018148725852370262, "step": 4124 }, { "epoch": 0.9975816203143894, "grad_norm": 0.780377034367234, "learning_rate": 1.4728351239556845e-11, "loss": 0.00158101471606642, "step": 4125 }, { "epoch": 0.9978234582829504, "grad_norm": 18.67318666673792, "learning_rate": 1.192997563231879e-11, "loss": 0.004529970698058605, "step": 4126 }, { "epoch": 0.9980652962515115, "grad_norm": 2.3329689316461644, "learning_rate": 9.426161453218729e-12, "loss": 0.007268534507602453, "step": 4127 }, { "epoch": 0.9983071342200726, "grad_norm": 3.5483491733432238, "learning_rate": 7.2169101772989735e-12, "loss": 0.006322264671325684, "step": 4128 }, { "epoch": 0.9985489721886336, "grad_norm": 22.972445604128335, "learning_rate": 5.3022231061294885e-12, "loss": 0.00874937605112791, "step": 4129 }, { "epoch": 0.9987908101571947, "grad_norm": 1.9957923520140348, "learning_rate": 3.682101367752377e-12, "loss": 0.0061253453604876995, "step": 4130 }, { "epoch": 0.9990326481257558, "grad_norm": 2.3719564466594445, "learning_rate": 2.3565459166263737e-12, "loss": 0.005828424822539091, "step": 4131 }, { "epoch": 0.9992744860943168, "grad_norm": 1.2630679513061194, "learning_rate": 1.3255575336268421e-12, "loss": 0.002282144268974662, "step": 4132 }, { "epoch": 0.9995163240628778, "grad_norm": 2.8152422822331924, "learning_rate": 5.891368262123109e-13, "loss": 0.015120125375688076, "step": 4133 }, { "epoch": 0.999758162031439, "grad_norm": 0.6194237027041638, "learning_rate": 1.4728422825793785e-13, "loss": 0.0020859544165432453, "step": 4134 }, { "epoch": 1.0, "grad_norm": 1.069686594538242, "learning_rate": 0.0, "loss": 0.0016096113249659538, "step": 4135 } ], "logging_steps": 1, "max_steps": 4135, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.57036929822779e+18, "train_batch_size": 6, "trial_name": null, "trial_params": null }