{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 250, "global_step": 1272, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015723270440251573, "grad_norm": 6.609381042259111, "learning_rate": 1.794871794871795e-08, "loss": 2.2121, "step": 1 }, { "epoch": 0.0031446540880503146, "grad_norm": 6.5863108918091955, "learning_rate": 3.58974358974359e-08, "loss": 2.1816, "step": 2 }, { "epoch": 0.0047169811320754715, "grad_norm": 6.617655516365913, "learning_rate": 5.384615384615385e-08, "loss": 2.0071, "step": 3 }, { "epoch": 0.006289308176100629, "grad_norm": 6.602018175612389, "learning_rate": 7.17948717948718e-08, "loss": 2.1686, "step": 4 }, { "epoch": 0.007861635220125786, "grad_norm": 6.591379792761674, "learning_rate": 8.974358974358973e-08, "loss": 2.1805, "step": 5 }, { "epoch": 0.009433962264150943, "grad_norm": 6.7248109628555, "learning_rate": 1.076923076923077e-07, "loss": 2.2243, "step": 6 }, { "epoch": 0.0110062893081761, "grad_norm": 6.3995094275133715, "learning_rate": 1.2564102564102563e-07, "loss": 2.3763, "step": 7 }, { "epoch": 0.012578616352201259, "grad_norm": 6.296067719776998, "learning_rate": 1.435897435897436e-07, "loss": 2.298, "step": 8 }, { "epoch": 0.014150943396226415, "grad_norm": 6.778527459516128, "learning_rate": 1.6153846153846155e-07, "loss": 2.1494, "step": 9 }, { "epoch": 0.015723270440251572, "grad_norm": 6.594906718693546, "learning_rate": 1.7948717948717946e-07, "loss": 2.0825, "step": 10 }, { "epoch": 0.01729559748427673, "grad_norm": 6.7082225379425, "learning_rate": 1.9743589743589741e-07, "loss": 2.3567, "step": 11 }, { "epoch": 0.018867924528301886, "grad_norm": 6.907609205305907, "learning_rate": 2.153846153846154e-07, "loss": 1.9809, "step": 12 }, { "epoch": 0.020440251572327043, "grad_norm": 5.9359307337140335, "learning_rate": 2.333333333333333e-07, "loss": 2.226, "step": 13 }, { "epoch": 0.0220125786163522, "grad_norm": 6.466867096117202, "learning_rate": 2.5128205128205126e-07, "loss": 2.1425, "step": 14 }, { "epoch": 0.02358490566037736, "grad_norm": 6.755674157390508, "learning_rate": 2.692307692307692e-07, "loss": 2.2433, "step": 15 }, { "epoch": 0.025157232704402517, "grad_norm": 6.002945550670174, "learning_rate": 2.871794871794872e-07, "loss": 2.3029, "step": 16 }, { "epoch": 0.026729559748427674, "grad_norm": 6.733820715319282, "learning_rate": 3.0512820512820514e-07, "loss": 2.6192, "step": 17 }, { "epoch": 0.02830188679245283, "grad_norm": 6.814207366203309, "learning_rate": 3.230769230769231e-07, "loss": 2.1716, "step": 18 }, { "epoch": 0.029874213836477988, "grad_norm": 6.649348126638431, "learning_rate": 3.41025641025641e-07, "loss": 2.0726, "step": 19 }, { "epoch": 0.031446540880503145, "grad_norm": 6.419164036165898, "learning_rate": 3.589743589743589e-07, "loss": 2.2484, "step": 20 }, { "epoch": 0.0330188679245283, "grad_norm": 6.381550340344472, "learning_rate": 3.7692307692307687e-07, "loss": 2.0979, "step": 21 }, { "epoch": 0.03459119496855346, "grad_norm": 6.470517353012837, "learning_rate": 3.9487179487179483e-07, "loss": 2.0564, "step": 22 }, { "epoch": 0.036163522012578615, "grad_norm": 6.817629340287558, "learning_rate": 4.128205128205128e-07, "loss": 2.2869, "step": 23 }, { "epoch": 0.03773584905660377, "grad_norm": 6.415896720614442, "learning_rate": 4.307692307692308e-07, "loss": 2.1393, "step": 24 }, { "epoch": 0.03930817610062893, "grad_norm": 6.758474027008109, "learning_rate": 4.4871794871794876e-07, "loss": 1.9729, "step": 25 }, { "epoch": 0.040880503144654086, "grad_norm": 5.796971726063335, "learning_rate": 4.666666666666666e-07, "loss": 2.1908, "step": 26 }, { "epoch": 0.04245283018867924, "grad_norm": 5.790656105640873, "learning_rate": 4.846153846153846e-07, "loss": 2.0891, "step": 27 }, { "epoch": 0.0440251572327044, "grad_norm": 6.190761522380566, "learning_rate": 5.025641025641025e-07, "loss": 1.962, "step": 28 }, { "epoch": 0.04559748427672956, "grad_norm": 6.653844953656282, "learning_rate": 5.205128205128205e-07, "loss": 2.1133, "step": 29 }, { "epoch": 0.04716981132075472, "grad_norm": 6.3047026890345395, "learning_rate": 5.384615384615384e-07, "loss": 2.2912, "step": 30 }, { "epoch": 0.04874213836477988, "grad_norm": 6.8087836989796875, "learning_rate": 5.564102564102564e-07, "loss": 2.2732, "step": 31 }, { "epoch": 0.050314465408805034, "grad_norm": 6.756218951330409, "learning_rate": 5.743589743589744e-07, "loss": 2.1865, "step": 32 }, { "epoch": 0.05188679245283019, "grad_norm": 5.892752864062046, "learning_rate": 5.923076923076923e-07, "loss": 1.9986, "step": 33 }, { "epoch": 0.05345911949685535, "grad_norm": 5.9596785550097495, "learning_rate": 6.102564102564103e-07, "loss": 2.3515, "step": 34 }, { "epoch": 0.055031446540880505, "grad_norm": 5.817501243045476, "learning_rate": 6.282051282051282e-07, "loss": 2.1328, "step": 35 }, { "epoch": 0.05660377358490566, "grad_norm": 5.555025157362233, "learning_rate": 6.461538461538462e-07, "loss": 2.0956, "step": 36 }, { "epoch": 0.05817610062893082, "grad_norm": 4.66564680385535, "learning_rate": 6.64102564102564e-07, "loss": 2.1224, "step": 37 }, { "epoch": 0.059748427672955975, "grad_norm": 4.682038309064788, "learning_rate": 6.82051282051282e-07, "loss": 1.8465, "step": 38 }, { "epoch": 0.06132075471698113, "grad_norm": 4.520420535983458, "learning_rate": 7e-07, "loss": 2.2189, "step": 39 }, { "epoch": 0.06289308176100629, "grad_norm": 4.7915678520719105, "learning_rate": 6.999988639134823e-07, "loss": 2.1286, "step": 40 }, { "epoch": 0.06446540880503145, "grad_norm": 4.612604930012734, "learning_rate": 6.999954556613048e-07, "loss": 2.0893, "step": 41 }, { "epoch": 0.0660377358490566, "grad_norm": 4.247600840809702, "learning_rate": 6.999897752655936e-07, "loss": 2.1122, "step": 42 }, { "epoch": 0.06761006289308176, "grad_norm": 3.9712862450759245, "learning_rate": 6.999818227632253e-07, "loss": 2.1013, "step": 43 }, { "epoch": 0.06918238993710692, "grad_norm": 4.081791414690391, "learning_rate": 6.99971598205827e-07, "loss": 2.0214, "step": 44 }, { "epoch": 0.07075471698113207, "grad_norm": 3.7509350102589827, "learning_rate": 6.999591016597756e-07, "loss": 2.1367, "step": 45 }, { "epoch": 0.07232704402515723, "grad_norm": 3.2808902294971616, "learning_rate": 6.999443332061978e-07, "loss": 2.2945, "step": 46 }, { "epoch": 0.07389937106918239, "grad_norm": 3.3036022925167607, "learning_rate": 6.999272929409694e-07, "loss": 2.0151, "step": 47 }, { "epoch": 0.07547169811320754, "grad_norm": 3.3210132249336812, "learning_rate": 6.999079809747144e-07, "loss": 2.0188, "step": 48 }, { "epoch": 0.0770440251572327, "grad_norm": 3.0229418180275007, "learning_rate": 6.998863974328045e-07, "loss": 2.0217, "step": 49 }, { "epoch": 0.07861635220125786, "grad_norm": 2.9951351846113115, "learning_rate": 6.998625424553584e-07, "loss": 2.0772, "step": 50 }, { "epoch": 0.08018867924528301, "grad_norm": 2.991842161373637, "learning_rate": 6.99836416197241e-07, "loss": 1.9351, "step": 51 }, { "epoch": 0.08176100628930817, "grad_norm": 3.330478724889984, "learning_rate": 6.998080188280617e-07, "loss": 2.2126, "step": 52 }, { "epoch": 0.08333333333333333, "grad_norm": 2.9629299494694283, "learning_rate": 6.99777350532174e-07, "loss": 2.3491, "step": 53 }, { "epoch": 0.08490566037735849, "grad_norm": 2.9627317429014983, "learning_rate": 6.997444115086743e-07, "loss": 1.9852, "step": 54 }, { "epoch": 0.08647798742138364, "grad_norm": 2.840879727132188, "learning_rate": 6.997092019714002e-07, "loss": 1.939, "step": 55 }, { "epoch": 0.0880503144654088, "grad_norm": 2.9520960895336614, "learning_rate": 6.996717221489292e-07, "loss": 2.1034, "step": 56 }, { "epoch": 0.08962264150943396, "grad_norm": 3.015493625809345, "learning_rate": 6.996319722845775e-07, "loss": 1.9913, "step": 57 }, { "epoch": 0.09119496855345911, "grad_norm": 2.694895109102356, "learning_rate": 6.995899526363981e-07, "loss": 1.9909, "step": 58 }, { "epoch": 0.09276729559748427, "grad_norm": 2.746013591310517, "learning_rate": 6.995456634771794e-07, "loss": 2.0852, "step": 59 }, { "epoch": 0.09433962264150944, "grad_norm": 2.5054959395703302, "learning_rate": 6.994991050944431e-07, "loss": 2.1756, "step": 60 }, { "epoch": 0.0959119496855346, "grad_norm": 2.7415517103140172, "learning_rate": 6.994502777904428e-07, "loss": 2.0021, "step": 61 }, { "epoch": 0.09748427672955975, "grad_norm": 2.591902596902185, "learning_rate": 6.993991818821612e-07, "loss": 1.8463, "step": 62 }, { "epoch": 0.09905660377358491, "grad_norm": 2.5850508327560617, "learning_rate": 6.993458177013095e-07, "loss": 1.7361, "step": 63 }, { "epoch": 0.10062893081761007, "grad_norm": 2.74240553360653, "learning_rate": 6.992901855943236e-07, "loss": 1.9652, "step": 64 }, { "epoch": 0.10220125786163523, "grad_norm": 2.7901671418215286, "learning_rate": 6.992322859223628e-07, "loss": 1.9407, "step": 65 }, { "epoch": 0.10377358490566038, "grad_norm": 2.592826985818942, "learning_rate": 6.991721190613075e-07, "loss": 1.8256, "step": 66 }, { "epoch": 0.10534591194968554, "grad_norm": 2.4593316329973978, "learning_rate": 6.991096854017562e-07, "loss": 1.9612, "step": 67 }, { "epoch": 0.1069182389937107, "grad_norm": 2.731581032864078, "learning_rate": 6.990449853490233e-07, "loss": 1.8444, "step": 68 }, { "epoch": 0.10849056603773585, "grad_norm": 2.8559336852628707, "learning_rate": 6.989780193231367e-07, "loss": 1.8695, "step": 69 }, { "epoch": 0.11006289308176101, "grad_norm": 2.7165119219767035, "learning_rate": 6.989087877588348e-07, "loss": 1.9658, "step": 70 }, { "epoch": 0.11163522012578617, "grad_norm": 2.829079631550892, "learning_rate": 6.988372911055634e-07, "loss": 2.1087, "step": 71 }, { "epoch": 0.11320754716981132, "grad_norm": 2.507099458574326, "learning_rate": 6.987635298274733e-07, "loss": 1.8418, "step": 72 }, { "epoch": 0.11477987421383648, "grad_norm": 2.4685464129673726, "learning_rate": 6.986875044034171e-07, "loss": 1.8306, "step": 73 }, { "epoch": 0.11635220125786164, "grad_norm": 2.5785937060649187, "learning_rate": 6.986092153269459e-07, "loss": 1.9845, "step": 74 }, { "epoch": 0.1179245283018868, "grad_norm": 2.4185902663784278, "learning_rate": 6.985286631063063e-07, "loss": 2.1262, "step": 75 }, { "epoch": 0.11949685534591195, "grad_norm": 2.6484961851922395, "learning_rate": 6.984458482644373e-07, "loss": 1.9226, "step": 76 }, { "epoch": 0.12106918238993711, "grad_norm": 2.624404494631164, "learning_rate": 6.983607713389663e-07, "loss": 1.9014, "step": 77 }, { "epoch": 0.12264150943396226, "grad_norm": 2.5143123431618717, "learning_rate": 6.982734328822063e-07, "loss": 1.7563, "step": 78 }, { "epoch": 0.12421383647798742, "grad_norm": 2.288866534494279, "learning_rate": 6.981838334611518e-07, "loss": 2.1576, "step": 79 }, { "epoch": 0.12578616352201258, "grad_norm": 2.4924549190125225, "learning_rate": 6.980919736574753e-07, "loss": 1.9265, "step": 80 }, { "epoch": 0.12735849056603774, "grad_norm": 2.6450039556661724, "learning_rate": 6.979978540675234e-07, "loss": 2.2224, "step": 81 }, { "epoch": 0.1289308176100629, "grad_norm": 2.3901087100576786, "learning_rate": 6.979014753023135e-07, "loss": 1.7917, "step": 82 }, { "epoch": 0.13050314465408805, "grad_norm": 2.2742892989609835, "learning_rate": 6.978028379875291e-07, "loss": 1.7802, "step": 83 }, { "epoch": 0.1320754716981132, "grad_norm": 2.314703480458537, "learning_rate": 6.977019427635158e-07, "loss": 2.0916, "step": 84 }, { "epoch": 0.13364779874213836, "grad_norm": 2.304575288629416, "learning_rate": 6.975987902852778e-07, "loss": 2.0544, "step": 85 }, { "epoch": 0.13522012578616352, "grad_norm": 2.2373974568043353, "learning_rate": 6.974933812224731e-07, "loss": 1.8365, "step": 86 }, { "epoch": 0.13679245283018868, "grad_norm": 2.3841316516671265, "learning_rate": 6.973857162594091e-07, "loss": 1.9519, "step": 87 }, { "epoch": 0.13836477987421383, "grad_norm": 2.268505399665297, "learning_rate": 6.972757960950384e-07, "loss": 2.0843, "step": 88 }, { "epoch": 0.139937106918239, "grad_norm": 2.389940820234018, "learning_rate": 6.971636214429544e-07, "loss": 2.1255, "step": 89 }, { "epoch": 0.14150943396226415, "grad_norm": 2.2702701401800294, "learning_rate": 6.970491930313862e-07, "loss": 1.7951, "step": 90 }, { "epoch": 0.1430817610062893, "grad_norm": 2.34707263205848, "learning_rate": 6.969325116031943e-07, "loss": 2.0553, "step": 91 }, { "epoch": 0.14465408805031446, "grad_norm": 2.138066100619543, "learning_rate": 6.968135779158653e-07, "loss": 1.8837, "step": 92 }, { "epoch": 0.14622641509433962, "grad_norm": 2.148707619444591, "learning_rate": 6.96692392741508e-07, "loss": 1.8783, "step": 93 }, { "epoch": 0.14779874213836477, "grad_norm": 2.2157095129853266, "learning_rate": 6.965689568668468e-07, "loss": 1.9493, "step": 94 }, { "epoch": 0.14937106918238993, "grad_norm": 2.6174396924182117, "learning_rate": 6.964432710932181e-07, "loss": 1.9476, "step": 95 }, { "epoch": 0.1509433962264151, "grad_norm": 2.423080479151295, "learning_rate": 6.963153362365641e-07, "loss": 1.9149, "step": 96 }, { "epoch": 0.15251572327044025, "grad_norm": 2.305911644784875, "learning_rate": 6.961851531274282e-07, "loss": 1.8686, "step": 97 }, { "epoch": 0.1540880503144654, "grad_norm": 2.5427101543409116, "learning_rate": 6.960527226109489e-07, "loss": 1.8722, "step": 98 }, { "epoch": 0.15566037735849056, "grad_norm": 2.2044082229047075, "learning_rate": 6.959180455468553e-07, "loss": 1.9485, "step": 99 }, { "epoch": 0.15723270440251572, "grad_norm": 2.337924598657549, "learning_rate": 6.9578112280946e-07, "loss": 1.8701, "step": 100 }, { "epoch": 0.15880503144654087, "grad_norm": 2.231091232316695, "learning_rate": 6.956419552876552e-07, "loss": 2.1111, "step": 101 }, { "epoch": 0.16037735849056603, "grad_norm": 2.1902798087554647, "learning_rate": 6.955005438849058e-07, "loss": 1.9696, "step": 102 }, { "epoch": 0.1619496855345912, "grad_norm": 2.391741538055505, "learning_rate": 6.953568895192436e-07, "loss": 1.9091, "step": 103 }, { "epoch": 0.16352201257861634, "grad_norm": 2.440486302600468, "learning_rate": 6.952109931232616e-07, "loss": 1.8899, "step": 104 }, { "epoch": 0.1650943396226415, "grad_norm": 3.1036603066171664, "learning_rate": 6.95062855644108e-07, "loss": 1.9706, "step": 105 }, { "epoch": 0.16666666666666666, "grad_norm": 2.176053941318332, "learning_rate": 6.9491247804348e-07, "loss": 2.0294, "step": 106 }, { "epoch": 0.16823899371069181, "grad_norm": 2.2955252073229415, "learning_rate": 6.947598612976173e-07, "loss": 1.8521, "step": 107 }, { "epoch": 0.16981132075471697, "grad_norm": 2.3179610819374856, "learning_rate": 6.946050063972961e-07, "loss": 2.0428, "step": 108 }, { "epoch": 0.17138364779874213, "grad_norm": 2.3353177807842442, "learning_rate": 6.944479143478225e-07, "loss": 1.6969, "step": 109 }, { "epoch": 0.17295597484276728, "grad_norm": 2.1751580486972903, "learning_rate": 6.942885861690258e-07, "loss": 2.1661, "step": 110 }, { "epoch": 0.17452830188679244, "grad_norm": 2.3502010826835873, "learning_rate": 6.941270228952526e-07, "loss": 1.8967, "step": 111 }, { "epoch": 0.1761006289308176, "grad_norm": 2.080568954254506, "learning_rate": 6.939632255753589e-07, "loss": 2.0775, "step": 112 }, { "epoch": 0.17767295597484276, "grad_norm": 2.0874560399766278, "learning_rate": 6.937971952727045e-07, "loss": 1.8397, "step": 113 }, { "epoch": 0.1792452830188679, "grad_norm": 2.235119037617934, "learning_rate": 6.936289330651452e-07, "loss": 2.0157, "step": 114 }, { "epoch": 0.18081761006289307, "grad_norm": 2.21645540026438, "learning_rate": 6.934584400450265e-07, "loss": 1.7553, "step": 115 }, { "epoch": 0.18238993710691823, "grad_norm": 2.278410487078488, "learning_rate": 6.932857173191757e-07, "loss": 1.8963, "step": 116 }, { "epoch": 0.18396226415094338, "grad_norm": 2.288449048592226, "learning_rate": 6.931107660088955e-07, "loss": 2.0707, "step": 117 }, { "epoch": 0.18553459119496854, "grad_norm": 2.540412196668586, "learning_rate": 6.929335872499565e-07, "loss": 1.9994, "step": 118 }, { "epoch": 0.1871069182389937, "grad_norm": 2.295470373319651, "learning_rate": 6.927541821925892e-07, "loss": 1.9994, "step": 119 }, { "epoch": 0.18867924528301888, "grad_norm": 2.4658706344060457, "learning_rate": 6.925725520014778e-07, "loss": 2.1002, "step": 120 }, { "epoch": 0.19025157232704404, "grad_norm": 1.9874120823661252, "learning_rate": 6.923886978557511e-07, "loss": 1.891, "step": 121 }, { "epoch": 0.1918238993710692, "grad_norm": 2.1896911235137266, "learning_rate": 6.922026209489765e-07, "loss": 2.0324, "step": 122 }, { "epoch": 0.19339622641509435, "grad_norm": 2.3459367974500687, "learning_rate": 6.920143224891506e-07, "loss": 1.9346, "step": 123 }, { "epoch": 0.1949685534591195, "grad_norm": 2.231755148378703, "learning_rate": 6.918238036986926e-07, "loss": 1.8345, "step": 124 }, { "epoch": 0.19654088050314467, "grad_norm": 2.279350328924178, "learning_rate": 6.91631065814436e-07, "loss": 1.8852, "step": 125 }, { "epoch": 0.19811320754716982, "grad_norm": 2.411938466651685, "learning_rate": 6.914361100876199e-07, "loss": 1.8085, "step": 126 }, { "epoch": 0.19968553459119498, "grad_norm": 2.2484290713562403, "learning_rate": 6.912389377838822e-07, "loss": 1.7151, "step": 127 }, { "epoch": 0.20125786163522014, "grad_norm": 2.27114158109643, "learning_rate": 6.910395501832502e-07, "loss": 1.9463, "step": 128 }, { "epoch": 0.2028301886792453, "grad_norm": 2.1360495919260765, "learning_rate": 6.908379485801327e-07, "loss": 2.0075, "step": 129 }, { "epoch": 0.20440251572327045, "grad_norm": 2.234634251477468, "learning_rate": 6.906341342833119e-07, "loss": 1.8222, "step": 130 }, { "epoch": 0.2059748427672956, "grad_norm": 2.354620761146764, "learning_rate": 6.904281086159346e-07, "loss": 1.8145, "step": 131 }, { "epoch": 0.20754716981132076, "grad_norm": 2.009458317182769, "learning_rate": 6.902198729155034e-07, "loss": 1.8962, "step": 132 }, { "epoch": 0.20911949685534592, "grad_norm": 2.2143437574616938, "learning_rate": 6.900094285338686e-07, "loss": 1.9396, "step": 133 }, { "epoch": 0.21069182389937108, "grad_norm": 2.2176657548464904, "learning_rate": 6.897967768372188e-07, "loss": 1.8453, "step": 134 }, { "epoch": 0.21226415094339623, "grad_norm": 2.30003499713903, "learning_rate": 6.895819192060725e-07, "loss": 1.899, "step": 135 }, { "epoch": 0.2138364779874214, "grad_norm": 2.090050155082603, "learning_rate": 6.893648570352687e-07, "loss": 1.899, "step": 136 }, { "epoch": 0.21540880503144655, "grad_norm": 2.2138416672788406, "learning_rate": 6.891455917339585e-07, "loss": 1.6803, "step": 137 }, { "epoch": 0.2169811320754717, "grad_norm": 2.275731103613008, "learning_rate": 6.889241247255951e-07, "loss": 1.8866, "step": 138 }, { "epoch": 0.21855345911949686, "grad_norm": 2.2018144326894764, "learning_rate": 6.887004574479256e-07, "loss": 1.7657, "step": 139 }, { "epoch": 0.22012578616352202, "grad_norm": 2.2574275757278586, "learning_rate": 6.884745913529804e-07, "loss": 1.8978, "step": 140 }, { "epoch": 0.22169811320754718, "grad_norm": 2.4947452295545323, "learning_rate": 6.882465279070651e-07, "loss": 2.1121, "step": 141 }, { "epoch": 0.22327044025157233, "grad_norm": 2.321836578962341, "learning_rate": 6.880162685907497e-07, "loss": 1.8622, "step": 142 }, { "epoch": 0.2248427672955975, "grad_norm": 2.234624773604752, "learning_rate": 6.877838148988602e-07, "loss": 1.8192, "step": 143 }, { "epoch": 0.22641509433962265, "grad_norm": 2.1827223385186834, "learning_rate": 6.87549168340468e-07, "loss": 1.9317, "step": 144 }, { "epoch": 0.2279874213836478, "grad_norm": 2.2058319167479987, "learning_rate": 6.873123304388804e-07, "loss": 2.1388, "step": 145 }, { "epoch": 0.22955974842767296, "grad_norm": 2.336708828184526, "learning_rate": 6.870733027316308e-07, "loss": 2.0724, "step": 146 }, { "epoch": 0.23113207547169812, "grad_norm": 2.352880258314341, "learning_rate": 6.868320867704689e-07, "loss": 1.7697, "step": 147 }, { "epoch": 0.23270440251572327, "grad_norm": 2.062384155984072, "learning_rate": 6.865886841213497e-07, "loss": 2.1101, "step": 148 }, { "epoch": 0.23427672955974843, "grad_norm": 2.1530391868129626, "learning_rate": 6.863430963644248e-07, "loss": 1.9232, "step": 149 }, { "epoch": 0.2358490566037736, "grad_norm": 2.1758733285873113, "learning_rate": 6.860953250940309e-07, "loss": 1.8156, "step": 150 }, { "epoch": 0.23742138364779874, "grad_norm": 2.3117896294416593, "learning_rate": 6.8584537191868e-07, "loss": 1.8781, "step": 151 }, { "epoch": 0.2389937106918239, "grad_norm": 2.00639335375149, "learning_rate": 6.855932384610488e-07, "loss": 1.8219, "step": 152 }, { "epoch": 0.24056603773584906, "grad_norm": 2.10561171383, "learning_rate": 6.853389263579684e-07, "loss": 1.7392, "step": 153 }, { "epoch": 0.24213836477987422, "grad_norm": 2.4444601099856795, "learning_rate": 6.850824372604132e-07, "loss": 1.9414, "step": 154 }, { "epoch": 0.24371069182389937, "grad_norm": 2.4242659961108495, "learning_rate": 6.848237728334909e-07, "loss": 1.839, "step": 155 }, { "epoch": 0.24528301886792453, "grad_norm": 2.2032265431848597, "learning_rate": 6.845629347564309e-07, "loss": 1.8687, "step": 156 }, { "epoch": 0.2468553459119497, "grad_norm": 2.2129417804509024, "learning_rate": 6.842999247225737e-07, "loss": 1.8612, "step": 157 }, { "epoch": 0.24842767295597484, "grad_norm": 2.305447975246766, "learning_rate": 6.840347444393605e-07, "loss": 2.0208, "step": 158 }, { "epoch": 0.25, "grad_norm": 2.122283205654406, "learning_rate": 6.837673956283212e-07, "loss": 1.8356, "step": 159 }, { "epoch": 0.25157232704402516, "grad_norm": 2.332633354247406, "learning_rate": 6.834978800250636e-07, "loss": 1.902, "step": 160 }, { "epoch": 0.2531446540880503, "grad_norm": 2.376667609853809, "learning_rate": 6.832261993792623e-07, "loss": 2.062, "step": 161 }, { "epoch": 0.25471698113207547, "grad_norm": 2.220933142644897, "learning_rate": 6.829523554546471e-07, "loss": 2.0558, "step": 162 }, { "epoch": 0.2562893081761006, "grad_norm": 2.033225497961393, "learning_rate": 6.826763500289916e-07, "loss": 1.8403, "step": 163 }, { "epoch": 0.2578616352201258, "grad_norm": 2.1916641547727576, "learning_rate": 6.823981848941018e-07, "loss": 2.0203, "step": 164 }, { "epoch": 0.25943396226415094, "grad_norm": 2.247501037485472, "learning_rate": 6.821178618558043e-07, "loss": 1.775, "step": 165 }, { "epoch": 0.2610062893081761, "grad_norm": 2.1659369555105723, "learning_rate": 6.818353827339348e-07, "loss": 2.1447, "step": 166 }, { "epoch": 0.26257861635220126, "grad_norm": 1.8752494947917147, "learning_rate": 6.815507493623258e-07, "loss": 1.8795, "step": 167 }, { "epoch": 0.2641509433962264, "grad_norm": 2.1874497821008663, "learning_rate": 6.812639635887953e-07, "loss": 1.9524, "step": 168 }, { "epoch": 0.26572327044025157, "grad_norm": 2.1976673497867862, "learning_rate": 6.809750272751346e-07, "loss": 1.8337, "step": 169 }, { "epoch": 0.2672955974842767, "grad_norm": 2.2148302103458386, "learning_rate": 6.806839422970957e-07, "loss": 1.8816, "step": 170 }, { "epoch": 0.2688679245283019, "grad_norm": 2.1811128155566775, "learning_rate": 6.803907105443801e-07, "loss": 1.8994, "step": 171 }, { "epoch": 0.27044025157232704, "grad_norm": 2.309399739378513, "learning_rate": 6.800953339206256e-07, "loss": 1.8565, "step": 172 }, { "epoch": 0.2720125786163522, "grad_norm": 2.2522360613496737, "learning_rate": 6.797978143433946e-07, "loss": 1.827, "step": 173 }, { "epoch": 0.27358490566037735, "grad_norm": 2.1425200823912856, "learning_rate": 6.794981537441612e-07, "loss": 1.8159, "step": 174 }, { "epoch": 0.2751572327044025, "grad_norm": 2.2841020729940946, "learning_rate": 6.791963540682988e-07, "loss": 1.8514, "step": 175 }, { "epoch": 0.27672955974842767, "grad_norm": 2.225194894385099, "learning_rate": 6.788924172750679e-07, "loss": 1.9328, "step": 176 }, { "epoch": 0.2783018867924528, "grad_norm": 2.09421585410522, "learning_rate": 6.785863453376026e-07, "loss": 1.7486, "step": 177 }, { "epoch": 0.279874213836478, "grad_norm": 2.333617044046103, "learning_rate": 6.782781402428983e-07, "loss": 1.772, "step": 178 }, { "epoch": 0.28144654088050314, "grad_norm": 2.178844778690441, "learning_rate": 6.779678039917989e-07, "loss": 1.7756, "step": 179 }, { "epoch": 0.2830188679245283, "grad_norm": 2.1517711064133644, "learning_rate": 6.776553385989832e-07, "loss": 1.9176, "step": 180 }, { "epoch": 0.28459119496855345, "grad_norm": 2.2680430076464133, "learning_rate": 6.773407460929527e-07, "loss": 1.893, "step": 181 }, { "epoch": 0.2861635220125786, "grad_norm": 2.3746361775333686, "learning_rate": 6.770240285160175e-07, "loss": 1.943, "step": 182 }, { "epoch": 0.28773584905660377, "grad_norm": 2.1349785564403216, "learning_rate": 6.76705187924284e-07, "loss": 1.7189, "step": 183 }, { "epoch": 0.2893081761006289, "grad_norm": 2.1525208745450097, "learning_rate": 6.763842263876403e-07, "loss": 1.9378, "step": 184 }, { "epoch": 0.2908805031446541, "grad_norm": 2.1312473298021097, "learning_rate": 6.760611459897444e-07, "loss": 1.6941, "step": 185 }, { "epoch": 0.29245283018867924, "grad_norm": 2.1276458698264302, "learning_rate": 6.757359488280091e-07, "loss": 1.8998, "step": 186 }, { "epoch": 0.2940251572327044, "grad_norm": 2.304634105409158, "learning_rate": 6.754086370135895e-07, "loss": 1.9326, "step": 187 }, { "epoch": 0.29559748427672955, "grad_norm": 1.9930028532322692, "learning_rate": 6.750792126713684e-07, "loss": 1.8537, "step": 188 }, { "epoch": 0.2971698113207547, "grad_norm": 2.1595911760439668, "learning_rate": 6.747476779399436e-07, "loss": 1.8215, "step": 189 }, { "epoch": 0.29874213836477986, "grad_norm": 2.218806657186074, "learning_rate": 6.744140349716127e-07, "loss": 1.7037, "step": 190 }, { "epoch": 0.300314465408805, "grad_norm": 2.216745170195917, "learning_rate": 6.740782859323604e-07, "loss": 2.0752, "step": 191 }, { "epoch": 0.3018867924528302, "grad_norm": 2.077203113815276, "learning_rate": 6.737404330018436e-07, "loss": 1.9836, "step": 192 }, { "epoch": 0.30345911949685533, "grad_norm": 2.192942695759785, "learning_rate": 6.734004783733772e-07, "loss": 1.6992, "step": 193 }, { "epoch": 0.3050314465408805, "grad_norm": 2.5819835895165806, "learning_rate": 6.730584242539209e-07, "loss": 2.4884, "step": 194 }, { "epoch": 0.30660377358490565, "grad_norm": 2.127248431265692, "learning_rate": 6.727142728640633e-07, "loss": 1.9178, "step": 195 }, { "epoch": 0.3081761006289308, "grad_norm": 1.9502319695954797, "learning_rate": 6.72368026438009e-07, "loss": 1.871, "step": 196 }, { "epoch": 0.30974842767295596, "grad_norm": 2.2313690170600045, "learning_rate": 6.720196872235629e-07, "loss": 1.6974, "step": 197 }, { "epoch": 0.3113207547169811, "grad_norm": 2.1737546254948574, "learning_rate": 6.716692574821164e-07, "loss": 1.9516, "step": 198 }, { "epoch": 0.3128930817610063, "grad_norm": 2.089055152215892, "learning_rate": 6.713167394886324e-07, "loss": 1.7015, "step": 199 }, { "epoch": 0.31446540880503143, "grad_norm": 2.1241406637075047, "learning_rate": 6.709621355316306e-07, "loss": 1.913, "step": 200 }, { "epoch": 0.3160377358490566, "grad_norm": 2.1582457787965343, "learning_rate": 6.706054479131726e-07, "loss": 1.9263, "step": 201 }, { "epoch": 0.31761006289308175, "grad_norm": 2.3223231854168134, "learning_rate": 6.702466789488468e-07, "loss": 1.7648, "step": 202 }, { "epoch": 0.3191823899371069, "grad_norm": 2.0439680852310778, "learning_rate": 6.698858309677537e-07, "loss": 1.8036, "step": 203 }, { "epoch": 0.32075471698113206, "grad_norm": 2.134474063516094, "learning_rate": 6.695229063124907e-07, "loss": 1.8974, "step": 204 }, { "epoch": 0.3223270440251572, "grad_norm": 2.1079010765204083, "learning_rate": 6.691579073391366e-07, "loss": 1.8955, "step": 205 }, { "epoch": 0.3238993710691824, "grad_norm": 2.027683856704508, "learning_rate": 6.687908364172367e-07, "loss": 1.9092, "step": 206 }, { "epoch": 0.32547169811320753, "grad_norm": 2.246810336956572, "learning_rate": 6.684216959297871e-07, "loss": 1.679, "step": 207 }, { "epoch": 0.3270440251572327, "grad_norm": 2.413641384514312, "learning_rate": 6.680504882732195e-07, "loss": 1.7324, "step": 208 }, { "epoch": 0.32861635220125784, "grad_norm": 2.183679584470248, "learning_rate": 6.676772158573852e-07, "loss": 1.7184, "step": 209 }, { "epoch": 0.330188679245283, "grad_norm": 1.953756725302565, "learning_rate": 6.673018811055401e-07, "loss": 1.8537, "step": 210 }, { "epoch": 0.33176100628930816, "grad_norm": 2.7587554774622616, "learning_rate": 6.669244864543286e-07, "loss": 1.7957, "step": 211 }, { "epoch": 0.3333333333333333, "grad_norm": 2.0690029827560874, "learning_rate": 6.665450343537673e-07, "loss": 1.5945, "step": 212 }, { "epoch": 0.33490566037735847, "grad_norm": 1.9370210759038746, "learning_rate": 6.661635272672305e-07, "loss": 1.8212, "step": 213 }, { "epoch": 0.33647798742138363, "grad_norm": 2.3157909721510634, "learning_rate": 6.657799676714325e-07, "loss": 1.8253, "step": 214 }, { "epoch": 0.3380503144654088, "grad_norm": 2.408718498079195, "learning_rate": 6.653943580564128e-07, "loss": 1.9183, "step": 215 }, { "epoch": 0.33962264150943394, "grad_norm": 2.2428481931189115, "learning_rate": 6.650067009255193e-07, "loss": 1.9667, "step": 216 }, { "epoch": 0.3411949685534591, "grad_norm": 2.3060581158229754, "learning_rate": 6.646169987953921e-07, "loss": 1.7651, "step": 217 }, { "epoch": 0.34276729559748426, "grad_norm": 2.272099574457665, "learning_rate": 6.642252541959475e-07, "loss": 1.9895, "step": 218 }, { "epoch": 0.3443396226415094, "grad_norm": 2.2813406057736016, "learning_rate": 6.638314696703613e-07, "loss": 2.1897, "step": 219 }, { "epoch": 0.34591194968553457, "grad_norm": 2.18496346075307, "learning_rate": 6.634356477750522e-07, "loss": 1.8511, "step": 220 }, { "epoch": 0.3474842767295597, "grad_norm": 1.9232426664702658, "learning_rate": 6.630377910796655e-07, "loss": 1.8494, "step": 221 }, { "epoch": 0.3490566037735849, "grad_norm": 1.9818215355449529, "learning_rate": 6.626379021670561e-07, "loss": 1.9395, "step": 222 }, { "epoch": 0.35062893081761004, "grad_norm": 2.244779710254365, "learning_rate": 6.622359836332723e-07, "loss": 1.9374, "step": 223 }, { "epoch": 0.3522012578616352, "grad_norm": 2.1262049775384333, "learning_rate": 6.618320380875379e-07, "loss": 1.879, "step": 224 }, { "epoch": 0.35377358490566035, "grad_norm": 1.9601702919734405, "learning_rate": 6.614260681522365e-07, "loss": 1.7618, "step": 225 }, { "epoch": 0.3553459119496855, "grad_norm": 2.177142901528055, "learning_rate": 6.610180764628937e-07, "loss": 1.7007, "step": 226 }, { "epoch": 0.35691823899371067, "grad_norm": 2.207301297000178, "learning_rate": 6.606080656681599e-07, "loss": 2.0777, "step": 227 }, { "epoch": 0.3584905660377358, "grad_norm": 2.325333907285728, "learning_rate": 6.601960384297937e-07, "loss": 1.8572, "step": 228 }, { "epoch": 0.360062893081761, "grad_norm": 2.176590475529821, "learning_rate": 6.597819974226442e-07, "loss": 1.911, "step": 229 }, { "epoch": 0.36163522012578614, "grad_norm": 2.231029983251665, "learning_rate": 6.593659453346336e-07, "loss": 1.8546, "step": 230 }, { "epoch": 0.3632075471698113, "grad_norm": 2.09941453798109, "learning_rate": 6.589478848667402e-07, "loss": 1.7259, "step": 231 }, { "epoch": 0.36477987421383645, "grad_norm": 2.207729891460013, "learning_rate": 6.585278187329803e-07, "loss": 2.0615, "step": 232 }, { "epoch": 0.3663522012578616, "grad_norm": 2.160194675811721, "learning_rate": 6.581057496603907e-07, "loss": 1.5577, "step": 233 }, { "epoch": 0.36792452830188677, "grad_norm": 2.1933688787719117, "learning_rate": 6.576816803890115e-07, "loss": 1.8231, "step": 234 }, { "epoch": 0.3694968553459119, "grad_norm": 2.0304475960191275, "learning_rate": 6.572556136718678e-07, "loss": 1.7399, "step": 235 }, { "epoch": 0.3710691823899371, "grad_norm": 2.0372413629068973, "learning_rate": 6.568275522749514e-07, "loss": 1.7089, "step": 236 }, { "epoch": 0.37264150943396224, "grad_norm": 2.2572731149937297, "learning_rate": 6.563974989772047e-07, "loss": 2.0235, "step": 237 }, { "epoch": 0.3742138364779874, "grad_norm": 2.0112586745579155, "learning_rate": 6.559654565704999e-07, "loss": 1.6913, "step": 238 }, { "epoch": 0.3757861635220126, "grad_norm": 2.020887524725217, "learning_rate": 6.555314278596232e-07, "loss": 1.9021, "step": 239 }, { "epoch": 0.37735849056603776, "grad_norm": 2.062052722629033, "learning_rate": 6.550954156622559e-07, "loss": 1.8555, "step": 240 }, { "epoch": 0.3789308176100629, "grad_norm": 2.146665256772467, "learning_rate": 6.546574228089551e-07, "loss": 1.9002, "step": 241 }, { "epoch": 0.3805031446540881, "grad_norm": 2.238196549782288, "learning_rate": 6.542174521431369e-07, "loss": 1.9807, "step": 242 }, { "epoch": 0.38207547169811323, "grad_norm": 2.111526491095184, "learning_rate": 6.537755065210571e-07, "loss": 1.9268, "step": 243 }, { "epoch": 0.3836477987421384, "grad_norm": 1.9805520877642875, "learning_rate": 6.533315888117923e-07, "loss": 1.8807, "step": 244 }, { "epoch": 0.38522012578616355, "grad_norm": 2.1505066292288313, "learning_rate": 6.528857018972223e-07, "loss": 1.8132, "step": 245 }, { "epoch": 0.3867924528301887, "grad_norm": 2.1682916632827913, "learning_rate": 6.524378486720107e-07, "loss": 1.8423, "step": 246 }, { "epoch": 0.38836477987421386, "grad_norm": 2.131056377037645, "learning_rate": 6.519880320435858e-07, "loss": 1.9015, "step": 247 }, { "epoch": 0.389937106918239, "grad_norm": 2.3204970045437094, "learning_rate": 6.515362549321227e-07, "loss": 1.9385, "step": 248 }, { "epoch": 0.3915094339622642, "grad_norm": 2.051436724730336, "learning_rate": 6.510825202705237e-07, "loss": 1.7837, "step": 249 }, { "epoch": 0.39308176100628933, "grad_norm": 2.1183882128382785, "learning_rate": 6.506268310043991e-07, "loss": 1.9752, "step": 250 }, { "epoch": 0.39308176100628933, "eval_sat2_MCTS_chains_SFT_val_loss": 1.7381622791290283, "eval_sat2_MCTS_chains_SFT_val_runtime": 91.8844, "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.188, "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.404, "step": 250 }, { "epoch": 0.3946540880503145, "grad_norm": 2.162981802199106, "learning_rate": 6.501691900920484e-07, "loss": 1.9521, "step": 251 }, { "epoch": 0.39622641509433965, "grad_norm": 1.9032699491782559, "learning_rate": 6.497096005044415e-07, "loss": 1.6293, "step": 252 }, { "epoch": 0.3977987421383648, "grad_norm": 2.160690442124648, "learning_rate": 6.492480652251983e-07, "loss": 1.6815, "step": 253 }, { "epoch": 0.39937106918238996, "grad_norm": 2.200251276319585, "learning_rate": 6.487845872505703e-07, "loss": 1.6191, "step": 254 }, { "epoch": 0.4009433962264151, "grad_norm": 2.1153136111562834, "learning_rate": 6.483191695894209e-07, "loss": 1.7317, "step": 255 }, { "epoch": 0.4025157232704403, "grad_norm": 2.136580109576303, "learning_rate": 6.478518152632057e-07, "loss": 1.6782, "step": 256 }, { "epoch": 0.40408805031446543, "grad_norm": 1.993099001469681, "learning_rate": 6.473825273059529e-07, "loss": 1.9167, "step": 257 }, { "epoch": 0.4056603773584906, "grad_norm": 2.348061223274077, "learning_rate": 6.469113087642439e-07, "loss": 1.8473, "step": 258 }, { "epoch": 0.40723270440251574, "grad_norm": 1.9562289450639117, "learning_rate": 6.46438162697193e-07, "loss": 1.8061, "step": 259 }, { "epoch": 0.4088050314465409, "grad_norm": 2.340728555526634, "learning_rate": 6.459630921764282e-07, "loss": 1.5497, "step": 260 }, { "epoch": 0.41037735849056606, "grad_norm": 2.2351691072279096, "learning_rate": 6.454861002860705e-07, "loss": 2.1132, "step": 261 }, { "epoch": 0.4119496855345912, "grad_norm": 2.1290065996236613, "learning_rate": 6.450071901227147e-07, "loss": 1.7072, "step": 262 }, { "epoch": 0.41352201257861637, "grad_norm": 2.3103776756030054, "learning_rate": 6.445263647954086e-07, "loss": 2.0191, "step": 263 }, { "epoch": 0.41509433962264153, "grad_norm": 2.1783834129569137, "learning_rate": 6.440436274256333e-07, "loss": 1.6563, "step": 264 }, { "epoch": 0.4166666666666667, "grad_norm": 2.3188145114913747, "learning_rate": 6.435589811472823e-07, "loss": 2.1497, "step": 265 }, { "epoch": 0.41823899371069184, "grad_norm": 2.2490188310531303, "learning_rate": 6.430724291066422e-07, "loss": 1.6207, "step": 266 }, { "epoch": 0.419811320754717, "grad_norm": 2.0324238162321877, "learning_rate": 6.425839744623711e-07, "loss": 1.8477, "step": 267 }, { "epoch": 0.42138364779874216, "grad_norm": 2.1650874047734505, "learning_rate": 6.420936203854793e-07, "loss": 1.7936, "step": 268 }, { "epoch": 0.4229559748427673, "grad_norm": 2.157646176152135, "learning_rate": 6.416013700593074e-07, "loss": 2.1013, "step": 269 }, { "epoch": 0.42452830188679247, "grad_norm": 2.1369610613705863, "learning_rate": 6.411072266795066e-07, "loss": 1.6905, "step": 270 }, { "epoch": 0.4261006289308176, "grad_norm": 2.108532284547873, "learning_rate": 6.406111934540178e-07, "loss": 1.78, "step": 271 }, { "epoch": 0.4276729559748428, "grad_norm": 2.142315122232749, "learning_rate": 6.401132736030504e-07, "loss": 1.738, "step": 272 }, { "epoch": 0.42924528301886794, "grad_norm": 2.2549541806802296, "learning_rate": 6.396134703590617e-07, "loss": 1.7392, "step": 273 }, { "epoch": 0.4308176100628931, "grad_norm": 2.0286792453580573, "learning_rate": 6.391117869667358e-07, "loss": 1.8195, "step": 274 }, { "epoch": 0.43238993710691825, "grad_norm": 2.0337872779297017, "learning_rate": 6.386082266829629e-07, "loss": 1.8084, "step": 275 }, { "epoch": 0.4339622641509434, "grad_norm": 2.095428278939121, "learning_rate": 6.381027927768171e-07, "loss": 1.943, "step": 276 }, { "epoch": 0.43553459119496857, "grad_norm": 2.013844627037566, "learning_rate": 6.375954885295369e-07, "loss": 1.6934, "step": 277 }, { "epoch": 0.4371069182389937, "grad_norm": 2.2579768491043497, "learning_rate": 6.37086317234502e-07, "loss": 1.8158, "step": 278 }, { "epoch": 0.4386792452830189, "grad_norm": 2.3262366087078354, "learning_rate": 6.365752821972134e-07, "loss": 1.83, "step": 279 }, { "epoch": 0.44025157232704404, "grad_norm": 2.0303047010962376, "learning_rate": 6.36062386735271e-07, "loss": 1.7752, "step": 280 }, { "epoch": 0.4418238993710692, "grad_norm": 2.1138830916579425, "learning_rate": 6.355476341783529e-07, "loss": 1.9532, "step": 281 }, { "epoch": 0.44339622641509435, "grad_norm": 2.100950656642803, "learning_rate": 6.35031027868193e-07, "loss": 1.7841, "step": 282 }, { "epoch": 0.4449685534591195, "grad_norm": 2.175666972234393, "learning_rate": 6.345125711585594e-07, "loss": 1.7535, "step": 283 }, { "epoch": 0.44654088050314467, "grad_norm": 2.090692135911591, "learning_rate": 6.339922674152333e-07, "loss": 1.9545, "step": 284 }, { "epoch": 0.4481132075471698, "grad_norm": 2.2150355818462875, "learning_rate": 6.334701200159867e-07, "loss": 1.7806, "step": 285 }, { "epoch": 0.449685534591195, "grad_norm": 2.1792703239922218, "learning_rate": 6.3294613235056e-07, "loss": 1.8228, "step": 286 }, { "epoch": 0.45125786163522014, "grad_norm": 2.3237367502125794, "learning_rate": 6.324203078206408e-07, "loss": 1.7555, "step": 287 }, { "epoch": 0.4528301886792453, "grad_norm": 2.119235492747239, "learning_rate": 6.318926498398415e-07, "loss": 2.1297, "step": 288 }, { "epoch": 0.45440251572327045, "grad_norm": 2.066839274608312, "learning_rate": 6.31363161833677e-07, "loss": 1.802, "step": 289 }, { "epoch": 0.4559748427672956, "grad_norm": 2.1311539498302716, "learning_rate": 6.308318472395429e-07, "loss": 1.8139, "step": 290 }, { "epoch": 0.45754716981132076, "grad_norm": 2.191273523825869, "learning_rate": 6.302987095066923e-07, "loss": 1.8633, "step": 291 }, { "epoch": 0.4591194968553459, "grad_norm": 2.2656122854422462, "learning_rate": 6.297637520962143e-07, "loss": 1.7441, "step": 292 }, { "epoch": 0.4606918238993711, "grad_norm": 1.93728883044019, "learning_rate": 6.292269784810113e-07, "loss": 1.9895, "step": 293 }, { "epoch": 0.46226415094339623, "grad_norm": 2.3187477308368343, "learning_rate": 6.286883921457763e-07, "loss": 1.83, "step": 294 }, { "epoch": 0.4638364779874214, "grad_norm": 2.202481261207392, "learning_rate": 6.281479965869702e-07, "loss": 1.8797, "step": 295 }, { "epoch": 0.46540880503144655, "grad_norm": 2.0842105854347657, "learning_rate": 6.276057953127994e-07, "loss": 1.7532, "step": 296 }, { "epoch": 0.4669811320754717, "grad_norm": 2.207130976094581, "learning_rate": 6.270617918431928e-07, "loss": 1.7917, "step": 297 }, { "epoch": 0.46855345911949686, "grad_norm": 2.032959806862764, "learning_rate": 6.26515989709779e-07, "loss": 1.8665, "step": 298 }, { "epoch": 0.470125786163522, "grad_norm": 1.9531386309076453, "learning_rate": 6.259683924558633e-07, "loss": 1.8778, "step": 299 }, { "epoch": 0.4716981132075472, "grad_norm": 2.0714051206159896, "learning_rate": 6.254190036364051e-07, "loss": 1.7126, "step": 300 }, { "epoch": 0.47327044025157233, "grad_norm": 2.2881658756108907, "learning_rate": 6.24867826817994e-07, "loss": 1.8592, "step": 301 }, { "epoch": 0.4748427672955975, "grad_norm": 2.0634196311190345, "learning_rate": 6.243148655788276e-07, "loss": 2.3116, "step": 302 }, { "epoch": 0.47641509433962265, "grad_norm": 2.1436128024329024, "learning_rate": 6.237601235086879e-07, "loss": 1.7473, "step": 303 }, { "epoch": 0.4779874213836478, "grad_norm": 2.2670611315983793, "learning_rate": 6.232036042089171e-07, "loss": 1.9504, "step": 304 }, { "epoch": 0.47955974842767296, "grad_norm": 2.138063229224171, "learning_rate": 6.226453112923962e-07, "loss": 1.8807, "step": 305 }, { "epoch": 0.4811320754716981, "grad_norm": 2.169257324016606, "learning_rate": 6.220852483835196e-07, "loss": 1.9444, "step": 306 }, { "epoch": 0.4827044025157233, "grad_norm": 2.230348996182296, "learning_rate": 6.215234191181725e-07, "loss": 1.8807, "step": 307 }, { "epoch": 0.48427672955974843, "grad_norm": 2.2607928616037967, "learning_rate": 6.20959827143707e-07, "loss": 1.9083, "step": 308 }, { "epoch": 0.4858490566037736, "grad_norm": 2.3264562054360756, "learning_rate": 6.203944761189192e-07, "loss": 1.7524, "step": 309 }, { "epoch": 0.48742138364779874, "grad_norm": 1.9976309357346456, "learning_rate": 6.198273697140243e-07, "loss": 1.9204, "step": 310 }, { "epoch": 0.4889937106918239, "grad_norm": 2.24371014902543, "learning_rate": 6.19258511610633e-07, "loss": 1.7535, "step": 311 }, { "epoch": 0.49056603773584906, "grad_norm": 2.2058395033512554, "learning_rate": 6.186879055017288e-07, "loss": 1.7674, "step": 312 }, { "epoch": 0.4921383647798742, "grad_norm": 2.024400868779697, "learning_rate": 6.181155550916422e-07, "loss": 1.6954, "step": 313 }, { "epoch": 0.4937106918238994, "grad_norm": 2.269535233545749, "learning_rate": 6.175414640960283e-07, "loss": 1.7337, "step": 314 }, { "epoch": 0.49528301886792453, "grad_norm": 1.9745297525246102, "learning_rate": 6.169656362418414e-07, "loss": 1.8684, "step": 315 }, { "epoch": 0.4968553459119497, "grad_norm": 1.976330025219871, "learning_rate": 6.163880752673117e-07, "loss": 2.1739, "step": 316 }, { "epoch": 0.49842767295597484, "grad_norm": 2.1760376472956025, "learning_rate": 6.158087849219204e-07, "loss": 1.9162, "step": 317 }, { "epoch": 0.5, "grad_norm": 2.143628134207709, "learning_rate": 6.152277689663759e-07, "loss": 1.7411, "step": 318 }, { "epoch": 0.5015723270440252, "grad_norm": 1.9712614846347265, "learning_rate": 6.146450311725888e-07, "loss": 1.7378, "step": 319 }, { "epoch": 0.5031446540880503, "grad_norm": 2.1616472374659357, "learning_rate": 6.140605753236483e-07, "loss": 1.741, "step": 320 }, { "epoch": 0.5047169811320755, "grad_norm": 2.2014372084510074, "learning_rate": 6.134744052137967e-07, "loss": 1.7833, "step": 321 }, { "epoch": 0.5062893081761006, "grad_norm": 1.982079271247313, "learning_rate": 6.128865246484048e-07, "loss": 1.8626, "step": 322 }, { "epoch": 0.5078616352201258, "grad_norm": 2.254872366939666, "learning_rate": 6.122969374439483e-07, "loss": 1.8039, "step": 323 }, { "epoch": 0.5094339622641509, "grad_norm": 2.016915503448893, "learning_rate": 6.11705647427982e-07, "loss": 1.5489, "step": 324 }, { "epoch": 0.5110062893081762, "grad_norm": 2.1164070023469788, "learning_rate": 6.111126584391148e-07, "loss": 1.7177, "step": 325 }, { "epoch": 0.5125786163522013, "grad_norm": 2.1940717974335846, "learning_rate": 6.105179743269858e-07, "loss": 1.844, "step": 326 }, { "epoch": 0.5141509433962265, "grad_norm": 2.2525569223241186, "learning_rate": 6.099215989522382e-07, "loss": 2.4854, "step": 327 }, { "epoch": 0.5157232704402516, "grad_norm": 2.0259464742758784, "learning_rate": 6.093235361864952e-07, "loss": 2.1479, "step": 328 }, { "epoch": 0.5172955974842768, "grad_norm": 2.3113158826769347, "learning_rate": 6.087237899123342e-07, "loss": 1.6807, "step": 329 }, { "epoch": 0.5188679245283019, "grad_norm": 2.032892966587123, "learning_rate": 6.081223640232616e-07, "loss": 2.0318, "step": 330 }, { "epoch": 0.5204402515723271, "grad_norm": 2.3650596675106472, "learning_rate": 6.075192624236881e-07, "loss": 1.9068, "step": 331 }, { "epoch": 0.5220125786163522, "grad_norm": 2.2695747251563225, "learning_rate": 6.069144890289028e-07, "loss": 1.6508, "step": 332 }, { "epoch": 0.5235849056603774, "grad_norm": 2.13682292483943, "learning_rate": 6.063080477650479e-07, "loss": 1.9008, "step": 333 }, { "epoch": 0.5251572327044025, "grad_norm": 2.108983919386453, "learning_rate": 6.056999425690935e-07, "loss": 1.7042, "step": 334 }, { "epoch": 0.5267295597484277, "grad_norm": 2.1512302698424075, "learning_rate": 6.050901773888115e-07, "loss": 1.664, "step": 335 }, { "epoch": 0.5283018867924528, "grad_norm": 2.039353542540568, "learning_rate": 6.044787561827507e-07, "loss": 1.7934, "step": 336 }, { "epoch": 0.529874213836478, "grad_norm": 2.0719588798830655, "learning_rate": 6.038656829202103e-07, "loss": 1.9121, "step": 337 }, { "epoch": 0.5314465408805031, "grad_norm": 1.9491996764419244, "learning_rate": 6.032509615812147e-07, "loss": 1.9204, "step": 338 }, { "epoch": 0.5330188679245284, "grad_norm": 2.1113652450511897, "learning_rate": 6.026345961564875e-07, "loss": 1.8414, "step": 339 }, { "epoch": 0.5345911949685535, "grad_norm": 2.2032131432104767, "learning_rate": 6.020165906474257e-07, "loss": 1.7114, "step": 340 }, { "epoch": 0.5361635220125787, "grad_norm": 2.1791413791352565, "learning_rate": 6.013969490660731e-07, "loss": 2.2435, "step": 341 }, { "epoch": 0.5377358490566038, "grad_norm": 2.0891510363928614, "learning_rate": 6.007756754350954e-07, "loss": 1.7701, "step": 342 }, { "epoch": 0.539308176100629, "grad_norm": 2.0697597215600196, "learning_rate": 6.001527737877532e-07, "loss": 1.8208, "step": 343 }, { "epoch": 0.5408805031446541, "grad_norm": 2.186994903601224, "learning_rate": 5.995282481678758e-07, "loss": 1.8373, "step": 344 }, { "epoch": 0.5424528301886793, "grad_norm": 2.2604938659796545, "learning_rate": 5.989021026298354e-07, "loss": 1.7995, "step": 345 }, { "epoch": 0.5440251572327044, "grad_norm": 2.265552317615986, "learning_rate": 5.982743412385207e-07, "loss": 1.7546, "step": 346 }, { "epoch": 0.5455974842767296, "grad_norm": 2.15564901479827, "learning_rate": 5.976449680693104e-07, "loss": 1.8596, "step": 347 }, { "epoch": 0.5471698113207547, "grad_norm": 2.0408737176330707, "learning_rate": 5.970139872080463e-07, "loss": 1.7603, "step": 348 }, { "epoch": 0.5487421383647799, "grad_norm": 2.0704318452346153, "learning_rate": 5.963814027510079e-07, "loss": 1.9277, "step": 349 }, { "epoch": 0.550314465408805, "grad_norm": 2.2298784446231243, "learning_rate": 5.957472188048845e-07, "loss": 1.7991, "step": 350 }, { "epoch": 0.5518867924528302, "grad_norm": 2.0268792215372455, "learning_rate": 5.951114394867494e-07, "loss": 1.7636, "step": 351 }, { "epoch": 0.5534591194968553, "grad_norm": 2.0724518345088527, "learning_rate": 5.944740689240333e-07, "loss": 1.8476, "step": 352 }, { "epoch": 0.5550314465408805, "grad_norm": 2.2252899856685375, "learning_rate": 5.938351112544964e-07, "loss": 1.9069, "step": 353 }, { "epoch": 0.5566037735849056, "grad_norm": 2.0991045116850784, "learning_rate": 5.931945706262028e-07, "loss": 1.8448, "step": 354 }, { "epoch": 0.5581761006289309, "grad_norm": 2.049481500083905, "learning_rate": 5.92552451197493e-07, "loss": 1.8783, "step": 355 }, { "epoch": 0.559748427672956, "grad_norm": 2.1757357613660893, "learning_rate": 5.919087571369567e-07, "loss": 1.8135, "step": 356 }, { "epoch": 0.5613207547169812, "grad_norm": 2.261669343627697, "learning_rate": 5.912634926234063e-07, "loss": 1.9118, "step": 357 }, { "epoch": 0.5628930817610063, "grad_norm": 2.3346020516981976, "learning_rate": 5.906166618458491e-07, "loss": 1.9321, "step": 358 }, { "epoch": 0.5644654088050315, "grad_norm": 1.9754635648300218, "learning_rate": 5.89968269003461e-07, "loss": 1.7947, "step": 359 }, { "epoch": 0.5660377358490566, "grad_norm": 2.1082779863127232, "learning_rate": 5.893183183055581e-07, "loss": 2.1433, "step": 360 }, { "epoch": 0.5676100628930818, "grad_norm": 2.2719804428628474, "learning_rate": 5.886668139715704e-07, "loss": 1.7544, "step": 361 }, { "epoch": 0.5691823899371069, "grad_norm": 2.063124532930253, "learning_rate": 5.880137602310138e-07, "loss": 1.8496, "step": 362 }, { "epoch": 0.5707547169811321, "grad_norm": 2.178454726609103, "learning_rate": 5.873591613234628e-07, "loss": 1.8731, "step": 363 }, { "epoch": 0.5723270440251572, "grad_norm": 2.2292005546265092, "learning_rate": 5.867030214985232e-07, "loss": 1.6673, "step": 364 }, { "epoch": 0.5738993710691824, "grad_norm": 2.1915150883178467, "learning_rate": 5.860453450158042e-07, "loss": 1.8147, "step": 365 }, { "epoch": 0.5754716981132075, "grad_norm": 2.068884145719646, "learning_rate": 5.853861361448906e-07, "loss": 1.8357, "step": 366 }, { "epoch": 0.5770440251572327, "grad_norm": 2.2335775263207656, "learning_rate": 5.847253991653161e-07, "loss": 1.7674, "step": 367 }, { "epoch": 0.5786163522012578, "grad_norm": 2.028099342557415, "learning_rate": 5.840631383665337e-07, "loss": 1.9504, "step": 368 }, { "epoch": 0.5801886792452831, "grad_norm": 2.171456280936142, "learning_rate": 5.833993580478899e-07, "loss": 1.7799, "step": 369 }, { "epoch": 0.5817610062893082, "grad_norm": 2.188440801596699, "learning_rate": 5.827340625185951e-07, "loss": 1.7664, "step": 370 }, { "epoch": 0.5833333333333334, "grad_norm": 2.061808710093214, "learning_rate": 5.820672560976968e-07, "loss": 1.8846, "step": 371 }, { "epoch": 0.5849056603773585, "grad_norm": 2.080838024759326, "learning_rate": 5.813989431140509e-07, "loss": 1.8208, "step": 372 }, { "epoch": 0.5864779874213837, "grad_norm": 2.1013196605531266, "learning_rate": 5.807291279062938e-07, "loss": 1.7698, "step": 373 }, { "epoch": 0.5880503144654088, "grad_norm": 2.1083157749408405, "learning_rate": 5.800578148228141e-07, "loss": 2.0087, "step": 374 }, { "epoch": 0.589622641509434, "grad_norm": 2.327452355557212, "learning_rate": 5.793850082217248e-07, "loss": 2.2253, "step": 375 }, { "epoch": 0.5911949685534591, "grad_norm": 2.2687807974688656, "learning_rate": 5.787107124708343e-07, "loss": 1.7986, "step": 376 }, { "epoch": 0.5927672955974843, "grad_norm": 2.0752387638298835, "learning_rate": 5.780349319476189e-07, "loss": 1.6402, "step": 377 }, { "epoch": 0.5943396226415094, "grad_norm": 2.2870254808353954, "learning_rate": 5.773576710391937e-07, "loss": 1.8418, "step": 378 }, { "epoch": 0.5959119496855346, "grad_norm": 2.146779959111506, "learning_rate": 5.766789341422841e-07, "loss": 1.8692, "step": 379 }, { "epoch": 0.5974842767295597, "grad_norm": 2.2844314903310585, "learning_rate": 5.75998725663198e-07, "loss": 1.713, "step": 380 }, { "epoch": 0.5990566037735849, "grad_norm": 2.08256027270516, "learning_rate": 5.753170500177962e-07, "loss": 2.0656, "step": 381 }, { "epoch": 0.60062893081761, "grad_norm": 2.1961855730334676, "learning_rate": 5.746339116314646e-07, "loss": 1.6188, "step": 382 }, { "epoch": 0.6022012578616353, "grad_norm": 2.0468929231050748, "learning_rate": 5.739493149390851e-07, "loss": 1.8984, "step": 383 }, { "epoch": 0.6037735849056604, "grad_norm": 2.0935907404848746, "learning_rate": 5.732632643850064e-07, "loss": 1.6483, "step": 384 }, { "epoch": 0.6053459119496856, "grad_norm": 2.0252018235406575, "learning_rate": 5.725757644230159e-07, "loss": 1.9005, "step": 385 }, { "epoch": 0.6069182389937107, "grad_norm": 2.001163026327367, "learning_rate": 5.718868195163106e-07, "loss": 1.829, "step": 386 }, { "epoch": 0.6084905660377359, "grad_norm": 2.1924828073774476, "learning_rate": 5.711964341374676e-07, "loss": 1.6139, "step": 387 }, { "epoch": 0.610062893081761, "grad_norm": 2.228468039969882, "learning_rate": 5.705046127684158e-07, "loss": 1.8581, "step": 388 }, { "epoch": 0.6116352201257862, "grad_norm": 2.1871856796652906, "learning_rate": 5.698113599004058e-07, "loss": 1.8884, "step": 389 }, { "epoch": 0.6132075471698113, "grad_norm": 2.119012481165798, "learning_rate": 5.691166800339823e-07, "loss": 1.7771, "step": 390 }, { "epoch": 0.6147798742138365, "grad_norm": 2.1752727454942677, "learning_rate": 5.684205776789531e-07, "loss": 1.7475, "step": 391 }, { "epoch": 0.6163522012578616, "grad_norm": 1.9703566361856617, "learning_rate": 5.677230573543612e-07, "loss": 1.995, "step": 392 }, { "epoch": 0.6179245283018868, "grad_norm": 1.9783136401244312, "learning_rate": 5.670241235884547e-07, "loss": 1.6578, "step": 393 }, { "epoch": 0.6194968553459119, "grad_norm": 2.1003352712507084, "learning_rate": 5.663237809186581e-07, "loss": 1.7801, "step": 394 }, { "epoch": 0.6210691823899371, "grad_norm": 2.2674486565881344, "learning_rate": 5.656220338915417e-07, "loss": 1.7259, "step": 395 }, { "epoch": 0.6226415094339622, "grad_norm": 2.1909929939479196, "learning_rate": 5.649188870627932e-07, "loss": 1.791, "step": 396 }, { "epoch": 0.6242138364779874, "grad_norm": 2.1847378593727855, "learning_rate": 5.642143449971877e-07, "loss": 1.6481, "step": 397 }, { "epoch": 0.6257861635220126, "grad_norm": 2.296413505588846, "learning_rate": 5.635084122685582e-07, "loss": 1.6404, "step": 398 }, { "epoch": 0.6273584905660378, "grad_norm": 2.3710100879322815, "learning_rate": 5.628010934597652e-07, "loss": 1.8934, "step": 399 }, { "epoch": 0.6289308176100629, "grad_norm": 2.1015013913914884, "learning_rate": 5.620923931626681e-07, "loss": 1.7094, "step": 400 }, { "epoch": 0.6305031446540881, "grad_norm": 2.2084477731115095, "learning_rate": 5.613823159780947e-07, "loss": 1.775, "step": 401 }, { "epoch": 0.6320754716981132, "grad_norm": 1.9632035650078112, "learning_rate": 5.606708665158114e-07, "loss": 1.8468, "step": 402 }, { "epoch": 0.6336477987421384, "grad_norm": 2.3789325797797045, "learning_rate": 5.59958049394493e-07, "loss": 1.9285, "step": 403 }, { "epoch": 0.6352201257861635, "grad_norm": 2.155496601332782, "learning_rate": 5.592438692416937e-07, "loss": 2.0406, "step": 404 }, { "epoch": 0.6367924528301887, "grad_norm": 2.237193083014261, "learning_rate": 5.585283306938159e-07, "loss": 1.7627, "step": 405 }, { "epoch": 0.6383647798742138, "grad_norm": 2.093924681792703, "learning_rate": 5.578114383960806e-07, "loss": 1.5612, "step": 406 }, { "epoch": 0.639937106918239, "grad_norm": 1.9118398198078217, "learning_rate": 5.570931970024976e-07, "loss": 1.7692, "step": 407 }, { "epoch": 0.6415094339622641, "grad_norm": 2.065885602117071, "learning_rate": 5.563736111758344e-07, "loss": 2.1028, "step": 408 }, { "epoch": 0.6430817610062893, "grad_norm": 2.0965149313714035, "learning_rate": 5.55652685587587e-07, "loss": 2.112, "step": 409 }, { "epoch": 0.6446540880503144, "grad_norm": 2.19620885008707, "learning_rate": 5.549304249179487e-07, "loss": 1.8514, "step": 410 }, { "epoch": 0.6462264150943396, "grad_norm": 2.157564335495825, "learning_rate": 5.542068338557801e-07, "loss": 1.9672, "step": 411 }, { "epoch": 0.6477987421383647, "grad_norm": 2.339822370018209, "learning_rate": 5.534819170985786e-07, "loss": 1.9801, "step": 412 }, { "epoch": 0.64937106918239, "grad_norm": 2.53967666106126, "learning_rate": 5.527556793524481e-07, "loss": 2.0856, "step": 413 }, { "epoch": 0.6509433962264151, "grad_norm": 2.027714420058619, "learning_rate": 5.520281253320678e-07, "loss": 1.92, "step": 414 }, { "epoch": 0.6525157232704403, "grad_norm": 2.2931114820343717, "learning_rate": 5.512992597606626e-07, "loss": 1.7635, "step": 415 }, { "epoch": 0.6540880503144654, "grad_norm": 2.103312242134212, "learning_rate": 5.505690873699716e-07, "loss": 1.8141, "step": 416 }, { "epoch": 0.6556603773584906, "grad_norm": 2.0874926904070032, "learning_rate": 5.498376129002176e-07, "loss": 1.6943, "step": 417 }, { "epoch": 0.6572327044025157, "grad_norm": 2.064459999298948, "learning_rate": 5.491048411000766e-07, "loss": 1.8251, "step": 418 }, { "epoch": 0.6588050314465409, "grad_norm": 2.043867970270658, "learning_rate": 5.483707767266467e-07, "loss": 1.9847, "step": 419 }, { "epoch": 0.660377358490566, "grad_norm": 2.0798717919218332, "learning_rate": 5.47635424545417e-07, "loss": 1.7778, "step": 420 }, { "epoch": 0.6619496855345912, "grad_norm": 2.2400310365423897, "learning_rate": 5.468987893302375e-07, "loss": 1.9559, "step": 421 }, { "epoch": 0.6635220125786163, "grad_norm": 2.099625244938322, "learning_rate": 5.461608758632872e-07, "loss": 1.736, "step": 422 }, { "epoch": 0.6650943396226415, "grad_norm": 2.1409980099649455, "learning_rate": 5.454216889350435e-07, "loss": 1.7748, "step": 423 }, { "epoch": 0.6666666666666666, "grad_norm": 2.0325650663365105, "learning_rate": 5.44681233344251e-07, "loss": 1.7192, "step": 424 }, { "epoch": 0.6682389937106918, "grad_norm": 2.0307579619789213, "learning_rate": 5.4393951389789e-07, "loss": 1.8139, "step": 425 }, { "epoch": 0.6698113207547169, "grad_norm": 1.9960345481446478, "learning_rate": 5.431965354111465e-07, "loss": 1.64, "step": 426 }, { "epoch": 0.6713836477987422, "grad_norm": 2.2621592250213154, "learning_rate": 5.424523027073794e-07, "loss": 1.8895, "step": 427 }, { "epoch": 0.6729559748427673, "grad_norm": 1.9753545804161157, "learning_rate": 5.417068206180899e-07, "loss": 1.7637, "step": 428 }, { "epoch": 0.6745283018867925, "grad_norm": 2.2178958295189606, "learning_rate": 5.409600939828906e-07, "loss": 2.0521, "step": 429 }, { "epoch": 0.6761006289308176, "grad_norm": 1.8923979616131885, "learning_rate": 5.402121276494731e-07, "loss": 1.7683, "step": 430 }, { "epoch": 0.6776729559748428, "grad_norm": 2.0677112128365893, "learning_rate": 5.394629264735772e-07, "loss": 1.8395, "step": 431 }, { "epoch": 0.6792452830188679, "grad_norm": 2.0153045160857603, "learning_rate": 5.387124953189594e-07, "loss": 1.8411, "step": 432 }, { "epoch": 0.6808176100628931, "grad_norm": 2.18275253208093, "learning_rate": 5.379608390573607e-07, "loss": 1.8287, "step": 433 }, { "epoch": 0.6823899371069182, "grad_norm": 2.091662651103415, "learning_rate": 5.372079625684757e-07, "loss": 1.8308, "step": 434 }, { "epoch": 0.6839622641509434, "grad_norm": 2.1060091304508495, "learning_rate": 5.364538707399207e-07, "loss": 1.7317, "step": 435 }, { "epoch": 0.6855345911949685, "grad_norm": 2.2183309272293545, "learning_rate": 5.356985684672016e-07, "loss": 1.7259, "step": 436 }, { "epoch": 0.6871069182389937, "grad_norm": 2.076351332712323, "learning_rate": 5.349420606536826e-07, "loss": 1.8949, "step": 437 }, { "epoch": 0.6886792452830188, "grad_norm": 2.133834879680037, "learning_rate": 5.341843522105541e-07, "loss": 1.881, "step": 438 }, { "epoch": 0.690251572327044, "grad_norm": 1.866508697696801, "learning_rate": 5.334254480568012e-07, "loss": 1.6093, "step": 439 }, { "epoch": 0.6918238993710691, "grad_norm": 2.026325564739705, "learning_rate": 5.326653531191709e-07, "loss": 1.7457, "step": 440 }, { "epoch": 0.6933962264150944, "grad_norm": 2.030947206901835, "learning_rate": 5.319040723321411e-07, "loss": 1.8016, "step": 441 }, { "epoch": 0.6949685534591195, "grad_norm": 2.2635231796009165, "learning_rate": 5.31141610637888e-07, "loss": 1.8274, "step": 442 }, { "epoch": 0.6965408805031447, "grad_norm": 2.160129609647997, "learning_rate": 5.303779729862541e-07, "loss": 1.841, "step": 443 }, { "epoch": 0.6981132075471698, "grad_norm": 2.3857343889060085, "learning_rate": 5.296131643347164e-07, "loss": 2.0521, "step": 444 }, { "epoch": 0.699685534591195, "grad_norm": 2.2373548879864, "learning_rate": 5.288471896483535e-07, "loss": 1.644, "step": 445 }, { "epoch": 0.7012578616352201, "grad_norm": 2.16265297512191, "learning_rate": 5.280800538998141e-07, "loss": 1.8585, "step": 446 }, { "epoch": 0.7028301886792453, "grad_norm": 2.0560711051498277, "learning_rate": 5.273117620692847e-07, "loss": 1.6482, "step": 447 }, { "epoch": 0.7044025157232704, "grad_norm": 1.9542683421441711, "learning_rate": 5.265423191444563e-07, "loss": 2.0183, "step": 448 }, { "epoch": 0.7059748427672956, "grad_norm": 2.1353978544627044, "learning_rate": 5.257717301204932e-07, "loss": 1.8363, "step": 449 }, { "epoch": 0.7075471698113207, "grad_norm": 2.1447188396187995, "learning_rate": 5.25e-07, "loss": 1.6944, "step": 450 }, { "epoch": 0.7091194968553459, "grad_norm": 2.3547091263057056, "learning_rate": 5.242271337929891e-07, "loss": 1.7432, "step": 451 }, { "epoch": 0.710691823899371, "grad_norm": 2.4270796532966337, "learning_rate": 5.234531365168486e-07, "loss": 1.6861, "step": 452 }, { "epoch": 0.7122641509433962, "grad_norm": 2.3217701732544302, "learning_rate": 5.22678013196309e-07, "loss": 1.6177, "step": 453 }, { "epoch": 0.7138364779874213, "grad_norm": 2.1886144838681982, "learning_rate": 5.219017688634111e-07, "loss": 1.6984, "step": 454 }, { "epoch": 0.7154088050314465, "grad_norm": 1.9865673398125758, "learning_rate": 5.211244085574735e-07, "loss": 1.7418, "step": 455 }, { "epoch": 0.7169811320754716, "grad_norm": 2.1224471005243677, "learning_rate": 5.203459373250593e-07, "loss": 1.7137, "step": 456 }, { "epoch": 0.7185534591194969, "grad_norm": 2.010328916041691, "learning_rate": 5.195663602199438e-07, "loss": 1.5998, "step": 457 }, { "epoch": 0.720125786163522, "grad_norm": 2.1617887359262964, "learning_rate": 5.187856823030815e-07, "loss": 1.7791, "step": 458 }, { "epoch": 0.7216981132075472, "grad_norm": 2.088298143518636, "learning_rate": 5.180039086425733e-07, "loss": 1.717, "step": 459 }, { "epoch": 0.7232704402515723, "grad_norm": 2.182318441419754, "learning_rate": 5.172210443136335e-07, "loss": 1.7386, "step": 460 }, { "epoch": 0.7248427672955975, "grad_norm": 2.24576929264229, "learning_rate": 5.164370943985573e-07, "loss": 1.6459, "step": 461 }, { "epoch": 0.7264150943396226, "grad_norm": 2.04417918075731, "learning_rate": 5.156520639866867e-07, "loss": 1.8024, "step": 462 }, { "epoch": 0.7279874213836478, "grad_norm": 2.112329388375878, "learning_rate": 5.14865958174379e-07, "loss": 2.0854, "step": 463 }, { "epoch": 0.7295597484276729, "grad_norm": 2.016676563210969, "learning_rate": 5.140787820649725e-07, "loss": 1.7524, "step": 464 }, { "epoch": 0.7311320754716981, "grad_norm": 2.60378450644644, "learning_rate": 5.132905407687537e-07, "loss": 2.0874, "step": 465 }, { "epoch": 0.7327044025157232, "grad_norm": 2.0550298802585427, "learning_rate": 5.125012394029245e-07, "loss": 1.7928, "step": 466 }, { "epoch": 0.7342767295597484, "grad_norm": 2.109873492408979, "learning_rate": 5.117108830915686e-07, "loss": 1.7396, "step": 467 }, { "epoch": 0.7358490566037735, "grad_norm": 2.075833532198868, "learning_rate": 5.109194769656182e-07, "loss": 1.8012, "step": 468 }, { "epoch": 0.7374213836477987, "grad_norm": 2.2968922617399143, "learning_rate": 5.10127026162821e-07, "loss": 1.677, "step": 469 }, { "epoch": 0.7389937106918238, "grad_norm": 2.1646580219404767, "learning_rate": 5.093335358277063e-07, "loss": 1.8778, "step": 470 }, { "epoch": 0.7405660377358491, "grad_norm": 2.077818575976857, "learning_rate": 5.085390111115525e-07, "loss": 1.7151, "step": 471 }, { "epoch": 0.7421383647798742, "grad_norm": 2.1792667307239117, "learning_rate": 5.077434571723527e-07, "loss": 1.7331, "step": 472 }, { "epoch": 0.7437106918238994, "grad_norm": 2.007840890946543, "learning_rate": 5.069468791747818e-07, "loss": 1.6337, "step": 473 }, { "epoch": 0.7452830188679245, "grad_norm": 2.0523492740975886, "learning_rate": 5.061492822901629e-07, "loss": 1.7339, "step": 474 }, { "epoch": 0.7468553459119497, "grad_norm": 2.1645747688452492, "learning_rate": 5.053506716964335e-07, "loss": 1.9892, "step": 475 }, { "epoch": 0.7484276729559748, "grad_norm": 2.30254590731602, "learning_rate": 5.04551052578112e-07, "loss": 1.8834, "step": 476 }, { "epoch": 0.75, "grad_norm": 2.3644242948478915, "learning_rate": 5.03750430126264e-07, "loss": 1.8034, "step": 477 }, { "epoch": 0.7515723270440252, "grad_norm": 2.1460399637911096, "learning_rate": 5.029488095384689e-07, "loss": 1.8283, "step": 478 }, { "epoch": 0.7531446540880503, "grad_norm": 2.178578800063943, "learning_rate": 5.021461960187858e-07, "loss": 1.6951, "step": 479 }, { "epoch": 0.7547169811320755, "grad_norm": 2.0849853089545154, "learning_rate": 5.013425947777198e-07, "loss": 1.8294, "step": 480 }, { "epoch": 0.7562893081761006, "grad_norm": 2.0454648568366327, "learning_rate": 5.005380110321882e-07, "loss": 1.8758, "step": 481 }, { "epoch": 0.7578616352201258, "grad_norm": 2.016873990373029, "learning_rate": 4.997324500054869e-07, "loss": 1.8153, "step": 482 }, { "epoch": 0.7594339622641509, "grad_norm": 2.3068593437311353, "learning_rate": 4.989259169272557e-07, "loss": 2.0439, "step": 483 }, { "epoch": 0.7610062893081762, "grad_norm": 2.23324104378393, "learning_rate": 4.981184170334456e-07, "loss": 1.6537, "step": 484 }, { "epoch": 0.7625786163522013, "grad_norm": 2.194801843258506, "learning_rate": 4.973099555662832e-07, "loss": 1.7096, "step": 485 }, { "epoch": 0.7641509433962265, "grad_norm": 2.031449083549838, "learning_rate": 4.965005377742386e-07, "loss": 1.774, "step": 486 }, { "epoch": 0.7657232704402516, "grad_norm": 2.1043645294914577, "learning_rate": 4.956901689119894e-07, "loss": 1.8458, "step": 487 }, { "epoch": 0.7672955974842768, "grad_norm": 1.953254357792575, "learning_rate": 4.948788542403877e-07, "loss": 1.8636, "step": 488 }, { "epoch": 0.7688679245283019, "grad_norm": 2.146268467480728, "learning_rate": 4.940665990264263e-07, "loss": 1.9093, "step": 489 }, { "epoch": 0.7704402515723271, "grad_norm": 1.9666125611299663, "learning_rate": 4.932534085432032e-07, "loss": 1.7225, "step": 490 }, { "epoch": 0.7720125786163522, "grad_norm": 2.318583475321128, "learning_rate": 4.924392880698882e-07, "loss": 2.1748, "step": 491 }, { "epoch": 0.7735849056603774, "grad_norm": 2.0092675503475004, "learning_rate": 4.91624242891689e-07, "loss": 1.9759, "step": 492 }, { "epoch": 0.7751572327044025, "grad_norm": 2.1097488696357076, "learning_rate": 4.90808278299816e-07, "loss": 1.9701, "step": 493 }, { "epoch": 0.7767295597484277, "grad_norm": 2.142041500023684, "learning_rate": 4.899913995914485e-07, "loss": 1.6193, "step": 494 }, { "epoch": 0.7783018867924528, "grad_norm": 2.2559396370443427, "learning_rate": 4.891736120696999e-07, "loss": 1.7115, "step": 495 }, { "epoch": 0.779874213836478, "grad_norm": 2.1488219924141596, "learning_rate": 4.883549210435841e-07, "loss": 1.9703, "step": 496 }, { "epoch": 0.7814465408805031, "grad_norm": 2.1397162032458232, "learning_rate": 4.8753533182798e-07, "loss": 1.7644, "step": 497 }, { "epoch": 0.7830188679245284, "grad_norm": 2.191504611692712, "learning_rate": 4.867148497435977e-07, "loss": 1.6881, "step": 498 }, { "epoch": 0.7845911949685535, "grad_norm": 2.000204406082109, "learning_rate": 4.858934801169436e-07, "loss": 1.6802, "step": 499 }, { "epoch": 0.7861635220125787, "grad_norm": 2.100996106775111, "learning_rate": 4.850712282802863e-07, "loss": 1.7683, "step": 500 }, { "epoch": 0.7861635220125787, "eval_sat2_MCTS_chains_SFT_val_loss": 1.7106590270996094, "eval_sat2_MCTS_chains_SFT_val_runtime": 92.0806, "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.164, "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.401, "step": 500 }, { "epoch": 0.7877358490566038, "grad_norm": 2.1566781241616413, "learning_rate": 4.842480995716212e-07, "loss": 1.7176, "step": 501 }, { "epoch": 0.789308176100629, "grad_norm": 2.1925936773589934, "learning_rate": 4.834240993346361e-07, "loss": 1.8595, "step": 502 }, { "epoch": 0.7908805031446541, "grad_norm": 2.249072634890456, "learning_rate": 4.825992329186777e-07, "loss": 1.7456, "step": 503 }, { "epoch": 0.7924528301886793, "grad_norm": 2.1147929947398025, "learning_rate": 4.817735056787149e-07, "loss": 1.6531, "step": 504 }, { "epoch": 0.7940251572327044, "grad_norm": 2.0853013840737935, "learning_rate": 4.809469229753053e-07, "loss": 1.707, "step": 505 }, { "epoch": 0.7955974842767296, "grad_norm": 1.9814812102879638, "learning_rate": 4.8011949017456e-07, "loss": 1.8545, "step": 506 }, { "epoch": 0.7971698113207547, "grad_norm": 2.242933290063341, "learning_rate": 4.792912126481094e-07, "loss": 1.7509, "step": 507 }, { "epoch": 0.7987421383647799, "grad_norm": 2.057417183985654, "learning_rate": 4.784620957730669e-07, "loss": 1.8915, "step": 508 }, { "epoch": 0.800314465408805, "grad_norm": 2.1269232658429, "learning_rate": 4.776321449319959e-07, "loss": 1.6437, "step": 509 }, { "epoch": 0.8018867924528302, "grad_norm": 2.0386799297363574, "learning_rate": 4.76801365512873e-07, "loss": 1.7157, "step": 510 }, { "epoch": 0.8034591194968553, "grad_norm": 2.1312494355819007, "learning_rate": 4.759697629090542e-07, "loss": 1.8856, "step": 511 }, { "epoch": 0.8050314465408805, "grad_norm": 2.036024812845296, "learning_rate": 4.751373425192395e-07, "loss": 1.6698, "step": 512 }, { "epoch": 0.8066037735849056, "grad_norm": 2.3974518115445242, "learning_rate": 4.743041097474381e-07, "loss": 1.9648, "step": 513 }, { "epoch": 0.8081761006289309, "grad_norm": 2.1241520800303846, "learning_rate": 4.734700700029329e-07, "loss": 1.7318, "step": 514 }, { "epoch": 0.809748427672956, "grad_norm": 2.1431917628718162, "learning_rate": 4.7263522870024566e-07, "loss": 1.6973, "step": 515 }, { "epoch": 0.8113207547169812, "grad_norm": 2.234624716136248, "learning_rate": 4.7179959125910164e-07, "loss": 1.7548, "step": 516 }, { "epoch": 0.8128930817610063, "grad_norm": 2.161266418044944, "learning_rate": 4.709631631043949e-07, "loss": 1.605, "step": 517 }, { "epoch": 0.8144654088050315, "grad_norm": 1.9836121455186402, "learning_rate": 4.701259496661527e-07, "loss": 1.8285, "step": 518 }, { "epoch": 0.8160377358490566, "grad_norm": 2.173970653905368, "learning_rate": 4.6928795637949986e-07, "loss": 1.7383, "step": 519 }, { "epoch": 0.8176100628930818, "grad_norm": 2.161721549869913, "learning_rate": 4.6844918868462445e-07, "loss": 1.5783, "step": 520 }, { "epoch": 0.8191823899371069, "grad_norm": 2.159956411342309, "learning_rate": 4.676096520267417e-07, "loss": 1.8492, "step": 521 }, { "epoch": 0.8207547169811321, "grad_norm": 2.0881573745933553, "learning_rate": 4.6676935185605884e-07, "loss": 1.7813, "step": 522 }, { "epoch": 0.8223270440251572, "grad_norm": 2.1031987642542935, "learning_rate": 4.659282936277399e-07, "loss": 2.0953, "step": 523 }, { "epoch": 0.8238993710691824, "grad_norm": 2.205744158912345, "learning_rate": 4.6508648280187014e-07, "loss": 1.7749, "step": 524 }, { "epoch": 0.8254716981132075, "grad_norm": 2.084552384171812, "learning_rate": 4.642439248434205e-07, "loss": 1.8273, "step": 525 }, { "epoch": 0.8270440251572327, "grad_norm": 2.061048329367607, "learning_rate": 4.6340062522221245e-07, "loss": 1.759, "step": 526 }, { "epoch": 0.8286163522012578, "grad_norm": 2.069167397462438, "learning_rate": 4.6255658941288197e-07, "loss": 1.8557, "step": 527 }, { "epoch": 0.8301886792452831, "grad_norm": 2.144369600536378, "learning_rate": 4.6171182289484464e-07, "loss": 1.7656, "step": 528 }, { "epoch": 0.8317610062893082, "grad_norm": 2.1514775909224375, "learning_rate": 4.608663311522597e-07, "loss": 1.8321, "step": 529 }, { "epoch": 0.8333333333333334, "grad_norm": 2.1170537274734507, "learning_rate": 4.6002011967399414e-07, "loss": 1.6458, "step": 530 }, { "epoch": 0.8349056603773585, "grad_norm": 2.357949753261125, "learning_rate": 4.591731939535879e-07, "loss": 1.6988, "step": 531 }, { "epoch": 0.8364779874213837, "grad_norm": 2.010756013038571, "learning_rate": 4.583255594892175e-07, "loss": 1.6346, "step": 532 }, { "epoch": 0.8380503144654088, "grad_norm": 2.0259740544494553, "learning_rate": 4.574772217836605e-07, "loss": 1.667, "step": 533 }, { "epoch": 0.839622641509434, "grad_norm": 2.3278850594778793, "learning_rate": 4.5662818634426e-07, "loss": 1.867, "step": 534 }, { "epoch": 0.8411949685534591, "grad_norm": 2.127352798058412, "learning_rate": 4.557784586828886e-07, "loss": 1.7475, "step": 535 }, { "epoch": 0.8427672955974843, "grad_norm": 2.0508704544909104, "learning_rate": 4.5492804431591275e-07, "loss": 1.9647, "step": 536 }, { "epoch": 0.8443396226415094, "grad_norm": 1.9847000051548063, "learning_rate": 4.5407694876415707e-07, "loss": 1.7637, "step": 537 }, { "epoch": 0.8459119496855346, "grad_norm": 2.146458433753693, "learning_rate": 4.532251775528685e-07, "loss": 1.9054, "step": 538 }, { "epoch": 0.8474842767295597, "grad_norm": 2.037086599135811, "learning_rate": 4.5237273621167977e-07, "loss": 1.9655, "step": 539 }, { "epoch": 0.8490566037735849, "grad_norm": 2.338765254967049, "learning_rate": 4.5151963027457467e-07, "loss": 1.9605, "step": 540 }, { "epoch": 0.85062893081761, "grad_norm": 2.2366764493368443, "learning_rate": 4.5066586527985104e-07, "loss": 1.8306, "step": 541 }, { "epoch": 0.8522012578616353, "grad_norm": 1.9395938337987975, "learning_rate": 4.498114467700855e-07, "loss": 1.8506, "step": 542 }, { "epoch": 0.8537735849056604, "grad_norm": 2.11126192377906, "learning_rate": 4.4895638029209737e-07, "loss": 2.0131, "step": 543 }, { "epoch": 0.8553459119496856, "grad_norm": 1.9448520113288943, "learning_rate": 4.48100671396912e-07, "loss": 1.6417, "step": 544 }, { "epoch": 0.8569182389937107, "grad_norm": 2.17917918305954, "learning_rate": 4.4724432563972586e-07, "loss": 1.596, "step": 545 }, { "epoch": 0.8584905660377359, "grad_norm": 2.2139144251618204, "learning_rate": 4.463873485798697e-07, "loss": 1.6793, "step": 546 }, { "epoch": 0.860062893081761, "grad_norm": 2.1238330080137815, "learning_rate": 4.455297457807724e-07, "loss": 2.0614, "step": 547 }, { "epoch": 0.8616352201257862, "grad_norm": 2.3023567382067873, "learning_rate": 4.4467152280992516e-07, "loss": 1.7894, "step": 548 }, { "epoch": 0.8632075471698113, "grad_norm": 2.041517573942539, "learning_rate": 4.4381268523884577e-07, "loss": 1.9789, "step": 549 }, { "epoch": 0.8647798742138365, "grad_norm": 2.295893853414247, "learning_rate": 4.4295323864304123e-07, "loss": 1.6548, "step": 550 }, { "epoch": 0.8663522012578616, "grad_norm": 2.1107449778410152, "learning_rate": 4.420931886019727e-07, "loss": 1.8173, "step": 551 }, { "epoch": 0.8679245283018868, "grad_norm": 1.9372269175313137, "learning_rate": 4.412325406990188e-07, "loss": 1.7247, "step": 552 }, { "epoch": 0.8694968553459119, "grad_norm": 1.8111270901761425, "learning_rate": 4.4037130052143943e-07, "loss": 1.7852, "step": 553 }, { "epoch": 0.8710691823899371, "grad_norm": 2.0430528853223997, "learning_rate": 4.3950947366033957e-07, "loss": 1.8733, "step": 554 }, { "epoch": 0.8726415094339622, "grad_norm": 2.1440645400252416, "learning_rate": 4.386470657106326e-07, "loss": 1.7904, "step": 555 }, { "epoch": 0.8742138364779874, "grad_norm": 2.1458981176838026, "learning_rate": 4.377840822710047e-07, "loss": 1.8855, "step": 556 }, { "epoch": 0.8757861635220126, "grad_norm": 2.986445993481532, "learning_rate": 4.3692052894387775e-07, "loss": 1.7439, "step": 557 }, { "epoch": 0.8773584905660378, "grad_norm": 1.9740082789953175, "learning_rate": 4.360564113353734e-07, "loss": 1.7215, "step": 558 }, { "epoch": 0.8789308176100629, "grad_norm": 2.0624678582939984, "learning_rate": 4.3519173505527654e-07, "loss": 1.7609, "step": 559 }, { "epoch": 0.8805031446540881, "grad_norm": 2.1742856227918868, "learning_rate": 4.3432650571699923e-07, "loss": 1.7963, "step": 560 }, { "epoch": 0.8820754716981132, "grad_norm": 2.328267217087019, "learning_rate": 4.334607289375434e-07, "loss": 1.8014, "step": 561 }, { "epoch": 0.8836477987421384, "grad_norm": 2.2240480876262256, "learning_rate": 4.325944103374652e-07, "loss": 1.9335, "step": 562 }, { "epoch": 0.8852201257861635, "grad_norm": 2.0812109009351483, "learning_rate": 4.3172755554083857e-07, "loss": 1.6568, "step": 563 }, { "epoch": 0.8867924528301887, "grad_norm": 2.419224994100306, "learning_rate": 4.308601701752178e-07, "loss": 1.8096, "step": 564 }, { "epoch": 0.8883647798742138, "grad_norm": 2.291019726545779, "learning_rate": 4.299922598716017e-07, "loss": 1.6433, "step": 565 }, { "epoch": 0.889937106918239, "grad_norm": 2.1682347425403634, "learning_rate": 4.2912383026439746e-07, "loss": 1.7372, "step": 566 }, { "epoch": 0.8915094339622641, "grad_norm": 2.2978680672025713, "learning_rate": 4.2825488699138295e-07, "loss": 1.8202, "step": 567 }, { "epoch": 0.8930817610062893, "grad_norm": 2.094968531097701, "learning_rate": 4.2738543569367104e-07, "loss": 1.6626, "step": 568 }, { "epoch": 0.8946540880503144, "grad_norm": 2.052297146112, "learning_rate": 4.2651548201567244e-07, "loss": 1.7142, "step": 569 }, { "epoch": 0.8962264150943396, "grad_norm": 2.318005287614226, "learning_rate": 4.2564503160505965e-07, "loss": 1.7043, "step": 570 }, { "epoch": 0.8977987421383647, "grad_norm": 2.2399641315697876, "learning_rate": 4.2477409011272947e-07, "loss": 1.7997, "step": 571 }, { "epoch": 0.89937106918239, "grad_norm": 2.2279042572355157, "learning_rate": 4.239026631927671e-07, "loss": 1.8412, "step": 572 }, { "epoch": 0.9009433962264151, "grad_norm": 2.2532345047567843, "learning_rate": 4.2303075650240874e-07, "loss": 1.7016, "step": 573 }, { "epoch": 0.9025157232704403, "grad_norm": 2.0723779334034917, "learning_rate": 4.221583757020058e-07, "loss": 1.7483, "step": 574 }, { "epoch": 0.9040880503144654, "grad_norm": 2.1869209764537527, "learning_rate": 4.21285526454987e-07, "loss": 1.7919, "step": 575 }, { "epoch": 0.9056603773584906, "grad_norm": 2.2496647478161544, "learning_rate": 4.2041221442782234e-07, "loss": 1.714, "step": 576 }, { "epoch": 0.9072327044025157, "grad_norm": 2.068857000999971, "learning_rate": 4.195384452899863e-07, "loss": 1.8463, "step": 577 }, { "epoch": 0.9088050314465409, "grad_norm": 2.0359248854430034, "learning_rate": 4.1866422471392097e-07, "loss": 1.7426, "step": 578 }, { "epoch": 0.910377358490566, "grad_norm": 2.228425159773193, "learning_rate": 4.1778955837499877e-07, "loss": 2.0427, "step": 579 }, { "epoch": 0.9119496855345912, "grad_norm": 2.284187655683882, "learning_rate": 4.1691445195148624e-07, "loss": 1.847, "step": 580 }, { "epoch": 0.9135220125786163, "grad_norm": 2.1771730650916448, "learning_rate": 4.1603891112450694e-07, "loss": 1.9315, "step": 581 }, { "epoch": 0.9150943396226415, "grad_norm": 2.1626615626582293, "learning_rate": 4.151629415780043e-07, "loss": 1.6456, "step": 582 }, { "epoch": 0.9166666666666666, "grad_norm": 2.305840039876449, "learning_rate": 4.142865489987052e-07, "loss": 1.6958, "step": 583 }, { "epoch": 0.9182389937106918, "grad_norm": 2.154256346851341, "learning_rate": 4.1340973907608293e-07, "loss": 2.0085, "step": 584 }, { "epoch": 0.9198113207547169, "grad_norm": 2.2041638948692537, "learning_rate": 4.125325175023197e-07, "loss": 2.1236, "step": 585 }, { "epoch": 0.9213836477987422, "grad_norm": 2.103411483270433, "learning_rate": 4.116548899722706e-07, "loss": 1.7799, "step": 586 }, { "epoch": 0.9229559748427673, "grad_norm": 2.0063388038701904, "learning_rate": 4.1077686218342565e-07, "loss": 1.6889, "step": 587 }, { "epoch": 0.9245283018867925, "grad_norm": 2.175245741274736, "learning_rate": 4.098984398358738e-07, "loss": 1.6925, "step": 588 }, { "epoch": 0.9261006289308176, "grad_norm": 2.0336428356844634, "learning_rate": 4.090196286322654e-07, "loss": 1.8607, "step": 589 }, { "epoch": 0.9276729559748428, "grad_norm": 2.037205174603859, "learning_rate": 4.0814043427777475e-07, "loss": 1.9117, "step": 590 }, { "epoch": 0.9292452830188679, "grad_norm": 2.30170118004087, "learning_rate": 4.072608624800641e-07, "loss": 1.9196, "step": 591 }, { "epoch": 0.9308176100628931, "grad_norm": 2.1986833773630816, "learning_rate": 4.063809189492457e-07, "loss": 2.1265, "step": 592 }, { "epoch": 0.9323899371069182, "grad_norm": 2.1016581569566033, "learning_rate": 4.055006093978452e-07, "loss": 1.8248, "step": 593 }, { "epoch": 0.9339622641509434, "grad_norm": 2.318998947765389, "learning_rate": 4.0461993954076433e-07, "loss": 1.9183, "step": 594 }, { "epoch": 0.9355345911949685, "grad_norm": 2.1280276120226396, "learning_rate": 4.037389150952441e-07, "loss": 1.5676, "step": 595 }, { "epoch": 0.9371069182389937, "grad_norm": 2.074586892066852, "learning_rate": 4.028575417808272e-07, "loss": 1.5338, "step": 596 }, { "epoch": 0.9386792452830188, "grad_norm": 2.0027829031033106, "learning_rate": 4.019758253193213e-07, "loss": 2.0435, "step": 597 }, { "epoch": 0.940251572327044, "grad_norm": 1.9483550382264585, "learning_rate": 4.010937714347617e-07, "loss": 1.8436, "step": 598 }, { "epoch": 0.9418238993710691, "grad_norm": 2.1386021253495495, "learning_rate": 4.002113858533744e-07, "loss": 1.8209, "step": 599 }, { "epoch": 0.9433962264150944, "grad_norm": 2.2603117565548883, "learning_rate": 3.9932867430353857e-07, "loss": 1.6182, "step": 600 }, { "epoch": 0.9449685534591195, "grad_norm": 2.1949498745608036, "learning_rate": 3.9844564251574946e-07, "loss": 1.7724, "step": 601 }, { "epoch": 0.9465408805031447, "grad_norm": 2.0756341905016047, "learning_rate": 3.9756229622258145e-07, "loss": 1.9824, "step": 602 }, { "epoch": 0.9481132075471698, "grad_norm": 2.211419527471263, "learning_rate": 3.9667864115865083e-07, "loss": 1.764, "step": 603 }, { "epoch": 0.949685534591195, "grad_norm": 1.9396778709562796, "learning_rate": 3.957946830605779e-07, "loss": 1.6183, "step": 604 }, { "epoch": 0.9512578616352201, "grad_norm": 2.036577606226067, "learning_rate": 3.949104276669506e-07, "loss": 1.9739, "step": 605 }, { "epoch": 0.9528301886792453, "grad_norm": 2.1523021934849496, "learning_rate": 3.940258807182871e-07, "loss": 1.8051, "step": 606 }, { "epoch": 0.9544025157232704, "grad_norm": 2.128010666946978, "learning_rate": 3.931410479569977e-07, "loss": 1.9472, "step": 607 }, { "epoch": 0.9559748427672956, "grad_norm": 2.101658911780381, "learning_rate": 3.9225593512734856e-07, "loss": 1.7936, "step": 608 }, { "epoch": 0.9575471698113207, "grad_norm": 2.0113616534057583, "learning_rate": 3.913705479754242e-07, "loss": 1.9101, "step": 609 }, { "epoch": 0.9591194968553459, "grad_norm": 2.047513346746328, "learning_rate": 3.9048489224908957e-07, "loss": 1.7896, "step": 610 }, { "epoch": 0.960691823899371, "grad_norm": 2.174472546503524, "learning_rate": 3.8959897369795354e-07, "loss": 1.8692, "step": 611 }, { "epoch": 0.9622641509433962, "grad_norm": 2.1246505746426827, "learning_rate": 3.8871279807333106e-07, "loss": 1.6824, "step": 612 }, { "epoch": 0.9638364779874213, "grad_norm": 2.1762242058390773, "learning_rate": 3.8782637112820597e-07, "loss": 1.9873, "step": 613 }, { "epoch": 0.9654088050314465, "grad_norm": 1.9373168302806898, "learning_rate": 3.8693969861719375e-07, "loss": 1.936, "step": 614 }, { "epoch": 0.9669811320754716, "grad_norm": 2.255844762606453, "learning_rate": 3.8605278629650403e-07, "loss": 1.7877, "step": 615 }, { "epoch": 0.9685534591194969, "grad_norm": 2.108356173918864, "learning_rate": 3.851656399239032e-07, "loss": 1.8975, "step": 616 }, { "epoch": 0.970125786163522, "grad_norm": 2.2742019781049225, "learning_rate": 3.8427826525867735e-07, "loss": 1.6503, "step": 617 }, { "epoch": 0.9716981132075472, "grad_norm": 2.2147090179932873, "learning_rate": 3.8339066806159426e-07, "loss": 1.8588, "step": 618 }, { "epoch": 0.9732704402515723, "grad_norm": 2.2344073830055646, "learning_rate": 3.825028540948665e-07, "loss": 2.0376, "step": 619 }, { "epoch": 0.9748427672955975, "grad_norm": 2.068870035023368, "learning_rate": 3.8161482912211413e-07, "loss": 1.8621, "step": 620 }, { "epoch": 0.9764150943396226, "grad_norm": 2.0716828751213687, "learning_rate": 3.807265989083269e-07, "loss": 1.9676, "step": 621 }, { "epoch": 0.9779874213836478, "grad_norm": 2.173084402473959, "learning_rate": 3.798381692198266e-07, "loss": 1.5456, "step": 622 }, { "epoch": 0.9795597484276729, "grad_norm": 2.123085754997638, "learning_rate": 3.7894954582423053e-07, "loss": 1.693, "step": 623 }, { "epoch": 0.9811320754716981, "grad_norm": 1.9313674111763137, "learning_rate": 3.780607344904134e-07, "loss": 1.8215, "step": 624 }, { "epoch": 0.9827044025157232, "grad_norm": 1.9446815492696663, "learning_rate": 3.7717174098846976e-07, "loss": 1.6901, "step": 625 }, { "epoch": 0.9842767295597484, "grad_norm": 2.1738358620885614, "learning_rate": 3.76282571089677e-07, "loss": 1.7188, "step": 626 }, { "epoch": 0.9858490566037735, "grad_norm": 1.9579450319413363, "learning_rate": 3.753932305664576e-07, "loss": 1.8272, "step": 627 }, { "epoch": 0.9874213836477987, "grad_norm": 2.12572045051787, "learning_rate": 3.7450372519234174e-07, "loss": 1.742, "step": 628 }, { "epoch": 0.9889937106918238, "grad_norm": 2.191799672435851, "learning_rate": 3.736140607419297e-07, "loss": 1.5517, "step": 629 }, { "epoch": 0.9905660377358491, "grad_norm": 1.8922363202506955, "learning_rate": 3.7272424299085454e-07, "loss": 1.8464, "step": 630 }, { "epoch": 0.9921383647798742, "grad_norm": 2.179698661804858, "learning_rate": 3.7183427771574476e-07, "loss": 1.6617, "step": 631 }, { "epoch": 0.9937106918238994, "grad_norm": 2.2771710047319074, "learning_rate": 3.7094417069418617e-07, "loss": 2.0336, "step": 632 }, { "epoch": 0.9952830188679245, "grad_norm": 2.006071135340339, "learning_rate": 3.7005392770468494e-07, "loss": 1.5935, "step": 633 }, { "epoch": 0.9968553459119497, "grad_norm": 2.1112816678317468, "learning_rate": 3.691635545266301e-07, "loss": 1.9101, "step": 634 }, { "epoch": 0.9984276729559748, "grad_norm": 2.2992568121548644, "learning_rate": 3.682730569402557e-07, "loss": 1.7981, "step": 635 }, { "epoch": 1.0, "grad_norm": 2.0674766445976664, "learning_rate": 3.6738244072660335e-07, "loss": 1.8885, "step": 636 }, { "epoch": 1.001572327044025, "grad_norm": 2.0323458623421025, "learning_rate": 3.6649171166748505e-07, "loss": 1.7354, "step": 637 }, { "epoch": 1.0031446540880504, "grad_norm": 2.3263404881491803, "learning_rate": 3.656008755454452e-07, "loss": 1.8682, "step": 638 }, { "epoch": 1.0047169811320755, "grad_norm": 2.0374703234555405, "learning_rate": 3.647099381437232e-07, "loss": 1.8107, "step": 639 }, { "epoch": 1.0062893081761006, "grad_norm": 2.1124967616004855, "learning_rate": 3.6381890524621594e-07, "loss": 1.8595, "step": 640 }, { "epoch": 1.0078616352201257, "grad_norm": 1.9775535226958436, "learning_rate": 3.629277826374406e-07, "loss": 1.6904, "step": 641 }, { "epoch": 1.009433962264151, "grad_norm": 2.0203547455163844, "learning_rate": 3.6203657610249633e-07, "loss": 1.825, "step": 642 }, { "epoch": 1.0110062893081762, "grad_norm": 2.0758681388805025, "learning_rate": 3.6114529142702736e-07, "loss": 1.7352, "step": 643 }, { "epoch": 1.0125786163522013, "grad_norm": 2.0224811786868906, "learning_rate": 3.602539343971851e-07, "loss": 1.6538, "step": 644 }, { "epoch": 1.0141509433962264, "grad_norm": 2.2197437087910217, "learning_rate": 3.593625107995906e-07, "loss": 1.83, "step": 645 }, { "epoch": 1.0157232704402517, "grad_norm": 1.9889586059387878, "learning_rate": 3.5847102642129743e-07, "loss": 1.7737, "step": 646 }, { "epoch": 1.0172955974842768, "grad_norm": 2.19853180662544, "learning_rate": 3.575794870497533e-07, "loss": 1.8649, "step": 647 }, { "epoch": 1.0188679245283019, "grad_norm": 2.0550115124535187, "learning_rate": 3.566878984727632e-07, "loss": 1.6925, "step": 648 }, { "epoch": 1.020440251572327, "grad_norm": 2.2183115138318015, "learning_rate": 3.557962664784515e-07, "loss": 1.6351, "step": 649 }, { "epoch": 1.0220125786163523, "grad_norm": 1.9968891207031567, "learning_rate": 3.549045968552242e-07, "loss": 1.8444, "step": 650 }, { "epoch": 1.0235849056603774, "grad_norm": 2.0649732438966435, "learning_rate": 3.540128953917321e-07, "loss": 1.7249, "step": 651 }, { "epoch": 1.0251572327044025, "grad_norm": 2.137990641187568, "learning_rate": 3.531211678768324e-07, "loss": 1.8187, "step": 652 }, { "epoch": 1.0267295597484276, "grad_norm": 2.228866151054972, "learning_rate": 3.5222942009955113e-07, "loss": 1.6852, "step": 653 }, { "epoch": 1.028301886792453, "grad_norm": 2.0107405877299294, "learning_rate": 3.513376578490464e-07, "loss": 1.8454, "step": 654 }, { "epoch": 1.029874213836478, "grad_norm": 2.0486309547186834, "learning_rate": 3.5044588691457e-07, "loss": 1.9823, "step": 655 }, { "epoch": 1.0314465408805031, "grad_norm": 2.1373140135446995, "learning_rate": 3.4955411308543004e-07, "loss": 1.5547, "step": 656 }, { "epoch": 1.0330188679245282, "grad_norm": 1.9722852672562703, "learning_rate": 3.486623421509536e-07, "loss": 1.6565, "step": 657 }, { "epoch": 1.0345911949685536, "grad_norm": 2.071392217463609, "learning_rate": 3.477705799004489e-07, "loss": 1.6954, "step": 658 }, { "epoch": 1.0361635220125787, "grad_norm": 2.1424736101079196, "learning_rate": 3.4687883212316763e-07, "loss": 1.6984, "step": 659 }, { "epoch": 1.0377358490566038, "grad_norm": 2.17329780648581, "learning_rate": 3.459871046082678e-07, "loss": 1.733, "step": 660 }, { "epoch": 1.0393081761006289, "grad_norm": 2.0114875931787335, "learning_rate": 3.4509540314477576e-07, "loss": 1.7247, "step": 661 }, { "epoch": 1.0408805031446542, "grad_norm": 2.1612750227336828, "learning_rate": 3.442037335215486e-07, "loss": 1.8257, "step": 662 }, { "epoch": 1.0424528301886793, "grad_norm": 2.09654475278469, "learning_rate": 3.433121015272368e-07, "loss": 1.6016, "step": 663 }, { "epoch": 1.0440251572327044, "grad_norm": 2.112814527327424, "learning_rate": 3.4242051295024674e-07, "loss": 1.8498, "step": 664 }, { "epoch": 1.0455974842767295, "grad_norm": 2.137137873151086, "learning_rate": 3.4152897357870254e-07, "loss": 1.9413, "step": 665 }, { "epoch": 1.0471698113207548, "grad_norm": 1.9781507699983039, "learning_rate": 3.406374892004093e-07, "loss": 2.0358, "step": 666 }, { "epoch": 1.04874213836478, "grad_norm": 2.1445450846607037, "learning_rate": 3.39746065602815e-07, "loss": 1.5967, "step": 667 }, { "epoch": 1.050314465408805, "grad_norm": 2.2106487525905156, "learning_rate": 3.388547085729726e-07, "loss": 1.6929, "step": 668 }, { "epoch": 1.0518867924528301, "grad_norm": 2.1632446260742157, "learning_rate": 3.379634238975036e-07, "loss": 1.9754, "step": 669 }, { "epoch": 1.0534591194968554, "grad_norm": 2.2423152444572385, "learning_rate": 3.370722173625594e-07, "loss": 1.8086, "step": 670 }, { "epoch": 1.0550314465408805, "grad_norm": 2.086454515712129, "learning_rate": 3.36181094753784e-07, "loss": 1.7976, "step": 671 }, { "epoch": 1.0566037735849056, "grad_norm": 2.338596181941415, "learning_rate": 3.3529006185627677e-07, "loss": 1.8634, "step": 672 }, { "epoch": 1.0581761006289307, "grad_norm": 2.2541670873895265, "learning_rate": 3.343991244545549e-07, "loss": 1.6529, "step": 673 }, { "epoch": 1.059748427672956, "grad_norm": 2.2022765709519017, "learning_rate": 3.3350828833251497e-07, "loss": 1.9213, "step": 674 }, { "epoch": 1.0613207547169812, "grad_norm": 2.033074601173319, "learning_rate": 3.326175592733966e-07, "loss": 1.8121, "step": 675 }, { "epoch": 1.0628930817610063, "grad_norm": 2.0295160070279077, "learning_rate": 3.3172694305974435e-07, "loss": 1.8344, "step": 676 }, { "epoch": 1.0644654088050314, "grad_norm": 2.3057324232200007, "learning_rate": 3.308364454733699e-07, "loss": 1.6693, "step": 677 }, { "epoch": 1.0660377358490567, "grad_norm": 2.078695505755079, "learning_rate": 3.29946072295315e-07, "loss": 1.6955, "step": 678 }, { "epoch": 1.0676100628930818, "grad_norm": 2.2250857375132016, "learning_rate": 3.290558293058139e-07, "loss": 1.775, "step": 679 }, { "epoch": 1.069182389937107, "grad_norm": 1.9629837170183118, "learning_rate": 3.2816572228425526e-07, "loss": 1.5546, "step": 680 }, { "epoch": 1.070754716981132, "grad_norm": 2.124163961606303, "learning_rate": 3.272757570091453e-07, "loss": 1.6283, "step": 681 }, { "epoch": 1.0723270440251573, "grad_norm": 1.9918668591962903, "learning_rate": 3.2638593925807033e-07, "loss": 1.72, "step": 682 }, { "epoch": 1.0738993710691824, "grad_norm": 2.264977591391123, "learning_rate": 3.2549627480765834e-07, "loss": 1.8045, "step": 683 }, { "epoch": 1.0754716981132075, "grad_norm": 2.02537032266525, "learning_rate": 3.246067694335424e-07, "loss": 1.8902, "step": 684 }, { "epoch": 1.0770440251572326, "grad_norm": 2.4603365936273995, "learning_rate": 3.23717428910323e-07, "loss": 1.6676, "step": 685 }, { "epoch": 1.078616352201258, "grad_norm": 2.2248140600403277, "learning_rate": 3.2282825901153026e-07, "loss": 1.7778, "step": 686 }, { "epoch": 1.080188679245283, "grad_norm": 2.115407517557494, "learning_rate": 3.219392655095866e-07, "loss": 1.7059, "step": 687 }, { "epoch": 1.0817610062893082, "grad_norm": 2.1771102044112136, "learning_rate": 3.2105045417576954e-07, "loss": 1.7673, "step": 688 }, { "epoch": 1.0833333333333333, "grad_norm": 2.1464674623014672, "learning_rate": 3.2016183078017346e-07, "loss": 1.8022, "step": 689 }, { "epoch": 1.0849056603773586, "grad_norm": 2.0997445957592307, "learning_rate": 3.192734010916732e-07, "loss": 1.8693, "step": 690 }, { "epoch": 1.0864779874213837, "grad_norm": 2.094025022111356, "learning_rate": 3.1838517087788595e-07, "loss": 1.7098, "step": 691 }, { "epoch": 1.0880503144654088, "grad_norm": 2.070698100046358, "learning_rate": 3.1749714590513353e-07, "loss": 1.8247, "step": 692 }, { "epoch": 1.0896226415094339, "grad_norm": 2.062398740678803, "learning_rate": 3.166093319384057e-07, "loss": 1.5966, "step": 693 }, { "epoch": 1.0911949685534592, "grad_norm": 2.087955408161737, "learning_rate": 3.157217347413226e-07, "loss": 1.7022, "step": 694 }, { "epoch": 1.0927672955974843, "grad_norm": 2.1423902860765303, "learning_rate": 3.1483436007609676e-07, "loss": 1.847, "step": 695 }, { "epoch": 1.0943396226415094, "grad_norm": 2.115585692766892, "learning_rate": 3.13947213703496e-07, "loss": 1.7229, "step": 696 }, { "epoch": 1.0959119496855345, "grad_norm": 2.064012528421271, "learning_rate": 3.1306030138280617e-07, "loss": 1.7403, "step": 697 }, { "epoch": 1.0974842767295598, "grad_norm": 2.2178418206871275, "learning_rate": 3.1217362887179405e-07, "loss": 1.7121, "step": 698 }, { "epoch": 1.099056603773585, "grad_norm": 2.267368653411109, "learning_rate": 3.1128720192666896e-07, "loss": 1.6441, "step": 699 }, { "epoch": 1.10062893081761, "grad_norm": 2.0531341543445674, "learning_rate": 3.1040102630204643e-07, "loss": 1.8517, "step": 700 }, { "epoch": 1.1022012578616351, "grad_norm": 2.090184163075423, "learning_rate": 3.0951510775091045e-07, "loss": 1.7339, "step": 701 }, { "epoch": 1.1037735849056605, "grad_norm": 2.271451855248717, "learning_rate": 3.086294520245758e-07, "loss": 1.6841, "step": 702 }, { "epoch": 1.1053459119496856, "grad_norm": 2.1230777563905647, "learning_rate": 3.0774406487265135e-07, "loss": 1.6153, "step": 703 }, { "epoch": 1.1069182389937107, "grad_norm": 2.129575845582385, "learning_rate": 3.0685895204300237e-07, "loss": 1.8306, "step": 704 }, { "epoch": 1.1084905660377358, "grad_norm": 2.127822835214652, "learning_rate": 3.0597411928171293e-07, "loss": 1.8781, "step": 705 }, { "epoch": 1.110062893081761, "grad_norm": 2.0801378059307445, "learning_rate": 3.0508957233304925e-07, "loss": 1.7194, "step": 706 }, { "epoch": 1.1116352201257862, "grad_norm": 2.0975124821955724, "learning_rate": 3.042053169394221e-07, "loss": 1.713, "step": 707 }, { "epoch": 1.1132075471698113, "grad_norm": 1.9540390287342302, "learning_rate": 3.033213588413492e-07, "loss": 1.8067, "step": 708 }, { "epoch": 1.1147798742138364, "grad_norm": 2.1885065328152775, "learning_rate": 3.0243770377741847e-07, "loss": 1.6779, "step": 709 }, { "epoch": 1.1163522012578617, "grad_norm": 2.016792783037323, "learning_rate": 3.0155435748425056e-07, "loss": 1.8584, "step": 710 }, { "epoch": 1.1179245283018868, "grad_norm": 2.0532056644149304, "learning_rate": 3.006713256964614e-07, "loss": 1.7892, "step": 711 }, { "epoch": 1.119496855345912, "grad_norm": 2.1717833645402522, "learning_rate": 2.9978861414662555e-07, "loss": 1.7875, "step": 712 }, { "epoch": 1.121069182389937, "grad_norm": 2.329982527967294, "learning_rate": 2.989062285652383e-07, "loss": 1.716, "step": 713 }, { "epoch": 1.1226415094339623, "grad_norm": 2.102564151366302, "learning_rate": 2.9802417468067866e-07, "loss": 1.725, "step": 714 }, { "epoch": 1.1242138364779874, "grad_norm": 2.005243042670392, "learning_rate": 2.9714245821917273e-07, "loss": 1.694, "step": 715 }, { "epoch": 1.1257861635220126, "grad_norm": 2.209440331648405, "learning_rate": 2.9626108490475596e-07, "loss": 1.7861, "step": 716 }, { "epoch": 1.1273584905660377, "grad_norm": 2.161682497833542, "learning_rate": 2.9538006045923564e-07, "loss": 1.9542, "step": 717 }, { "epoch": 1.128930817610063, "grad_norm": 2.0410927204511093, "learning_rate": 2.944993906021547e-07, "loss": 2.0458, "step": 718 }, { "epoch": 1.130503144654088, "grad_norm": 2.2980810899615585, "learning_rate": 2.936190810507544e-07, "loss": 1.6057, "step": 719 }, { "epoch": 1.1320754716981132, "grad_norm": 2.1147420056792545, "learning_rate": 2.92739137519936e-07, "loss": 1.7409, "step": 720 }, { "epoch": 1.1336477987421383, "grad_norm": 2.2130777267174326, "learning_rate": 2.9185956572222527e-07, "loss": 1.7651, "step": 721 }, { "epoch": 1.1352201257861636, "grad_norm": 2.2140788506158184, "learning_rate": 2.9098037136773475e-07, "loss": 1.7143, "step": 722 }, { "epoch": 1.1367924528301887, "grad_norm": 2.1122237127505117, "learning_rate": 2.9010156016412616e-07, "loss": 1.8633, "step": 723 }, { "epoch": 1.1383647798742138, "grad_norm": 1.8051995194002572, "learning_rate": 2.8922313781657437e-07, "loss": 1.7253, "step": 724 }, { "epoch": 1.139937106918239, "grad_norm": 2.078502586781223, "learning_rate": 2.8834511002772954e-07, "loss": 1.7383, "step": 725 }, { "epoch": 1.1415094339622642, "grad_norm": 2.0343412709026163, "learning_rate": 2.8746748249768034e-07, "loss": 1.6715, "step": 726 }, { "epoch": 1.1430817610062893, "grad_norm": 2.3354289699172415, "learning_rate": 2.865902609239171e-07, "loss": 1.5695, "step": 727 }, { "epoch": 1.1446540880503144, "grad_norm": 2.012287905864566, "learning_rate": 2.8571345100129475e-07, "loss": 1.5644, "step": 728 }, { "epoch": 1.1462264150943395, "grad_norm": 2.1335991982813343, "learning_rate": 2.848370584219957e-07, "loss": 1.7191, "step": 729 }, { "epoch": 1.1477987421383649, "grad_norm": 2.042910279341824, "learning_rate": 2.839610888754931e-07, "loss": 1.8231, "step": 730 }, { "epoch": 1.14937106918239, "grad_norm": 2.104058986286464, "learning_rate": 2.8308554804851373e-07, "loss": 1.9903, "step": 731 }, { "epoch": 1.150943396226415, "grad_norm": 2.2528134865633334, "learning_rate": 2.8221044162500126e-07, "loss": 1.618, "step": 732 }, { "epoch": 1.1525157232704402, "grad_norm": 2.169090932499384, "learning_rate": 2.81335775286079e-07, "loss": 1.8708, "step": 733 }, { "epoch": 1.1540880503144655, "grad_norm": 2.129509499660428, "learning_rate": 2.804615547100136e-07, "loss": 1.8978, "step": 734 }, { "epoch": 1.1556603773584906, "grad_norm": 2.1659278567954, "learning_rate": 2.795877855721777e-07, "loss": 1.5827, "step": 735 }, { "epoch": 1.1572327044025157, "grad_norm": 2.1781926760780324, "learning_rate": 2.7871447354501304e-07, "loss": 2.1652, "step": 736 }, { "epoch": 1.1588050314465408, "grad_norm": 2.281659140467624, "learning_rate": 2.7784162429799415e-07, "loss": 1.68, "step": 737 }, { "epoch": 1.1603773584905661, "grad_norm": 2.379387037873376, "learning_rate": 2.7696924349759123e-07, "loss": 1.6087, "step": 738 }, { "epoch": 1.1619496855345912, "grad_norm": 2.0112044543536616, "learning_rate": 2.7609733680723295e-07, "loss": 1.5596, "step": 739 }, { "epoch": 1.1635220125786163, "grad_norm": 2.053191724834195, "learning_rate": 2.7522590988727045e-07, "loss": 1.8064, "step": 740 }, { "epoch": 1.1650943396226414, "grad_norm": 2.0861593681907804, "learning_rate": 2.743549683949404e-07, "loss": 1.7197, "step": 741 }, { "epoch": 1.1666666666666667, "grad_norm": 2.139575160657706, "learning_rate": 2.734845179843275e-07, "loss": 1.7399, "step": 742 }, { "epoch": 1.1682389937106918, "grad_norm": 2.020388423393523, "learning_rate": 2.726145643063289e-07, "loss": 1.9182, "step": 743 }, { "epoch": 1.169811320754717, "grad_norm": 2.0021432569973623, "learning_rate": 2.717451130086171e-07, "loss": 1.8025, "step": 744 }, { "epoch": 1.171383647798742, "grad_norm": 2.166154517705087, "learning_rate": 2.7087616973560256e-07, "loss": 1.76, "step": 745 }, { "epoch": 1.1729559748427674, "grad_norm": 2.172540185241632, "learning_rate": 2.7000774012839826e-07, "loss": 1.739, "step": 746 }, { "epoch": 1.1745283018867925, "grad_norm": 2.1123768460041568, "learning_rate": 2.6913982982478235e-07, "loss": 1.9205, "step": 747 }, { "epoch": 1.1761006289308176, "grad_norm": 2.064019777362976, "learning_rate": 2.6827244445916145e-07, "loss": 1.6711, "step": 748 }, { "epoch": 1.1776729559748427, "grad_norm": 2.4454300470632706, "learning_rate": 2.6740558966253464e-07, "loss": 1.9951, "step": 749 }, { "epoch": 1.179245283018868, "grad_norm": 2.202888650923341, "learning_rate": 2.665392710624566e-07, "loss": 1.8166, "step": 750 }, { "epoch": 1.179245283018868, "eval_sat2_MCTS_chains_SFT_val_loss": 1.7027679681777954, "eval_sat2_MCTS_chains_SFT_val_runtime": 91.7741, "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.201, "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.406, "step": 750 }, { "epoch": 1.180817610062893, "grad_norm": 2.1197579207922765, "learning_rate": 2.656734942830008e-07, "loss": 1.789, "step": 751 }, { "epoch": 1.1823899371069182, "grad_norm": 2.0955019518228646, "learning_rate": 2.648082649447234e-07, "loss": 1.8041, "step": 752 }, { "epoch": 1.1839622641509433, "grad_norm": 2.286056725260083, "learning_rate": 2.639435886646267e-07, "loss": 1.9274, "step": 753 }, { "epoch": 1.1855345911949686, "grad_norm": 2.078238808064587, "learning_rate": 2.630794710561223e-07, "loss": 1.841, "step": 754 }, { "epoch": 1.1871069182389937, "grad_norm": 2.0466496074566414, "learning_rate": 2.622159177289953e-07, "loss": 1.8408, "step": 755 }, { "epoch": 1.1886792452830188, "grad_norm": 2.0829604905191603, "learning_rate": 2.6135293428936736e-07, "loss": 1.7203, "step": 756 }, { "epoch": 1.190251572327044, "grad_norm": 1.9719112678002333, "learning_rate": 2.604905263396604e-07, "loss": 1.8256, "step": 757 }, { "epoch": 1.1918238993710693, "grad_norm": 2.212861952610994, "learning_rate": 2.596286994785605e-07, "loss": 1.7089, "step": 758 }, { "epoch": 1.1933962264150944, "grad_norm": 2.1479617774562176, "learning_rate": 2.5876745930098133e-07, "loss": 1.788, "step": 759 }, { "epoch": 1.1949685534591195, "grad_norm": 2.180325040687691, "learning_rate": 2.5790681139802733e-07, "loss": 1.8424, "step": 760 }, { "epoch": 1.1965408805031448, "grad_norm": 2.0959805506163347, "learning_rate": 2.570467613569588e-07, "loss": 1.9297, "step": 761 }, { "epoch": 1.1981132075471699, "grad_norm": 2.2360792245611174, "learning_rate": 2.5618731476115436e-07, "loss": 1.7221, "step": 762 }, { "epoch": 1.199685534591195, "grad_norm": 2.1402022569964885, "learning_rate": 2.553284771900748e-07, "loss": 1.7152, "step": 763 }, { "epoch": 1.20125786163522, "grad_norm": 2.347076356695893, "learning_rate": 2.544702542192276e-07, "loss": 1.9218, "step": 764 }, { "epoch": 1.2028301886792452, "grad_norm": 2.1235602631273194, "learning_rate": 2.5361265142013034e-07, "loss": 1.7872, "step": 765 }, { "epoch": 1.2044025157232705, "grad_norm": 2.178942765064468, "learning_rate": 2.527556743602741e-07, "loss": 1.7043, "step": 766 }, { "epoch": 1.2059748427672956, "grad_norm": 2.180716726403436, "learning_rate": 2.5189932860308796e-07, "loss": 1.7261, "step": 767 }, { "epoch": 1.2075471698113207, "grad_norm": 2.1659838220822016, "learning_rate": 2.510436197079026e-07, "loss": 1.5518, "step": 768 }, { "epoch": 1.209119496855346, "grad_norm": 2.0823803015545144, "learning_rate": 2.501885532299145e-07, "loss": 1.6306, "step": 769 }, { "epoch": 1.2106918238993711, "grad_norm": 2.0975682440855237, "learning_rate": 2.4933413472014904e-07, "loss": 1.7477, "step": 770 }, { "epoch": 1.2122641509433962, "grad_norm": 2.3304268703171895, "learning_rate": 2.4848036972542535e-07, "loss": 1.9249, "step": 771 }, { "epoch": 1.2138364779874213, "grad_norm": 2.158526522953037, "learning_rate": 2.476272637883203e-07, "loss": 1.9818, "step": 772 }, { "epoch": 1.2154088050314464, "grad_norm": 2.4996434803308887, "learning_rate": 2.467748224471316e-07, "loss": 1.8969, "step": 773 }, { "epoch": 1.2169811320754718, "grad_norm": 2.124488199922616, "learning_rate": 2.4592305123584285e-07, "loss": 1.6033, "step": 774 }, { "epoch": 1.2185534591194969, "grad_norm": 2.3781806613712124, "learning_rate": 2.4507195568408727e-07, "loss": 1.8226, "step": 775 }, { "epoch": 1.220125786163522, "grad_norm": 2.021004570791569, "learning_rate": 2.4422154131711143e-07, "loss": 1.7341, "step": 776 }, { "epoch": 1.2216981132075473, "grad_norm": 2.1594846096039277, "learning_rate": 2.4337181365574e-07, "loss": 1.9486, "step": 777 }, { "epoch": 1.2232704402515724, "grad_norm": 2.1845057975898987, "learning_rate": 2.4252277821633946e-07, "loss": 1.543, "step": 778 }, { "epoch": 1.2248427672955975, "grad_norm": 2.344507158866424, "learning_rate": 2.4167444051078245e-07, "loss": 1.8528, "step": 779 }, { "epoch": 1.2264150943396226, "grad_norm": 2.0236986373716004, "learning_rate": 2.40826806046412e-07, "loss": 1.7242, "step": 780 }, { "epoch": 1.2279874213836477, "grad_norm": 2.078201977490899, "learning_rate": 2.399798803260058e-07, "loss": 1.9902, "step": 781 }, { "epoch": 1.229559748427673, "grad_norm": 2.0183020857677727, "learning_rate": 2.3913366884774034e-07, "loss": 1.7074, "step": 782 }, { "epoch": 1.2311320754716981, "grad_norm": 2.1592329303327453, "learning_rate": 2.382881771051553e-07, "loss": 1.8272, "step": 783 }, { "epoch": 1.2327044025157232, "grad_norm": 1.920219447662245, "learning_rate": 2.3744341058711808e-07, "loss": 1.9844, "step": 784 }, { "epoch": 1.2342767295597485, "grad_norm": 2.210906315130372, "learning_rate": 2.3659937477778755e-07, "loss": 1.7823, "step": 785 }, { "epoch": 1.2358490566037736, "grad_norm": 2.1278759914686547, "learning_rate": 2.3575607515657943e-07, "loss": 1.9001, "step": 786 }, { "epoch": 1.2374213836477987, "grad_norm": 2.1057924683593314, "learning_rate": 2.3491351719812993e-07, "loss": 1.6949, "step": 787 }, { "epoch": 1.2389937106918238, "grad_norm": 2.0673174239605117, "learning_rate": 2.3407170637226013e-07, "loss": 1.8988, "step": 788 }, { "epoch": 1.240566037735849, "grad_norm": 1.9808848675265713, "learning_rate": 2.332306481439411e-07, "loss": 2.0551, "step": 789 }, { "epoch": 1.2421383647798743, "grad_norm": 2.1946700318779704, "learning_rate": 2.3239034797325837e-07, "loss": 1.8481, "step": 790 }, { "epoch": 1.2437106918238994, "grad_norm": 2.145296214864516, "learning_rate": 2.3155081131537557e-07, "loss": 1.601, "step": 791 }, { "epoch": 1.2452830188679245, "grad_norm": 2.21099564914462, "learning_rate": 2.3071204362050016e-07, "loss": 1.6916, "step": 792 }, { "epoch": 1.2468553459119498, "grad_norm": 2.149817455185584, "learning_rate": 2.298740503338474e-07, "loss": 1.7328, "step": 793 }, { "epoch": 1.248427672955975, "grad_norm": 2.083870636996782, "learning_rate": 2.290368368956051e-07, "loss": 1.7316, "step": 794 }, { "epoch": 1.25, "grad_norm": 2.2190350027051884, "learning_rate": 2.2820040874089833e-07, "loss": 1.7335, "step": 795 }, { "epoch": 1.251572327044025, "grad_norm": 2.0847463644872732, "learning_rate": 2.2736477129975447e-07, "loss": 1.862, "step": 796 }, { "epoch": 1.2531446540880502, "grad_norm": 2.1940464195440397, "learning_rate": 2.2652992999706714e-07, "loss": 1.909, "step": 797 }, { "epoch": 1.2547169811320755, "grad_norm": 2.0762267573536723, "learning_rate": 2.2569589025256186e-07, "loss": 1.7823, "step": 798 }, { "epoch": 1.2562893081761006, "grad_norm": 2.2023094376465124, "learning_rate": 2.248626574807605e-07, "loss": 1.7458, "step": 799 }, { "epoch": 1.2578616352201257, "grad_norm": 2.093984864757531, "learning_rate": 2.2403023709094586e-07, "loss": 1.7313, "step": 800 }, { "epoch": 1.259433962264151, "grad_norm": 2.092496952956971, "learning_rate": 2.2319863448712701e-07, "loss": 1.6546, "step": 801 }, { "epoch": 1.2610062893081762, "grad_norm": 2.000639985772567, "learning_rate": 2.2236785506800412e-07, "loss": 1.7073, "step": 802 }, { "epoch": 1.2625786163522013, "grad_norm": 1.948987350292626, "learning_rate": 2.21537904226933e-07, "loss": 2.023, "step": 803 }, { "epoch": 1.2641509433962264, "grad_norm": 2.112242215195991, "learning_rate": 2.2070878735189064e-07, "loss": 1.7933, "step": 804 }, { "epoch": 1.2657232704402515, "grad_norm": 2.055178325234487, "learning_rate": 2.1988050982543993e-07, "loss": 1.8308, "step": 805 }, { "epoch": 1.2672955974842768, "grad_norm": 2.169844090849057, "learning_rate": 2.190530770246948e-07, "loss": 1.7516, "step": 806 }, { "epoch": 1.2688679245283019, "grad_norm": 2.0219060313319077, "learning_rate": 2.1822649432128516e-07, "loss": 1.5981, "step": 807 }, { "epoch": 1.270440251572327, "grad_norm": 2.201206530110713, "learning_rate": 2.1740076708132223e-07, "loss": 1.8104, "step": 808 }, { "epoch": 1.2720125786163523, "grad_norm": 2.1652601552896447, "learning_rate": 2.165759006653639e-07, "loss": 1.7986, "step": 809 }, { "epoch": 1.2735849056603774, "grad_norm": 2.0415201426832197, "learning_rate": 2.1575190042837886e-07, "loss": 1.7244, "step": 810 }, { "epoch": 1.2751572327044025, "grad_norm": 2.1143245858070174, "learning_rate": 2.1492877171971362e-07, "loss": 1.4419, "step": 811 }, { "epoch": 1.2767295597484276, "grad_norm": 2.0485144393529806, "learning_rate": 2.141065198830563e-07, "loss": 1.7491, "step": 812 }, { "epoch": 1.2783018867924527, "grad_norm": 1.8955436328860988, "learning_rate": 2.1328515025640226e-07, "loss": 1.8095, "step": 813 }, { "epoch": 1.279874213836478, "grad_norm": 2.2901583680374036, "learning_rate": 2.124646681720199e-07, "loss": 1.6575, "step": 814 }, { "epoch": 1.2814465408805031, "grad_norm": 2.275548594033002, "learning_rate": 2.116450789564159e-07, "loss": 1.6958, "step": 815 }, { "epoch": 1.2830188679245282, "grad_norm": 2.3449017025523737, "learning_rate": 2.1082638793030008e-07, "loss": 1.5706, "step": 816 }, { "epoch": 1.2845911949685536, "grad_norm": 2.317328281003811, "learning_rate": 2.100086004085516e-07, "loss": 1.7318, "step": 817 }, { "epoch": 1.2861635220125787, "grad_norm": 2.226957187409235, "learning_rate": 2.0919172170018401e-07, "loss": 1.7858, "step": 818 }, { "epoch": 1.2877358490566038, "grad_norm": 2.117343302028277, "learning_rate": 2.0837575710831098e-07, "loss": 1.6472, "step": 819 }, { "epoch": 1.2893081761006289, "grad_norm": 2.2741664788753795, "learning_rate": 2.0756071193011176e-07, "loss": 2.1258, "step": 820 }, { "epoch": 1.290880503144654, "grad_norm": 1.9084045733401307, "learning_rate": 2.0674659145679692e-07, "loss": 1.7781, "step": 821 }, { "epoch": 1.2924528301886793, "grad_norm": 1.995611639444785, "learning_rate": 2.0593340097357373e-07, "loss": 1.8206, "step": 822 }, { "epoch": 1.2940251572327044, "grad_norm": 2.244777578569415, "learning_rate": 2.051211457596122e-07, "loss": 1.6349, "step": 823 }, { "epoch": 1.2955974842767295, "grad_norm": 2.167997702023013, "learning_rate": 2.043098310880107e-07, "loss": 1.8128, "step": 824 }, { "epoch": 1.2971698113207548, "grad_norm": 2.0751468176787387, "learning_rate": 2.034994622257615e-07, "loss": 1.7242, "step": 825 }, { "epoch": 1.29874213836478, "grad_norm": 2.0555412382564757, "learning_rate": 2.0269004443371673e-07, "loss": 1.6657, "step": 826 }, { "epoch": 1.300314465408805, "grad_norm": 2.161584510716646, "learning_rate": 2.018815829665546e-07, "loss": 1.6663, "step": 827 }, { "epoch": 1.3018867924528301, "grad_norm": 2.201251049412695, "learning_rate": 2.0107408307274428e-07, "loss": 1.6481, "step": 828 }, { "epoch": 1.3034591194968552, "grad_norm": 2.0149941921725962, "learning_rate": 2.0026754999451317e-07, "loss": 1.6969, "step": 829 }, { "epoch": 1.3050314465408805, "grad_norm": 2.242230337987245, "learning_rate": 1.9946198896781174e-07, "loss": 1.9959, "step": 830 }, { "epoch": 1.3066037735849056, "grad_norm": 2.15895547407779, "learning_rate": 1.986574052222802e-07, "loss": 1.7761, "step": 831 }, { "epoch": 1.3081761006289307, "grad_norm": 2.181310101596831, "learning_rate": 1.9785380398121416e-07, "loss": 1.6648, "step": 832 }, { "epoch": 1.309748427672956, "grad_norm": 1.9416146895135635, "learning_rate": 1.9705119046153114e-07, "loss": 1.7318, "step": 833 }, { "epoch": 1.3113207547169812, "grad_norm": 2.0663038260287467, "learning_rate": 1.9624956987373606e-07, "loss": 1.9148, "step": 834 }, { "epoch": 1.3128930817610063, "grad_norm": 2.293091774940269, "learning_rate": 1.9544894742188804e-07, "loss": 1.8369, "step": 835 }, { "epoch": 1.3144654088050314, "grad_norm": 2.3609086075602206, "learning_rate": 1.9464932830356648e-07, "loss": 1.7337, "step": 836 }, { "epoch": 1.3160377358490565, "grad_norm": 2.060684097800172, "learning_rate": 1.9385071770983697e-07, "loss": 1.5396, "step": 837 }, { "epoch": 1.3176100628930818, "grad_norm": 1.995153844021168, "learning_rate": 1.93053120825218e-07, "loss": 1.6491, "step": 838 }, { "epoch": 1.319182389937107, "grad_norm": 2.305659266839101, "learning_rate": 1.9225654282764733e-07, "loss": 1.8602, "step": 839 }, { "epoch": 1.320754716981132, "grad_norm": 1.9550425262609439, "learning_rate": 1.9146098888844752e-07, "loss": 1.7687, "step": 840 }, { "epoch": 1.3223270440251573, "grad_norm": 2.1752564296945143, "learning_rate": 1.9066646417229369e-07, "loss": 1.954, "step": 841 }, { "epoch": 1.3238993710691824, "grad_norm": 2.2819270079625387, "learning_rate": 1.8987297383717918e-07, "loss": 1.6462, "step": 842 }, { "epoch": 1.3254716981132075, "grad_norm": 2.160953368673478, "learning_rate": 1.8908052303438188e-07, "loss": 1.6413, "step": 843 }, { "epoch": 1.3270440251572326, "grad_norm": 2.0789012158742803, "learning_rate": 1.882891169084313e-07, "loss": 1.827, "step": 844 }, { "epoch": 1.3286163522012577, "grad_norm": 2.0716992608690448, "learning_rate": 1.8749876059707536e-07, "loss": 1.7414, "step": 845 }, { "epoch": 1.330188679245283, "grad_norm": 2.103884856783866, "learning_rate": 1.867094592312463e-07, "loss": 2.0534, "step": 846 }, { "epoch": 1.3317610062893082, "grad_norm": 2.4629953048647324, "learning_rate": 1.8592121793502755e-07, "loss": 1.7296, "step": 847 }, { "epoch": 1.3333333333333333, "grad_norm": 1.9458332362620918, "learning_rate": 1.8513404182562097e-07, "loss": 1.8213, "step": 848 }, { "epoch": 1.3349056603773586, "grad_norm": 2.1510489748789583, "learning_rate": 1.8434793601331336e-07, "loss": 1.8064, "step": 849 }, { "epoch": 1.3364779874213837, "grad_norm": 2.2594694003824567, "learning_rate": 1.8356290560144285e-07, "loss": 1.763, "step": 850 }, { "epoch": 1.3380503144654088, "grad_norm": 2.2650014550444606, "learning_rate": 1.8277895568636646e-07, "loss": 1.6519, "step": 851 }, { "epoch": 1.3396226415094339, "grad_norm": 2.279277485826785, "learning_rate": 1.8199609135742672e-07, "loss": 1.7316, "step": 852 }, { "epoch": 1.341194968553459, "grad_norm": 2.113275805679697, "learning_rate": 1.812143176969185e-07, "loss": 1.7905, "step": 853 }, { "epoch": 1.3427672955974843, "grad_norm": 1.9917783424168132, "learning_rate": 1.8043363978005617e-07, "loss": 1.7985, "step": 854 }, { "epoch": 1.3443396226415094, "grad_norm": 1.8965972111176195, "learning_rate": 1.7965406267494078e-07, "loss": 1.8407, "step": 855 }, { "epoch": 1.3459119496855345, "grad_norm": 2.2071694500169174, "learning_rate": 1.7887559144252658e-07, "loss": 1.7755, "step": 856 }, { "epoch": 1.3474842767295598, "grad_norm": 2.2472976512202, "learning_rate": 1.7809823113658896e-07, "loss": 1.8221, "step": 857 }, { "epoch": 1.349056603773585, "grad_norm": 2.0844792244919876, "learning_rate": 1.7732198680369107e-07, "loss": 1.8871, "step": 858 }, { "epoch": 1.35062893081761, "grad_norm": 2.027073391928856, "learning_rate": 1.765468634831514e-07, "loss": 1.7576, "step": 859 }, { "epoch": 1.3522012578616351, "grad_norm": 2.2937069673682906, "learning_rate": 1.757728662070108e-07, "loss": 1.6525, "step": 860 }, { "epoch": 1.3537735849056602, "grad_norm": 2.1305400050893972, "learning_rate": 1.7500000000000007e-07, "loss": 1.6292, "step": 861 }, { "epoch": 1.3553459119496856, "grad_norm": 2.093411160599093, "learning_rate": 1.7422826987950683e-07, "loss": 2.0316, "step": 862 }, { "epoch": 1.3569182389937107, "grad_norm": 2.053081781406045, "learning_rate": 1.7345768085554372e-07, "loss": 1.6936, "step": 863 }, { "epoch": 1.3584905660377358, "grad_norm": 2.1226077695084555, "learning_rate": 1.726882379307153e-07, "loss": 1.7328, "step": 864 }, { "epoch": 1.360062893081761, "grad_norm": 2.162295283463786, "learning_rate": 1.7191994610018574e-07, "loss": 1.75, "step": 865 }, { "epoch": 1.3616352201257862, "grad_norm": 2.2966135587303946, "learning_rate": 1.711528103516464e-07, "loss": 1.7858, "step": 866 }, { "epoch": 1.3632075471698113, "grad_norm": 2.13860142749209, "learning_rate": 1.703868356652837e-07, "loss": 1.9188, "step": 867 }, { "epoch": 1.3647798742138364, "grad_norm": 2.1291702490610054, "learning_rate": 1.6962202701374592e-07, "loss": 1.6769, "step": 868 }, { "epoch": 1.3663522012578615, "grad_norm": 2.0330576038157524, "learning_rate": 1.6885838936211206e-07, "loss": 1.7028, "step": 869 }, { "epoch": 1.3679245283018868, "grad_norm": 2.174657226535088, "learning_rate": 1.6809592766785903e-07, "loss": 1.7184, "step": 870 }, { "epoch": 1.369496855345912, "grad_norm": 2.1336668424018463, "learning_rate": 1.673346468808292e-07, "loss": 1.5666, "step": 871 }, { "epoch": 1.371069182389937, "grad_norm": 2.0664898739681403, "learning_rate": 1.6657455194319875e-07, "loss": 1.6633, "step": 872 }, { "epoch": 1.3726415094339623, "grad_norm": 2.07966395343747, "learning_rate": 1.6581564778944585e-07, "loss": 1.9728, "step": 873 }, { "epoch": 1.3742138364779874, "grad_norm": 2.0802230341676493, "learning_rate": 1.6505793934631743e-07, "loss": 1.806, "step": 874 }, { "epoch": 1.3757861635220126, "grad_norm": 2.0585446511098855, "learning_rate": 1.6430143153279843e-07, "loss": 1.7467, "step": 875 }, { "epoch": 1.3773584905660377, "grad_norm": 2.0670814111420457, "learning_rate": 1.6354612926007947e-07, "loss": 1.8074, "step": 876 }, { "epoch": 1.378930817610063, "grad_norm": 2.2448398015661977, "learning_rate": 1.6279203743152437e-07, "loss": 1.792, "step": 877 }, { "epoch": 1.380503144654088, "grad_norm": 2.1936825808205476, "learning_rate": 1.620391609426394e-07, "loss": 1.7086, "step": 878 }, { "epoch": 1.3820754716981132, "grad_norm": 2.0481925632804896, "learning_rate": 1.6128750468104068e-07, "loss": 1.6359, "step": 879 }, { "epoch": 1.3836477987421385, "grad_norm": 2.307548887007293, "learning_rate": 1.6053707352642275e-07, "loss": 1.8802, "step": 880 }, { "epoch": 1.3852201257861636, "grad_norm": 2.212178445396311, "learning_rate": 1.5978787235052684e-07, "loss": 1.6298, "step": 881 }, { "epoch": 1.3867924528301887, "grad_norm": 2.4275019833470357, "learning_rate": 1.5903990601710933e-07, "loss": 1.6078, "step": 882 }, { "epoch": 1.3883647798742138, "grad_norm": 2.097258459403789, "learning_rate": 1.5829317938191007e-07, "loss": 1.7955, "step": 883 }, { "epoch": 1.389937106918239, "grad_norm": 2.2433716966343074, "learning_rate": 1.5754769729262068e-07, "loss": 1.841, "step": 884 }, { "epoch": 1.3915094339622642, "grad_norm": 1.9611279725874884, "learning_rate": 1.5680346458885351e-07, "loss": 1.8903, "step": 885 }, { "epoch": 1.3930817610062893, "grad_norm": 2.1857460166910703, "learning_rate": 1.560604861021099e-07, "loss": 1.8461, "step": 886 }, { "epoch": 1.3946540880503144, "grad_norm": 1.9802850293847218, "learning_rate": 1.5531876665574905e-07, "loss": 1.6594, "step": 887 }, { "epoch": 1.3962264150943398, "grad_norm": 2.044592124136331, "learning_rate": 1.5457831106495645e-07, "loss": 1.8477, "step": 888 }, { "epoch": 1.3977987421383649, "grad_norm": 2.1730178709565027, "learning_rate": 1.538391241367128e-07, "loss": 1.8571, "step": 889 }, { "epoch": 1.39937106918239, "grad_norm": 2.226125612813974, "learning_rate": 1.5310121066976246e-07, "loss": 1.8246, "step": 890 }, { "epoch": 1.400943396226415, "grad_norm": 2.110513299587775, "learning_rate": 1.52364575454583e-07, "loss": 1.5938, "step": 891 }, { "epoch": 1.4025157232704402, "grad_norm": 2.0945323670932483, "learning_rate": 1.5162922327335352e-07, "loss": 1.9624, "step": 892 }, { "epoch": 1.4040880503144655, "grad_norm": 2.031350721397557, "learning_rate": 1.5089515889992337e-07, "loss": 1.5905, "step": 893 }, { "epoch": 1.4056603773584906, "grad_norm": 2.5190983077756, "learning_rate": 1.5016238709978235e-07, "loss": 1.6355, "step": 894 }, { "epoch": 1.4072327044025157, "grad_norm": 2.0786833230760946, "learning_rate": 1.4943091263002846e-07, "loss": 2.4022, "step": 895 }, { "epoch": 1.408805031446541, "grad_norm": 2.2624991107175076, "learning_rate": 1.487007402393374e-07, "loss": 2.0047, "step": 896 }, { "epoch": 1.4103773584905661, "grad_norm": 2.13658012100152, "learning_rate": 1.4797187466793216e-07, "loss": 2.0784, "step": 897 }, { "epoch": 1.4119496855345912, "grad_norm": 2.154584941501371, "learning_rate": 1.4724432064755204e-07, "loss": 1.9006, "step": 898 }, { "epoch": 1.4135220125786163, "grad_norm": 2.2513090599810224, "learning_rate": 1.4651808290142143e-07, "loss": 1.87, "step": 899 }, { "epoch": 1.4150943396226414, "grad_norm": 2.1884972541614336, "learning_rate": 1.457931661442199e-07, "loss": 1.8312, "step": 900 }, { "epoch": 1.4166666666666667, "grad_norm": 2.165721743117902, "learning_rate": 1.450695750820513e-07, "loss": 1.5696, "step": 901 }, { "epoch": 1.4182389937106918, "grad_norm": 2.0366037954150964, "learning_rate": 1.4434731441241295e-07, "loss": 1.864, "step": 902 }, { "epoch": 1.419811320754717, "grad_norm": 2.0359358262558436, "learning_rate": 1.4362638882416552e-07, "loss": 1.9211, "step": 903 }, { "epoch": 1.4213836477987423, "grad_norm": 2.2277853781005144, "learning_rate": 1.429068029975025e-07, "loss": 1.6469, "step": 904 }, { "epoch": 1.4229559748427674, "grad_norm": 2.1703686899029937, "learning_rate": 1.421885616039194e-07, "loss": 1.7961, "step": 905 }, { "epoch": 1.4245283018867925, "grad_norm": 2.2098194543382097, "learning_rate": 1.4147166930618412e-07, "loss": 1.7475, "step": 906 }, { "epoch": 1.4261006289308176, "grad_norm": 2.0638951952196156, "learning_rate": 1.4075613075830626e-07, "loss": 1.5421, "step": 907 }, { "epoch": 1.4276729559748427, "grad_norm": 2.2638501605807786, "learning_rate": 1.400419506055069e-07, "loss": 2.0258, "step": 908 }, { "epoch": 1.429245283018868, "grad_norm": 2.026762327168469, "learning_rate": 1.393291334841886e-07, "loss": 1.7273, "step": 909 }, { "epoch": 1.430817610062893, "grad_norm": 2.0850924373321655, "learning_rate": 1.3861768402190533e-07, "loss": 1.6415, "step": 910 }, { "epoch": 1.4323899371069182, "grad_norm": 2.105162873756192, "learning_rate": 1.379076068373319e-07, "loss": 1.7626, "step": 911 }, { "epoch": 1.4339622641509435, "grad_norm": 2.2125942477293474, "learning_rate": 1.3719890654023485e-07, "loss": 1.6857, "step": 912 }, { "epoch": 1.4355345911949686, "grad_norm": 2.1153484222332173, "learning_rate": 1.36491587731442e-07, "loss": 1.7955, "step": 913 }, { "epoch": 1.4371069182389937, "grad_norm": 2.2997509014276742, "learning_rate": 1.3578565500281222e-07, "loss": 2.0574, "step": 914 }, { "epoch": 1.4386792452830188, "grad_norm": 2.078107647008167, "learning_rate": 1.3508111293720675e-07, "loss": 2.0042, "step": 915 }, { "epoch": 1.440251572327044, "grad_norm": 2.0399439337966787, "learning_rate": 1.343779661084584e-07, "loss": 1.574, "step": 916 }, { "epoch": 1.4418238993710693, "grad_norm": 2.2889011438788063, "learning_rate": 1.33676219081342e-07, "loss": 1.7344, "step": 917 }, { "epoch": 1.4433962264150944, "grad_norm": 2.333332062321592, "learning_rate": 1.329758764115452e-07, "loss": 1.5233, "step": 918 }, { "epoch": 1.4449685534591195, "grad_norm": 2.1331182619855857, "learning_rate": 1.322769426456388e-07, "loss": 1.6755, "step": 919 }, { "epoch": 1.4465408805031448, "grad_norm": 2.1531518116333745, "learning_rate": 1.3157942232104702e-07, "loss": 2.0816, "step": 920 }, { "epoch": 1.4481132075471699, "grad_norm": 2.088795076446534, "learning_rate": 1.308833199660178e-07, "loss": 2.0221, "step": 921 }, { "epoch": 1.449685534591195, "grad_norm": 2.00003816402544, "learning_rate": 1.3018864009959402e-07, "loss": 1.8191, "step": 922 }, { "epoch": 1.45125786163522, "grad_norm": 2.0551941845228403, "learning_rate": 1.2949538723158427e-07, "loss": 1.779, "step": 923 }, { "epoch": 1.4528301886792452, "grad_norm": 2.2468743977572547, "learning_rate": 1.288035658625323e-07, "loss": 1.7847, "step": 924 }, { "epoch": 1.4544025157232705, "grad_norm": 2.2719489416819254, "learning_rate": 1.2811318048368927e-07, "loss": 1.6929, "step": 925 }, { "epoch": 1.4559748427672956, "grad_norm": 2.0990742193527434, "learning_rate": 1.2742423557698407e-07, "loss": 1.6888, "step": 926 }, { "epoch": 1.4575471698113207, "grad_norm": 2.21584285821969, "learning_rate": 1.2673673561499367e-07, "loss": 1.8427, "step": 927 }, { "epoch": 1.459119496855346, "grad_norm": 2.145727226704997, "learning_rate": 1.2605068506091503e-07, "loss": 1.854, "step": 928 }, { "epoch": 1.4606918238993711, "grad_norm": 2.2012601813217554, "learning_rate": 1.2536608836853537e-07, "loss": 1.6569, "step": 929 }, { "epoch": 1.4622641509433962, "grad_norm": 2.1152395275847233, "learning_rate": 1.2468294998220374e-07, "loss": 1.8322, "step": 930 }, { "epoch": 1.4638364779874213, "grad_norm": 2.1134882928957226, "learning_rate": 1.2400127433680197e-07, "loss": 1.54, "step": 931 }, { "epoch": 1.4654088050314464, "grad_norm": 2.058732975760006, "learning_rate": 1.2332106585771588e-07, "loss": 1.8464, "step": 932 }, { "epoch": 1.4669811320754718, "grad_norm": 1.9450661693276792, "learning_rate": 1.226423289608063e-07, "loss": 1.6493, "step": 933 }, { "epoch": 1.4685534591194969, "grad_norm": 2.162745466557572, "learning_rate": 1.2196506805238097e-07, "loss": 1.7669, "step": 934 }, { "epoch": 1.470125786163522, "grad_norm": 1.9865409540246532, "learning_rate": 1.2128928752916557e-07, "loss": 1.9032, "step": 935 }, { "epoch": 1.4716981132075473, "grad_norm": 2.1486068004112098, "learning_rate": 1.2061499177827517e-07, "loss": 1.7487, "step": 936 }, { "epoch": 1.4732704402515724, "grad_norm": 2.1353688348778244, "learning_rate": 1.199421851771858e-07, "loss": 1.9648, "step": 937 }, { "epoch": 1.4748427672955975, "grad_norm": 2.203719714639102, "learning_rate": 1.1927087209370627e-07, "loss": 2.2411, "step": 938 }, { "epoch": 1.4764150943396226, "grad_norm": 2.1007559098440707, "learning_rate": 1.1860105688594913e-07, "loss": 1.9012, "step": 939 }, { "epoch": 1.4779874213836477, "grad_norm": 2.0112830813880724, "learning_rate": 1.179327439023032e-07, "loss": 1.7904, "step": 940 }, { "epoch": 1.479559748427673, "grad_norm": 2.147891306593028, "learning_rate": 1.1726593748140503e-07, "loss": 1.849, "step": 941 }, { "epoch": 1.4811320754716981, "grad_norm": 2.1951970364454167, "learning_rate": 1.1660064195211026e-07, "loss": 1.8406, "step": 942 }, { "epoch": 1.4827044025157232, "grad_norm": 2.251395431799435, "learning_rate": 1.1593686163346624e-07, "loss": 1.8115, "step": 943 }, { "epoch": 1.4842767295597485, "grad_norm": 2.2374065524605915, "learning_rate": 1.1527460083468404e-07, "loss": 1.7597, "step": 944 }, { "epoch": 1.4858490566037736, "grad_norm": 2.2516356519637153, "learning_rate": 1.1461386385510934e-07, "loss": 1.7996, "step": 945 }, { "epoch": 1.4874213836477987, "grad_norm": 1.9259732674286747, "learning_rate": 1.1395465498419584e-07, "loss": 1.7016, "step": 946 }, { "epoch": 1.4889937106918238, "grad_norm": 2.0151236267786943, "learning_rate": 1.1329697850147684e-07, "loss": 1.8591, "step": 947 }, { "epoch": 1.490566037735849, "grad_norm": 2.1157729525808, "learning_rate": 1.1264083867653721e-07, "loss": 1.7659, "step": 948 }, { "epoch": 1.4921383647798743, "grad_norm": 2.2039873137986485, "learning_rate": 1.1198623976898626e-07, "loss": 1.8312, "step": 949 }, { "epoch": 1.4937106918238994, "grad_norm": 2.139918541343564, "learning_rate": 1.1133318602842961e-07, "loss": 1.8547, "step": 950 }, { "epoch": 1.4952830188679245, "grad_norm": 2.0927341451744716, "learning_rate": 1.1068168169444187e-07, "loss": 1.6786, "step": 951 }, { "epoch": 1.4968553459119498, "grad_norm": 2.22996580434956, "learning_rate": 1.1003173099653898e-07, "loss": 1.9014, "step": 952 }, { "epoch": 1.498427672955975, "grad_norm": 2.2224019206058707, "learning_rate": 1.093833381541509e-07, "loss": 1.9734, "step": 953 }, { "epoch": 1.5, "grad_norm": 2.292687450843933, "learning_rate": 1.087365073765938e-07, "loss": 1.6376, "step": 954 }, { "epoch": 1.501572327044025, "grad_norm": 1.933890837573844, "learning_rate": 1.0809124286304334e-07, "loss": 1.6966, "step": 955 }, { "epoch": 1.5031446540880502, "grad_norm": 2.2347334629753557, "learning_rate": 1.0744754880250704e-07, "loss": 1.9026, "step": 956 }, { "epoch": 1.5047169811320755, "grad_norm": 2.0708848593951084, "learning_rate": 1.0680542937379719e-07, "loss": 1.7771, "step": 957 }, { "epoch": 1.5062893081761006, "grad_norm": 2.102938366432825, "learning_rate": 1.061648887455036e-07, "loss": 1.7984, "step": 958 }, { "epoch": 1.507861635220126, "grad_norm": 2.2787001713384467, "learning_rate": 1.0552593107596671e-07, "loss": 1.7934, "step": 959 }, { "epoch": 1.509433962264151, "grad_norm": 2.1532789357045794, "learning_rate": 1.0488856051325056e-07, "loss": 1.6814, "step": 960 }, { "epoch": 1.5110062893081762, "grad_norm": 2.253404213973793, "learning_rate": 1.0425278119511557e-07, "loss": 1.5369, "step": 961 }, { "epoch": 1.5125786163522013, "grad_norm": 2.1276754628326904, "learning_rate": 1.0361859724899213e-07, "loss": 1.6983, "step": 962 }, { "epoch": 1.5141509433962264, "grad_norm": 2.04685261141141, "learning_rate": 1.0298601279195375e-07, "loss": 1.9189, "step": 963 }, { "epoch": 1.5157232704402515, "grad_norm": 2.334536087166455, "learning_rate": 1.0235503193068961e-07, "loss": 1.9152, "step": 964 }, { "epoch": 1.5172955974842768, "grad_norm": 2.1800867522294975, "learning_rate": 1.0172565876147919e-07, "loss": 1.8854, "step": 965 }, { "epoch": 1.5188679245283019, "grad_norm": 2.235966102337266, "learning_rate": 1.0109789737016459e-07, "loss": 1.8736, "step": 966 }, { "epoch": 1.5204402515723272, "grad_norm": 2.3031366168949385, "learning_rate": 1.0047175183212424e-07, "loss": 1.8837, "step": 967 }, { "epoch": 1.5220125786163523, "grad_norm": 1.9731203822876688, "learning_rate": 9.984722621224678e-08, "loss": 1.6989, "step": 968 }, { "epoch": 1.5235849056603774, "grad_norm": 2.0967189530567043, "learning_rate": 9.922432456490459e-08, "loss": 1.7385, "step": 969 }, { "epoch": 1.5251572327044025, "grad_norm": 2.2284300025544, "learning_rate": 9.86030509339269e-08, "loss": 1.5122, "step": 970 }, { "epoch": 1.5267295597484276, "grad_norm": 2.0075774050458017, "learning_rate": 9.798340935257439e-08, "loss": 1.7742, "step": 971 }, { "epoch": 1.5283018867924527, "grad_norm": 2.253336294321935, "learning_rate": 9.736540384351247e-08, "loss": 1.8329, "step": 972 }, { "epoch": 1.529874213836478, "grad_norm": 2.0229026010397795, "learning_rate": 9.674903841878527e-08, "loss": 1.8612, "step": 973 }, { "epoch": 1.5314465408805031, "grad_norm": 2.1682039050875384, "learning_rate": 9.613431707978969e-08, "loss": 1.6209, "step": 974 }, { "epoch": 1.5330188679245285, "grad_norm": 2.17108676879446, "learning_rate": 9.55212438172494e-08, "loss": 1.7289, "step": 975 }, { "epoch": 1.5345911949685536, "grad_norm": 2.0449199036226466, "learning_rate": 9.49098226111885e-08, "loss": 1.7313, "step": 976 }, { "epoch": 1.5361635220125787, "grad_norm": 2.0509938124397658, "learning_rate": 9.430005743090654e-08, "loss": 1.859, "step": 977 }, { "epoch": 1.5377358490566038, "grad_norm": 2.1533522799374016, "learning_rate": 9.369195223495212e-08, "loss": 1.5909, "step": 978 }, { "epoch": 1.5393081761006289, "grad_norm": 2.4410463221304957, "learning_rate": 9.308551097109723e-08, "loss": 1.9236, "step": 979 }, { "epoch": 1.540880503144654, "grad_norm": 2.1289270496387256, "learning_rate": 9.248073757631187e-08, "loss": 1.6905, "step": 980 }, { "epoch": 1.5424528301886793, "grad_norm": 2.160163073483539, "learning_rate": 9.187763597673842e-08, "loss": 1.572, "step": 981 }, { "epoch": 1.5440251572327044, "grad_norm": 2.1864157152356247, "learning_rate": 9.127621008766583e-08, "loss": 1.486, "step": 982 }, { "epoch": 1.5455974842767297, "grad_norm": 2.228493447513863, "learning_rate": 9.067646381350473e-08, "loss": 1.8109, "step": 983 }, { "epoch": 1.5471698113207548, "grad_norm": 2.21672107568335, "learning_rate": 9.007840104776179e-08, "loss": 1.7224, "step": 984 }, { "epoch": 1.54874213836478, "grad_norm": 2.0906671637683383, "learning_rate": 8.948202567301416e-08, "loss": 1.7993, "step": 985 }, { "epoch": 1.550314465408805, "grad_norm": 2.103005760268071, "learning_rate": 8.888734156088509e-08, "loss": 1.7734, "step": 986 }, { "epoch": 1.5518867924528301, "grad_norm": 2.1048717139473534, "learning_rate": 8.829435257201803e-08, "loss": 1.6411, "step": 987 }, { "epoch": 1.5534591194968552, "grad_norm": 2.244151500892785, "learning_rate": 8.77030625560516e-08, "loss": 1.9157, "step": 988 }, { "epoch": 1.5550314465408805, "grad_norm": 2.3422640414896407, "learning_rate": 8.711347535159517e-08, "loss": 1.446, "step": 989 }, { "epoch": 1.5566037735849056, "grad_norm": 2.21179768023497, "learning_rate": 8.652559478620349e-08, "loss": 1.7682, "step": 990 }, { "epoch": 1.558176100628931, "grad_norm": 2.130587628281344, "learning_rate": 8.593942467635173e-08, "loss": 1.9265, "step": 991 }, { "epoch": 1.559748427672956, "grad_norm": 2.2427742208191384, "learning_rate": 8.535496882741118e-08, "loss": 1.8189, "step": 992 }, { "epoch": 1.5613207547169812, "grad_norm": 2.319870729623824, "learning_rate": 8.47722310336241e-08, "loss": 1.7268, "step": 993 }, { "epoch": 1.5628930817610063, "grad_norm": 2.0786710848008214, "learning_rate": 8.419121507807966e-08, "loss": 1.6414, "step": 994 }, { "epoch": 1.5644654088050314, "grad_norm": 2.1766614925100805, "learning_rate": 8.361192473268831e-08, "loss": 1.7614, "step": 995 }, { "epoch": 1.5660377358490565, "grad_norm": 2.0644913614642784, "learning_rate": 8.30343637581585e-08, "loss": 1.7658, "step": 996 }, { "epoch": 1.5676100628930818, "grad_norm": 1.9942721896362847, "learning_rate": 8.245853590397171e-08, "loss": 1.5864, "step": 997 }, { "epoch": 1.569182389937107, "grad_norm": 2.107035516490154, "learning_rate": 8.188444490835773e-08, "loss": 1.5109, "step": 998 }, { "epoch": 1.5707547169811322, "grad_norm": 2.157536925590625, "learning_rate": 8.131209449827121e-08, "loss": 1.8098, "step": 999 }, { "epoch": 1.5723270440251573, "grad_norm": 2.3169442855875535, "learning_rate": 8.074148838936693e-08, "loss": 1.718, "step": 1000 }, { "epoch": 1.5723270440251573, "eval_sat2_MCTS_chains_SFT_val_loss": 1.7004035711288452, "eval_sat2_MCTS_chains_SFT_val_runtime": 92.1151, "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.16, "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.4, "step": 1000 }, { "epoch": 1.5738993710691824, "grad_norm": 1.9653337976940755, "learning_rate": 8.017263028597577e-08, "loss": 1.6755, "step": 1001 }, { "epoch": 1.5754716981132075, "grad_norm": 2.259220567901995, "learning_rate": 7.960552388108074e-08, "loss": 1.6192, "step": 1002 }, { "epoch": 1.5770440251572326, "grad_norm": 2.172629330750012, "learning_rate": 7.9040172856293e-08, "loss": 1.7591, "step": 1003 }, { "epoch": 1.5786163522012577, "grad_norm": 2.235449144986895, "learning_rate": 7.847658088182764e-08, "loss": 1.6464, "step": 1004 }, { "epoch": 1.580188679245283, "grad_norm": 2.1331939859889766, "learning_rate": 7.791475161648044e-08, "loss": 1.7274, "step": 1005 }, { "epoch": 1.5817610062893082, "grad_norm": 2.11798299275123, "learning_rate": 7.735468870760373e-08, "loss": 1.9111, "step": 1006 }, { "epoch": 1.5833333333333335, "grad_norm": 2.1205150272186266, "learning_rate": 7.679639579108278e-08, "loss": 1.8506, "step": 1007 }, { "epoch": 1.5849056603773586, "grad_norm": 2.0605259082135428, "learning_rate": 7.623987649131212e-08, "loss": 1.5979, "step": 1008 }, { "epoch": 1.5864779874213837, "grad_norm": 2.366698058592538, "learning_rate": 7.568513442117235e-08, "loss": 1.6993, "step": 1009 }, { "epoch": 1.5880503144654088, "grad_norm": 2.218013241692686, "learning_rate": 7.513217318200599e-08, "loss": 1.7854, "step": 1010 }, { "epoch": 1.5896226415094339, "grad_norm": 1.9909585694013383, "learning_rate": 7.458099636359496e-08, "loss": 1.6368, "step": 1011 }, { "epoch": 1.591194968553459, "grad_norm": 2.290605549139274, "learning_rate": 7.403160754413676e-08, "loss": 1.7737, "step": 1012 }, { "epoch": 1.5927672955974843, "grad_norm": 2.1367293795429947, "learning_rate": 7.348401029022108e-08, "loss": 1.6134, "step": 1013 }, { "epoch": 1.5943396226415094, "grad_norm": 2.01820143740589, "learning_rate": 7.293820815680712e-08, "loss": 1.7256, "step": 1014 }, { "epoch": 1.5959119496855347, "grad_norm": 2.276755773858234, "learning_rate": 7.239420468720059e-08, "loss": 1.9804, "step": 1015 }, { "epoch": 1.5974842767295598, "grad_norm": 2.330331102976199, "learning_rate": 7.185200341302975e-08, "loss": 1.7043, "step": 1016 }, { "epoch": 1.599056603773585, "grad_norm": 2.1276439937862306, "learning_rate": 7.131160785422365e-08, "loss": 1.9397, "step": 1017 }, { "epoch": 1.60062893081761, "grad_norm": 2.172507453951336, "learning_rate": 7.077302151898875e-08, "loss": 1.9139, "step": 1018 }, { "epoch": 1.6022012578616351, "grad_norm": 2.1478223685971356, "learning_rate": 7.023624790378576e-08, "loss": 1.6555, "step": 1019 }, { "epoch": 1.6037735849056602, "grad_norm": 2.3969459707002256, "learning_rate": 6.97012904933078e-08, "loss": 1.9195, "step": 1020 }, { "epoch": 1.6053459119496856, "grad_norm": 1.9248844302089616, "learning_rate": 6.916815276045719e-08, "loss": 1.8894, "step": 1021 }, { "epoch": 1.6069182389937107, "grad_norm": 2.0212176784028717, "learning_rate": 6.863683816632293e-08, "loss": 1.7218, "step": 1022 }, { "epoch": 1.608490566037736, "grad_norm": 2.094474217163886, "learning_rate": 6.810735016015846e-08, "loss": 1.6663, "step": 1023 }, { "epoch": 1.610062893081761, "grad_norm": 2.0244431225363697, "learning_rate": 6.757969217935929e-08, "loss": 1.6878, "step": 1024 }, { "epoch": 1.6116352201257862, "grad_norm": 2.1281168867505014, "learning_rate": 6.705386764944006e-08, "loss": 1.8226, "step": 1025 }, { "epoch": 1.6132075471698113, "grad_norm": 2.0886881066721625, "learning_rate": 6.652987998401334e-08, "loss": 1.655, "step": 1026 }, { "epoch": 1.6147798742138364, "grad_norm": 2.0367504844576247, "learning_rate": 6.60077325847666e-08, "loss": 1.7722, "step": 1027 }, { "epoch": 1.6163522012578615, "grad_norm": 1.9930526077088684, "learning_rate": 6.548742884144054e-08, "loss": 1.7073, "step": 1028 }, { "epoch": 1.6179245283018868, "grad_norm": 2.159190917587176, "learning_rate": 6.4968972131807e-08, "loss": 1.8479, "step": 1029 }, { "epoch": 1.619496855345912, "grad_norm": 2.2749098562974903, "learning_rate": 6.445236582164699e-08, "loss": 1.9923, "step": 1030 }, { "epoch": 1.6210691823899372, "grad_norm": 2.2025296280904225, "learning_rate": 6.393761326472898e-08, "loss": 1.6454, "step": 1031 }, { "epoch": 1.6226415094339623, "grad_norm": 2.1334919488245414, "learning_rate": 6.342471780278667e-08, "loss": 1.6965, "step": 1032 }, { "epoch": 1.6242138364779874, "grad_norm": 2.322316370670461, "learning_rate": 6.291368276549802e-08, "loss": 1.7228, "step": 1033 }, { "epoch": 1.6257861635220126, "grad_norm": 1.995542145637434, "learning_rate": 6.240451147046318e-08, "loss": 1.595, "step": 1034 }, { "epoch": 1.6273584905660377, "grad_norm": 2.196444636496991, "learning_rate": 6.189720722318278e-08, "loss": 1.8758, "step": 1035 }, { "epoch": 1.6289308176100628, "grad_norm": 2.0927387390257657, "learning_rate": 6.139177331703707e-08, "loss": 2.1127, "step": 1036 }, { "epoch": 1.630503144654088, "grad_norm": 1.9307021857625342, "learning_rate": 6.088821303326411e-08, "loss": 1.803, "step": 1037 }, { "epoch": 1.6320754716981132, "grad_norm": 2.082620089850429, "learning_rate": 6.038652964093827e-08, "loss": 1.6595, "step": 1038 }, { "epoch": 1.6336477987421385, "grad_norm": 2.067695395804015, "learning_rate": 5.988672639694953e-08, "loss": 1.8777, "step": 1039 }, { "epoch": 1.6352201257861636, "grad_norm": 2.042833868758026, "learning_rate": 5.938880654598219e-08, "loss": 1.7071, "step": 1040 }, { "epoch": 1.6367924528301887, "grad_norm": 2.117785030809429, "learning_rate": 5.889277332049334e-08, "loss": 1.7538, "step": 1041 }, { "epoch": 1.6383647798742138, "grad_norm": 2.0095241417925265, "learning_rate": 5.839862994069262e-08, "loss": 1.8899, "step": 1042 }, { "epoch": 1.639937106918239, "grad_norm": 2.2478546507042405, "learning_rate": 5.79063796145207e-08, "loss": 1.8472, "step": 1043 }, { "epoch": 1.641509433962264, "grad_norm": 2.305709446534207, "learning_rate": 5.74160255376288e-08, "loss": 1.7277, "step": 1044 }, { "epoch": 1.6430817610062893, "grad_norm": 2.18183263977078, "learning_rate": 5.692757089335781e-08, "loss": 1.9153, "step": 1045 }, { "epoch": 1.6446540880503144, "grad_norm": 2.0254378536986386, "learning_rate": 5.644101885271778e-08, "loss": 1.8602, "step": 1046 }, { "epoch": 1.6462264150943398, "grad_norm": 2.2161096727855085, "learning_rate": 5.5956372574366835e-08, "loss": 1.6629, "step": 1047 }, { "epoch": 1.6477987421383649, "grad_norm": 2.301864500140701, "learning_rate": 5.547363520459137e-08, "loss": 1.7943, "step": 1048 }, { "epoch": 1.64937106918239, "grad_norm": 2.092542135751507, "learning_rate": 5.4992809877285235e-08, "loss": 1.7474, "step": 1049 }, { "epoch": 1.650943396226415, "grad_norm": 2.148759273760875, "learning_rate": 5.4513899713929394e-08, "loss": 1.9493, "step": 1050 }, { "epoch": 1.6525157232704402, "grad_norm": 2.103715691929771, "learning_rate": 5.403690782357175e-08, "loss": 1.8557, "step": 1051 }, { "epoch": 1.6540880503144653, "grad_norm": 2.099977822828033, "learning_rate": 5.3561837302806944e-08, "loss": 1.7166, "step": 1052 }, { "epoch": 1.6556603773584906, "grad_norm": 1.9796989676905823, "learning_rate": 5.3088691235756094e-08, "loss": 1.9712, "step": 1053 }, { "epoch": 1.6572327044025157, "grad_norm": 2.119765004058249, "learning_rate": 5.2617472694047037e-08, "loss": 1.8249, "step": 1054 }, { "epoch": 1.658805031446541, "grad_norm": 2.0993720478618387, "learning_rate": 5.2148184736794346e-08, "loss": 1.8525, "step": 1055 }, { "epoch": 1.6603773584905661, "grad_norm": 2.023540938803579, "learning_rate": 5.1680830410579055e-08, "loss": 2.0546, "step": 1056 }, { "epoch": 1.6619496855345912, "grad_norm": 2.0699562130101015, "learning_rate": 5.121541274942966e-08, "loss": 1.9134, "step": 1057 }, { "epoch": 1.6635220125786163, "grad_norm": 2.144350196893977, "learning_rate": 5.07519347748018e-08, "loss": 1.7238, "step": 1058 }, { "epoch": 1.6650943396226414, "grad_norm": 2.0183983742920075, "learning_rate": 5.029039949555856e-08, "loss": 1.8309, "step": 1059 }, { "epoch": 1.6666666666666665, "grad_norm": 2.1227831992403043, "learning_rate": 4.983080990795154e-08, "loss": 1.7035, "step": 1060 }, { "epoch": 1.6682389937106918, "grad_norm": 2.2858740994961506, "learning_rate": 4.937316899560099e-08, "loss": 1.5596, "step": 1061 }, { "epoch": 1.669811320754717, "grad_norm": 2.1280194157468344, "learning_rate": 4.891747972947634e-08, "loss": 1.6423, "step": 1062 }, { "epoch": 1.6713836477987423, "grad_norm": 2.2102497033701543, "learning_rate": 4.846374506787724e-08, "loss": 1.6832, "step": 1063 }, { "epoch": 1.6729559748427674, "grad_norm": 2.238490260804657, "learning_rate": 4.8011967956414156e-08, "loss": 1.6306, "step": 1064 }, { "epoch": 1.6745283018867925, "grad_norm": 2.200290791301945, "learning_rate": 4.756215132798929e-08, "loss": 1.778, "step": 1065 }, { "epoch": 1.6761006289308176, "grad_norm": 2.0985204995078024, "learning_rate": 4.7114298102777545e-08, "loss": 1.7058, "step": 1066 }, { "epoch": 1.6776729559748427, "grad_norm": 2.087398311229735, "learning_rate": 4.666841118820755e-08, "loss": 1.7865, "step": 1067 }, { "epoch": 1.6792452830188678, "grad_norm": 2.1078417553705817, "learning_rate": 4.622449347894291e-08, "loss": 1.7158, "step": 1068 }, { "epoch": 1.680817610062893, "grad_norm": 2.116464602679099, "learning_rate": 4.578254785686302e-08, "loss": 1.8466, "step": 1069 }, { "epoch": 1.6823899371069182, "grad_norm": 2.089237238472885, "learning_rate": 4.5342577191044845e-08, "loss": 1.6295, "step": 1070 }, { "epoch": 1.6839622641509435, "grad_norm": 2.0547646711843988, "learning_rate": 4.4904584337744134e-08, "loss": 1.7459, "step": 1071 }, { "epoch": 1.6855345911949686, "grad_norm": 2.0054803327980335, "learning_rate": 4.4468572140376675e-08, "loss": 1.6197, "step": 1072 }, { "epoch": 1.6871069182389937, "grad_norm": 2.0697707743408293, "learning_rate": 4.403454342950009e-08, "loss": 1.829, "step": 1073 }, { "epoch": 1.6886792452830188, "grad_norm": 2.3284003484454416, "learning_rate": 4.360250102279542e-08, "loss": 1.8744, "step": 1074 }, { "epoch": 1.690251572327044, "grad_norm": 2.242850396768619, "learning_rate": 4.317244772504851e-08, "loss": 1.7455, "step": 1075 }, { "epoch": 1.691823899371069, "grad_norm": 2.1997671073539204, "learning_rate": 4.274438632813232e-08, "loss": 2.0059, "step": 1076 }, { "epoch": 1.6933962264150944, "grad_norm": 2.1566093126536643, "learning_rate": 4.2318319610988444e-08, "loss": 1.5531, "step": 1077 }, { "epoch": 1.6949685534591195, "grad_norm": 2.13597178016107, "learning_rate": 4.1894250339609196e-08, "loss": 1.8328, "step": 1078 }, { "epoch": 1.6965408805031448, "grad_norm": 2.07918842349438, "learning_rate": 4.1472181267019636e-08, "loss": 1.7407, "step": 1079 }, { "epoch": 1.6981132075471699, "grad_norm": 2.028781264262438, "learning_rate": 4.1052115133259726e-08, "loss": 1.8737, "step": 1080 }, { "epoch": 1.699685534591195, "grad_norm": 2.0355607974120016, "learning_rate": 4.063405466536631e-08, "loss": 1.5415, "step": 1081 }, { "epoch": 1.70125786163522, "grad_norm": 1.9226221416432272, "learning_rate": 4.021800257735578e-08, "loss": 1.9198, "step": 1082 }, { "epoch": 1.7028301886792452, "grad_norm": 1.8732770572866655, "learning_rate": 3.9803961570206315e-08, "loss": 1.8087, "step": 1083 }, { "epoch": 1.7044025157232703, "grad_norm": 2.346364883253458, "learning_rate": 3.9391934331840104e-08, "loss": 1.9382, "step": 1084 }, { "epoch": 1.7059748427672956, "grad_norm": 2.1529023946779433, "learning_rate": 3.898192353710623e-08, "loss": 1.8482, "step": 1085 }, { "epoch": 1.7075471698113207, "grad_norm": 2.2018108847525646, "learning_rate": 3.857393184776341e-08, "loss": 1.672, "step": 1086 }, { "epoch": 1.709119496855346, "grad_norm": 2.2290874113828902, "learning_rate": 3.8167961912462046e-08, "loss": 1.9239, "step": 1087 }, { "epoch": 1.7106918238993711, "grad_norm": 2.100849188665365, "learning_rate": 3.7764016366727704e-08, "loss": 1.8664, "step": 1088 }, { "epoch": 1.7122641509433962, "grad_norm": 2.1510675871347975, "learning_rate": 3.73620978329439e-08, "loss": 1.7952, "step": 1089 }, { "epoch": 1.7138364779874213, "grad_norm": 2.109222961162139, "learning_rate": 3.6962208920334554e-08, "loss": 1.7452, "step": 1090 }, { "epoch": 1.7154088050314464, "grad_norm": 2.121301963046306, "learning_rate": 3.656435222494782e-08, "loss": 1.8288, "step": 1091 }, { "epoch": 1.7169811320754715, "grad_norm": 2.0337417924569, "learning_rate": 3.61685303296387e-08, "loss": 1.8223, "step": 1092 }, { "epoch": 1.7185534591194969, "grad_norm": 2.1339663352283913, "learning_rate": 3.577474580405245e-08, "loss": 1.5421, "step": 1093 }, { "epoch": 1.720125786163522, "grad_norm": 2.049959891378422, "learning_rate": 3.5383001204607826e-08, "loss": 1.8102, "step": 1094 }, { "epoch": 1.7216981132075473, "grad_norm": 2.1813938906584847, "learning_rate": 3.499329907448072e-08, "loss": 2.1207, "step": 1095 }, { "epoch": 1.7232704402515724, "grad_norm": 2.0705000827955558, "learning_rate": 3.4605641943587113e-08, "loss": 1.8636, "step": 1096 }, { "epoch": 1.7248427672955975, "grad_norm": 2.238011733888544, "learning_rate": 3.4220032328567384e-08, "loss": 1.5974, "step": 1097 }, { "epoch": 1.7264150943396226, "grad_norm": 2.2247891478941857, "learning_rate": 3.383647273276945e-08, "loss": 1.8494, "step": 1098 }, { "epoch": 1.7279874213836477, "grad_norm": 2.312396508453329, "learning_rate": 3.345496564623257e-08, "loss": 1.885, "step": 1099 }, { "epoch": 1.7295597484276728, "grad_norm": 2.066339746833989, "learning_rate": 3.3075513545671434e-08, "loss": 1.7994, "step": 1100 }, { "epoch": 1.7311320754716981, "grad_norm": 2.0879972636931314, "learning_rate": 3.269811889445988e-08, "loss": 1.6402, "step": 1101 }, { "epoch": 1.7327044025157232, "grad_norm": 2.175814596182231, "learning_rate": 3.232278414261481e-08, "loss": 1.5661, "step": 1102 }, { "epoch": 1.7342767295597485, "grad_norm": 2.1194787082555018, "learning_rate": 3.194951172678054e-08, "loss": 1.7645, "step": 1103 }, { "epoch": 1.7358490566037736, "grad_norm": 1.8763626024150262, "learning_rate": 3.157830407021283e-08, "loss": 1.596, "step": 1104 }, { "epoch": 1.7374213836477987, "grad_norm": 1.9594370469295614, "learning_rate": 3.120916358276331e-08, "loss": 1.6861, "step": 1105 }, { "epoch": 1.7389937106918238, "grad_norm": 2.3713769253326453, "learning_rate": 3.084209266086331e-08, "loss": 1.5862, "step": 1106 }, { "epoch": 1.740566037735849, "grad_norm": 2.1938199292645937, "learning_rate": 3.047709368750924e-08, "loss": 1.6771, "step": 1107 }, { "epoch": 1.742138364779874, "grad_norm": 2.3215493957832267, "learning_rate": 3.01141690322463e-08, "loss": 1.7517, "step": 1108 }, { "epoch": 1.7437106918238994, "grad_norm": 2.221134746509961, "learning_rate": 2.9753321051153258e-08, "loss": 1.7712, "step": 1109 }, { "epoch": 1.7452830188679245, "grad_norm": 2.1030740049013517, "learning_rate": 2.9394552086827434e-08, "loss": 1.7837, "step": 1110 }, { "epoch": 1.7468553459119498, "grad_norm": 2.178087587225378, "learning_rate": 2.9037864468369417e-08, "loss": 1.7186, "step": 1111 }, { "epoch": 1.748427672955975, "grad_norm": 2.1574371301384434, "learning_rate": 2.8683260511367614e-08, "loss": 1.5805, "step": 1112 }, { "epoch": 1.75, "grad_norm": 2.042041045810803, "learning_rate": 2.8330742517883645e-08, "loss": 1.7781, "step": 1113 }, { "epoch": 1.751572327044025, "grad_norm": 1.996501467635451, "learning_rate": 2.7980312776437142e-08, "loss": 1.8566, "step": 1114 }, { "epoch": 1.7531446540880502, "grad_norm": 1.9640009019290934, "learning_rate": 2.7631973561990995e-08, "loss": 2.0415, "step": 1115 }, { "epoch": 1.7547169811320755, "grad_norm": 2.181588054812827, "learning_rate": 2.7285727135936608e-08, "loss": 1.6838, "step": 1116 }, { "epoch": 1.7562893081761006, "grad_norm": 2.1470396103954705, "learning_rate": 2.6941575746079108e-08, "loss": 1.9552, "step": 1117 }, { "epoch": 1.757861635220126, "grad_norm": 2.064818182873414, "learning_rate": 2.659952162662269e-08, "loss": 1.7339, "step": 1118 }, { "epoch": 1.759433962264151, "grad_norm": 2.39097255770087, "learning_rate": 2.625956699815639e-08, "loss": 1.6014, "step": 1119 }, { "epoch": 1.7610062893081762, "grad_norm": 2.08931618388101, "learning_rate": 2.592171406763949e-08, "loss": 1.6226, "step": 1120 }, { "epoch": 1.7625786163522013, "grad_norm": 2.085612329162341, "learning_rate": 2.5585965028387198e-08, "loss": 1.6741, "step": 1121 }, { "epoch": 1.7641509433962264, "grad_norm": 2.0859266300459067, "learning_rate": 2.5252322060056403e-08, "loss": 1.8141, "step": 1122 }, { "epoch": 1.7657232704402515, "grad_norm": 2.063805491342782, "learning_rate": 2.4920787328631565e-08, "loss": 1.6166, "step": 1123 }, { "epoch": 1.7672955974842768, "grad_norm": 2.1187493282183016, "learning_rate": 2.459136298641057e-08, "loss": 1.6022, "step": 1124 }, { "epoch": 1.7688679245283019, "grad_norm": 2.1979153283650414, "learning_rate": 2.426405117199089e-08, "loss": 1.6834, "step": 1125 }, { "epoch": 1.7704402515723272, "grad_norm": 2.20082255588697, "learning_rate": 2.393885401025565e-08, "loss": 1.9188, "step": 1126 }, { "epoch": 1.7720125786163523, "grad_norm": 2.1540464614977504, "learning_rate": 2.361577361235962e-08, "loss": 1.5527, "step": 1127 }, { "epoch": 1.7735849056603774, "grad_norm": 2.0788864004074923, "learning_rate": 2.3294812075716015e-08, "loss": 1.9392, "step": 1128 }, { "epoch": 1.7751572327044025, "grad_norm": 2.04615578653692, "learning_rate": 2.2975971483982428e-08, "loss": 1.8391, "step": 1129 }, { "epoch": 1.7767295597484276, "grad_norm": 2.034007984738505, "learning_rate": 2.265925390704726e-08, "loss": 1.8705, "step": 1130 }, { "epoch": 1.7783018867924527, "grad_norm": 2.146991438580307, "learning_rate": 2.2344661401016678e-08, "loss": 1.9585, "step": 1131 }, { "epoch": 1.779874213836478, "grad_norm": 2.279053736154095, "learning_rate": 2.203219600820112e-08, "loss": 2.1532, "step": 1132 }, { "epoch": 1.7814465408805031, "grad_norm": 2.0190752770101548, "learning_rate": 2.1721859757101658e-08, "loss": 1.6968, "step": 1133 }, { "epoch": 1.7830188679245285, "grad_norm": 2.28515721518105, "learning_rate": 2.1413654662397408e-08, "loss": 1.626, "step": 1134 }, { "epoch": 1.7845911949685536, "grad_norm": 2.135849516978255, "learning_rate": 2.1107582724932088e-08, "loss": 1.7029, "step": 1135 }, { "epoch": 1.7861635220125787, "grad_norm": 2.418793471259512, "learning_rate": 2.0803645931701158e-08, "loss": 1.759, "step": 1136 }, { "epoch": 1.7877358490566038, "grad_norm": 2.322445509938412, "learning_rate": 2.0501846255838835e-08, "loss": 1.6907, "step": 1137 }, { "epoch": 1.7893081761006289, "grad_norm": 2.115867407487902, "learning_rate": 2.0202185656605426e-08, "loss": 1.8523, "step": 1138 }, { "epoch": 1.790880503144654, "grad_norm": 2.2369615207847096, "learning_rate": 1.9904666079374393e-08, "loss": 1.8127, "step": 1139 }, { "epoch": 1.7924528301886793, "grad_norm": 2.0960658556245133, "learning_rate": 1.9609289455619883e-08, "loss": 1.5551, "step": 1140 }, { "epoch": 1.7940251572327044, "grad_norm": 2.085123820184512, "learning_rate": 1.9316057702904277e-08, "loss": 1.8461, "step": 1141 }, { "epoch": 1.7955974842767297, "grad_norm": 2.082494020984376, "learning_rate": 1.9024972724865423e-08, "loss": 1.9352, "step": 1142 }, { "epoch": 1.7971698113207548, "grad_norm": 2.093869902799492, "learning_rate": 1.8736036411204626e-08, "loss": 1.7042, "step": 1143 }, { "epoch": 1.79874213836478, "grad_norm": 2.3137806461183845, "learning_rate": 1.8449250637674162e-08, "loss": 1.7895, "step": 1144 }, { "epoch": 1.800314465408805, "grad_norm": 2.1172529387064563, "learning_rate": 1.8164617266065252e-08, "loss": 1.8503, "step": 1145 }, { "epoch": 1.8018867924528301, "grad_norm": 2.1273968333527282, "learning_rate": 1.7882138144195685e-08, "loss": 1.6297, "step": 1146 }, { "epoch": 1.8034591194968552, "grad_norm": 2.268804643140494, "learning_rate": 1.7601815105898215e-08, "loss": 1.73, "step": 1147 }, { "epoch": 1.8050314465408805, "grad_norm": 2.113208754031722, "learning_rate": 1.7323649971008393e-08, "loss": 1.6516, "step": 1148 }, { "epoch": 1.8066037735849056, "grad_norm": 2.3541202063911695, "learning_rate": 1.7047644545352903e-08, "loss": 1.681, "step": 1149 }, { "epoch": 1.808176100628931, "grad_norm": 2.248100742975873, "learning_rate": 1.6773800620737644e-08, "loss": 1.8295, "step": 1150 }, { "epoch": 1.809748427672956, "grad_norm": 1.9422747223256194, "learning_rate": 1.650211997493634e-08, "loss": 1.6425, "step": 1151 }, { "epoch": 1.8113207547169812, "grad_norm": 2.1573082083805577, "learning_rate": 1.6232604371678726e-08, "loss": 1.923, "step": 1152 }, { "epoch": 1.8128930817610063, "grad_norm": 2.1325777796205254, "learning_rate": 1.5965255560639394e-08, "loss": 1.7978, "step": 1153 }, { "epoch": 1.8144654088050314, "grad_norm": 2.002442725649601, "learning_rate": 1.5700075277426262e-08, "loss": 1.7531, "step": 1154 }, { "epoch": 1.8160377358490565, "grad_norm": 1.9280913979981322, "learning_rate": 1.543706524356917e-08, "loss": 1.8257, "step": 1155 }, { "epoch": 1.8176100628930818, "grad_norm": 2.1344422655015496, "learning_rate": 1.5176227166509058e-08, "loss": 1.7187, "step": 1156 }, { "epoch": 1.819182389937107, "grad_norm": 2.2004037593468224, "learning_rate": 1.491756273958673e-08, "loss": 1.9901, "step": 1157 }, { "epoch": 1.8207547169811322, "grad_norm": 2.0033561732048955, "learning_rate": 1.466107364203158e-08, "loss": 1.679, "step": 1158 }, { "epoch": 1.8223270440251573, "grad_norm": 2.060413883738862, "learning_rate": 1.440676153895114e-08, "loss": 1.7332, "step": 1159 }, { "epoch": 1.8238993710691824, "grad_norm": 2.2111387133657314, "learning_rate": 1.4154628081320014e-08, "loss": 1.7536, "step": 1160 }, { "epoch": 1.8254716981132075, "grad_norm": 2.131048492611576, "learning_rate": 1.3904674905969066e-08, "loss": 1.8556, "step": 1161 }, { "epoch": 1.8270440251572326, "grad_norm": 2.0610846483559664, "learning_rate": 1.3656903635575167e-08, "loss": 1.8077, "step": 1162 }, { "epoch": 1.8286163522012577, "grad_norm": 2.1736177189318733, "learning_rate": 1.3411315878650237e-08, "loss": 1.6993, "step": 1163 }, { "epoch": 1.830188679245283, "grad_norm": 2.2099683077755583, "learning_rate": 1.3167913229531135e-08, "loss": 1.7427, "step": 1164 }, { "epoch": 1.8317610062893082, "grad_norm": 2.222331633508216, "learning_rate": 1.2926697268369101e-08, "loss": 1.6154, "step": 1165 }, { "epoch": 1.8333333333333335, "grad_norm": 2.056907346969074, "learning_rate": 1.2687669561119568e-08, "loss": 1.5743, "step": 1166 }, { "epoch": 1.8349056603773586, "grad_norm": 2.2254294416157134, "learning_rate": 1.245083165953194e-08, "loss": 1.6526, "step": 1167 }, { "epoch": 1.8364779874213837, "grad_norm": 1.975892358409506, "learning_rate": 1.2216185101139692e-08, "loss": 1.6919, "step": 1168 }, { "epoch": 1.8380503144654088, "grad_norm": 1.9887040123280215, "learning_rate": 1.1983731409250181e-08, "loss": 1.9421, "step": 1169 }, { "epoch": 1.8396226415094339, "grad_norm": 2.065623115774402, "learning_rate": 1.1753472092934858e-08, "loss": 1.664, "step": 1170 }, { "epoch": 1.841194968553459, "grad_norm": 2.0212401073533495, "learning_rate": 1.1525408647019474e-08, "loss": 1.9716, "step": 1171 }, { "epoch": 1.8427672955974843, "grad_norm": 2.0410818459050075, "learning_rate": 1.129954255207441e-08, "loss": 1.647, "step": 1172 }, { "epoch": 1.8443396226415094, "grad_norm": 2.2637429629509964, "learning_rate": 1.1075875274404834e-08, "loss": 1.9561, "step": 1173 }, { "epoch": 1.8459119496855347, "grad_norm": 2.1445378180592356, "learning_rate": 1.0854408266041543e-08, "loss": 1.8834, "step": 1174 }, { "epoch": 1.8474842767295598, "grad_norm": 2.171320431386727, "learning_rate": 1.063514296473132e-08, "loss": 1.8161, "step": 1175 }, { "epoch": 1.849056603773585, "grad_norm": 2.0483569709125966, "learning_rate": 1.041808079392753e-08, "loss": 1.811, "step": 1176 }, { "epoch": 1.85062893081761, "grad_norm": 2.1123320054916697, "learning_rate": 1.020322316278111e-08, "loss": 1.8621, "step": 1177 }, { "epoch": 1.8522012578616351, "grad_norm": 1.9907743276040535, "learning_rate": 9.990571466131276e-09, "loss": 1.8181, "step": 1178 }, { "epoch": 1.8537735849056602, "grad_norm": 2.2022266952576572, "learning_rate": 9.780127084496431e-09, "loss": 1.5795, "step": 1179 }, { "epoch": 1.8553459119496856, "grad_norm": 2.1497279507664535, "learning_rate": 9.571891384065272e-09, "loss": 1.6334, "step": 1180 }, { "epoch": 1.8569182389937107, "grad_norm": 2.2214098590343028, "learning_rate": 9.365865716687965e-09, "loss": 2.0922, "step": 1181 }, { "epoch": 1.858490566037736, "grad_norm": 2.064033723821905, "learning_rate": 9.162051419867245e-09, "loss": 1.6108, "step": 1182 }, { "epoch": 1.860062893081761, "grad_norm": 2.426092174363886, "learning_rate": 8.960449816749832e-09, "loss": 1.5644, "step": 1183 }, { "epoch": 1.8616352201257862, "grad_norm": 2.074214131520031, "learning_rate": 8.761062216117765e-09, "loss": 1.5808, "step": 1184 }, { "epoch": 1.8632075471698113, "grad_norm": 2.1958655328363146, "learning_rate": 8.563889912380046e-09, "loss": 1.8186, "step": 1185 }, { "epoch": 1.8647798742138364, "grad_norm": 2.2651556186496626, "learning_rate": 8.368934185564013e-09, "loss": 1.6952, "step": 1186 }, { "epoch": 1.8663522012578615, "grad_norm": 2.1256908151950915, "learning_rate": 8.176196301307264e-09, "loss": 1.7424, "step": 1187 }, { "epoch": 1.8679245283018868, "grad_norm": 2.0538768988218536, "learning_rate": 7.985677510849332e-09, "loss": 1.923, "step": 1188 }, { "epoch": 1.869496855345912, "grad_norm": 2.061086071767639, "learning_rate": 7.79737905102349e-09, "loss": 2.0386, "step": 1189 }, { "epoch": 1.8710691823899372, "grad_norm": 2.170281435880277, "learning_rate": 7.611302144248788e-09, "loss": 1.979, "step": 1190 }, { "epoch": 1.8726415094339623, "grad_norm": 2.0702664036289944, "learning_rate": 7.427447998522241e-09, "loss": 1.9203, "step": 1191 }, { "epoch": 1.8742138364779874, "grad_norm": 2.186437061351241, "learning_rate": 7.245817807410742e-09, "loss": 2.0204, "step": 1192 }, { "epoch": 1.8757861635220126, "grad_norm": 2.403033320304919, "learning_rate": 7.066412750043532e-09, "loss": 1.8169, "step": 1193 }, { "epoch": 1.8773584905660377, "grad_norm": 2.113165661681811, "learning_rate": 6.889233991104421e-09, "loss": 1.8014, "step": 1194 }, { "epoch": 1.8789308176100628, "grad_norm": 2.1364558866941907, "learning_rate": 6.714282680824252e-09, "loss": 1.8172, "step": 1195 }, { "epoch": 1.880503144654088, "grad_norm": 2.085369574271752, "learning_rate": 6.54155995497348e-09, "loss": 1.9062, "step": 1196 }, { "epoch": 1.8820754716981132, "grad_norm": 2.158776809363429, "learning_rate": 6.371066934854713e-09, "loss": 1.7571, "step": 1197 }, { "epoch": 1.8836477987421385, "grad_norm": 2.1204855812030936, "learning_rate": 6.202804727295441e-09, "loss": 1.4898, "step": 1198 }, { "epoch": 1.8852201257861636, "grad_norm": 1.9588917914820057, "learning_rate": 6.036774424641044e-09, "loss": 1.9212, "step": 1199 }, { "epoch": 1.8867924528301887, "grad_norm": 2.289926267202458, "learning_rate": 5.872977104747451e-09, "loss": 1.8261, "step": 1200 }, { "epoch": 1.8883647798742138, "grad_norm": 2.248912074374657, "learning_rate": 5.711413830974177e-09, "loss": 1.5361, "step": 1201 }, { "epoch": 1.889937106918239, "grad_norm": 2.1042256295937656, "learning_rate": 5.5520856521775685e-09, "loss": 1.8066, "step": 1202 }, { "epoch": 1.891509433962264, "grad_norm": 2.180029267811429, "learning_rate": 5.3949936027039625e-09, "loss": 2.1126, "step": 1203 }, { "epoch": 1.8930817610062893, "grad_norm": 2.1625082758181744, "learning_rate": 5.240138702382729e-09, "loss": 1.7901, "step": 1204 }, { "epoch": 1.8946540880503144, "grad_norm": 2.1665119765928, "learning_rate": 5.087521956520058e-09, "loss": 1.8569, "step": 1205 }, { "epoch": 1.8962264150943398, "grad_norm": 2.0777861736774725, "learning_rate": 4.937144355891998e-09, "loss": 1.793, "step": 1206 }, { "epoch": 1.8977987421383649, "grad_norm": 2.066304356371116, "learning_rate": 4.789006876738438e-09, "loss": 1.7053, "step": 1207 }, { "epoch": 1.89937106918239, "grad_norm": 2.0957159756988006, "learning_rate": 4.643110480756423e-09, "loss": 1.7215, "step": 1208 }, { "epoch": 1.900943396226415, "grad_norm": 2.111256856709086, "learning_rate": 4.499456115094169e-09, "loss": 1.8245, "step": 1209 }, { "epoch": 1.9025157232704402, "grad_norm": 1.8782104151978112, "learning_rate": 4.358044712344688e-09, "loss": 1.6741, "step": 1210 }, { "epoch": 1.9040880503144653, "grad_norm": 2.141467865299451, "learning_rate": 4.218877190539927e-09, "loss": 1.7832, "step": 1211 }, { "epoch": 1.9056603773584906, "grad_norm": 2.0679767608171584, "learning_rate": 4.081954453144737e-09, "loss": 1.8032, "step": 1212 }, { "epoch": 1.9072327044025157, "grad_norm": 2.044401061483079, "learning_rate": 3.947277389051013e-09, "loss": 1.5991, "step": 1213 }, { "epoch": 1.908805031446541, "grad_norm": 2.3204022598290424, "learning_rate": 3.814846872571781e-09, "loss": 2.1206, "step": 1214 }, { "epoch": 1.9103773584905661, "grad_norm": 2.194698485486922, "learning_rate": 3.68466376343588e-09, "loss": 1.8517, "step": 1215 }, { "epoch": 1.9119496855345912, "grad_norm": 2.116787406844949, "learning_rate": 3.556728906781897e-09, "loss": 1.751, "step": 1216 }, { "epoch": 1.9135220125786163, "grad_norm": 2.105786362411949, "learning_rate": 3.4310431331531553e-09, "loss": 1.6647, "step": 1217 }, { "epoch": 1.9150943396226414, "grad_norm": 2.23499471842287, "learning_rate": 3.307607258491962e-09, "loss": 1.7705, "step": 1218 }, { "epoch": 1.9166666666666665, "grad_norm": 2.131569173737192, "learning_rate": 3.18642208413456e-09, "loss": 1.889, "step": 1219 }, { "epoch": 1.9182389937106918, "grad_norm": 2.0221410280515, "learning_rate": 3.067488396805684e-09, "loss": 1.5895, "step": 1220 }, { "epoch": 1.919811320754717, "grad_norm": 2.000235641315321, "learning_rate": 2.950806968613745e-09, "loss": 1.8866, "step": 1221 }, { "epoch": 1.9213836477987423, "grad_norm": 2.6217655796510377, "learning_rate": 2.8363785570455436e-09, "loss": 1.7802, "step": 1222 }, { "epoch": 1.9229559748427674, "grad_norm": 2.3713251232928996, "learning_rate": 2.724203904961531e-09, "loss": 1.7429, "step": 1223 }, { "epoch": 1.9245283018867925, "grad_norm": 2.0162450187498515, "learning_rate": 2.6142837405909113e-09, "loss": 1.6852, "step": 1224 }, { "epoch": 1.9261006289308176, "grad_norm": 1.9184165558043198, "learning_rate": 2.5066187775269034e-09, "loss": 2.0859, "step": 1225 }, { "epoch": 1.9276729559748427, "grad_norm": 2.0980898585416403, "learning_rate": 2.401209714722152e-09, "loss": 1.776, "step": 1226 }, { "epoch": 1.9292452830188678, "grad_norm": 2.102737428792809, "learning_rate": 2.2980572364841854e-09, "loss": 1.8705, "step": 1227 }, { "epoch": 1.930817610062893, "grad_norm": 2.235581223829696, "learning_rate": 2.1971620124709435e-09, "loss": 1.8633, "step": 1228 }, { "epoch": 1.9323899371069182, "grad_norm": 2.1003684288920006, "learning_rate": 2.098524697686427e-09, "loss": 1.8356, "step": 1229 }, { "epoch": 1.9339622641509435, "grad_norm": 2.085896244612752, "learning_rate": 2.002145932476501e-09, "loss": 1.6605, "step": 1230 }, { "epoch": 1.9355345911949686, "grad_norm": 2.249282131879236, "learning_rate": 1.908026342524738e-09, "loss": 1.572, "step": 1231 }, { "epoch": 1.9371069182389937, "grad_norm": 2.133881200071366, "learning_rate": 1.8161665388481796e-09, "loss": 1.7188, "step": 1232 }, { "epoch": 1.9386792452830188, "grad_norm": 2.0523111952597124, "learning_rate": 1.7265671177936092e-09, "loss": 2.0153, "step": 1233 }, { "epoch": 1.940251572327044, "grad_norm": 2.095543253062672, "learning_rate": 1.639228661033587e-09, "loss": 1.5568, "step": 1234 }, { "epoch": 1.941823899371069, "grad_norm": 2.2647929929993764, "learning_rate": 1.554151735562642e-09, "loss": 2.0448, "step": 1235 }, { "epoch": 1.9433962264150944, "grad_norm": 2.1607959179153404, "learning_rate": 1.47133689369362e-09, "loss": 1.6183, "step": 1236 }, { "epoch": 1.9449685534591195, "grad_norm": 2.069314519936326, "learning_rate": 1.3907846730541073e-09, "loss": 1.7316, "step": 1237 }, { "epoch": 1.9465408805031448, "grad_norm": 2.402914241865446, "learning_rate": 1.3124955965828966e-09, "loss": 1.6961, "step": 1238 }, { "epoch": 1.9481132075471699, "grad_norm": 2.418179213383202, "learning_rate": 1.2364701725266436e-09, "loss": 1.8602, "step": 1239 }, { "epoch": 1.949685534591195, "grad_norm": 2.0182001003309122, "learning_rate": 1.162708894436526e-09, "loss": 1.8426, "step": 1240 }, { "epoch": 1.95125786163522, "grad_norm": 1.9544224426157475, "learning_rate": 1.0912122411651348e-09, "loss": 1.912, "step": 1241 }, { "epoch": 1.9528301886792452, "grad_norm": 2.1284678150486975, "learning_rate": 1.0219806768631712e-09, "loss": 1.7659, "step": 1242 }, { "epoch": 1.9544025157232703, "grad_norm": 2.103380584288797, "learning_rate": 9.550146509766489e-10, "loss": 1.6285, "step": 1243 }, { "epoch": 1.9559748427672956, "grad_norm": 2.318883943366702, "learning_rate": 8.903145982438242e-10, "loss": 1.7361, "step": 1244 }, { "epoch": 1.9575471698113207, "grad_norm": 2.0210439137006007, "learning_rate": 8.278809386924767e-10, "loss": 1.8383, "step": 1245 }, { "epoch": 1.959119496855346, "grad_norm": 2.122214078228037, "learning_rate": 7.677140776371494e-10, "loss": 1.6549, "step": 1246 }, { "epoch": 1.9606918238993711, "grad_norm": 2.148904360940334, "learning_rate": 7.0981440567639e-10, "loss": 1.777, "step": 1247 }, { "epoch": 1.9622641509433962, "grad_norm": 2.157751893993133, "learning_rate": 6.541822986904589e-10, "loss": 1.7056, "step": 1248 }, { "epoch": 1.9638364779874213, "grad_norm": 1.9449801123342445, "learning_rate": 6.00818117838725e-10, "loss": 1.7219, "step": 1249 }, { "epoch": 1.9654088050314464, "grad_norm": 2.054951250857022, "learning_rate": 5.497222095572962e-10, "loss": 1.6673, "step": 1250 }, { "epoch": 1.9654088050314464, "eval_sat2_MCTS_chains_SFT_val_loss": 1.6998926401138306, "eval_sat2_MCTS_chains_SFT_val_runtime": 91.7731, "eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.202, "eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.406, "step": 1250 }, { "epoch": 1.9669811320754715, "grad_norm": 2.05069047537442, "learning_rate": 5.008949055568812e-10, "loss": 1.7608, "step": 1251 }, { "epoch": 1.9685534591194969, "grad_norm": 2.154702108749229, "learning_rate": 4.543365228205753e-10, "loss": 1.6858, "step": 1252 }, { "epoch": 1.970125786163522, "grad_norm": 2.075168647544607, "learning_rate": 4.1004736360183976e-10, "loss": 1.6641, "step": 1253 }, { "epoch": 1.9716981132075473, "grad_norm": 2.2047735011240226, "learning_rate": 3.6802771542244204e-10, "loss": 1.7977, "step": 1254 }, { "epoch": 1.9732704402515724, "grad_norm": 2.128359624030826, "learning_rate": 3.2827785107074623e-10, "loss": 1.5849, "step": 1255 }, { "epoch": 1.9748427672955975, "grad_norm": 2.1524898340309595, "learning_rate": 2.907980285997702e-10, "loss": 1.6826, "step": 1256 }, { "epoch": 1.9764150943396226, "grad_norm": 2.0578150120619005, "learning_rate": 2.555884913256312e-10, "loss": 1.9806, "step": 1257 }, { "epoch": 1.9779874213836477, "grad_norm": 2.0645335423876263, "learning_rate": 2.2264946782599158e-10, "loss": 1.7219, "step": 1258 }, { "epoch": 1.9795597484276728, "grad_norm": 2.1474076116693337, "learning_rate": 1.9198117193838791e-10, "loss": 1.853, "step": 1259 }, { "epoch": 1.9811320754716981, "grad_norm": 2.0521508824628105, "learning_rate": 1.6358380275906524e-10, "loss": 1.6023, "step": 1260 }, { "epoch": 1.9827044025157232, "grad_norm": 2.3303244823585625, "learning_rate": 1.3745754464157823e-10, "loss": 1.7856, "step": 1261 }, { "epoch": 1.9842767295597485, "grad_norm": 2.112760564566335, "learning_rate": 1.1360256719554762e-10, "loss": 2.052, "step": 1262 }, { "epoch": 1.9858490566037736, "grad_norm": 2.0973660824743323, "learning_rate": 9.201902528561123e-11, "loss": 1.8666, "step": 1263 }, { "epoch": 1.9874213836477987, "grad_norm": 2.1164187723139043, "learning_rate": 7.270705903056895e-11, "loss": 1.7619, "step": 1264 }, { "epoch": 1.9889937106918238, "grad_norm": 2.184592687671629, "learning_rate": 5.566679380210049e-11, "loss": 1.8967, "step": 1265 }, { "epoch": 1.990566037735849, "grad_norm": 2.0610164571083756, "learning_rate": 4.089834022437677e-11, "loss": 1.8686, "step": 1266 }, { "epoch": 1.992138364779874, "grad_norm": 2.176062181352369, "learning_rate": 2.8401794173049666e-11, "loss": 1.5427, "step": 1267 }, { "epoch": 1.9937106918238994, "grad_norm": 2.0675804076152313, "learning_rate": 1.8177236774707948e-11, "loss": 1.6378, "step": 1268 }, { "epoch": 1.9952830188679245, "grad_norm": 2.290052278929668, "learning_rate": 1.022473440637217e-11, "loss": 1.656, "step": 1269 }, { "epoch": 1.9968553459119498, "grad_norm": 2.287180872342125, "learning_rate": 4.544338695106064e-12, "loss": 1.8058, "step": 1270 }, { "epoch": 1.998427672955975, "grad_norm": 2.0048323595450275, "learning_rate": 1.1360865176279766e-12, "loss": 1.6803, "step": 1271 }, { "epoch": 2.0, "grad_norm": 2.049630174327001, "learning_rate": 0.0, "loss": 1.7973, "step": 1272 } ], "logging_steps": 1, "max_steps": 1272, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 214395832958976.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }