ViGoRL-MCTS-SFT-3b-Spatial / trainer_state.json
gsarch's picture
Initial checkpoint upload
31da275 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 250,
"global_step": 1272,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015723270440251573,
"grad_norm": 6.609381042259111,
"learning_rate": 1.794871794871795e-08,
"loss": 2.2121,
"step": 1
},
{
"epoch": 0.0031446540880503146,
"grad_norm": 6.5863108918091955,
"learning_rate": 3.58974358974359e-08,
"loss": 2.1816,
"step": 2
},
{
"epoch": 0.0047169811320754715,
"grad_norm": 6.617655516365913,
"learning_rate": 5.384615384615385e-08,
"loss": 2.0071,
"step": 3
},
{
"epoch": 0.006289308176100629,
"grad_norm": 6.602018175612389,
"learning_rate": 7.17948717948718e-08,
"loss": 2.1686,
"step": 4
},
{
"epoch": 0.007861635220125786,
"grad_norm": 6.591379792761674,
"learning_rate": 8.974358974358973e-08,
"loss": 2.1805,
"step": 5
},
{
"epoch": 0.009433962264150943,
"grad_norm": 6.7248109628555,
"learning_rate": 1.076923076923077e-07,
"loss": 2.2243,
"step": 6
},
{
"epoch": 0.0110062893081761,
"grad_norm": 6.3995094275133715,
"learning_rate": 1.2564102564102563e-07,
"loss": 2.3763,
"step": 7
},
{
"epoch": 0.012578616352201259,
"grad_norm": 6.296067719776998,
"learning_rate": 1.435897435897436e-07,
"loss": 2.298,
"step": 8
},
{
"epoch": 0.014150943396226415,
"grad_norm": 6.778527459516128,
"learning_rate": 1.6153846153846155e-07,
"loss": 2.1494,
"step": 9
},
{
"epoch": 0.015723270440251572,
"grad_norm": 6.594906718693546,
"learning_rate": 1.7948717948717946e-07,
"loss": 2.0825,
"step": 10
},
{
"epoch": 0.01729559748427673,
"grad_norm": 6.7082225379425,
"learning_rate": 1.9743589743589741e-07,
"loss": 2.3567,
"step": 11
},
{
"epoch": 0.018867924528301886,
"grad_norm": 6.907609205305907,
"learning_rate": 2.153846153846154e-07,
"loss": 1.9809,
"step": 12
},
{
"epoch": 0.020440251572327043,
"grad_norm": 5.9359307337140335,
"learning_rate": 2.333333333333333e-07,
"loss": 2.226,
"step": 13
},
{
"epoch": 0.0220125786163522,
"grad_norm": 6.466867096117202,
"learning_rate": 2.5128205128205126e-07,
"loss": 2.1425,
"step": 14
},
{
"epoch": 0.02358490566037736,
"grad_norm": 6.755674157390508,
"learning_rate": 2.692307692307692e-07,
"loss": 2.2433,
"step": 15
},
{
"epoch": 0.025157232704402517,
"grad_norm": 6.002945550670174,
"learning_rate": 2.871794871794872e-07,
"loss": 2.3029,
"step": 16
},
{
"epoch": 0.026729559748427674,
"grad_norm": 6.733820715319282,
"learning_rate": 3.0512820512820514e-07,
"loss": 2.6192,
"step": 17
},
{
"epoch": 0.02830188679245283,
"grad_norm": 6.814207366203309,
"learning_rate": 3.230769230769231e-07,
"loss": 2.1716,
"step": 18
},
{
"epoch": 0.029874213836477988,
"grad_norm": 6.649348126638431,
"learning_rate": 3.41025641025641e-07,
"loss": 2.0726,
"step": 19
},
{
"epoch": 0.031446540880503145,
"grad_norm": 6.419164036165898,
"learning_rate": 3.589743589743589e-07,
"loss": 2.2484,
"step": 20
},
{
"epoch": 0.0330188679245283,
"grad_norm": 6.381550340344472,
"learning_rate": 3.7692307692307687e-07,
"loss": 2.0979,
"step": 21
},
{
"epoch": 0.03459119496855346,
"grad_norm": 6.470517353012837,
"learning_rate": 3.9487179487179483e-07,
"loss": 2.0564,
"step": 22
},
{
"epoch": 0.036163522012578615,
"grad_norm": 6.817629340287558,
"learning_rate": 4.128205128205128e-07,
"loss": 2.2869,
"step": 23
},
{
"epoch": 0.03773584905660377,
"grad_norm": 6.415896720614442,
"learning_rate": 4.307692307692308e-07,
"loss": 2.1393,
"step": 24
},
{
"epoch": 0.03930817610062893,
"grad_norm": 6.758474027008109,
"learning_rate": 4.4871794871794876e-07,
"loss": 1.9729,
"step": 25
},
{
"epoch": 0.040880503144654086,
"grad_norm": 5.796971726063335,
"learning_rate": 4.666666666666666e-07,
"loss": 2.1908,
"step": 26
},
{
"epoch": 0.04245283018867924,
"grad_norm": 5.790656105640873,
"learning_rate": 4.846153846153846e-07,
"loss": 2.0891,
"step": 27
},
{
"epoch": 0.0440251572327044,
"grad_norm": 6.190761522380566,
"learning_rate": 5.025641025641025e-07,
"loss": 1.962,
"step": 28
},
{
"epoch": 0.04559748427672956,
"grad_norm": 6.653844953656282,
"learning_rate": 5.205128205128205e-07,
"loss": 2.1133,
"step": 29
},
{
"epoch": 0.04716981132075472,
"grad_norm": 6.3047026890345395,
"learning_rate": 5.384615384615384e-07,
"loss": 2.2912,
"step": 30
},
{
"epoch": 0.04874213836477988,
"grad_norm": 6.8087836989796875,
"learning_rate": 5.564102564102564e-07,
"loss": 2.2732,
"step": 31
},
{
"epoch": 0.050314465408805034,
"grad_norm": 6.756218951330409,
"learning_rate": 5.743589743589744e-07,
"loss": 2.1865,
"step": 32
},
{
"epoch": 0.05188679245283019,
"grad_norm": 5.892752864062046,
"learning_rate": 5.923076923076923e-07,
"loss": 1.9986,
"step": 33
},
{
"epoch": 0.05345911949685535,
"grad_norm": 5.9596785550097495,
"learning_rate": 6.102564102564103e-07,
"loss": 2.3515,
"step": 34
},
{
"epoch": 0.055031446540880505,
"grad_norm": 5.817501243045476,
"learning_rate": 6.282051282051282e-07,
"loss": 2.1328,
"step": 35
},
{
"epoch": 0.05660377358490566,
"grad_norm": 5.555025157362233,
"learning_rate": 6.461538461538462e-07,
"loss": 2.0956,
"step": 36
},
{
"epoch": 0.05817610062893082,
"grad_norm": 4.66564680385535,
"learning_rate": 6.64102564102564e-07,
"loss": 2.1224,
"step": 37
},
{
"epoch": 0.059748427672955975,
"grad_norm": 4.682038309064788,
"learning_rate": 6.82051282051282e-07,
"loss": 1.8465,
"step": 38
},
{
"epoch": 0.06132075471698113,
"grad_norm": 4.520420535983458,
"learning_rate": 7e-07,
"loss": 2.2189,
"step": 39
},
{
"epoch": 0.06289308176100629,
"grad_norm": 4.7915678520719105,
"learning_rate": 6.999988639134823e-07,
"loss": 2.1286,
"step": 40
},
{
"epoch": 0.06446540880503145,
"grad_norm": 4.612604930012734,
"learning_rate": 6.999954556613048e-07,
"loss": 2.0893,
"step": 41
},
{
"epoch": 0.0660377358490566,
"grad_norm": 4.247600840809702,
"learning_rate": 6.999897752655936e-07,
"loss": 2.1122,
"step": 42
},
{
"epoch": 0.06761006289308176,
"grad_norm": 3.9712862450759245,
"learning_rate": 6.999818227632253e-07,
"loss": 2.1013,
"step": 43
},
{
"epoch": 0.06918238993710692,
"grad_norm": 4.081791414690391,
"learning_rate": 6.99971598205827e-07,
"loss": 2.0214,
"step": 44
},
{
"epoch": 0.07075471698113207,
"grad_norm": 3.7509350102589827,
"learning_rate": 6.999591016597756e-07,
"loss": 2.1367,
"step": 45
},
{
"epoch": 0.07232704402515723,
"grad_norm": 3.2808902294971616,
"learning_rate": 6.999443332061978e-07,
"loss": 2.2945,
"step": 46
},
{
"epoch": 0.07389937106918239,
"grad_norm": 3.3036022925167607,
"learning_rate": 6.999272929409694e-07,
"loss": 2.0151,
"step": 47
},
{
"epoch": 0.07547169811320754,
"grad_norm": 3.3210132249336812,
"learning_rate": 6.999079809747144e-07,
"loss": 2.0188,
"step": 48
},
{
"epoch": 0.0770440251572327,
"grad_norm": 3.0229418180275007,
"learning_rate": 6.998863974328045e-07,
"loss": 2.0217,
"step": 49
},
{
"epoch": 0.07861635220125786,
"grad_norm": 2.9951351846113115,
"learning_rate": 6.998625424553584e-07,
"loss": 2.0772,
"step": 50
},
{
"epoch": 0.08018867924528301,
"grad_norm": 2.991842161373637,
"learning_rate": 6.99836416197241e-07,
"loss": 1.9351,
"step": 51
},
{
"epoch": 0.08176100628930817,
"grad_norm": 3.330478724889984,
"learning_rate": 6.998080188280617e-07,
"loss": 2.2126,
"step": 52
},
{
"epoch": 0.08333333333333333,
"grad_norm": 2.9629299494694283,
"learning_rate": 6.99777350532174e-07,
"loss": 2.3491,
"step": 53
},
{
"epoch": 0.08490566037735849,
"grad_norm": 2.9627317429014983,
"learning_rate": 6.997444115086743e-07,
"loss": 1.9852,
"step": 54
},
{
"epoch": 0.08647798742138364,
"grad_norm": 2.840879727132188,
"learning_rate": 6.997092019714002e-07,
"loss": 1.939,
"step": 55
},
{
"epoch": 0.0880503144654088,
"grad_norm": 2.9520960895336614,
"learning_rate": 6.996717221489292e-07,
"loss": 2.1034,
"step": 56
},
{
"epoch": 0.08962264150943396,
"grad_norm": 3.015493625809345,
"learning_rate": 6.996319722845775e-07,
"loss": 1.9913,
"step": 57
},
{
"epoch": 0.09119496855345911,
"grad_norm": 2.694895109102356,
"learning_rate": 6.995899526363981e-07,
"loss": 1.9909,
"step": 58
},
{
"epoch": 0.09276729559748427,
"grad_norm": 2.746013591310517,
"learning_rate": 6.995456634771794e-07,
"loss": 2.0852,
"step": 59
},
{
"epoch": 0.09433962264150944,
"grad_norm": 2.5054959395703302,
"learning_rate": 6.994991050944431e-07,
"loss": 2.1756,
"step": 60
},
{
"epoch": 0.0959119496855346,
"grad_norm": 2.7415517103140172,
"learning_rate": 6.994502777904428e-07,
"loss": 2.0021,
"step": 61
},
{
"epoch": 0.09748427672955975,
"grad_norm": 2.591902596902185,
"learning_rate": 6.993991818821612e-07,
"loss": 1.8463,
"step": 62
},
{
"epoch": 0.09905660377358491,
"grad_norm": 2.5850508327560617,
"learning_rate": 6.993458177013095e-07,
"loss": 1.7361,
"step": 63
},
{
"epoch": 0.10062893081761007,
"grad_norm": 2.74240553360653,
"learning_rate": 6.992901855943236e-07,
"loss": 1.9652,
"step": 64
},
{
"epoch": 0.10220125786163523,
"grad_norm": 2.7901671418215286,
"learning_rate": 6.992322859223628e-07,
"loss": 1.9407,
"step": 65
},
{
"epoch": 0.10377358490566038,
"grad_norm": 2.592826985818942,
"learning_rate": 6.991721190613075e-07,
"loss": 1.8256,
"step": 66
},
{
"epoch": 0.10534591194968554,
"grad_norm": 2.4593316329973978,
"learning_rate": 6.991096854017562e-07,
"loss": 1.9612,
"step": 67
},
{
"epoch": 0.1069182389937107,
"grad_norm": 2.731581032864078,
"learning_rate": 6.990449853490233e-07,
"loss": 1.8444,
"step": 68
},
{
"epoch": 0.10849056603773585,
"grad_norm": 2.8559336852628707,
"learning_rate": 6.989780193231367e-07,
"loss": 1.8695,
"step": 69
},
{
"epoch": 0.11006289308176101,
"grad_norm": 2.7165119219767035,
"learning_rate": 6.989087877588348e-07,
"loss": 1.9658,
"step": 70
},
{
"epoch": 0.11163522012578617,
"grad_norm": 2.829079631550892,
"learning_rate": 6.988372911055634e-07,
"loss": 2.1087,
"step": 71
},
{
"epoch": 0.11320754716981132,
"grad_norm": 2.507099458574326,
"learning_rate": 6.987635298274733e-07,
"loss": 1.8418,
"step": 72
},
{
"epoch": 0.11477987421383648,
"grad_norm": 2.4685464129673726,
"learning_rate": 6.986875044034171e-07,
"loss": 1.8306,
"step": 73
},
{
"epoch": 0.11635220125786164,
"grad_norm": 2.5785937060649187,
"learning_rate": 6.986092153269459e-07,
"loss": 1.9845,
"step": 74
},
{
"epoch": 0.1179245283018868,
"grad_norm": 2.4185902663784278,
"learning_rate": 6.985286631063063e-07,
"loss": 2.1262,
"step": 75
},
{
"epoch": 0.11949685534591195,
"grad_norm": 2.6484961851922395,
"learning_rate": 6.984458482644373e-07,
"loss": 1.9226,
"step": 76
},
{
"epoch": 0.12106918238993711,
"grad_norm": 2.624404494631164,
"learning_rate": 6.983607713389663e-07,
"loss": 1.9014,
"step": 77
},
{
"epoch": 0.12264150943396226,
"grad_norm": 2.5143123431618717,
"learning_rate": 6.982734328822063e-07,
"loss": 1.7563,
"step": 78
},
{
"epoch": 0.12421383647798742,
"grad_norm": 2.288866534494279,
"learning_rate": 6.981838334611518e-07,
"loss": 2.1576,
"step": 79
},
{
"epoch": 0.12578616352201258,
"grad_norm": 2.4924549190125225,
"learning_rate": 6.980919736574753e-07,
"loss": 1.9265,
"step": 80
},
{
"epoch": 0.12735849056603774,
"grad_norm": 2.6450039556661724,
"learning_rate": 6.979978540675234e-07,
"loss": 2.2224,
"step": 81
},
{
"epoch": 0.1289308176100629,
"grad_norm": 2.3901087100576786,
"learning_rate": 6.979014753023135e-07,
"loss": 1.7917,
"step": 82
},
{
"epoch": 0.13050314465408805,
"grad_norm": 2.2742892989609835,
"learning_rate": 6.978028379875291e-07,
"loss": 1.7802,
"step": 83
},
{
"epoch": 0.1320754716981132,
"grad_norm": 2.314703480458537,
"learning_rate": 6.977019427635158e-07,
"loss": 2.0916,
"step": 84
},
{
"epoch": 0.13364779874213836,
"grad_norm": 2.304575288629416,
"learning_rate": 6.975987902852778e-07,
"loss": 2.0544,
"step": 85
},
{
"epoch": 0.13522012578616352,
"grad_norm": 2.2373974568043353,
"learning_rate": 6.974933812224731e-07,
"loss": 1.8365,
"step": 86
},
{
"epoch": 0.13679245283018868,
"grad_norm": 2.3841316516671265,
"learning_rate": 6.973857162594091e-07,
"loss": 1.9519,
"step": 87
},
{
"epoch": 0.13836477987421383,
"grad_norm": 2.268505399665297,
"learning_rate": 6.972757960950384e-07,
"loss": 2.0843,
"step": 88
},
{
"epoch": 0.139937106918239,
"grad_norm": 2.389940820234018,
"learning_rate": 6.971636214429544e-07,
"loss": 2.1255,
"step": 89
},
{
"epoch": 0.14150943396226415,
"grad_norm": 2.2702701401800294,
"learning_rate": 6.970491930313862e-07,
"loss": 1.7951,
"step": 90
},
{
"epoch": 0.1430817610062893,
"grad_norm": 2.34707263205848,
"learning_rate": 6.969325116031943e-07,
"loss": 2.0553,
"step": 91
},
{
"epoch": 0.14465408805031446,
"grad_norm": 2.138066100619543,
"learning_rate": 6.968135779158653e-07,
"loss": 1.8837,
"step": 92
},
{
"epoch": 0.14622641509433962,
"grad_norm": 2.148707619444591,
"learning_rate": 6.96692392741508e-07,
"loss": 1.8783,
"step": 93
},
{
"epoch": 0.14779874213836477,
"grad_norm": 2.2157095129853266,
"learning_rate": 6.965689568668468e-07,
"loss": 1.9493,
"step": 94
},
{
"epoch": 0.14937106918238993,
"grad_norm": 2.6174396924182117,
"learning_rate": 6.964432710932181e-07,
"loss": 1.9476,
"step": 95
},
{
"epoch": 0.1509433962264151,
"grad_norm": 2.423080479151295,
"learning_rate": 6.963153362365641e-07,
"loss": 1.9149,
"step": 96
},
{
"epoch": 0.15251572327044025,
"grad_norm": 2.305911644784875,
"learning_rate": 6.961851531274282e-07,
"loss": 1.8686,
"step": 97
},
{
"epoch": 0.1540880503144654,
"grad_norm": 2.5427101543409116,
"learning_rate": 6.960527226109489e-07,
"loss": 1.8722,
"step": 98
},
{
"epoch": 0.15566037735849056,
"grad_norm": 2.2044082229047075,
"learning_rate": 6.959180455468553e-07,
"loss": 1.9485,
"step": 99
},
{
"epoch": 0.15723270440251572,
"grad_norm": 2.337924598657549,
"learning_rate": 6.9578112280946e-07,
"loss": 1.8701,
"step": 100
},
{
"epoch": 0.15880503144654087,
"grad_norm": 2.231091232316695,
"learning_rate": 6.956419552876552e-07,
"loss": 2.1111,
"step": 101
},
{
"epoch": 0.16037735849056603,
"grad_norm": 2.1902798087554647,
"learning_rate": 6.955005438849058e-07,
"loss": 1.9696,
"step": 102
},
{
"epoch": 0.1619496855345912,
"grad_norm": 2.391741538055505,
"learning_rate": 6.953568895192436e-07,
"loss": 1.9091,
"step": 103
},
{
"epoch": 0.16352201257861634,
"grad_norm": 2.440486302600468,
"learning_rate": 6.952109931232616e-07,
"loss": 1.8899,
"step": 104
},
{
"epoch": 0.1650943396226415,
"grad_norm": 3.1036603066171664,
"learning_rate": 6.95062855644108e-07,
"loss": 1.9706,
"step": 105
},
{
"epoch": 0.16666666666666666,
"grad_norm": 2.176053941318332,
"learning_rate": 6.9491247804348e-07,
"loss": 2.0294,
"step": 106
},
{
"epoch": 0.16823899371069181,
"grad_norm": 2.2955252073229415,
"learning_rate": 6.947598612976173e-07,
"loss": 1.8521,
"step": 107
},
{
"epoch": 0.16981132075471697,
"grad_norm": 2.3179610819374856,
"learning_rate": 6.946050063972961e-07,
"loss": 2.0428,
"step": 108
},
{
"epoch": 0.17138364779874213,
"grad_norm": 2.3353177807842442,
"learning_rate": 6.944479143478225e-07,
"loss": 1.6969,
"step": 109
},
{
"epoch": 0.17295597484276728,
"grad_norm": 2.1751580486972903,
"learning_rate": 6.942885861690258e-07,
"loss": 2.1661,
"step": 110
},
{
"epoch": 0.17452830188679244,
"grad_norm": 2.3502010826835873,
"learning_rate": 6.941270228952526e-07,
"loss": 1.8967,
"step": 111
},
{
"epoch": 0.1761006289308176,
"grad_norm": 2.080568954254506,
"learning_rate": 6.939632255753589e-07,
"loss": 2.0775,
"step": 112
},
{
"epoch": 0.17767295597484276,
"grad_norm": 2.0874560399766278,
"learning_rate": 6.937971952727045e-07,
"loss": 1.8397,
"step": 113
},
{
"epoch": 0.1792452830188679,
"grad_norm": 2.235119037617934,
"learning_rate": 6.936289330651452e-07,
"loss": 2.0157,
"step": 114
},
{
"epoch": 0.18081761006289307,
"grad_norm": 2.21645540026438,
"learning_rate": 6.934584400450265e-07,
"loss": 1.7553,
"step": 115
},
{
"epoch": 0.18238993710691823,
"grad_norm": 2.278410487078488,
"learning_rate": 6.932857173191757e-07,
"loss": 1.8963,
"step": 116
},
{
"epoch": 0.18396226415094338,
"grad_norm": 2.288449048592226,
"learning_rate": 6.931107660088955e-07,
"loss": 2.0707,
"step": 117
},
{
"epoch": 0.18553459119496854,
"grad_norm": 2.540412196668586,
"learning_rate": 6.929335872499565e-07,
"loss": 1.9994,
"step": 118
},
{
"epoch": 0.1871069182389937,
"grad_norm": 2.295470373319651,
"learning_rate": 6.927541821925892e-07,
"loss": 1.9994,
"step": 119
},
{
"epoch": 0.18867924528301888,
"grad_norm": 2.4658706344060457,
"learning_rate": 6.925725520014778e-07,
"loss": 2.1002,
"step": 120
},
{
"epoch": 0.19025157232704404,
"grad_norm": 1.9874120823661252,
"learning_rate": 6.923886978557511e-07,
"loss": 1.891,
"step": 121
},
{
"epoch": 0.1918238993710692,
"grad_norm": 2.1896911235137266,
"learning_rate": 6.922026209489765e-07,
"loss": 2.0324,
"step": 122
},
{
"epoch": 0.19339622641509435,
"grad_norm": 2.3459367974500687,
"learning_rate": 6.920143224891506e-07,
"loss": 1.9346,
"step": 123
},
{
"epoch": 0.1949685534591195,
"grad_norm": 2.231755148378703,
"learning_rate": 6.918238036986926e-07,
"loss": 1.8345,
"step": 124
},
{
"epoch": 0.19654088050314467,
"grad_norm": 2.279350328924178,
"learning_rate": 6.91631065814436e-07,
"loss": 1.8852,
"step": 125
},
{
"epoch": 0.19811320754716982,
"grad_norm": 2.411938466651685,
"learning_rate": 6.914361100876199e-07,
"loss": 1.8085,
"step": 126
},
{
"epoch": 0.19968553459119498,
"grad_norm": 2.2484290713562403,
"learning_rate": 6.912389377838822e-07,
"loss": 1.7151,
"step": 127
},
{
"epoch": 0.20125786163522014,
"grad_norm": 2.27114158109643,
"learning_rate": 6.910395501832502e-07,
"loss": 1.9463,
"step": 128
},
{
"epoch": 0.2028301886792453,
"grad_norm": 2.1360495919260765,
"learning_rate": 6.908379485801327e-07,
"loss": 2.0075,
"step": 129
},
{
"epoch": 0.20440251572327045,
"grad_norm": 2.234634251477468,
"learning_rate": 6.906341342833119e-07,
"loss": 1.8222,
"step": 130
},
{
"epoch": 0.2059748427672956,
"grad_norm": 2.354620761146764,
"learning_rate": 6.904281086159346e-07,
"loss": 1.8145,
"step": 131
},
{
"epoch": 0.20754716981132076,
"grad_norm": 2.009458317182769,
"learning_rate": 6.902198729155034e-07,
"loss": 1.8962,
"step": 132
},
{
"epoch": 0.20911949685534592,
"grad_norm": 2.2143437574616938,
"learning_rate": 6.900094285338686e-07,
"loss": 1.9396,
"step": 133
},
{
"epoch": 0.21069182389937108,
"grad_norm": 2.2176657548464904,
"learning_rate": 6.897967768372188e-07,
"loss": 1.8453,
"step": 134
},
{
"epoch": 0.21226415094339623,
"grad_norm": 2.30003499713903,
"learning_rate": 6.895819192060725e-07,
"loss": 1.899,
"step": 135
},
{
"epoch": 0.2138364779874214,
"grad_norm": 2.090050155082603,
"learning_rate": 6.893648570352687e-07,
"loss": 1.899,
"step": 136
},
{
"epoch": 0.21540880503144655,
"grad_norm": 2.2138416672788406,
"learning_rate": 6.891455917339585e-07,
"loss": 1.6803,
"step": 137
},
{
"epoch": 0.2169811320754717,
"grad_norm": 2.275731103613008,
"learning_rate": 6.889241247255951e-07,
"loss": 1.8866,
"step": 138
},
{
"epoch": 0.21855345911949686,
"grad_norm": 2.2018144326894764,
"learning_rate": 6.887004574479256e-07,
"loss": 1.7657,
"step": 139
},
{
"epoch": 0.22012578616352202,
"grad_norm": 2.2574275757278586,
"learning_rate": 6.884745913529804e-07,
"loss": 1.8978,
"step": 140
},
{
"epoch": 0.22169811320754718,
"grad_norm": 2.4947452295545323,
"learning_rate": 6.882465279070651e-07,
"loss": 2.1121,
"step": 141
},
{
"epoch": 0.22327044025157233,
"grad_norm": 2.321836578962341,
"learning_rate": 6.880162685907497e-07,
"loss": 1.8622,
"step": 142
},
{
"epoch": 0.2248427672955975,
"grad_norm": 2.234624773604752,
"learning_rate": 6.877838148988602e-07,
"loss": 1.8192,
"step": 143
},
{
"epoch": 0.22641509433962265,
"grad_norm": 2.1827223385186834,
"learning_rate": 6.87549168340468e-07,
"loss": 1.9317,
"step": 144
},
{
"epoch": 0.2279874213836478,
"grad_norm": 2.2058319167479987,
"learning_rate": 6.873123304388804e-07,
"loss": 2.1388,
"step": 145
},
{
"epoch": 0.22955974842767296,
"grad_norm": 2.336708828184526,
"learning_rate": 6.870733027316308e-07,
"loss": 2.0724,
"step": 146
},
{
"epoch": 0.23113207547169812,
"grad_norm": 2.352880258314341,
"learning_rate": 6.868320867704689e-07,
"loss": 1.7697,
"step": 147
},
{
"epoch": 0.23270440251572327,
"grad_norm": 2.062384155984072,
"learning_rate": 6.865886841213497e-07,
"loss": 2.1101,
"step": 148
},
{
"epoch": 0.23427672955974843,
"grad_norm": 2.1530391868129626,
"learning_rate": 6.863430963644248e-07,
"loss": 1.9232,
"step": 149
},
{
"epoch": 0.2358490566037736,
"grad_norm": 2.1758733285873113,
"learning_rate": 6.860953250940309e-07,
"loss": 1.8156,
"step": 150
},
{
"epoch": 0.23742138364779874,
"grad_norm": 2.3117896294416593,
"learning_rate": 6.8584537191868e-07,
"loss": 1.8781,
"step": 151
},
{
"epoch": 0.2389937106918239,
"grad_norm": 2.00639335375149,
"learning_rate": 6.855932384610488e-07,
"loss": 1.8219,
"step": 152
},
{
"epoch": 0.24056603773584906,
"grad_norm": 2.10561171383,
"learning_rate": 6.853389263579684e-07,
"loss": 1.7392,
"step": 153
},
{
"epoch": 0.24213836477987422,
"grad_norm": 2.4444601099856795,
"learning_rate": 6.850824372604132e-07,
"loss": 1.9414,
"step": 154
},
{
"epoch": 0.24371069182389937,
"grad_norm": 2.4242659961108495,
"learning_rate": 6.848237728334909e-07,
"loss": 1.839,
"step": 155
},
{
"epoch": 0.24528301886792453,
"grad_norm": 2.2032265431848597,
"learning_rate": 6.845629347564309e-07,
"loss": 1.8687,
"step": 156
},
{
"epoch": 0.2468553459119497,
"grad_norm": 2.2129417804509024,
"learning_rate": 6.842999247225737e-07,
"loss": 1.8612,
"step": 157
},
{
"epoch": 0.24842767295597484,
"grad_norm": 2.305447975246766,
"learning_rate": 6.840347444393605e-07,
"loss": 2.0208,
"step": 158
},
{
"epoch": 0.25,
"grad_norm": 2.122283205654406,
"learning_rate": 6.837673956283212e-07,
"loss": 1.8356,
"step": 159
},
{
"epoch": 0.25157232704402516,
"grad_norm": 2.332633354247406,
"learning_rate": 6.834978800250636e-07,
"loss": 1.902,
"step": 160
},
{
"epoch": 0.2531446540880503,
"grad_norm": 2.376667609853809,
"learning_rate": 6.832261993792623e-07,
"loss": 2.062,
"step": 161
},
{
"epoch": 0.25471698113207547,
"grad_norm": 2.220933142644897,
"learning_rate": 6.829523554546471e-07,
"loss": 2.0558,
"step": 162
},
{
"epoch": 0.2562893081761006,
"grad_norm": 2.033225497961393,
"learning_rate": 6.826763500289916e-07,
"loss": 1.8403,
"step": 163
},
{
"epoch": 0.2578616352201258,
"grad_norm": 2.1916641547727576,
"learning_rate": 6.823981848941018e-07,
"loss": 2.0203,
"step": 164
},
{
"epoch": 0.25943396226415094,
"grad_norm": 2.247501037485472,
"learning_rate": 6.821178618558043e-07,
"loss": 1.775,
"step": 165
},
{
"epoch": 0.2610062893081761,
"grad_norm": 2.1659369555105723,
"learning_rate": 6.818353827339348e-07,
"loss": 2.1447,
"step": 166
},
{
"epoch": 0.26257861635220126,
"grad_norm": 1.8752494947917147,
"learning_rate": 6.815507493623258e-07,
"loss": 1.8795,
"step": 167
},
{
"epoch": 0.2641509433962264,
"grad_norm": 2.1874497821008663,
"learning_rate": 6.812639635887953e-07,
"loss": 1.9524,
"step": 168
},
{
"epoch": 0.26572327044025157,
"grad_norm": 2.1976673497867862,
"learning_rate": 6.809750272751346e-07,
"loss": 1.8337,
"step": 169
},
{
"epoch": 0.2672955974842767,
"grad_norm": 2.2148302103458386,
"learning_rate": 6.806839422970957e-07,
"loss": 1.8816,
"step": 170
},
{
"epoch": 0.2688679245283019,
"grad_norm": 2.1811128155566775,
"learning_rate": 6.803907105443801e-07,
"loss": 1.8994,
"step": 171
},
{
"epoch": 0.27044025157232704,
"grad_norm": 2.309399739378513,
"learning_rate": 6.800953339206256e-07,
"loss": 1.8565,
"step": 172
},
{
"epoch": 0.2720125786163522,
"grad_norm": 2.2522360613496737,
"learning_rate": 6.797978143433946e-07,
"loss": 1.827,
"step": 173
},
{
"epoch": 0.27358490566037735,
"grad_norm": 2.1425200823912856,
"learning_rate": 6.794981537441612e-07,
"loss": 1.8159,
"step": 174
},
{
"epoch": 0.2751572327044025,
"grad_norm": 2.2841020729940946,
"learning_rate": 6.791963540682988e-07,
"loss": 1.8514,
"step": 175
},
{
"epoch": 0.27672955974842767,
"grad_norm": 2.225194894385099,
"learning_rate": 6.788924172750679e-07,
"loss": 1.9328,
"step": 176
},
{
"epoch": 0.2783018867924528,
"grad_norm": 2.09421585410522,
"learning_rate": 6.785863453376026e-07,
"loss": 1.7486,
"step": 177
},
{
"epoch": 0.279874213836478,
"grad_norm": 2.333617044046103,
"learning_rate": 6.782781402428983e-07,
"loss": 1.772,
"step": 178
},
{
"epoch": 0.28144654088050314,
"grad_norm": 2.178844778690441,
"learning_rate": 6.779678039917989e-07,
"loss": 1.7756,
"step": 179
},
{
"epoch": 0.2830188679245283,
"grad_norm": 2.1517711064133644,
"learning_rate": 6.776553385989832e-07,
"loss": 1.9176,
"step": 180
},
{
"epoch": 0.28459119496855345,
"grad_norm": 2.2680430076464133,
"learning_rate": 6.773407460929527e-07,
"loss": 1.893,
"step": 181
},
{
"epoch": 0.2861635220125786,
"grad_norm": 2.3746361775333686,
"learning_rate": 6.770240285160175e-07,
"loss": 1.943,
"step": 182
},
{
"epoch": 0.28773584905660377,
"grad_norm": 2.1349785564403216,
"learning_rate": 6.76705187924284e-07,
"loss": 1.7189,
"step": 183
},
{
"epoch": 0.2893081761006289,
"grad_norm": 2.1525208745450097,
"learning_rate": 6.763842263876403e-07,
"loss": 1.9378,
"step": 184
},
{
"epoch": 0.2908805031446541,
"grad_norm": 2.1312473298021097,
"learning_rate": 6.760611459897444e-07,
"loss": 1.6941,
"step": 185
},
{
"epoch": 0.29245283018867924,
"grad_norm": 2.1276458698264302,
"learning_rate": 6.757359488280091e-07,
"loss": 1.8998,
"step": 186
},
{
"epoch": 0.2940251572327044,
"grad_norm": 2.304634105409158,
"learning_rate": 6.754086370135895e-07,
"loss": 1.9326,
"step": 187
},
{
"epoch": 0.29559748427672955,
"grad_norm": 1.9930028532322692,
"learning_rate": 6.750792126713684e-07,
"loss": 1.8537,
"step": 188
},
{
"epoch": 0.2971698113207547,
"grad_norm": 2.1595911760439668,
"learning_rate": 6.747476779399436e-07,
"loss": 1.8215,
"step": 189
},
{
"epoch": 0.29874213836477986,
"grad_norm": 2.218806657186074,
"learning_rate": 6.744140349716127e-07,
"loss": 1.7037,
"step": 190
},
{
"epoch": 0.300314465408805,
"grad_norm": 2.216745170195917,
"learning_rate": 6.740782859323604e-07,
"loss": 2.0752,
"step": 191
},
{
"epoch": 0.3018867924528302,
"grad_norm": 2.077203113815276,
"learning_rate": 6.737404330018436e-07,
"loss": 1.9836,
"step": 192
},
{
"epoch": 0.30345911949685533,
"grad_norm": 2.192942695759785,
"learning_rate": 6.734004783733772e-07,
"loss": 1.6992,
"step": 193
},
{
"epoch": 0.3050314465408805,
"grad_norm": 2.5819835895165806,
"learning_rate": 6.730584242539209e-07,
"loss": 2.4884,
"step": 194
},
{
"epoch": 0.30660377358490565,
"grad_norm": 2.127248431265692,
"learning_rate": 6.727142728640633e-07,
"loss": 1.9178,
"step": 195
},
{
"epoch": 0.3081761006289308,
"grad_norm": 1.9502319695954797,
"learning_rate": 6.72368026438009e-07,
"loss": 1.871,
"step": 196
},
{
"epoch": 0.30974842767295596,
"grad_norm": 2.2313690170600045,
"learning_rate": 6.720196872235629e-07,
"loss": 1.6974,
"step": 197
},
{
"epoch": 0.3113207547169811,
"grad_norm": 2.1737546254948574,
"learning_rate": 6.716692574821164e-07,
"loss": 1.9516,
"step": 198
},
{
"epoch": 0.3128930817610063,
"grad_norm": 2.089055152215892,
"learning_rate": 6.713167394886324e-07,
"loss": 1.7015,
"step": 199
},
{
"epoch": 0.31446540880503143,
"grad_norm": 2.1241406637075047,
"learning_rate": 6.709621355316306e-07,
"loss": 1.913,
"step": 200
},
{
"epoch": 0.3160377358490566,
"grad_norm": 2.1582457787965343,
"learning_rate": 6.706054479131726e-07,
"loss": 1.9263,
"step": 201
},
{
"epoch": 0.31761006289308175,
"grad_norm": 2.3223231854168134,
"learning_rate": 6.702466789488468e-07,
"loss": 1.7648,
"step": 202
},
{
"epoch": 0.3191823899371069,
"grad_norm": 2.0439680852310778,
"learning_rate": 6.698858309677537e-07,
"loss": 1.8036,
"step": 203
},
{
"epoch": 0.32075471698113206,
"grad_norm": 2.134474063516094,
"learning_rate": 6.695229063124907e-07,
"loss": 1.8974,
"step": 204
},
{
"epoch": 0.3223270440251572,
"grad_norm": 2.1079010765204083,
"learning_rate": 6.691579073391366e-07,
"loss": 1.8955,
"step": 205
},
{
"epoch": 0.3238993710691824,
"grad_norm": 2.027683856704508,
"learning_rate": 6.687908364172367e-07,
"loss": 1.9092,
"step": 206
},
{
"epoch": 0.32547169811320753,
"grad_norm": 2.246810336956572,
"learning_rate": 6.684216959297871e-07,
"loss": 1.679,
"step": 207
},
{
"epoch": 0.3270440251572327,
"grad_norm": 2.413641384514312,
"learning_rate": 6.680504882732195e-07,
"loss": 1.7324,
"step": 208
},
{
"epoch": 0.32861635220125784,
"grad_norm": 2.183679584470248,
"learning_rate": 6.676772158573852e-07,
"loss": 1.7184,
"step": 209
},
{
"epoch": 0.330188679245283,
"grad_norm": 1.953756725302565,
"learning_rate": 6.673018811055401e-07,
"loss": 1.8537,
"step": 210
},
{
"epoch": 0.33176100628930816,
"grad_norm": 2.7587554774622616,
"learning_rate": 6.669244864543286e-07,
"loss": 1.7957,
"step": 211
},
{
"epoch": 0.3333333333333333,
"grad_norm": 2.0690029827560874,
"learning_rate": 6.665450343537673e-07,
"loss": 1.5945,
"step": 212
},
{
"epoch": 0.33490566037735847,
"grad_norm": 1.9370210759038746,
"learning_rate": 6.661635272672305e-07,
"loss": 1.8212,
"step": 213
},
{
"epoch": 0.33647798742138363,
"grad_norm": 2.3157909721510634,
"learning_rate": 6.657799676714325e-07,
"loss": 1.8253,
"step": 214
},
{
"epoch": 0.3380503144654088,
"grad_norm": 2.408718498079195,
"learning_rate": 6.653943580564128e-07,
"loss": 1.9183,
"step": 215
},
{
"epoch": 0.33962264150943394,
"grad_norm": 2.2428481931189115,
"learning_rate": 6.650067009255193e-07,
"loss": 1.9667,
"step": 216
},
{
"epoch": 0.3411949685534591,
"grad_norm": 2.3060581158229754,
"learning_rate": 6.646169987953921e-07,
"loss": 1.7651,
"step": 217
},
{
"epoch": 0.34276729559748426,
"grad_norm": 2.272099574457665,
"learning_rate": 6.642252541959475e-07,
"loss": 1.9895,
"step": 218
},
{
"epoch": 0.3443396226415094,
"grad_norm": 2.2813406057736016,
"learning_rate": 6.638314696703613e-07,
"loss": 2.1897,
"step": 219
},
{
"epoch": 0.34591194968553457,
"grad_norm": 2.18496346075307,
"learning_rate": 6.634356477750522e-07,
"loss": 1.8511,
"step": 220
},
{
"epoch": 0.3474842767295597,
"grad_norm": 1.9232426664702658,
"learning_rate": 6.630377910796655e-07,
"loss": 1.8494,
"step": 221
},
{
"epoch": 0.3490566037735849,
"grad_norm": 1.9818215355449529,
"learning_rate": 6.626379021670561e-07,
"loss": 1.9395,
"step": 222
},
{
"epoch": 0.35062893081761004,
"grad_norm": 2.244779710254365,
"learning_rate": 6.622359836332723e-07,
"loss": 1.9374,
"step": 223
},
{
"epoch": 0.3522012578616352,
"grad_norm": 2.1262049775384333,
"learning_rate": 6.618320380875379e-07,
"loss": 1.879,
"step": 224
},
{
"epoch": 0.35377358490566035,
"grad_norm": 1.9601702919734405,
"learning_rate": 6.614260681522365e-07,
"loss": 1.7618,
"step": 225
},
{
"epoch": 0.3553459119496855,
"grad_norm": 2.177142901528055,
"learning_rate": 6.610180764628937e-07,
"loss": 1.7007,
"step": 226
},
{
"epoch": 0.35691823899371067,
"grad_norm": 2.207301297000178,
"learning_rate": 6.606080656681599e-07,
"loss": 2.0777,
"step": 227
},
{
"epoch": 0.3584905660377358,
"grad_norm": 2.325333907285728,
"learning_rate": 6.601960384297937e-07,
"loss": 1.8572,
"step": 228
},
{
"epoch": 0.360062893081761,
"grad_norm": 2.176590475529821,
"learning_rate": 6.597819974226442e-07,
"loss": 1.911,
"step": 229
},
{
"epoch": 0.36163522012578614,
"grad_norm": 2.231029983251665,
"learning_rate": 6.593659453346336e-07,
"loss": 1.8546,
"step": 230
},
{
"epoch": 0.3632075471698113,
"grad_norm": 2.09941453798109,
"learning_rate": 6.589478848667402e-07,
"loss": 1.7259,
"step": 231
},
{
"epoch": 0.36477987421383645,
"grad_norm": 2.207729891460013,
"learning_rate": 6.585278187329803e-07,
"loss": 2.0615,
"step": 232
},
{
"epoch": 0.3663522012578616,
"grad_norm": 2.160194675811721,
"learning_rate": 6.581057496603907e-07,
"loss": 1.5577,
"step": 233
},
{
"epoch": 0.36792452830188677,
"grad_norm": 2.1933688787719117,
"learning_rate": 6.576816803890115e-07,
"loss": 1.8231,
"step": 234
},
{
"epoch": 0.3694968553459119,
"grad_norm": 2.0304475960191275,
"learning_rate": 6.572556136718678e-07,
"loss": 1.7399,
"step": 235
},
{
"epoch": 0.3710691823899371,
"grad_norm": 2.0372413629068973,
"learning_rate": 6.568275522749514e-07,
"loss": 1.7089,
"step": 236
},
{
"epoch": 0.37264150943396224,
"grad_norm": 2.2572731149937297,
"learning_rate": 6.563974989772047e-07,
"loss": 2.0235,
"step": 237
},
{
"epoch": 0.3742138364779874,
"grad_norm": 2.0112586745579155,
"learning_rate": 6.559654565704999e-07,
"loss": 1.6913,
"step": 238
},
{
"epoch": 0.3757861635220126,
"grad_norm": 2.020887524725217,
"learning_rate": 6.555314278596232e-07,
"loss": 1.9021,
"step": 239
},
{
"epoch": 0.37735849056603776,
"grad_norm": 2.062052722629033,
"learning_rate": 6.550954156622559e-07,
"loss": 1.8555,
"step": 240
},
{
"epoch": 0.3789308176100629,
"grad_norm": 2.146665256772467,
"learning_rate": 6.546574228089551e-07,
"loss": 1.9002,
"step": 241
},
{
"epoch": 0.3805031446540881,
"grad_norm": 2.238196549782288,
"learning_rate": 6.542174521431369e-07,
"loss": 1.9807,
"step": 242
},
{
"epoch": 0.38207547169811323,
"grad_norm": 2.111526491095184,
"learning_rate": 6.537755065210571e-07,
"loss": 1.9268,
"step": 243
},
{
"epoch": 0.3836477987421384,
"grad_norm": 1.9805520877642875,
"learning_rate": 6.533315888117923e-07,
"loss": 1.8807,
"step": 244
},
{
"epoch": 0.38522012578616355,
"grad_norm": 2.1505066292288313,
"learning_rate": 6.528857018972223e-07,
"loss": 1.8132,
"step": 245
},
{
"epoch": 0.3867924528301887,
"grad_norm": 2.1682916632827913,
"learning_rate": 6.524378486720107e-07,
"loss": 1.8423,
"step": 246
},
{
"epoch": 0.38836477987421386,
"grad_norm": 2.131056377037645,
"learning_rate": 6.519880320435858e-07,
"loss": 1.9015,
"step": 247
},
{
"epoch": 0.389937106918239,
"grad_norm": 2.3204970045437094,
"learning_rate": 6.515362549321227e-07,
"loss": 1.9385,
"step": 248
},
{
"epoch": 0.3915094339622642,
"grad_norm": 2.051436724730336,
"learning_rate": 6.510825202705237e-07,
"loss": 1.7837,
"step": 249
},
{
"epoch": 0.39308176100628933,
"grad_norm": 2.1183882128382785,
"learning_rate": 6.506268310043991e-07,
"loss": 1.9752,
"step": 250
},
{
"epoch": 0.39308176100628933,
"eval_sat2_MCTS_chains_SFT_val_loss": 1.7381622791290283,
"eval_sat2_MCTS_chains_SFT_val_runtime": 91.8844,
"eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.188,
"eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.404,
"step": 250
},
{
"epoch": 0.3946540880503145,
"grad_norm": 2.162981802199106,
"learning_rate": 6.501691900920484e-07,
"loss": 1.9521,
"step": 251
},
{
"epoch": 0.39622641509433965,
"grad_norm": 1.9032699491782559,
"learning_rate": 6.497096005044415e-07,
"loss": 1.6293,
"step": 252
},
{
"epoch": 0.3977987421383648,
"grad_norm": 2.160690442124648,
"learning_rate": 6.492480652251983e-07,
"loss": 1.6815,
"step": 253
},
{
"epoch": 0.39937106918238996,
"grad_norm": 2.200251276319585,
"learning_rate": 6.487845872505703e-07,
"loss": 1.6191,
"step": 254
},
{
"epoch": 0.4009433962264151,
"grad_norm": 2.1153136111562834,
"learning_rate": 6.483191695894209e-07,
"loss": 1.7317,
"step": 255
},
{
"epoch": 0.4025157232704403,
"grad_norm": 2.136580109576303,
"learning_rate": 6.478518152632057e-07,
"loss": 1.6782,
"step": 256
},
{
"epoch": 0.40408805031446543,
"grad_norm": 1.993099001469681,
"learning_rate": 6.473825273059529e-07,
"loss": 1.9167,
"step": 257
},
{
"epoch": 0.4056603773584906,
"grad_norm": 2.348061223274077,
"learning_rate": 6.469113087642439e-07,
"loss": 1.8473,
"step": 258
},
{
"epoch": 0.40723270440251574,
"grad_norm": 1.9562289450639117,
"learning_rate": 6.46438162697193e-07,
"loss": 1.8061,
"step": 259
},
{
"epoch": 0.4088050314465409,
"grad_norm": 2.340728555526634,
"learning_rate": 6.459630921764282e-07,
"loss": 1.5497,
"step": 260
},
{
"epoch": 0.41037735849056606,
"grad_norm": 2.2351691072279096,
"learning_rate": 6.454861002860705e-07,
"loss": 2.1132,
"step": 261
},
{
"epoch": 0.4119496855345912,
"grad_norm": 2.1290065996236613,
"learning_rate": 6.450071901227147e-07,
"loss": 1.7072,
"step": 262
},
{
"epoch": 0.41352201257861637,
"grad_norm": 2.3103776756030054,
"learning_rate": 6.445263647954086e-07,
"loss": 2.0191,
"step": 263
},
{
"epoch": 0.41509433962264153,
"grad_norm": 2.1783834129569137,
"learning_rate": 6.440436274256333e-07,
"loss": 1.6563,
"step": 264
},
{
"epoch": 0.4166666666666667,
"grad_norm": 2.3188145114913747,
"learning_rate": 6.435589811472823e-07,
"loss": 2.1497,
"step": 265
},
{
"epoch": 0.41823899371069184,
"grad_norm": 2.2490188310531303,
"learning_rate": 6.430724291066422e-07,
"loss": 1.6207,
"step": 266
},
{
"epoch": 0.419811320754717,
"grad_norm": 2.0324238162321877,
"learning_rate": 6.425839744623711e-07,
"loss": 1.8477,
"step": 267
},
{
"epoch": 0.42138364779874216,
"grad_norm": 2.1650874047734505,
"learning_rate": 6.420936203854793e-07,
"loss": 1.7936,
"step": 268
},
{
"epoch": 0.4229559748427673,
"grad_norm": 2.157646176152135,
"learning_rate": 6.416013700593074e-07,
"loss": 2.1013,
"step": 269
},
{
"epoch": 0.42452830188679247,
"grad_norm": 2.1369610613705863,
"learning_rate": 6.411072266795066e-07,
"loss": 1.6905,
"step": 270
},
{
"epoch": 0.4261006289308176,
"grad_norm": 2.108532284547873,
"learning_rate": 6.406111934540178e-07,
"loss": 1.78,
"step": 271
},
{
"epoch": 0.4276729559748428,
"grad_norm": 2.142315122232749,
"learning_rate": 6.401132736030504e-07,
"loss": 1.738,
"step": 272
},
{
"epoch": 0.42924528301886794,
"grad_norm": 2.2549541806802296,
"learning_rate": 6.396134703590617e-07,
"loss": 1.7392,
"step": 273
},
{
"epoch": 0.4308176100628931,
"grad_norm": 2.0286792453580573,
"learning_rate": 6.391117869667358e-07,
"loss": 1.8195,
"step": 274
},
{
"epoch": 0.43238993710691825,
"grad_norm": 2.0337872779297017,
"learning_rate": 6.386082266829629e-07,
"loss": 1.8084,
"step": 275
},
{
"epoch": 0.4339622641509434,
"grad_norm": 2.095428278939121,
"learning_rate": 6.381027927768171e-07,
"loss": 1.943,
"step": 276
},
{
"epoch": 0.43553459119496857,
"grad_norm": 2.013844627037566,
"learning_rate": 6.375954885295369e-07,
"loss": 1.6934,
"step": 277
},
{
"epoch": 0.4371069182389937,
"grad_norm": 2.2579768491043497,
"learning_rate": 6.37086317234502e-07,
"loss": 1.8158,
"step": 278
},
{
"epoch": 0.4386792452830189,
"grad_norm": 2.3262366087078354,
"learning_rate": 6.365752821972134e-07,
"loss": 1.83,
"step": 279
},
{
"epoch": 0.44025157232704404,
"grad_norm": 2.0303047010962376,
"learning_rate": 6.36062386735271e-07,
"loss": 1.7752,
"step": 280
},
{
"epoch": 0.4418238993710692,
"grad_norm": 2.1138830916579425,
"learning_rate": 6.355476341783529e-07,
"loss": 1.9532,
"step": 281
},
{
"epoch": 0.44339622641509435,
"grad_norm": 2.100950656642803,
"learning_rate": 6.35031027868193e-07,
"loss": 1.7841,
"step": 282
},
{
"epoch": 0.4449685534591195,
"grad_norm": 2.175666972234393,
"learning_rate": 6.345125711585594e-07,
"loss": 1.7535,
"step": 283
},
{
"epoch": 0.44654088050314467,
"grad_norm": 2.090692135911591,
"learning_rate": 6.339922674152333e-07,
"loss": 1.9545,
"step": 284
},
{
"epoch": 0.4481132075471698,
"grad_norm": 2.2150355818462875,
"learning_rate": 6.334701200159867e-07,
"loss": 1.7806,
"step": 285
},
{
"epoch": 0.449685534591195,
"grad_norm": 2.1792703239922218,
"learning_rate": 6.3294613235056e-07,
"loss": 1.8228,
"step": 286
},
{
"epoch": 0.45125786163522014,
"grad_norm": 2.3237367502125794,
"learning_rate": 6.324203078206408e-07,
"loss": 1.7555,
"step": 287
},
{
"epoch": 0.4528301886792453,
"grad_norm": 2.119235492747239,
"learning_rate": 6.318926498398415e-07,
"loss": 2.1297,
"step": 288
},
{
"epoch": 0.45440251572327045,
"grad_norm": 2.066839274608312,
"learning_rate": 6.31363161833677e-07,
"loss": 1.802,
"step": 289
},
{
"epoch": 0.4559748427672956,
"grad_norm": 2.1311539498302716,
"learning_rate": 6.308318472395429e-07,
"loss": 1.8139,
"step": 290
},
{
"epoch": 0.45754716981132076,
"grad_norm": 2.191273523825869,
"learning_rate": 6.302987095066923e-07,
"loss": 1.8633,
"step": 291
},
{
"epoch": 0.4591194968553459,
"grad_norm": 2.2656122854422462,
"learning_rate": 6.297637520962143e-07,
"loss": 1.7441,
"step": 292
},
{
"epoch": 0.4606918238993711,
"grad_norm": 1.93728883044019,
"learning_rate": 6.292269784810113e-07,
"loss": 1.9895,
"step": 293
},
{
"epoch": 0.46226415094339623,
"grad_norm": 2.3187477308368343,
"learning_rate": 6.286883921457763e-07,
"loss": 1.83,
"step": 294
},
{
"epoch": 0.4638364779874214,
"grad_norm": 2.202481261207392,
"learning_rate": 6.281479965869702e-07,
"loss": 1.8797,
"step": 295
},
{
"epoch": 0.46540880503144655,
"grad_norm": 2.0842105854347657,
"learning_rate": 6.276057953127994e-07,
"loss": 1.7532,
"step": 296
},
{
"epoch": 0.4669811320754717,
"grad_norm": 2.207130976094581,
"learning_rate": 6.270617918431928e-07,
"loss": 1.7917,
"step": 297
},
{
"epoch": 0.46855345911949686,
"grad_norm": 2.032959806862764,
"learning_rate": 6.26515989709779e-07,
"loss": 1.8665,
"step": 298
},
{
"epoch": 0.470125786163522,
"grad_norm": 1.9531386309076453,
"learning_rate": 6.259683924558633e-07,
"loss": 1.8778,
"step": 299
},
{
"epoch": 0.4716981132075472,
"grad_norm": 2.0714051206159896,
"learning_rate": 6.254190036364051e-07,
"loss": 1.7126,
"step": 300
},
{
"epoch": 0.47327044025157233,
"grad_norm": 2.2881658756108907,
"learning_rate": 6.24867826817994e-07,
"loss": 1.8592,
"step": 301
},
{
"epoch": 0.4748427672955975,
"grad_norm": 2.0634196311190345,
"learning_rate": 6.243148655788276e-07,
"loss": 2.3116,
"step": 302
},
{
"epoch": 0.47641509433962265,
"grad_norm": 2.1436128024329024,
"learning_rate": 6.237601235086879e-07,
"loss": 1.7473,
"step": 303
},
{
"epoch": 0.4779874213836478,
"grad_norm": 2.2670611315983793,
"learning_rate": 6.232036042089171e-07,
"loss": 1.9504,
"step": 304
},
{
"epoch": 0.47955974842767296,
"grad_norm": 2.138063229224171,
"learning_rate": 6.226453112923962e-07,
"loss": 1.8807,
"step": 305
},
{
"epoch": 0.4811320754716981,
"grad_norm": 2.169257324016606,
"learning_rate": 6.220852483835196e-07,
"loss": 1.9444,
"step": 306
},
{
"epoch": 0.4827044025157233,
"grad_norm": 2.230348996182296,
"learning_rate": 6.215234191181725e-07,
"loss": 1.8807,
"step": 307
},
{
"epoch": 0.48427672955974843,
"grad_norm": 2.2607928616037967,
"learning_rate": 6.20959827143707e-07,
"loss": 1.9083,
"step": 308
},
{
"epoch": 0.4858490566037736,
"grad_norm": 2.3264562054360756,
"learning_rate": 6.203944761189192e-07,
"loss": 1.7524,
"step": 309
},
{
"epoch": 0.48742138364779874,
"grad_norm": 1.9976309357346456,
"learning_rate": 6.198273697140243e-07,
"loss": 1.9204,
"step": 310
},
{
"epoch": 0.4889937106918239,
"grad_norm": 2.24371014902543,
"learning_rate": 6.19258511610633e-07,
"loss": 1.7535,
"step": 311
},
{
"epoch": 0.49056603773584906,
"grad_norm": 2.2058395033512554,
"learning_rate": 6.186879055017288e-07,
"loss": 1.7674,
"step": 312
},
{
"epoch": 0.4921383647798742,
"grad_norm": 2.024400868779697,
"learning_rate": 6.181155550916422e-07,
"loss": 1.6954,
"step": 313
},
{
"epoch": 0.4937106918238994,
"grad_norm": 2.269535233545749,
"learning_rate": 6.175414640960283e-07,
"loss": 1.7337,
"step": 314
},
{
"epoch": 0.49528301886792453,
"grad_norm": 1.9745297525246102,
"learning_rate": 6.169656362418414e-07,
"loss": 1.8684,
"step": 315
},
{
"epoch": 0.4968553459119497,
"grad_norm": 1.976330025219871,
"learning_rate": 6.163880752673117e-07,
"loss": 2.1739,
"step": 316
},
{
"epoch": 0.49842767295597484,
"grad_norm": 2.1760376472956025,
"learning_rate": 6.158087849219204e-07,
"loss": 1.9162,
"step": 317
},
{
"epoch": 0.5,
"grad_norm": 2.143628134207709,
"learning_rate": 6.152277689663759e-07,
"loss": 1.7411,
"step": 318
},
{
"epoch": 0.5015723270440252,
"grad_norm": 1.9712614846347265,
"learning_rate": 6.146450311725888e-07,
"loss": 1.7378,
"step": 319
},
{
"epoch": 0.5031446540880503,
"grad_norm": 2.1616472374659357,
"learning_rate": 6.140605753236483e-07,
"loss": 1.741,
"step": 320
},
{
"epoch": 0.5047169811320755,
"grad_norm": 2.2014372084510074,
"learning_rate": 6.134744052137967e-07,
"loss": 1.7833,
"step": 321
},
{
"epoch": 0.5062893081761006,
"grad_norm": 1.982079271247313,
"learning_rate": 6.128865246484048e-07,
"loss": 1.8626,
"step": 322
},
{
"epoch": 0.5078616352201258,
"grad_norm": 2.254872366939666,
"learning_rate": 6.122969374439483e-07,
"loss": 1.8039,
"step": 323
},
{
"epoch": 0.5094339622641509,
"grad_norm": 2.016915503448893,
"learning_rate": 6.11705647427982e-07,
"loss": 1.5489,
"step": 324
},
{
"epoch": 0.5110062893081762,
"grad_norm": 2.1164070023469788,
"learning_rate": 6.111126584391148e-07,
"loss": 1.7177,
"step": 325
},
{
"epoch": 0.5125786163522013,
"grad_norm": 2.1940717974335846,
"learning_rate": 6.105179743269858e-07,
"loss": 1.844,
"step": 326
},
{
"epoch": 0.5141509433962265,
"grad_norm": 2.2525569223241186,
"learning_rate": 6.099215989522382e-07,
"loss": 2.4854,
"step": 327
},
{
"epoch": 0.5157232704402516,
"grad_norm": 2.0259464742758784,
"learning_rate": 6.093235361864952e-07,
"loss": 2.1479,
"step": 328
},
{
"epoch": 0.5172955974842768,
"grad_norm": 2.3113158826769347,
"learning_rate": 6.087237899123342e-07,
"loss": 1.6807,
"step": 329
},
{
"epoch": 0.5188679245283019,
"grad_norm": 2.032892966587123,
"learning_rate": 6.081223640232616e-07,
"loss": 2.0318,
"step": 330
},
{
"epoch": 0.5204402515723271,
"grad_norm": 2.3650596675106472,
"learning_rate": 6.075192624236881e-07,
"loss": 1.9068,
"step": 331
},
{
"epoch": 0.5220125786163522,
"grad_norm": 2.2695747251563225,
"learning_rate": 6.069144890289028e-07,
"loss": 1.6508,
"step": 332
},
{
"epoch": 0.5235849056603774,
"grad_norm": 2.13682292483943,
"learning_rate": 6.063080477650479e-07,
"loss": 1.9008,
"step": 333
},
{
"epoch": 0.5251572327044025,
"grad_norm": 2.108983919386453,
"learning_rate": 6.056999425690935e-07,
"loss": 1.7042,
"step": 334
},
{
"epoch": 0.5267295597484277,
"grad_norm": 2.1512302698424075,
"learning_rate": 6.050901773888115e-07,
"loss": 1.664,
"step": 335
},
{
"epoch": 0.5283018867924528,
"grad_norm": 2.039353542540568,
"learning_rate": 6.044787561827507e-07,
"loss": 1.7934,
"step": 336
},
{
"epoch": 0.529874213836478,
"grad_norm": 2.0719588798830655,
"learning_rate": 6.038656829202103e-07,
"loss": 1.9121,
"step": 337
},
{
"epoch": 0.5314465408805031,
"grad_norm": 1.9491996764419244,
"learning_rate": 6.032509615812147e-07,
"loss": 1.9204,
"step": 338
},
{
"epoch": 0.5330188679245284,
"grad_norm": 2.1113652450511897,
"learning_rate": 6.026345961564875e-07,
"loss": 1.8414,
"step": 339
},
{
"epoch": 0.5345911949685535,
"grad_norm": 2.2032131432104767,
"learning_rate": 6.020165906474257e-07,
"loss": 1.7114,
"step": 340
},
{
"epoch": 0.5361635220125787,
"grad_norm": 2.1791413791352565,
"learning_rate": 6.013969490660731e-07,
"loss": 2.2435,
"step": 341
},
{
"epoch": 0.5377358490566038,
"grad_norm": 2.0891510363928614,
"learning_rate": 6.007756754350954e-07,
"loss": 1.7701,
"step": 342
},
{
"epoch": 0.539308176100629,
"grad_norm": 2.0697597215600196,
"learning_rate": 6.001527737877532e-07,
"loss": 1.8208,
"step": 343
},
{
"epoch": 0.5408805031446541,
"grad_norm": 2.186994903601224,
"learning_rate": 5.995282481678758e-07,
"loss": 1.8373,
"step": 344
},
{
"epoch": 0.5424528301886793,
"grad_norm": 2.2604938659796545,
"learning_rate": 5.989021026298354e-07,
"loss": 1.7995,
"step": 345
},
{
"epoch": 0.5440251572327044,
"grad_norm": 2.265552317615986,
"learning_rate": 5.982743412385207e-07,
"loss": 1.7546,
"step": 346
},
{
"epoch": 0.5455974842767296,
"grad_norm": 2.15564901479827,
"learning_rate": 5.976449680693104e-07,
"loss": 1.8596,
"step": 347
},
{
"epoch": 0.5471698113207547,
"grad_norm": 2.0408737176330707,
"learning_rate": 5.970139872080463e-07,
"loss": 1.7603,
"step": 348
},
{
"epoch": 0.5487421383647799,
"grad_norm": 2.0704318452346153,
"learning_rate": 5.963814027510079e-07,
"loss": 1.9277,
"step": 349
},
{
"epoch": 0.550314465408805,
"grad_norm": 2.2298784446231243,
"learning_rate": 5.957472188048845e-07,
"loss": 1.7991,
"step": 350
},
{
"epoch": 0.5518867924528302,
"grad_norm": 2.0268792215372455,
"learning_rate": 5.951114394867494e-07,
"loss": 1.7636,
"step": 351
},
{
"epoch": 0.5534591194968553,
"grad_norm": 2.0724518345088527,
"learning_rate": 5.944740689240333e-07,
"loss": 1.8476,
"step": 352
},
{
"epoch": 0.5550314465408805,
"grad_norm": 2.2252899856685375,
"learning_rate": 5.938351112544964e-07,
"loss": 1.9069,
"step": 353
},
{
"epoch": 0.5566037735849056,
"grad_norm": 2.0991045116850784,
"learning_rate": 5.931945706262028e-07,
"loss": 1.8448,
"step": 354
},
{
"epoch": 0.5581761006289309,
"grad_norm": 2.049481500083905,
"learning_rate": 5.92552451197493e-07,
"loss": 1.8783,
"step": 355
},
{
"epoch": 0.559748427672956,
"grad_norm": 2.1757357613660893,
"learning_rate": 5.919087571369567e-07,
"loss": 1.8135,
"step": 356
},
{
"epoch": 0.5613207547169812,
"grad_norm": 2.261669343627697,
"learning_rate": 5.912634926234063e-07,
"loss": 1.9118,
"step": 357
},
{
"epoch": 0.5628930817610063,
"grad_norm": 2.3346020516981976,
"learning_rate": 5.906166618458491e-07,
"loss": 1.9321,
"step": 358
},
{
"epoch": 0.5644654088050315,
"grad_norm": 1.9754635648300218,
"learning_rate": 5.89968269003461e-07,
"loss": 1.7947,
"step": 359
},
{
"epoch": 0.5660377358490566,
"grad_norm": 2.1082779863127232,
"learning_rate": 5.893183183055581e-07,
"loss": 2.1433,
"step": 360
},
{
"epoch": 0.5676100628930818,
"grad_norm": 2.2719804428628474,
"learning_rate": 5.886668139715704e-07,
"loss": 1.7544,
"step": 361
},
{
"epoch": 0.5691823899371069,
"grad_norm": 2.063124532930253,
"learning_rate": 5.880137602310138e-07,
"loss": 1.8496,
"step": 362
},
{
"epoch": 0.5707547169811321,
"grad_norm": 2.178454726609103,
"learning_rate": 5.873591613234628e-07,
"loss": 1.8731,
"step": 363
},
{
"epoch": 0.5723270440251572,
"grad_norm": 2.2292005546265092,
"learning_rate": 5.867030214985232e-07,
"loss": 1.6673,
"step": 364
},
{
"epoch": 0.5738993710691824,
"grad_norm": 2.1915150883178467,
"learning_rate": 5.860453450158042e-07,
"loss": 1.8147,
"step": 365
},
{
"epoch": 0.5754716981132075,
"grad_norm": 2.068884145719646,
"learning_rate": 5.853861361448906e-07,
"loss": 1.8357,
"step": 366
},
{
"epoch": 0.5770440251572327,
"grad_norm": 2.2335775263207656,
"learning_rate": 5.847253991653161e-07,
"loss": 1.7674,
"step": 367
},
{
"epoch": 0.5786163522012578,
"grad_norm": 2.028099342557415,
"learning_rate": 5.840631383665337e-07,
"loss": 1.9504,
"step": 368
},
{
"epoch": 0.5801886792452831,
"grad_norm": 2.171456280936142,
"learning_rate": 5.833993580478899e-07,
"loss": 1.7799,
"step": 369
},
{
"epoch": 0.5817610062893082,
"grad_norm": 2.188440801596699,
"learning_rate": 5.827340625185951e-07,
"loss": 1.7664,
"step": 370
},
{
"epoch": 0.5833333333333334,
"grad_norm": 2.061808710093214,
"learning_rate": 5.820672560976968e-07,
"loss": 1.8846,
"step": 371
},
{
"epoch": 0.5849056603773585,
"grad_norm": 2.080838024759326,
"learning_rate": 5.813989431140509e-07,
"loss": 1.8208,
"step": 372
},
{
"epoch": 0.5864779874213837,
"grad_norm": 2.1013196605531266,
"learning_rate": 5.807291279062938e-07,
"loss": 1.7698,
"step": 373
},
{
"epoch": 0.5880503144654088,
"grad_norm": 2.1083157749408405,
"learning_rate": 5.800578148228141e-07,
"loss": 2.0087,
"step": 374
},
{
"epoch": 0.589622641509434,
"grad_norm": 2.327452355557212,
"learning_rate": 5.793850082217248e-07,
"loss": 2.2253,
"step": 375
},
{
"epoch": 0.5911949685534591,
"grad_norm": 2.2687807974688656,
"learning_rate": 5.787107124708343e-07,
"loss": 1.7986,
"step": 376
},
{
"epoch": 0.5927672955974843,
"grad_norm": 2.0752387638298835,
"learning_rate": 5.780349319476189e-07,
"loss": 1.6402,
"step": 377
},
{
"epoch": 0.5943396226415094,
"grad_norm": 2.2870254808353954,
"learning_rate": 5.773576710391937e-07,
"loss": 1.8418,
"step": 378
},
{
"epoch": 0.5959119496855346,
"grad_norm": 2.146779959111506,
"learning_rate": 5.766789341422841e-07,
"loss": 1.8692,
"step": 379
},
{
"epoch": 0.5974842767295597,
"grad_norm": 2.2844314903310585,
"learning_rate": 5.75998725663198e-07,
"loss": 1.713,
"step": 380
},
{
"epoch": 0.5990566037735849,
"grad_norm": 2.08256027270516,
"learning_rate": 5.753170500177962e-07,
"loss": 2.0656,
"step": 381
},
{
"epoch": 0.60062893081761,
"grad_norm": 2.1961855730334676,
"learning_rate": 5.746339116314646e-07,
"loss": 1.6188,
"step": 382
},
{
"epoch": 0.6022012578616353,
"grad_norm": 2.0468929231050748,
"learning_rate": 5.739493149390851e-07,
"loss": 1.8984,
"step": 383
},
{
"epoch": 0.6037735849056604,
"grad_norm": 2.0935907404848746,
"learning_rate": 5.732632643850064e-07,
"loss": 1.6483,
"step": 384
},
{
"epoch": 0.6053459119496856,
"grad_norm": 2.0252018235406575,
"learning_rate": 5.725757644230159e-07,
"loss": 1.9005,
"step": 385
},
{
"epoch": 0.6069182389937107,
"grad_norm": 2.001163026327367,
"learning_rate": 5.718868195163106e-07,
"loss": 1.829,
"step": 386
},
{
"epoch": 0.6084905660377359,
"grad_norm": 2.1924828073774476,
"learning_rate": 5.711964341374676e-07,
"loss": 1.6139,
"step": 387
},
{
"epoch": 0.610062893081761,
"grad_norm": 2.228468039969882,
"learning_rate": 5.705046127684158e-07,
"loss": 1.8581,
"step": 388
},
{
"epoch": 0.6116352201257862,
"grad_norm": 2.1871856796652906,
"learning_rate": 5.698113599004058e-07,
"loss": 1.8884,
"step": 389
},
{
"epoch": 0.6132075471698113,
"grad_norm": 2.119012481165798,
"learning_rate": 5.691166800339823e-07,
"loss": 1.7771,
"step": 390
},
{
"epoch": 0.6147798742138365,
"grad_norm": 2.1752727454942677,
"learning_rate": 5.684205776789531e-07,
"loss": 1.7475,
"step": 391
},
{
"epoch": 0.6163522012578616,
"grad_norm": 1.9703566361856617,
"learning_rate": 5.677230573543612e-07,
"loss": 1.995,
"step": 392
},
{
"epoch": 0.6179245283018868,
"grad_norm": 1.9783136401244312,
"learning_rate": 5.670241235884547e-07,
"loss": 1.6578,
"step": 393
},
{
"epoch": 0.6194968553459119,
"grad_norm": 2.1003352712507084,
"learning_rate": 5.663237809186581e-07,
"loss": 1.7801,
"step": 394
},
{
"epoch": 0.6210691823899371,
"grad_norm": 2.2674486565881344,
"learning_rate": 5.656220338915417e-07,
"loss": 1.7259,
"step": 395
},
{
"epoch": 0.6226415094339622,
"grad_norm": 2.1909929939479196,
"learning_rate": 5.649188870627932e-07,
"loss": 1.791,
"step": 396
},
{
"epoch": 0.6242138364779874,
"grad_norm": 2.1847378593727855,
"learning_rate": 5.642143449971877e-07,
"loss": 1.6481,
"step": 397
},
{
"epoch": 0.6257861635220126,
"grad_norm": 2.296413505588846,
"learning_rate": 5.635084122685582e-07,
"loss": 1.6404,
"step": 398
},
{
"epoch": 0.6273584905660378,
"grad_norm": 2.3710100879322815,
"learning_rate": 5.628010934597652e-07,
"loss": 1.8934,
"step": 399
},
{
"epoch": 0.6289308176100629,
"grad_norm": 2.1015013913914884,
"learning_rate": 5.620923931626681e-07,
"loss": 1.7094,
"step": 400
},
{
"epoch": 0.6305031446540881,
"grad_norm": 2.2084477731115095,
"learning_rate": 5.613823159780947e-07,
"loss": 1.775,
"step": 401
},
{
"epoch": 0.6320754716981132,
"grad_norm": 1.9632035650078112,
"learning_rate": 5.606708665158114e-07,
"loss": 1.8468,
"step": 402
},
{
"epoch": 0.6336477987421384,
"grad_norm": 2.3789325797797045,
"learning_rate": 5.59958049394493e-07,
"loss": 1.9285,
"step": 403
},
{
"epoch": 0.6352201257861635,
"grad_norm": 2.155496601332782,
"learning_rate": 5.592438692416937e-07,
"loss": 2.0406,
"step": 404
},
{
"epoch": 0.6367924528301887,
"grad_norm": 2.237193083014261,
"learning_rate": 5.585283306938159e-07,
"loss": 1.7627,
"step": 405
},
{
"epoch": 0.6383647798742138,
"grad_norm": 2.093924681792703,
"learning_rate": 5.578114383960806e-07,
"loss": 1.5612,
"step": 406
},
{
"epoch": 0.639937106918239,
"grad_norm": 1.9118398198078217,
"learning_rate": 5.570931970024976e-07,
"loss": 1.7692,
"step": 407
},
{
"epoch": 0.6415094339622641,
"grad_norm": 2.065885602117071,
"learning_rate": 5.563736111758344e-07,
"loss": 2.1028,
"step": 408
},
{
"epoch": 0.6430817610062893,
"grad_norm": 2.0965149313714035,
"learning_rate": 5.55652685587587e-07,
"loss": 2.112,
"step": 409
},
{
"epoch": 0.6446540880503144,
"grad_norm": 2.19620885008707,
"learning_rate": 5.549304249179487e-07,
"loss": 1.8514,
"step": 410
},
{
"epoch": 0.6462264150943396,
"grad_norm": 2.157564335495825,
"learning_rate": 5.542068338557801e-07,
"loss": 1.9672,
"step": 411
},
{
"epoch": 0.6477987421383647,
"grad_norm": 2.339822370018209,
"learning_rate": 5.534819170985786e-07,
"loss": 1.9801,
"step": 412
},
{
"epoch": 0.64937106918239,
"grad_norm": 2.53967666106126,
"learning_rate": 5.527556793524481e-07,
"loss": 2.0856,
"step": 413
},
{
"epoch": 0.6509433962264151,
"grad_norm": 2.027714420058619,
"learning_rate": 5.520281253320678e-07,
"loss": 1.92,
"step": 414
},
{
"epoch": 0.6525157232704403,
"grad_norm": 2.2931114820343717,
"learning_rate": 5.512992597606626e-07,
"loss": 1.7635,
"step": 415
},
{
"epoch": 0.6540880503144654,
"grad_norm": 2.103312242134212,
"learning_rate": 5.505690873699716e-07,
"loss": 1.8141,
"step": 416
},
{
"epoch": 0.6556603773584906,
"grad_norm": 2.0874926904070032,
"learning_rate": 5.498376129002176e-07,
"loss": 1.6943,
"step": 417
},
{
"epoch": 0.6572327044025157,
"grad_norm": 2.064459999298948,
"learning_rate": 5.491048411000766e-07,
"loss": 1.8251,
"step": 418
},
{
"epoch": 0.6588050314465409,
"grad_norm": 2.043867970270658,
"learning_rate": 5.483707767266467e-07,
"loss": 1.9847,
"step": 419
},
{
"epoch": 0.660377358490566,
"grad_norm": 2.0798717919218332,
"learning_rate": 5.47635424545417e-07,
"loss": 1.7778,
"step": 420
},
{
"epoch": 0.6619496855345912,
"grad_norm": 2.2400310365423897,
"learning_rate": 5.468987893302375e-07,
"loss": 1.9559,
"step": 421
},
{
"epoch": 0.6635220125786163,
"grad_norm": 2.099625244938322,
"learning_rate": 5.461608758632872e-07,
"loss": 1.736,
"step": 422
},
{
"epoch": 0.6650943396226415,
"grad_norm": 2.1409980099649455,
"learning_rate": 5.454216889350435e-07,
"loss": 1.7748,
"step": 423
},
{
"epoch": 0.6666666666666666,
"grad_norm": 2.0325650663365105,
"learning_rate": 5.44681233344251e-07,
"loss": 1.7192,
"step": 424
},
{
"epoch": 0.6682389937106918,
"grad_norm": 2.0307579619789213,
"learning_rate": 5.4393951389789e-07,
"loss": 1.8139,
"step": 425
},
{
"epoch": 0.6698113207547169,
"grad_norm": 1.9960345481446478,
"learning_rate": 5.431965354111465e-07,
"loss": 1.64,
"step": 426
},
{
"epoch": 0.6713836477987422,
"grad_norm": 2.2621592250213154,
"learning_rate": 5.424523027073794e-07,
"loss": 1.8895,
"step": 427
},
{
"epoch": 0.6729559748427673,
"grad_norm": 1.9753545804161157,
"learning_rate": 5.417068206180899e-07,
"loss": 1.7637,
"step": 428
},
{
"epoch": 0.6745283018867925,
"grad_norm": 2.2178958295189606,
"learning_rate": 5.409600939828906e-07,
"loss": 2.0521,
"step": 429
},
{
"epoch": 0.6761006289308176,
"grad_norm": 1.8923979616131885,
"learning_rate": 5.402121276494731e-07,
"loss": 1.7683,
"step": 430
},
{
"epoch": 0.6776729559748428,
"grad_norm": 2.0677112128365893,
"learning_rate": 5.394629264735772e-07,
"loss": 1.8395,
"step": 431
},
{
"epoch": 0.6792452830188679,
"grad_norm": 2.0153045160857603,
"learning_rate": 5.387124953189594e-07,
"loss": 1.8411,
"step": 432
},
{
"epoch": 0.6808176100628931,
"grad_norm": 2.18275253208093,
"learning_rate": 5.379608390573607e-07,
"loss": 1.8287,
"step": 433
},
{
"epoch": 0.6823899371069182,
"grad_norm": 2.091662651103415,
"learning_rate": 5.372079625684757e-07,
"loss": 1.8308,
"step": 434
},
{
"epoch": 0.6839622641509434,
"grad_norm": 2.1060091304508495,
"learning_rate": 5.364538707399207e-07,
"loss": 1.7317,
"step": 435
},
{
"epoch": 0.6855345911949685,
"grad_norm": 2.2183309272293545,
"learning_rate": 5.356985684672016e-07,
"loss": 1.7259,
"step": 436
},
{
"epoch": 0.6871069182389937,
"grad_norm": 2.076351332712323,
"learning_rate": 5.349420606536826e-07,
"loss": 1.8949,
"step": 437
},
{
"epoch": 0.6886792452830188,
"grad_norm": 2.133834879680037,
"learning_rate": 5.341843522105541e-07,
"loss": 1.881,
"step": 438
},
{
"epoch": 0.690251572327044,
"grad_norm": 1.866508697696801,
"learning_rate": 5.334254480568012e-07,
"loss": 1.6093,
"step": 439
},
{
"epoch": 0.6918238993710691,
"grad_norm": 2.026325564739705,
"learning_rate": 5.326653531191709e-07,
"loss": 1.7457,
"step": 440
},
{
"epoch": 0.6933962264150944,
"grad_norm": 2.030947206901835,
"learning_rate": 5.319040723321411e-07,
"loss": 1.8016,
"step": 441
},
{
"epoch": 0.6949685534591195,
"grad_norm": 2.2635231796009165,
"learning_rate": 5.31141610637888e-07,
"loss": 1.8274,
"step": 442
},
{
"epoch": 0.6965408805031447,
"grad_norm": 2.160129609647997,
"learning_rate": 5.303779729862541e-07,
"loss": 1.841,
"step": 443
},
{
"epoch": 0.6981132075471698,
"grad_norm": 2.3857343889060085,
"learning_rate": 5.296131643347164e-07,
"loss": 2.0521,
"step": 444
},
{
"epoch": 0.699685534591195,
"grad_norm": 2.2373548879864,
"learning_rate": 5.288471896483535e-07,
"loss": 1.644,
"step": 445
},
{
"epoch": 0.7012578616352201,
"grad_norm": 2.16265297512191,
"learning_rate": 5.280800538998141e-07,
"loss": 1.8585,
"step": 446
},
{
"epoch": 0.7028301886792453,
"grad_norm": 2.0560711051498277,
"learning_rate": 5.273117620692847e-07,
"loss": 1.6482,
"step": 447
},
{
"epoch": 0.7044025157232704,
"grad_norm": 1.9542683421441711,
"learning_rate": 5.265423191444563e-07,
"loss": 2.0183,
"step": 448
},
{
"epoch": 0.7059748427672956,
"grad_norm": 2.1353978544627044,
"learning_rate": 5.257717301204932e-07,
"loss": 1.8363,
"step": 449
},
{
"epoch": 0.7075471698113207,
"grad_norm": 2.1447188396187995,
"learning_rate": 5.25e-07,
"loss": 1.6944,
"step": 450
},
{
"epoch": 0.7091194968553459,
"grad_norm": 2.3547091263057056,
"learning_rate": 5.242271337929891e-07,
"loss": 1.7432,
"step": 451
},
{
"epoch": 0.710691823899371,
"grad_norm": 2.4270796532966337,
"learning_rate": 5.234531365168486e-07,
"loss": 1.6861,
"step": 452
},
{
"epoch": 0.7122641509433962,
"grad_norm": 2.3217701732544302,
"learning_rate": 5.22678013196309e-07,
"loss": 1.6177,
"step": 453
},
{
"epoch": 0.7138364779874213,
"grad_norm": 2.1886144838681982,
"learning_rate": 5.219017688634111e-07,
"loss": 1.6984,
"step": 454
},
{
"epoch": 0.7154088050314465,
"grad_norm": 1.9865673398125758,
"learning_rate": 5.211244085574735e-07,
"loss": 1.7418,
"step": 455
},
{
"epoch": 0.7169811320754716,
"grad_norm": 2.1224471005243677,
"learning_rate": 5.203459373250593e-07,
"loss": 1.7137,
"step": 456
},
{
"epoch": 0.7185534591194969,
"grad_norm": 2.010328916041691,
"learning_rate": 5.195663602199438e-07,
"loss": 1.5998,
"step": 457
},
{
"epoch": 0.720125786163522,
"grad_norm": 2.1617887359262964,
"learning_rate": 5.187856823030815e-07,
"loss": 1.7791,
"step": 458
},
{
"epoch": 0.7216981132075472,
"grad_norm": 2.088298143518636,
"learning_rate": 5.180039086425733e-07,
"loss": 1.717,
"step": 459
},
{
"epoch": 0.7232704402515723,
"grad_norm": 2.182318441419754,
"learning_rate": 5.172210443136335e-07,
"loss": 1.7386,
"step": 460
},
{
"epoch": 0.7248427672955975,
"grad_norm": 2.24576929264229,
"learning_rate": 5.164370943985573e-07,
"loss": 1.6459,
"step": 461
},
{
"epoch": 0.7264150943396226,
"grad_norm": 2.04417918075731,
"learning_rate": 5.156520639866867e-07,
"loss": 1.8024,
"step": 462
},
{
"epoch": 0.7279874213836478,
"grad_norm": 2.112329388375878,
"learning_rate": 5.14865958174379e-07,
"loss": 2.0854,
"step": 463
},
{
"epoch": 0.7295597484276729,
"grad_norm": 2.016676563210969,
"learning_rate": 5.140787820649725e-07,
"loss": 1.7524,
"step": 464
},
{
"epoch": 0.7311320754716981,
"grad_norm": 2.60378450644644,
"learning_rate": 5.132905407687537e-07,
"loss": 2.0874,
"step": 465
},
{
"epoch": 0.7327044025157232,
"grad_norm": 2.0550298802585427,
"learning_rate": 5.125012394029245e-07,
"loss": 1.7928,
"step": 466
},
{
"epoch": 0.7342767295597484,
"grad_norm": 2.109873492408979,
"learning_rate": 5.117108830915686e-07,
"loss": 1.7396,
"step": 467
},
{
"epoch": 0.7358490566037735,
"grad_norm": 2.075833532198868,
"learning_rate": 5.109194769656182e-07,
"loss": 1.8012,
"step": 468
},
{
"epoch": 0.7374213836477987,
"grad_norm": 2.2968922617399143,
"learning_rate": 5.10127026162821e-07,
"loss": 1.677,
"step": 469
},
{
"epoch": 0.7389937106918238,
"grad_norm": 2.1646580219404767,
"learning_rate": 5.093335358277063e-07,
"loss": 1.8778,
"step": 470
},
{
"epoch": 0.7405660377358491,
"grad_norm": 2.077818575976857,
"learning_rate": 5.085390111115525e-07,
"loss": 1.7151,
"step": 471
},
{
"epoch": 0.7421383647798742,
"grad_norm": 2.1792667307239117,
"learning_rate": 5.077434571723527e-07,
"loss": 1.7331,
"step": 472
},
{
"epoch": 0.7437106918238994,
"grad_norm": 2.007840890946543,
"learning_rate": 5.069468791747818e-07,
"loss": 1.6337,
"step": 473
},
{
"epoch": 0.7452830188679245,
"grad_norm": 2.0523492740975886,
"learning_rate": 5.061492822901629e-07,
"loss": 1.7339,
"step": 474
},
{
"epoch": 0.7468553459119497,
"grad_norm": 2.1645747688452492,
"learning_rate": 5.053506716964335e-07,
"loss": 1.9892,
"step": 475
},
{
"epoch": 0.7484276729559748,
"grad_norm": 2.30254590731602,
"learning_rate": 5.04551052578112e-07,
"loss": 1.8834,
"step": 476
},
{
"epoch": 0.75,
"grad_norm": 2.3644242948478915,
"learning_rate": 5.03750430126264e-07,
"loss": 1.8034,
"step": 477
},
{
"epoch": 0.7515723270440252,
"grad_norm": 2.1460399637911096,
"learning_rate": 5.029488095384689e-07,
"loss": 1.8283,
"step": 478
},
{
"epoch": 0.7531446540880503,
"grad_norm": 2.178578800063943,
"learning_rate": 5.021461960187858e-07,
"loss": 1.6951,
"step": 479
},
{
"epoch": 0.7547169811320755,
"grad_norm": 2.0849853089545154,
"learning_rate": 5.013425947777198e-07,
"loss": 1.8294,
"step": 480
},
{
"epoch": 0.7562893081761006,
"grad_norm": 2.0454648568366327,
"learning_rate": 5.005380110321882e-07,
"loss": 1.8758,
"step": 481
},
{
"epoch": 0.7578616352201258,
"grad_norm": 2.016873990373029,
"learning_rate": 4.997324500054869e-07,
"loss": 1.8153,
"step": 482
},
{
"epoch": 0.7594339622641509,
"grad_norm": 2.3068593437311353,
"learning_rate": 4.989259169272557e-07,
"loss": 2.0439,
"step": 483
},
{
"epoch": 0.7610062893081762,
"grad_norm": 2.23324104378393,
"learning_rate": 4.981184170334456e-07,
"loss": 1.6537,
"step": 484
},
{
"epoch": 0.7625786163522013,
"grad_norm": 2.194801843258506,
"learning_rate": 4.973099555662832e-07,
"loss": 1.7096,
"step": 485
},
{
"epoch": 0.7641509433962265,
"grad_norm": 2.031449083549838,
"learning_rate": 4.965005377742386e-07,
"loss": 1.774,
"step": 486
},
{
"epoch": 0.7657232704402516,
"grad_norm": 2.1043645294914577,
"learning_rate": 4.956901689119894e-07,
"loss": 1.8458,
"step": 487
},
{
"epoch": 0.7672955974842768,
"grad_norm": 1.953254357792575,
"learning_rate": 4.948788542403877e-07,
"loss": 1.8636,
"step": 488
},
{
"epoch": 0.7688679245283019,
"grad_norm": 2.146268467480728,
"learning_rate": 4.940665990264263e-07,
"loss": 1.9093,
"step": 489
},
{
"epoch": 0.7704402515723271,
"grad_norm": 1.9666125611299663,
"learning_rate": 4.932534085432032e-07,
"loss": 1.7225,
"step": 490
},
{
"epoch": 0.7720125786163522,
"grad_norm": 2.318583475321128,
"learning_rate": 4.924392880698882e-07,
"loss": 2.1748,
"step": 491
},
{
"epoch": 0.7735849056603774,
"grad_norm": 2.0092675503475004,
"learning_rate": 4.91624242891689e-07,
"loss": 1.9759,
"step": 492
},
{
"epoch": 0.7751572327044025,
"grad_norm": 2.1097488696357076,
"learning_rate": 4.90808278299816e-07,
"loss": 1.9701,
"step": 493
},
{
"epoch": 0.7767295597484277,
"grad_norm": 2.142041500023684,
"learning_rate": 4.899913995914485e-07,
"loss": 1.6193,
"step": 494
},
{
"epoch": 0.7783018867924528,
"grad_norm": 2.2559396370443427,
"learning_rate": 4.891736120696999e-07,
"loss": 1.7115,
"step": 495
},
{
"epoch": 0.779874213836478,
"grad_norm": 2.1488219924141596,
"learning_rate": 4.883549210435841e-07,
"loss": 1.9703,
"step": 496
},
{
"epoch": 0.7814465408805031,
"grad_norm": 2.1397162032458232,
"learning_rate": 4.8753533182798e-07,
"loss": 1.7644,
"step": 497
},
{
"epoch": 0.7830188679245284,
"grad_norm": 2.191504611692712,
"learning_rate": 4.867148497435977e-07,
"loss": 1.6881,
"step": 498
},
{
"epoch": 0.7845911949685535,
"grad_norm": 2.000204406082109,
"learning_rate": 4.858934801169436e-07,
"loss": 1.6802,
"step": 499
},
{
"epoch": 0.7861635220125787,
"grad_norm": 2.100996106775111,
"learning_rate": 4.850712282802863e-07,
"loss": 1.7683,
"step": 500
},
{
"epoch": 0.7861635220125787,
"eval_sat2_MCTS_chains_SFT_val_loss": 1.7106590270996094,
"eval_sat2_MCTS_chains_SFT_val_runtime": 92.0806,
"eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.164,
"eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.401,
"step": 500
},
{
"epoch": 0.7877358490566038,
"grad_norm": 2.1566781241616413,
"learning_rate": 4.842480995716212e-07,
"loss": 1.7176,
"step": 501
},
{
"epoch": 0.789308176100629,
"grad_norm": 2.1925936773589934,
"learning_rate": 4.834240993346361e-07,
"loss": 1.8595,
"step": 502
},
{
"epoch": 0.7908805031446541,
"grad_norm": 2.249072634890456,
"learning_rate": 4.825992329186777e-07,
"loss": 1.7456,
"step": 503
},
{
"epoch": 0.7924528301886793,
"grad_norm": 2.1147929947398025,
"learning_rate": 4.817735056787149e-07,
"loss": 1.6531,
"step": 504
},
{
"epoch": 0.7940251572327044,
"grad_norm": 2.0853013840737935,
"learning_rate": 4.809469229753053e-07,
"loss": 1.707,
"step": 505
},
{
"epoch": 0.7955974842767296,
"grad_norm": 1.9814812102879638,
"learning_rate": 4.8011949017456e-07,
"loss": 1.8545,
"step": 506
},
{
"epoch": 0.7971698113207547,
"grad_norm": 2.242933290063341,
"learning_rate": 4.792912126481094e-07,
"loss": 1.7509,
"step": 507
},
{
"epoch": 0.7987421383647799,
"grad_norm": 2.057417183985654,
"learning_rate": 4.784620957730669e-07,
"loss": 1.8915,
"step": 508
},
{
"epoch": 0.800314465408805,
"grad_norm": 2.1269232658429,
"learning_rate": 4.776321449319959e-07,
"loss": 1.6437,
"step": 509
},
{
"epoch": 0.8018867924528302,
"grad_norm": 2.0386799297363574,
"learning_rate": 4.76801365512873e-07,
"loss": 1.7157,
"step": 510
},
{
"epoch": 0.8034591194968553,
"grad_norm": 2.1312494355819007,
"learning_rate": 4.759697629090542e-07,
"loss": 1.8856,
"step": 511
},
{
"epoch": 0.8050314465408805,
"grad_norm": 2.036024812845296,
"learning_rate": 4.751373425192395e-07,
"loss": 1.6698,
"step": 512
},
{
"epoch": 0.8066037735849056,
"grad_norm": 2.3974518115445242,
"learning_rate": 4.743041097474381e-07,
"loss": 1.9648,
"step": 513
},
{
"epoch": 0.8081761006289309,
"grad_norm": 2.1241520800303846,
"learning_rate": 4.734700700029329e-07,
"loss": 1.7318,
"step": 514
},
{
"epoch": 0.809748427672956,
"grad_norm": 2.1431917628718162,
"learning_rate": 4.7263522870024566e-07,
"loss": 1.6973,
"step": 515
},
{
"epoch": 0.8113207547169812,
"grad_norm": 2.234624716136248,
"learning_rate": 4.7179959125910164e-07,
"loss": 1.7548,
"step": 516
},
{
"epoch": 0.8128930817610063,
"grad_norm": 2.161266418044944,
"learning_rate": 4.709631631043949e-07,
"loss": 1.605,
"step": 517
},
{
"epoch": 0.8144654088050315,
"grad_norm": 1.9836121455186402,
"learning_rate": 4.701259496661527e-07,
"loss": 1.8285,
"step": 518
},
{
"epoch": 0.8160377358490566,
"grad_norm": 2.173970653905368,
"learning_rate": 4.6928795637949986e-07,
"loss": 1.7383,
"step": 519
},
{
"epoch": 0.8176100628930818,
"grad_norm": 2.161721549869913,
"learning_rate": 4.6844918868462445e-07,
"loss": 1.5783,
"step": 520
},
{
"epoch": 0.8191823899371069,
"grad_norm": 2.159956411342309,
"learning_rate": 4.676096520267417e-07,
"loss": 1.8492,
"step": 521
},
{
"epoch": 0.8207547169811321,
"grad_norm": 2.0881573745933553,
"learning_rate": 4.6676935185605884e-07,
"loss": 1.7813,
"step": 522
},
{
"epoch": 0.8223270440251572,
"grad_norm": 2.1031987642542935,
"learning_rate": 4.659282936277399e-07,
"loss": 2.0953,
"step": 523
},
{
"epoch": 0.8238993710691824,
"grad_norm": 2.205744158912345,
"learning_rate": 4.6508648280187014e-07,
"loss": 1.7749,
"step": 524
},
{
"epoch": 0.8254716981132075,
"grad_norm": 2.084552384171812,
"learning_rate": 4.642439248434205e-07,
"loss": 1.8273,
"step": 525
},
{
"epoch": 0.8270440251572327,
"grad_norm": 2.061048329367607,
"learning_rate": 4.6340062522221245e-07,
"loss": 1.759,
"step": 526
},
{
"epoch": 0.8286163522012578,
"grad_norm": 2.069167397462438,
"learning_rate": 4.6255658941288197e-07,
"loss": 1.8557,
"step": 527
},
{
"epoch": 0.8301886792452831,
"grad_norm": 2.144369600536378,
"learning_rate": 4.6171182289484464e-07,
"loss": 1.7656,
"step": 528
},
{
"epoch": 0.8317610062893082,
"grad_norm": 2.1514775909224375,
"learning_rate": 4.608663311522597e-07,
"loss": 1.8321,
"step": 529
},
{
"epoch": 0.8333333333333334,
"grad_norm": 2.1170537274734507,
"learning_rate": 4.6002011967399414e-07,
"loss": 1.6458,
"step": 530
},
{
"epoch": 0.8349056603773585,
"grad_norm": 2.357949753261125,
"learning_rate": 4.591731939535879e-07,
"loss": 1.6988,
"step": 531
},
{
"epoch": 0.8364779874213837,
"grad_norm": 2.010756013038571,
"learning_rate": 4.583255594892175e-07,
"loss": 1.6346,
"step": 532
},
{
"epoch": 0.8380503144654088,
"grad_norm": 2.0259740544494553,
"learning_rate": 4.574772217836605e-07,
"loss": 1.667,
"step": 533
},
{
"epoch": 0.839622641509434,
"grad_norm": 2.3278850594778793,
"learning_rate": 4.5662818634426e-07,
"loss": 1.867,
"step": 534
},
{
"epoch": 0.8411949685534591,
"grad_norm": 2.127352798058412,
"learning_rate": 4.557784586828886e-07,
"loss": 1.7475,
"step": 535
},
{
"epoch": 0.8427672955974843,
"grad_norm": 2.0508704544909104,
"learning_rate": 4.5492804431591275e-07,
"loss": 1.9647,
"step": 536
},
{
"epoch": 0.8443396226415094,
"grad_norm": 1.9847000051548063,
"learning_rate": 4.5407694876415707e-07,
"loss": 1.7637,
"step": 537
},
{
"epoch": 0.8459119496855346,
"grad_norm": 2.146458433753693,
"learning_rate": 4.532251775528685e-07,
"loss": 1.9054,
"step": 538
},
{
"epoch": 0.8474842767295597,
"grad_norm": 2.037086599135811,
"learning_rate": 4.5237273621167977e-07,
"loss": 1.9655,
"step": 539
},
{
"epoch": 0.8490566037735849,
"grad_norm": 2.338765254967049,
"learning_rate": 4.5151963027457467e-07,
"loss": 1.9605,
"step": 540
},
{
"epoch": 0.85062893081761,
"grad_norm": 2.2366764493368443,
"learning_rate": 4.5066586527985104e-07,
"loss": 1.8306,
"step": 541
},
{
"epoch": 0.8522012578616353,
"grad_norm": 1.9395938337987975,
"learning_rate": 4.498114467700855e-07,
"loss": 1.8506,
"step": 542
},
{
"epoch": 0.8537735849056604,
"grad_norm": 2.11126192377906,
"learning_rate": 4.4895638029209737e-07,
"loss": 2.0131,
"step": 543
},
{
"epoch": 0.8553459119496856,
"grad_norm": 1.9448520113288943,
"learning_rate": 4.48100671396912e-07,
"loss": 1.6417,
"step": 544
},
{
"epoch": 0.8569182389937107,
"grad_norm": 2.17917918305954,
"learning_rate": 4.4724432563972586e-07,
"loss": 1.596,
"step": 545
},
{
"epoch": 0.8584905660377359,
"grad_norm": 2.2139144251618204,
"learning_rate": 4.463873485798697e-07,
"loss": 1.6793,
"step": 546
},
{
"epoch": 0.860062893081761,
"grad_norm": 2.1238330080137815,
"learning_rate": 4.455297457807724e-07,
"loss": 2.0614,
"step": 547
},
{
"epoch": 0.8616352201257862,
"grad_norm": 2.3023567382067873,
"learning_rate": 4.4467152280992516e-07,
"loss": 1.7894,
"step": 548
},
{
"epoch": 0.8632075471698113,
"grad_norm": 2.041517573942539,
"learning_rate": 4.4381268523884577e-07,
"loss": 1.9789,
"step": 549
},
{
"epoch": 0.8647798742138365,
"grad_norm": 2.295893853414247,
"learning_rate": 4.4295323864304123e-07,
"loss": 1.6548,
"step": 550
},
{
"epoch": 0.8663522012578616,
"grad_norm": 2.1107449778410152,
"learning_rate": 4.420931886019727e-07,
"loss": 1.8173,
"step": 551
},
{
"epoch": 0.8679245283018868,
"grad_norm": 1.9372269175313137,
"learning_rate": 4.412325406990188e-07,
"loss": 1.7247,
"step": 552
},
{
"epoch": 0.8694968553459119,
"grad_norm": 1.8111270901761425,
"learning_rate": 4.4037130052143943e-07,
"loss": 1.7852,
"step": 553
},
{
"epoch": 0.8710691823899371,
"grad_norm": 2.0430528853223997,
"learning_rate": 4.3950947366033957e-07,
"loss": 1.8733,
"step": 554
},
{
"epoch": 0.8726415094339622,
"grad_norm": 2.1440645400252416,
"learning_rate": 4.386470657106326e-07,
"loss": 1.7904,
"step": 555
},
{
"epoch": 0.8742138364779874,
"grad_norm": 2.1458981176838026,
"learning_rate": 4.377840822710047e-07,
"loss": 1.8855,
"step": 556
},
{
"epoch": 0.8757861635220126,
"grad_norm": 2.986445993481532,
"learning_rate": 4.3692052894387775e-07,
"loss": 1.7439,
"step": 557
},
{
"epoch": 0.8773584905660378,
"grad_norm": 1.9740082789953175,
"learning_rate": 4.360564113353734e-07,
"loss": 1.7215,
"step": 558
},
{
"epoch": 0.8789308176100629,
"grad_norm": 2.0624678582939984,
"learning_rate": 4.3519173505527654e-07,
"loss": 1.7609,
"step": 559
},
{
"epoch": 0.8805031446540881,
"grad_norm": 2.1742856227918868,
"learning_rate": 4.3432650571699923e-07,
"loss": 1.7963,
"step": 560
},
{
"epoch": 0.8820754716981132,
"grad_norm": 2.328267217087019,
"learning_rate": 4.334607289375434e-07,
"loss": 1.8014,
"step": 561
},
{
"epoch": 0.8836477987421384,
"grad_norm": 2.2240480876262256,
"learning_rate": 4.325944103374652e-07,
"loss": 1.9335,
"step": 562
},
{
"epoch": 0.8852201257861635,
"grad_norm": 2.0812109009351483,
"learning_rate": 4.3172755554083857e-07,
"loss": 1.6568,
"step": 563
},
{
"epoch": 0.8867924528301887,
"grad_norm": 2.419224994100306,
"learning_rate": 4.308601701752178e-07,
"loss": 1.8096,
"step": 564
},
{
"epoch": 0.8883647798742138,
"grad_norm": 2.291019726545779,
"learning_rate": 4.299922598716017e-07,
"loss": 1.6433,
"step": 565
},
{
"epoch": 0.889937106918239,
"grad_norm": 2.1682347425403634,
"learning_rate": 4.2912383026439746e-07,
"loss": 1.7372,
"step": 566
},
{
"epoch": 0.8915094339622641,
"grad_norm": 2.2978680672025713,
"learning_rate": 4.2825488699138295e-07,
"loss": 1.8202,
"step": 567
},
{
"epoch": 0.8930817610062893,
"grad_norm": 2.094968531097701,
"learning_rate": 4.2738543569367104e-07,
"loss": 1.6626,
"step": 568
},
{
"epoch": 0.8946540880503144,
"grad_norm": 2.052297146112,
"learning_rate": 4.2651548201567244e-07,
"loss": 1.7142,
"step": 569
},
{
"epoch": 0.8962264150943396,
"grad_norm": 2.318005287614226,
"learning_rate": 4.2564503160505965e-07,
"loss": 1.7043,
"step": 570
},
{
"epoch": 0.8977987421383647,
"grad_norm": 2.2399641315697876,
"learning_rate": 4.2477409011272947e-07,
"loss": 1.7997,
"step": 571
},
{
"epoch": 0.89937106918239,
"grad_norm": 2.2279042572355157,
"learning_rate": 4.239026631927671e-07,
"loss": 1.8412,
"step": 572
},
{
"epoch": 0.9009433962264151,
"grad_norm": 2.2532345047567843,
"learning_rate": 4.2303075650240874e-07,
"loss": 1.7016,
"step": 573
},
{
"epoch": 0.9025157232704403,
"grad_norm": 2.0723779334034917,
"learning_rate": 4.221583757020058e-07,
"loss": 1.7483,
"step": 574
},
{
"epoch": 0.9040880503144654,
"grad_norm": 2.1869209764537527,
"learning_rate": 4.21285526454987e-07,
"loss": 1.7919,
"step": 575
},
{
"epoch": 0.9056603773584906,
"grad_norm": 2.2496647478161544,
"learning_rate": 4.2041221442782234e-07,
"loss": 1.714,
"step": 576
},
{
"epoch": 0.9072327044025157,
"grad_norm": 2.068857000999971,
"learning_rate": 4.195384452899863e-07,
"loss": 1.8463,
"step": 577
},
{
"epoch": 0.9088050314465409,
"grad_norm": 2.0359248854430034,
"learning_rate": 4.1866422471392097e-07,
"loss": 1.7426,
"step": 578
},
{
"epoch": 0.910377358490566,
"grad_norm": 2.228425159773193,
"learning_rate": 4.1778955837499877e-07,
"loss": 2.0427,
"step": 579
},
{
"epoch": 0.9119496855345912,
"grad_norm": 2.284187655683882,
"learning_rate": 4.1691445195148624e-07,
"loss": 1.847,
"step": 580
},
{
"epoch": 0.9135220125786163,
"grad_norm": 2.1771730650916448,
"learning_rate": 4.1603891112450694e-07,
"loss": 1.9315,
"step": 581
},
{
"epoch": 0.9150943396226415,
"grad_norm": 2.1626615626582293,
"learning_rate": 4.151629415780043e-07,
"loss": 1.6456,
"step": 582
},
{
"epoch": 0.9166666666666666,
"grad_norm": 2.305840039876449,
"learning_rate": 4.142865489987052e-07,
"loss": 1.6958,
"step": 583
},
{
"epoch": 0.9182389937106918,
"grad_norm": 2.154256346851341,
"learning_rate": 4.1340973907608293e-07,
"loss": 2.0085,
"step": 584
},
{
"epoch": 0.9198113207547169,
"grad_norm": 2.2041638948692537,
"learning_rate": 4.125325175023197e-07,
"loss": 2.1236,
"step": 585
},
{
"epoch": 0.9213836477987422,
"grad_norm": 2.103411483270433,
"learning_rate": 4.116548899722706e-07,
"loss": 1.7799,
"step": 586
},
{
"epoch": 0.9229559748427673,
"grad_norm": 2.0063388038701904,
"learning_rate": 4.1077686218342565e-07,
"loss": 1.6889,
"step": 587
},
{
"epoch": 0.9245283018867925,
"grad_norm": 2.175245741274736,
"learning_rate": 4.098984398358738e-07,
"loss": 1.6925,
"step": 588
},
{
"epoch": 0.9261006289308176,
"grad_norm": 2.0336428356844634,
"learning_rate": 4.090196286322654e-07,
"loss": 1.8607,
"step": 589
},
{
"epoch": 0.9276729559748428,
"grad_norm": 2.037205174603859,
"learning_rate": 4.0814043427777475e-07,
"loss": 1.9117,
"step": 590
},
{
"epoch": 0.9292452830188679,
"grad_norm": 2.30170118004087,
"learning_rate": 4.072608624800641e-07,
"loss": 1.9196,
"step": 591
},
{
"epoch": 0.9308176100628931,
"grad_norm": 2.1986833773630816,
"learning_rate": 4.063809189492457e-07,
"loss": 2.1265,
"step": 592
},
{
"epoch": 0.9323899371069182,
"grad_norm": 2.1016581569566033,
"learning_rate": 4.055006093978452e-07,
"loss": 1.8248,
"step": 593
},
{
"epoch": 0.9339622641509434,
"grad_norm": 2.318998947765389,
"learning_rate": 4.0461993954076433e-07,
"loss": 1.9183,
"step": 594
},
{
"epoch": 0.9355345911949685,
"grad_norm": 2.1280276120226396,
"learning_rate": 4.037389150952441e-07,
"loss": 1.5676,
"step": 595
},
{
"epoch": 0.9371069182389937,
"grad_norm": 2.074586892066852,
"learning_rate": 4.028575417808272e-07,
"loss": 1.5338,
"step": 596
},
{
"epoch": 0.9386792452830188,
"grad_norm": 2.0027829031033106,
"learning_rate": 4.019758253193213e-07,
"loss": 2.0435,
"step": 597
},
{
"epoch": 0.940251572327044,
"grad_norm": 1.9483550382264585,
"learning_rate": 4.010937714347617e-07,
"loss": 1.8436,
"step": 598
},
{
"epoch": 0.9418238993710691,
"grad_norm": 2.1386021253495495,
"learning_rate": 4.002113858533744e-07,
"loss": 1.8209,
"step": 599
},
{
"epoch": 0.9433962264150944,
"grad_norm": 2.2603117565548883,
"learning_rate": 3.9932867430353857e-07,
"loss": 1.6182,
"step": 600
},
{
"epoch": 0.9449685534591195,
"grad_norm": 2.1949498745608036,
"learning_rate": 3.9844564251574946e-07,
"loss": 1.7724,
"step": 601
},
{
"epoch": 0.9465408805031447,
"grad_norm": 2.0756341905016047,
"learning_rate": 3.9756229622258145e-07,
"loss": 1.9824,
"step": 602
},
{
"epoch": 0.9481132075471698,
"grad_norm": 2.211419527471263,
"learning_rate": 3.9667864115865083e-07,
"loss": 1.764,
"step": 603
},
{
"epoch": 0.949685534591195,
"grad_norm": 1.9396778709562796,
"learning_rate": 3.957946830605779e-07,
"loss": 1.6183,
"step": 604
},
{
"epoch": 0.9512578616352201,
"grad_norm": 2.036577606226067,
"learning_rate": 3.949104276669506e-07,
"loss": 1.9739,
"step": 605
},
{
"epoch": 0.9528301886792453,
"grad_norm": 2.1523021934849496,
"learning_rate": 3.940258807182871e-07,
"loss": 1.8051,
"step": 606
},
{
"epoch": 0.9544025157232704,
"grad_norm": 2.128010666946978,
"learning_rate": 3.931410479569977e-07,
"loss": 1.9472,
"step": 607
},
{
"epoch": 0.9559748427672956,
"grad_norm": 2.101658911780381,
"learning_rate": 3.9225593512734856e-07,
"loss": 1.7936,
"step": 608
},
{
"epoch": 0.9575471698113207,
"grad_norm": 2.0113616534057583,
"learning_rate": 3.913705479754242e-07,
"loss": 1.9101,
"step": 609
},
{
"epoch": 0.9591194968553459,
"grad_norm": 2.047513346746328,
"learning_rate": 3.9048489224908957e-07,
"loss": 1.7896,
"step": 610
},
{
"epoch": 0.960691823899371,
"grad_norm": 2.174472546503524,
"learning_rate": 3.8959897369795354e-07,
"loss": 1.8692,
"step": 611
},
{
"epoch": 0.9622641509433962,
"grad_norm": 2.1246505746426827,
"learning_rate": 3.8871279807333106e-07,
"loss": 1.6824,
"step": 612
},
{
"epoch": 0.9638364779874213,
"grad_norm": 2.1762242058390773,
"learning_rate": 3.8782637112820597e-07,
"loss": 1.9873,
"step": 613
},
{
"epoch": 0.9654088050314465,
"grad_norm": 1.9373168302806898,
"learning_rate": 3.8693969861719375e-07,
"loss": 1.936,
"step": 614
},
{
"epoch": 0.9669811320754716,
"grad_norm": 2.255844762606453,
"learning_rate": 3.8605278629650403e-07,
"loss": 1.7877,
"step": 615
},
{
"epoch": 0.9685534591194969,
"grad_norm": 2.108356173918864,
"learning_rate": 3.851656399239032e-07,
"loss": 1.8975,
"step": 616
},
{
"epoch": 0.970125786163522,
"grad_norm": 2.2742019781049225,
"learning_rate": 3.8427826525867735e-07,
"loss": 1.6503,
"step": 617
},
{
"epoch": 0.9716981132075472,
"grad_norm": 2.2147090179932873,
"learning_rate": 3.8339066806159426e-07,
"loss": 1.8588,
"step": 618
},
{
"epoch": 0.9732704402515723,
"grad_norm": 2.2344073830055646,
"learning_rate": 3.825028540948665e-07,
"loss": 2.0376,
"step": 619
},
{
"epoch": 0.9748427672955975,
"grad_norm": 2.068870035023368,
"learning_rate": 3.8161482912211413e-07,
"loss": 1.8621,
"step": 620
},
{
"epoch": 0.9764150943396226,
"grad_norm": 2.0716828751213687,
"learning_rate": 3.807265989083269e-07,
"loss": 1.9676,
"step": 621
},
{
"epoch": 0.9779874213836478,
"grad_norm": 2.173084402473959,
"learning_rate": 3.798381692198266e-07,
"loss": 1.5456,
"step": 622
},
{
"epoch": 0.9795597484276729,
"grad_norm": 2.123085754997638,
"learning_rate": 3.7894954582423053e-07,
"loss": 1.693,
"step": 623
},
{
"epoch": 0.9811320754716981,
"grad_norm": 1.9313674111763137,
"learning_rate": 3.780607344904134e-07,
"loss": 1.8215,
"step": 624
},
{
"epoch": 0.9827044025157232,
"grad_norm": 1.9446815492696663,
"learning_rate": 3.7717174098846976e-07,
"loss": 1.6901,
"step": 625
},
{
"epoch": 0.9842767295597484,
"grad_norm": 2.1738358620885614,
"learning_rate": 3.76282571089677e-07,
"loss": 1.7188,
"step": 626
},
{
"epoch": 0.9858490566037735,
"grad_norm": 1.9579450319413363,
"learning_rate": 3.753932305664576e-07,
"loss": 1.8272,
"step": 627
},
{
"epoch": 0.9874213836477987,
"grad_norm": 2.12572045051787,
"learning_rate": 3.7450372519234174e-07,
"loss": 1.742,
"step": 628
},
{
"epoch": 0.9889937106918238,
"grad_norm": 2.191799672435851,
"learning_rate": 3.736140607419297e-07,
"loss": 1.5517,
"step": 629
},
{
"epoch": 0.9905660377358491,
"grad_norm": 1.8922363202506955,
"learning_rate": 3.7272424299085454e-07,
"loss": 1.8464,
"step": 630
},
{
"epoch": 0.9921383647798742,
"grad_norm": 2.179698661804858,
"learning_rate": 3.7183427771574476e-07,
"loss": 1.6617,
"step": 631
},
{
"epoch": 0.9937106918238994,
"grad_norm": 2.2771710047319074,
"learning_rate": 3.7094417069418617e-07,
"loss": 2.0336,
"step": 632
},
{
"epoch": 0.9952830188679245,
"grad_norm": 2.006071135340339,
"learning_rate": 3.7005392770468494e-07,
"loss": 1.5935,
"step": 633
},
{
"epoch": 0.9968553459119497,
"grad_norm": 2.1112816678317468,
"learning_rate": 3.691635545266301e-07,
"loss": 1.9101,
"step": 634
},
{
"epoch": 0.9984276729559748,
"grad_norm": 2.2992568121548644,
"learning_rate": 3.682730569402557e-07,
"loss": 1.7981,
"step": 635
},
{
"epoch": 1.0,
"grad_norm": 2.0674766445976664,
"learning_rate": 3.6738244072660335e-07,
"loss": 1.8885,
"step": 636
},
{
"epoch": 1.001572327044025,
"grad_norm": 2.0323458623421025,
"learning_rate": 3.6649171166748505e-07,
"loss": 1.7354,
"step": 637
},
{
"epoch": 1.0031446540880504,
"grad_norm": 2.3263404881491803,
"learning_rate": 3.656008755454452e-07,
"loss": 1.8682,
"step": 638
},
{
"epoch": 1.0047169811320755,
"grad_norm": 2.0374703234555405,
"learning_rate": 3.647099381437232e-07,
"loss": 1.8107,
"step": 639
},
{
"epoch": 1.0062893081761006,
"grad_norm": 2.1124967616004855,
"learning_rate": 3.6381890524621594e-07,
"loss": 1.8595,
"step": 640
},
{
"epoch": 1.0078616352201257,
"grad_norm": 1.9775535226958436,
"learning_rate": 3.629277826374406e-07,
"loss": 1.6904,
"step": 641
},
{
"epoch": 1.009433962264151,
"grad_norm": 2.0203547455163844,
"learning_rate": 3.6203657610249633e-07,
"loss": 1.825,
"step": 642
},
{
"epoch": 1.0110062893081762,
"grad_norm": 2.0758681388805025,
"learning_rate": 3.6114529142702736e-07,
"loss": 1.7352,
"step": 643
},
{
"epoch": 1.0125786163522013,
"grad_norm": 2.0224811786868906,
"learning_rate": 3.602539343971851e-07,
"loss": 1.6538,
"step": 644
},
{
"epoch": 1.0141509433962264,
"grad_norm": 2.2197437087910217,
"learning_rate": 3.593625107995906e-07,
"loss": 1.83,
"step": 645
},
{
"epoch": 1.0157232704402517,
"grad_norm": 1.9889586059387878,
"learning_rate": 3.5847102642129743e-07,
"loss": 1.7737,
"step": 646
},
{
"epoch": 1.0172955974842768,
"grad_norm": 2.19853180662544,
"learning_rate": 3.575794870497533e-07,
"loss": 1.8649,
"step": 647
},
{
"epoch": 1.0188679245283019,
"grad_norm": 2.0550115124535187,
"learning_rate": 3.566878984727632e-07,
"loss": 1.6925,
"step": 648
},
{
"epoch": 1.020440251572327,
"grad_norm": 2.2183115138318015,
"learning_rate": 3.557962664784515e-07,
"loss": 1.6351,
"step": 649
},
{
"epoch": 1.0220125786163523,
"grad_norm": 1.9968891207031567,
"learning_rate": 3.549045968552242e-07,
"loss": 1.8444,
"step": 650
},
{
"epoch": 1.0235849056603774,
"grad_norm": 2.0649732438966435,
"learning_rate": 3.540128953917321e-07,
"loss": 1.7249,
"step": 651
},
{
"epoch": 1.0251572327044025,
"grad_norm": 2.137990641187568,
"learning_rate": 3.531211678768324e-07,
"loss": 1.8187,
"step": 652
},
{
"epoch": 1.0267295597484276,
"grad_norm": 2.228866151054972,
"learning_rate": 3.5222942009955113e-07,
"loss": 1.6852,
"step": 653
},
{
"epoch": 1.028301886792453,
"grad_norm": 2.0107405877299294,
"learning_rate": 3.513376578490464e-07,
"loss": 1.8454,
"step": 654
},
{
"epoch": 1.029874213836478,
"grad_norm": 2.0486309547186834,
"learning_rate": 3.5044588691457e-07,
"loss": 1.9823,
"step": 655
},
{
"epoch": 1.0314465408805031,
"grad_norm": 2.1373140135446995,
"learning_rate": 3.4955411308543004e-07,
"loss": 1.5547,
"step": 656
},
{
"epoch": 1.0330188679245282,
"grad_norm": 1.9722852672562703,
"learning_rate": 3.486623421509536e-07,
"loss": 1.6565,
"step": 657
},
{
"epoch": 1.0345911949685536,
"grad_norm": 2.071392217463609,
"learning_rate": 3.477705799004489e-07,
"loss": 1.6954,
"step": 658
},
{
"epoch": 1.0361635220125787,
"grad_norm": 2.1424736101079196,
"learning_rate": 3.4687883212316763e-07,
"loss": 1.6984,
"step": 659
},
{
"epoch": 1.0377358490566038,
"grad_norm": 2.17329780648581,
"learning_rate": 3.459871046082678e-07,
"loss": 1.733,
"step": 660
},
{
"epoch": 1.0393081761006289,
"grad_norm": 2.0114875931787335,
"learning_rate": 3.4509540314477576e-07,
"loss": 1.7247,
"step": 661
},
{
"epoch": 1.0408805031446542,
"grad_norm": 2.1612750227336828,
"learning_rate": 3.442037335215486e-07,
"loss": 1.8257,
"step": 662
},
{
"epoch": 1.0424528301886793,
"grad_norm": 2.09654475278469,
"learning_rate": 3.433121015272368e-07,
"loss": 1.6016,
"step": 663
},
{
"epoch": 1.0440251572327044,
"grad_norm": 2.112814527327424,
"learning_rate": 3.4242051295024674e-07,
"loss": 1.8498,
"step": 664
},
{
"epoch": 1.0455974842767295,
"grad_norm": 2.137137873151086,
"learning_rate": 3.4152897357870254e-07,
"loss": 1.9413,
"step": 665
},
{
"epoch": 1.0471698113207548,
"grad_norm": 1.9781507699983039,
"learning_rate": 3.406374892004093e-07,
"loss": 2.0358,
"step": 666
},
{
"epoch": 1.04874213836478,
"grad_norm": 2.1445450846607037,
"learning_rate": 3.39746065602815e-07,
"loss": 1.5967,
"step": 667
},
{
"epoch": 1.050314465408805,
"grad_norm": 2.2106487525905156,
"learning_rate": 3.388547085729726e-07,
"loss": 1.6929,
"step": 668
},
{
"epoch": 1.0518867924528301,
"grad_norm": 2.1632446260742157,
"learning_rate": 3.379634238975036e-07,
"loss": 1.9754,
"step": 669
},
{
"epoch": 1.0534591194968554,
"grad_norm": 2.2423152444572385,
"learning_rate": 3.370722173625594e-07,
"loss": 1.8086,
"step": 670
},
{
"epoch": 1.0550314465408805,
"grad_norm": 2.086454515712129,
"learning_rate": 3.36181094753784e-07,
"loss": 1.7976,
"step": 671
},
{
"epoch": 1.0566037735849056,
"grad_norm": 2.338596181941415,
"learning_rate": 3.3529006185627677e-07,
"loss": 1.8634,
"step": 672
},
{
"epoch": 1.0581761006289307,
"grad_norm": 2.2541670873895265,
"learning_rate": 3.343991244545549e-07,
"loss": 1.6529,
"step": 673
},
{
"epoch": 1.059748427672956,
"grad_norm": 2.2022765709519017,
"learning_rate": 3.3350828833251497e-07,
"loss": 1.9213,
"step": 674
},
{
"epoch": 1.0613207547169812,
"grad_norm": 2.033074601173319,
"learning_rate": 3.326175592733966e-07,
"loss": 1.8121,
"step": 675
},
{
"epoch": 1.0628930817610063,
"grad_norm": 2.0295160070279077,
"learning_rate": 3.3172694305974435e-07,
"loss": 1.8344,
"step": 676
},
{
"epoch": 1.0644654088050314,
"grad_norm": 2.3057324232200007,
"learning_rate": 3.308364454733699e-07,
"loss": 1.6693,
"step": 677
},
{
"epoch": 1.0660377358490567,
"grad_norm": 2.078695505755079,
"learning_rate": 3.29946072295315e-07,
"loss": 1.6955,
"step": 678
},
{
"epoch": 1.0676100628930818,
"grad_norm": 2.2250857375132016,
"learning_rate": 3.290558293058139e-07,
"loss": 1.775,
"step": 679
},
{
"epoch": 1.069182389937107,
"grad_norm": 1.9629837170183118,
"learning_rate": 3.2816572228425526e-07,
"loss": 1.5546,
"step": 680
},
{
"epoch": 1.070754716981132,
"grad_norm": 2.124163961606303,
"learning_rate": 3.272757570091453e-07,
"loss": 1.6283,
"step": 681
},
{
"epoch": 1.0723270440251573,
"grad_norm": 1.9918668591962903,
"learning_rate": 3.2638593925807033e-07,
"loss": 1.72,
"step": 682
},
{
"epoch": 1.0738993710691824,
"grad_norm": 2.264977591391123,
"learning_rate": 3.2549627480765834e-07,
"loss": 1.8045,
"step": 683
},
{
"epoch": 1.0754716981132075,
"grad_norm": 2.02537032266525,
"learning_rate": 3.246067694335424e-07,
"loss": 1.8902,
"step": 684
},
{
"epoch": 1.0770440251572326,
"grad_norm": 2.4603365936273995,
"learning_rate": 3.23717428910323e-07,
"loss": 1.6676,
"step": 685
},
{
"epoch": 1.078616352201258,
"grad_norm": 2.2248140600403277,
"learning_rate": 3.2282825901153026e-07,
"loss": 1.7778,
"step": 686
},
{
"epoch": 1.080188679245283,
"grad_norm": 2.115407517557494,
"learning_rate": 3.219392655095866e-07,
"loss": 1.7059,
"step": 687
},
{
"epoch": 1.0817610062893082,
"grad_norm": 2.1771102044112136,
"learning_rate": 3.2105045417576954e-07,
"loss": 1.7673,
"step": 688
},
{
"epoch": 1.0833333333333333,
"grad_norm": 2.1464674623014672,
"learning_rate": 3.2016183078017346e-07,
"loss": 1.8022,
"step": 689
},
{
"epoch": 1.0849056603773586,
"grad_norm": 2.0997445957592307,
"learning_rate": 3.192734010916732e-07,
"loss": 1.8693,
"step": 690
},
{
"epoch": 1.0864779874213837,
"grad_norm": 2.094025022111356,
"learning_rate": 3.1838517087788595e-07,
"loss": 1.7098,
"step": 691
},
{
"epoch": 1.0880503144654088,
"grad_norm": 2.070698100046358,
"learning_rate": 3.1749714590513353e-07,
"loss": 1.8247,
"step": 692
},
{
"epoch": 1.0896226415094339,
"grad_norm": 2.062398740678803,
"learning_rate": 3.166093319384057e-07,
"loss": 1.5966,
"step": 693
},
{
"epoch": 1.0911949685534592,
"grad_norm": 2.087955408161737,
"learning_rate": 3.157217347413226e-07,
"loss": 1.7022,
"step": 694
},
{
"epoch": 1.0927672955974843,
"grad_norm": 2.1423902860765303,
"learning_rate": 3.1483436007609676e-07,
"loss": 1.847,
"step": 695
},
{
"epoch": 1.0943396226415094,
"grad_norm": 2.115585692766892,
"learning_rate": 3.13947213703496e-07,
"loss": 1.7229,
"step": 696
},
{
"epoch": 1.0959119496855345,
"grad_norm": 2.064012528421271,
"learning_rate": 3.1306030138280617e-07,
"loss": 1.7403,
"step": 697
},
{
"epoch": 1.0974842767295598,
"grad_norm": 2.2178418206871275,
"learning_rate": 3.1217362887179405e-07,
"loss": 1.7121,
"step": 698
},
{
"epoch": 1.099056603773585,
"grad_norm": 2.267368653411109,
"learning_rate": 3.1128720192666896e-07,
"loss": 1.6441,
"step": 699
},
{
"epoch": 1.10062893081761,
"grad_norm": 2.0531341543445674,
"learning_rate": 3.1040102630204643e-07,
"loss": 1.8517,
"step": 700
},
{
"epoch": 1.1022012578616351,
"grad_norm": 2.090184163075423,
"learning_rate": 3.0951510775091045e-07,
"loss": 1.7339,
"step": 701
},
{
"epoch": 1.1037735849056605,
"grad_norm": 2.271451855248717,
"learning_rate": 3.086294520245758e-07,
"loss": 1.6841,
"step": 702
},
{
"epoch": 1.1053459119496856,
"grad_norm": 2.1230777563905647,
"learning_rate": 3.0774406487265135e-07,
"loss": 1.6153,
"step": 703
},
{
"epoch": 1.1069182389937107,
"grad_norm": 2.129575845582385,
"learning_rate": 3.0685895204300237e-07,
"loss": 1.8306,
"step": 704
},
{
"epoch": 1.1084905660377358,
"grad_norm": 2.127822835214652,
"learning_rate": 3.0597411928171293e-07,
"loss": 1.8781,
"step": 705
},
{
"epoch": 1.110062893081761,
"grad_norm": 2.0801378059307445,
"learning_rate": 3.0508957233304925e-07,
"loss": 1.7194,
"step": 706
},
{
"epoch": 1.1116352201257862,
"grad_norm": 2.0975124821955724,
"learning_rate": 3.042053169394221e-07,
"loss": 1.713,
"step": 707
},
{
"epoch": 1.1132075471698113,
"grad_norm": 1.9540390287342302,
"learning_rate": 3.033213588413492e-07,
"loss": 1.8067,
"step": 708
},
{
"epoch": 1.1147798742138364,
"grad_norm": 2.1885065328152775,
"learning_rate": 3.0243770377741847e-07,
"loss": 1.6779,
"step": 709
},
{
"epoch": 1.1163522012578617,
"grad_norm": 2.016792783037323,
"learning_rate": 3.0155435748425056e-07,
"loss": 1.8584,
"step": 710
},
{
"epoch": 1.1179245283018868,
"grad_norm": 2.0532056644149304,
"learning_rate": 3.006713256964614e-07,
"loss": 1.7892,
"step": 711
},
{
"epoch": 1.119496855345912,
"grad_norm": 2.1717833645402522,
"learning_rate": 2.9978861414662555e-07,
"loss": 1.7875,
"step": 712
},
{
"epoch": 1.121069182389937,
"grad_norm": 2.329982527967294,
"learning_rate": 2.989062285652383e-07,
"loss": 1.716,
"step": 713
},
{
"epoch": 1.1226415094339623,
"grad_norm": 2.102564151366302,
"learning_rate": 2.9802417468067866e-07,
"loss": 1.725,
"step": 714
},
{
"epoch": 1.1242138364779874,
"grad_norm": 2.005243042670392,
"learning_rate": 2.9714245821917273e-07,
"loss": 1.694,
"step": 715
},
{
"epoch": 1.1257861635220126,
"grad_norm": 2.209440331648405,
"learning_rate": 2.9626108490475596e-07,
"loss": 1.7861,
"step": 716
},
{
"epoch": 1.1273584905660377,
"grad_norm": 2.161682497833542,
"learning_rate": 2.9538006045923564e-07,
"loss": 1.9542,
"step": 717
},
{
"epoch": 1.128930817610063,
"grad_norm": 2.0410927204511093,
"learning_rate": 2.944993906021547e-07,
"loss": 2.0458,
"step": 718
},
{
"epoch": 1.130503144654088,
"grad_norm": 2.2980810899615585,
"learning_rate": 2.936190810507544e-07,
"loss": 1.6057,
"step": 719
},
{
"epoch": 1.1320754716981132,
"grad_norm": 2.1147420056792545,
"learning_rate": 2.92739137519936e-07,
"loss": 1.7409,
"step": 720
},
{
"epoch": 1.1336477987421383,
"grad_norm": 2.2130777267174326,
"learning_rate": 2.9185956572222527e-07,
"loss": 1.7651,
"step": 721
},
{
"epoch": 1.1352201257861636,
"grad_norm": 2.2140788506158184,
"learning_rate": 2.9098037136773475e-07,
"loss": 1.7143,
"step": 722
},
{
"epoch": 1.1367924528301887,
"grad_norm": 2.1122237127505117,
"learning_rate": 2.9010156016412616e-07,
"loss": 1.8633,
"step": 723
},
{
"epoch": 1.1383647798742138,
"grad_norm": 1.8051995194002572,
"learning_rate": 2.8922313781657437e-07,
"loss": 1.7253,
"step": 724
},
{
"epoch": 1.139937106918239,
"grad_norm": 2.078502586781223,
"learning_rate": 2.8834511002772954e-07,
"loss": 1.7383,
"step": 725
},
{
"epoch": 1.1415094339622642,
"grad_norm": 2.0343412709026163,
"learning_rate": 2.8746748249768034e-07,
"loss": 1.6715,
"step": 726
},
{
"epoch": 1.1430817610062893,
"grad_norm": 2.3354289699172415,
"learning_rate": 2.865902609239171e-07,
"loss": 1.5695,
"step": 727
},
{
"epoch": 1.1446540880503144,
"grad_norm": 2.012287905864566,
"learning_rate": 2.8571345100129475e-07,
"loss": 1.5644,
"step": 728
},
{
"epoch": 1.1462264150943395,
"grad_norm": 2.1335991982813343,
"learning_rate": 2.848370584219957e-07,
"loss": 1.7191,
"step": 729
},
{
"epoch": 1.1477987421383649,
"grad_norm": 2.042910279341824,
"learning_rate": 2.839610888754931e-07,
"loss": 1.8231,
"step": 730
},
{
"epoch": 1.14937106918239,
"grad_norm": 2.104058986286464,
"learning_rate": 2.8308554804851373e-07,
"loss": 1.9903,
"step": 731
},
{
"epoch": 1.150943396226415,
"grad_norm": 2.2528134865633334,
"learning_rate": 2.8221044162500126e-07,
"loss": 1.618,
"step": 732
},
{
"epoch": 1.1525157232704402,
"grad_norm": 2.169090932499384,
"learning_rate": 2.81335775286079e-07,
"loss": 1.8708,
"step": 733
},
{
"epoch": 1.1540880503144655,
"grad_norm": 2.129509499660428,
"learning_rate": 2.804615547100136e-07,
"loss": 1.8978,
"step": 734
},
{
"epoch": 1.1556603773584906,
"grad_norm": 2.1659278567954,
"learning_rate": 2.795877855721777e-07,
"loss": 1.5827,
"step": 735
},
{
"epoch": 1.1572327044025157,
"grad_norm": 2.1781926760780324,
"learning_rate": 2.7871447354501304e-07,
"loss": 2.1652,
"step": 736
},
{
"epoch": 1.1588050314465408,
"grad_norm": 2.281659140467624,
"learning_rate": 2.7784162429799415e-07,
"loss": 1.68,
"step": 737
},
{
"epoch": 1.1603773584905661,
"grad_norm": 2.379387037873376,
"learning_rate": 2.7696924349759123e-07,
"loss": 1.6087,
"step": 738
},
{
"epoch": 1.1619496855345912,
"grad_norm": 2.0112044543536616,
"learning_rate": 2.7609733680723295e-07,
"loss": 1.5596,
"step": 739
},
{
"epoch": 1.1635220125786163,
"grad_norm": 2.053191724834195,
"learning_rate": 2.7522590988727045e-07,
"loss": 1.8064,
"step": 740
},
{
"epoch": 1.1650943396226414,
"grad_norm": 2.0861593681907804,
"learning_rate": 2.743549683949404e-07,
"loss": 1.7197,
"step": 741
},
{
"epoch": 1.1666666666666667,
"grad_norm": 2.139575160657706,
"learning_rate": 2.734845179843275e-07,
"loss": 1.7399,
"step": 742
},
{
"epoch": 1.1682389937106918,
"grad_norm": 2.020388423393523,
"learning_rate": 2.726145643063289e-07,
"loss": 1.9182,
"step": 743
},
{
"epoch": 1.169811320754717,
"grad_norm": 2.0021432569973623,
"learning_rate": 2.717451130086171e-07,
"loss": 1.8025,
"step": 744
},
{
"epoch": 1.171383647798742,
"grad_norm": 2.166154517705087,
"learning_rate": 2.7087616973560256e-07,
"loss": 1.76,
"step": 745
},
{
"epoch": 1.1729559748427674,
"grad_norm": 2.172540185241632,
"learning_rate": 2.7000774012839826e-07,
"loss": 1.739,
"step": 746
},
{
"epoch": 1.1745283018867925,
"grad_norm": 2.1123768460041568,
"learning_rate": 2.6913982982478235e-07,
"loss": 1.9205,
"step": 747
},
{
"epoch": 1.1761006289308176,
"grad_norm": 2.064019777362976,
"learning_rate": 2.6827244445916145e-07,
"loss": 1.6711,
"step": 748
},
{
"epoch": 1.1776729559748427,
"grad_norm": 2.4454300470632706,
"learning_rate": 2.6740558966253464e-07,
"loss": 1.9951,
"step": 749
},
{
"epoch": 1.179245283018868,
"grad_norm": 2.202888650923341,
"learning_rate": 2.665392710624566e-07,
"loss": 1.8166,
"step": 750
},
{
"epoch": 1.179245283018868,
"eval_sat2_MCTS_chains_SFT_val_loss": 1.7027679681777954,
"eval_sat2_MCTS_chains_SFT_val_runtime": 91.7741,
"eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.201,
"eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.406,
"step": 750
},
{
"epoch": 1.180817610062893,
"grad_norm": 2.1197579207922765,
"learning_rate": 2.656734942830008e-07,
"loss": 1.789,
"step": 751
},
{
"epoch": 1.1823899371069182,
"grad_norm": 2.0955019518228646,
"learning_rate": 2.648082649447234e-07,
"loss": 1.8041,
"step": 752
},
{
"epoch": 1.1839622641509433,
"grad_norm": 2.286056725260083,
"learning_rate": 2.639435886646267e-07,
"loss": 1.9274,
"step": 753
},
{
"epoch": 1.1855345911949686,
"grad_norm": 2.078238808064587,
"learning_rate": 2.630794710561223e-07,
"loss": 1.841,
"step": 754
},
{
"epoch": 1.1871069182389937,
"grad_norm": 2.0466496074566414,
"learning_rate": 2.622159177289953e-07,
"loss": 1.8408,
"step": 755
},
{
"epoch": 1.1886792452830188,
"grad_norm": 2.0829604905191603,
"learning_rate": 2.6135293428936736e-07,
"loss": 1.7203,
"step": 756
},
{
"epoch": 1.190251572327044,
"grad_norm": 1.9719112678002333,
"learning_rate": 2.604905263396604e-07,
"loss": 1.8256,
"step": 757
},
{
"epoch": 1.1918238993710693,
"grad_norm": 2.212861952610994,
"learning_rate": 2.596286994785605e-07,
"loss": 1.7089,
"step": 758
},
{
"epoch": 1.1933962264150944,
"grad_norm": 2.1479617774562176,
"learning_rate": 2.5876745930098133e-07,
"loss": 1.788,
"step": 759
},
{
"epoch": 1.1949685534591195,
"grad_norm": 2.180325040687691,
"learning_rate": 2.5790681139802733e-07,
"loss": 1.8424,
"step": 760
},
{
"epoch": 1.1965408805031448,
"grad_norm": 2.0959805506163347,
"learning_rate": 2.570467613569588e-07,
"loss": 1.9297,
"step": 761
},
{
"epoch": 1.1981132075471699,
"grad_norm": 2.2360792245611174,
"learning_rate": 2.5618731476115436e-07,
"loss": 1.7221,
"step": 762
},
{
"epoch": 1.199685534591195,
"grad_norm": 2.1402022569964885,
"learning_rate": 2.553284771900748e-07,
"loss": 1.7152,
"step": 763
},
{
"epoch": 1.20125786163522,
"grad_norm": 2.347076356695893,
"learning_rate": 2.544702542192276e-07,
"loss": 1.9218,
"step": 764
},
{
"epoch": 1.2028301886792452,
"grad_norm": 2.1235602631273194,
"learning_rate": 2.5361265142013034e-07,
"loss": 1.7872,
"step": 765
},
{
"epoch": 1.2044025157232705,
"grad_norm": 2.178942765064468,
"learning_rate": 2.527556743602741e-07,
"loss": 1.7043,
"step": 766
},
{
"epoch": 1.2059748427672956,
"grad_norm": 2.180716726403436,
"learning_rate": 2.5189932860308796e-07,
"loss": 1.7261,
"step": 767
},
{
"epoch": 1.2075471698113207,
"grad_norm": 2.1659838220822016,
"learning_rate": 2.510436197079026e-07,
"loss": 1.5518,
"step": 768
},
{
"epoch": 1.209119496855346,
"grad_norm": 2.0823803015545144,
"learning_rate": 2.501885532299145e-07,
"loss": 1.6306,
"step": 769
},
{
"epoch": 1.2106918238993711,
"grad_norm": 2.0975682440855237,
"learning_rate": 2.4933413472014904e-07,
"loss": 1.7477,
"step": 770
},
{
"epoch": 1.2122641509433962,
"grad_norm": 2.3304268703171895,
"learning_rate": 2.4848036972542535e-07,
"loss": 1.9249,
"step": 771
},
{
"epoch": 1.2138364779874213,
"grad_norm": 2.158526522953037,
"learning_rate": 2.476272637883203e-07,
"loss": 1.9818,
"step": 772
},
{
"epoch": 1.2154088050314464,
"grad_norm": 2.4996434803308887,
"learning_rate": 2.467748224471316e-07,
"loss": 1.8969,
"step": 773
},
{
"epoch": 1.2169811320754718,
"grad_norm": 2.124488199922616,
"learning_rate": 2.4592305123584285e-07,
"loss": 1.6033,
"step": 774
},
{
"epoch": 1.2185534591194969,
"grad_norm": 2.3781806613712124,
"learning_rate": 2.4507195568408727e-07,
"loss": 1.8226,
"step": 775
},
{
"epoch": 1.220125786163522,
"grad_norm": 2.021004570791569,
"learning_rate": 2.4422154131711143e-07,
"loss": 1.7341,
"step": 776
},
{
"epoch": 1.2216981132075473,
"grad_norm": 2.1594846096039277,
"learning_rate": 2.4337181365574e-07,
"loss": 1.9486,
"step": 777
},
{
"epoch": 1.2232704402515724,
"grad_norm": 2.1845057975898987,
"learning_rate": 2.4252277821633946e-07,
"loss": 1.543,
"step": 778
},
{
"epoch": 1.2248427672955975,
"grad_norm": 2.344507158866424,
"learning_rate": 2.4167444051078245e-07,
"loss": 1.8528,
"step": 779
},
{
"epoch": 1.2264150943396226,
"grad_norm": 2.0236986373716004,
"learning_rate": 2.40826806046412e-07,
"loss": 1.7242,
"step": 780
},
{
"epoch": 1.2279874213836477,
"grad_norm": 2.078201977490899,
"learning_rate": 2.399798803260058e-07,
"loss": 1.9902,
"step": 781
},
{
"epoch": 1.229559748427673,
"grad_norm": 2.0183020857677727,
"learning_rate": 2.3913366884774034e-07,
"loss": 1.7074,
"step": 782
},
{
"epoch": 1.2311320754716981,
"grad_norm": 2.1592329303327453,
"learning_rate": 2.382881771051553e-07,
"loss": 1.8272,
"step": 783
},
{
"epoch": 1.2327044025157232,
"grad_norm": 1.920219447662245,
"learning_rate": 2.3744341058711808e-07,
"loss": 1.9844,
"step": 784
},
{
"epoch": 1.2342767295597485,
"grad_norm": 2.210906315130372,
"learning_rate": 2.3659937477778755e-07,
"loss": 1.7823,
"step": 785
},
{
"epoch": 1.2358490566037736,
"grad_norm": 2.1278759914686547,
"learning_rate": 2.3575607515657943e-07,
"loss": 1.9001,
"step": 786
},
{
"epoch": 1.2374213836477987,
"grad_norm": 2.1057924683593314,
"learning_rate": 2.3491351719812993e-07,
"loss": 1.6949,
"step": 787
},
{
"epoch": 1.2389937106918238,
"grad_norm": 2.0673174239605117,
"learning_rate": 2.3407170637226013e-07,
"loss": 1.8988,
"step": 788
},
{
"epoch": 1.240566037735849,
"grad_norm": 1.9808848675265713,
"learning_rate": 2.332306481439411e-07,
"loss": 2.0551,
"step": 789
},
{
"epoch": 1.2421383647798743,
"grad_norm": 2.1946700318779704,
"learning_rate": 2.3239034797325837e-07,
"loss": 1.8481,
"step": 790
},
{
"epoch": 1.2437106918238994,
"grad_norm": 2.145296214864516,
"learning_rate": 2.3155081131537557e-07,
"loss": 1.601,
"step": 791
},
{
"epoch": 1.2452830188679245,
"grad_norm": 2.21099564914462,
"learning_rate": 2.3071204362050016e-07,
"loss": 1.6916,
"step": 792
},
{
"epoch": 1.2468553459119498,
"grad_norm": 2.149817455185584,
"learning_rate": 2.298740503338474e-07,
"loss": 1.7328,
"step": 793
},
{
"epoch": 1.248427672955975,
"grad_norm": 2.083870636996782,
"learning_rate": 2.290368368956051e-07,
"loss": 1.7316,
"step": 794
},
{
"epoch": 1.25,
"grad_norm": 2.2190350027051884,
"learning_rate": 2.2820040874089833e-07,
"loss": 1.7335,
"step": 795
},
{
"epoch": 1.251572327044025,
"grad_norm": 2.0847463644872732,
"learning_rate": 2.2736477129975447e-07,
"loss": 1.862,
"step": 796
},
{
"epoch": 1.2531446540880502,
"grad_norm": 2.1940464195440397,
"learning_rate": 2.2652992999706714e-07,
"loss": 1.909,
"step": 797
},
{
"epoch": 1.2547169811320755,
"grad_norm": 2.0762267573536723,
"learning_rate": 2.2569589025256186e-07,
"loss": 1.7823,
"step": 798
},
{
"epoch": 1.2562893081761006,
"grad_norm": 2.2023094376465124,
"learning_rate": 2.248626574807605e-07,
"loss": 1.7458,
"step": 799
},
{
"epoch": 1.2578616352201257,
"grad_norm": 2.093984864757531,
"learning_rate": 2.2403023709094586e-07,
"loss": 1.7313,
"step": 800
},
{
"epoch": 1.259433962264151,
"grad_norm": 2.092496952956971,
"learning_rate": 2.2319863448712701e-07,
"loss": 1.6546,
"step": 801
},
{
"epoch": 1.2610062893081762,
"grad_norm": 2.000639985772567,
"learning_rate": 2.2236785506800412e-07,
"loss": 1.7073,
"step": 802
},
{
"epoch": 1.2625786163522013,
"grad_norm": 1.948987350292626,
"learning_rate": 2.21537904226933e-07,
"loss": 2.023,
"step": 803
},
{
"epoch": 1.2641509433962264,
"grad_norm": 2.112242215195991,
"learning_rate": 2.2070878735189064e-07,
"loss": 1.7933,
"step": 804
},
{
"epoch": 1.2657232704402515,
"grad_norm": 2.055178325234487,
"learning_rate": 2.1988050982543993e-07,
"loss": 1.8308,
"step": 805
},
{
"epoch": 1.2672955974842768,
"grad_norm": 2.169844090849057,
"learning_rate": 2.190530770246948e-07,
"loss": 1.7516,
"step": 806
},
{
"epoch": 1.2688679245283019,
"grad_norm": 2.0219060313319077,
"learning_rate": 2.1822649432128516e-07,
"loss": 1.5981,
"step": 807
},
{
"epoch": 1.270440251572327,
"grad_norm": 2.201206530110713,
"learning_rate": 2.1740076708132223e-07,
"loss": 1.8104,
"step": 808
},
{
"epoch": 1.2720125786163523,
"grad_norm": 2.1652601552896447,
"learning_rate": 2.165759006653639e-07,
"loss": 1.7986,
"step": 809
},
{
"epoch": 1.2735849056603774,
"grad_norm": 2.0415201426832197,
"learning_rate": 2.1575190042837886e-07,
"loss": 1.7244,
"step": 810
},
{
"epoch": 1.2751572327044025,
"grad_norm": 2.1143245858070174,
"learning_rate": 2.1492877171971362e-07,
"loss": 1.4419,
"step": 811
},
{
"epoch": 1.2767295597484276,
"grad_norm": 2.0485144393529806,
"learning_rate": 2.141065198830563e-07,
"loss": 1.7491,
"step": 812
},
{
"epoch": 1.2783018867924527,
"grad_norm": 1.8955436328860988,
"learning_rate": 2.1328515025640226e-07,
"loss": 1.8095,
"step": 813
},
{
"epoch": 1.279874213836478,
"grad_norm": 2.2901583680374036,
"learning_rate": 2.124646681720199e-07,
"loss": 1.6575,
"step": 814
},
{
"epoch": 1.2814465408805031,
"grad_norm": 2.275548594033002,
"learning_rate": 2.116450789564159e-07,
"loss": 1.6958,
"step": 815
},
{
"epoch": 1.2830188679245282,
"grad_norm": 2.3449017025523737,
"learning_rate": 2.1082638793030008e-07,
"loss": 1.5706,
"step": 816
},
{
"epoch": 1.2845911949685536,
"grad_norm": 2.317328281003811,
"learning_rate": 2.100086004085516e-07,
"loss": 1.7318,
"step": 817
},
{
"epoch": 1.2861635220125787,
"grad_norm": 2.226957187409235,
"learning_rate": 2.0919172170018401e-07,
"loss": 1.7858,
"step": 818
},
{
"epoch": 1.2877358490566038,
"grad_norm": 2.117343302028277,
"learning_rate": 2.0837575710831098e-07,
"loss": 1.6472,
"step": 819
},
{
"epoch": 1.2893081761006289,
"grad_norm": 2.2741664788753795,
"learning_rate": 2.0756071193011176e-07,
"loss": 2.1258,
"step": 820
},
{
"epoch": 1.290880503144654,
"grad_norm": 1.9084045733401307,
"learning_rate": 2.0674659145679692e-07,
"loss": 1.7781,
"step": 821
},
{
"epoch": 1.2924528301886793,
"grad_norm": 1.995611639444785,
"learning_rate": 2.0593340097357373e-07,
"loss": 1.8206,
"step": 822
},
{
"epoch": 1.2940251572327044,
"grad_norm": 2.244777578569415,
"learning_rate": 2.051211457596122e-07,
"loss": 1.6349,
"step": 823
},
{
"epoch": 1.2955974842767295,
"grad_norm": 2.167997702023013,
"learning_rate": 2.043098310880107e-07,
"loss": 1.8128,
"step": 824
},
{
"epoch": 1.2971698113207548,
"grad_norm": 2.0751468176787387,
"learning_rate": 2.034994622257615e-07,
"loss": 1.7242,
"step": 825
},
{
"epoch": 1.29874213836478,
"grad_norm": 2.0555412382564757,
"learning_rate": 2.0269004443371673e-07,
"loss": 1.6657,
"step": 826
},
{
"epoch": 1.300314465408805,
"grad_norm": 2.161584510716646,
"learning_rate": 2.018815829665546e-07,
"loss": 1.6663,
"step": 827
},
{
"epoch": 1.3018867924528301,
"grad_norm": 2.201251049412695,
"learning_rate": 2.0107408307274428e-07,
"loss": 1.6481,
"step": 828
},
{
"epoch": 1.3034591194968552,
"grad_norm": 2.0149941921725962,
"learning_rate": 2.0026754999451317e-07,
"loss": 1.6969,
"step": 829
},
{
"epoch": 1.3050314465408805,
"grad_norm": 2.242230337987245,
"learning_rate": 1.9946198896781174e-07,
"loss": 1.9959,
"step": 830
},
{
"epoch": 1.3066037735849056,
"grad_norm": 2.15895547407779,
"learning_rate": 1.986574052222802e-07,
"loss": 1.7761,
"step": 831
},
{
"epoch": 1.3081761006289307,
"grad_norm": 2.181310101596831,
"learning_rate": 1.9785380398121416e-07,
"loss": 1.6648,
"step": 832
},
{
"epoch": 1.309748427672956,
"grad_norm": 1.9416146895135635,
"learning_rate": 1.9705119046153114e-07,
"loss": 1.7318,
"step": 833
},
{
"epoch": 1.3113207547169812,
"grad_norm": 2.0663038260287467,
"learning_rate": 1.9624956987373606e-07,
"loss": 1.9148,
"step": 834
},
{
"epoch": 1.3128930817610063,
"grad_norm": 2.293091774940269,
"learning_rate": 1.9544894742188804e-07,
"loss": 1.8369,
"step": 835
},
{
"epoch": 1.3144654088050314,
"grad_norm": 2.3609086075602206,
"learning_rate": 1.9464932830356648e-07,
"loss": 1.7337,
"step": 836
},
{
"epoch": 1.3160377358490565,
"grad_norm": 2.060684097800172,
"learning_rate": 1.9385071770983697e-07,
"loss": 1.5396,
"step": 837
},
{
"epoch": 1.3176100628930818,
"grad_norm": 1.995153844021168,
"learning_rate": 1.93053120825218e-07,
"loss": 1.6491,
"step": 838
},
{
"epoch": 1.319182389937107,
"grad_norm": 2.305659266839101,
"learning_rate": 1.9225654282764733e-07,
"loss": 1.8602,
"step": 839
},
{
"epoch": 1.320754716981132,
"grad_norm": 1.9550425262609439,
"learning_rate": 1.9146098888844752e-07,
"loss": 1.7687,
"step": 840
},
{
"epoch": 1.3223270440251573,
"grad_norm": 2.1752564296945143,
"learning_rate": 1.9066646417229369e-07,
"loss": 1.954,
"step": 841
},
{
"epoch": 1.3238993710691824,
"grad_norm": 2.2819270079625387,
"learning_rate": 1.8987297383717918e-07,
"loss": 1.6462,
"step": 842
},
{
"epoch": 1.3254716981132075,
"grad_norm": 2.160953368673478,
"learning_rate": 1.8908052303438188e-07,
"loss": 1.6413,
"step": 843
},
{
"epoch": 1.3270440251572326,
"grad_norm": 2.0789012158742803,
"learning_rate": 1.882891169084313e-07,
"loss": 1.827,
"step": 844
},
{
"epoch": 1.3286163522012577,
"grad_norm": 2.0716992608690448,
"learning_rate": 1.8749876059707536e-07,
"loss": 1.7414,
"step": 845
},
{
"epoch": 1.330188679245283,
"grad_norm": 2.103884856783866,
"learning_rate": 1.867094592312463e-07,
"loss": 2.0534,
"step": 846
},
{
"epoch": 1.3317610062893082,
"grad_norm": 2.4629953048647324,
"learning_rate": 1.8592121793502755e-07,
"loss": 1.7296,
"step": 847
},
{
"epoch": 1.3333333333333333,
"grad_norm": 1.9458332362620918,
"learning_rate": 1.8513404182562097e-07,
"loss": 1.8213,
"step": 848
},
{
"epoch": 1.3349056603773586,
"grad_norm": 2.1510489748789583,
"learning_rate": 1.8434793601331336e-07,
"loss": 1.8064,
"step": 849
},
{
"epoch": 1.3364779874213837,
"grad_norm": 2.2594694003824567,
"learning_rate": 1.8356290560144285e-07,
"loss": 1.763,
"step": 850
},
{
"epoch": 1.3380503144654088,
"grad_norm": 2.2650014550444606,
"learning_rate": 1.8277895568636646e-07,
"loss": 1.6519,
"step": 851
},
{
"epoch": 1.3396226415094339,
"grad_norm": 2.279277485826785,
"learning_rate": 1.8199609135742672e-07,
"loss": 1.7316,
"step": 852
},
{
"epoch": 1.341194968553459,
"grad_norm": 2.113275805679697,
"learning_rate": 1.812143176969185e-07,
"loss": 1.7905,
"step": 853
},
{
"epoch": 1.3427672955974843,
"grad_norm": 1.9917783424168132,
"learning_rate": 1.8043363978005617e-07,
"loss": 1.7985,
"step": 854
},
{
"epoch": 1.3443396226415094,
"grad_norm": 1.8965972111176195,
"learning_rate": 1.7965406267494078e-07,
"loss": 1.8407,
"step": 855
},
{
"epoch": 1.3459119496855345,
"grad_norm": 2.2071694500169174,
"learning_rate": 1.7887559144252658e-07,
"loss": 1.7755,
"step": 856
},
{
"epoch": 1.3474842767295598,
"grad_norm": 2.2472976512202,
"learning_rate": 1.7809823113658896e-07,
"loss": 1.8221,
"step": 857
},
{
"epoch": 1.349056603773585,
"grad_norm": 2.0844792244919876,
"learning_rate": 1.7732198680369107e-07,
"loss": 1.8871,
"step": 858
},
{
"epoch": 1.35062893081761,
"grad_norm": 2.027073391928856,
"learning_rate": 1.765468634831514e-07,
"loss": 1.7576,
"step": 859
},
{
"epoch": 1.3522012578616351,
"grad_norm": 2.2937069673682906,
"learning_rate": 1.757728662070108e-07,
"loss": 1.6525,
"step": 860
},
{
"epoch": 1.3537735849056602,
"grad_norm": 2.1305400050893972,
"learning_rate": 1.7500000000000007e-07,
"loss": 1.6292,
"step": 861
},
{
"epoch": 1.3553459119496856,
"grad_norm": 2.093411160599093,
"learning_rate": 1.7422826987950683e-07,
"loss": 2.0316,
"step": 862
},
{
"epoch": 1.3569182389937107,
"grad_norm": 2.053081781406045,
"learning_rate": 1.7345768085554372e-07,
"loss": 1.6936,
"step": 863
},
{
"epoch": 1.3584905660377358,
"grad_norm": 2.1226077695084555,
"learning_rate": 1.726882379307153e-07,
"loss": 1.7328,
"step": 864
},
{
"epoch": 1.360062893081761,
"grad_norm": 2.162295283463786,
"learning_rate": 1.7191994610018574e-07,
"loss": 1.75,
"step": 865
},
{
"epoch": 1.3616352201257862,
"grad_norm": 2.2966135587303946,
"learning_rate": 1.711528103516464e-07,
"loss": 1.7858,
"step": 866
},
{
"epoch": 1.3632075471698113,
"grad_norm": 2.13860142749209,
"learning_rate": 1.703868356652837e-07,
"loss": 1.9188,
"step": 867
},
{
"epoch": 1.3647798742138364,
"grad_norm": 2.1291702490610054,
"learning_rate": 1.6962202701374592e-07,
"loss": 1.6769,
"step": 868
},
{
"epoch": 1.3663522012578615,
"grad_norm": 2.0330576038157524,
"learning_rate": 1.6885838936211206e-07,
"loss": 1.7028,
"step": 869
},
{
"epoch": 1.3679245283018868,
"grad_norm": 2.174657226535088,
"learning_rate": 1.6809592766785903e-07,
"loss": 1.7184,
"step": 870
},
{
"epoch": 1.369496855345912,
"grad_norm": 2.1336668424018463,
"learning_rate": 1.673346468808292e-07,
"loss": 1.5666,
"step": 871
},
{
"epoch": 1.371069182389937,
"grad_norm": 2.0664898739681403,
"learning_rate": 1.6657455194319875e-07,
"loss": 1.6633,
"step": 872
},
{
"epoch": 1.3726415094339623,
"grad_norm": 2.07966395343747,
"learning_rate": 1.6581564778944585e-07,
"loss": 1.9728,
"step": 873
},
{
"epoch": 1.3742138364779874,
"grad_norm": 2.0802230341676493,
"learning_rate": 1.6505793934631743e-07,
"loss": 1.806,
"step": 874
},
{
"epoch": 1.3757861635220126,
"grad_norm": 2.0585446511098855,
"learning_rate": 1.6430143153279843e-07,
"loss": 1.7467,
"step": 875
},
{
"epoch": 1.3773584905660377,
"grad_norm": 2.0670814111420457,
"learning_rate": 1.6354612926007947e-07,
"loss": 1.8074,
"step": 876
},
{
"epoch": 1.378930817610063,
"grad_norm": 2.2448398015661977,
"learning_rate": 1.6279203743152437e-07,
"loss": 1.792,
"step": 877
},
{
"epoch": 1.380503144654088,
"grad_norm": 2.1936825808205476,
"learning_rate": 1.620391609426394e-07,
"loss": 1.7086,
"step": 878
},
{
"epoch": 1.3820754716981132,
"grad_norm": 2.0481925632804896,
"learning_rate": 1.6128750468104068e-07,
"loss": 1.6359,
"step": 879
},
{
"epoch": 1.3836477987421385,
"grad_norm": 2.307548887007293,
"learning_rate": 1.6053707352642275e-07,
"loss": 1.8802,
"step": 880
},
{
"epoch": 1.3852201257861636,
"grad_norm": 2.212178445396311,
"learning_rate": 1.5978787235052684e-07,
"loss": 1.6298,
"step": 881
},
{
"epoch": 1.3867924528301887,
"grad_norm": 2.4275019833470357,
"learning_rate": 1.5903990601710933e-07,
"loss": 1.6078,
"step": 882
},
{
"epoch": 1.3883647798742138,
"grad_norm": 2.097258459403789,
"learning_rate": 1.5829317938191007e-07,
"loss": 1.7955,
"step": 883
},
{
"epoch": 1.389937106918239,
"grad_norm": 2.2433716966343074,
"learning_rate": 1.5754769729262068e-07,
"loss": 1.841,
"step": 884
},
{
"epoch": 1.3915094339622642,
"grad_norm": 1.9611279725874884,
"learning_rate": 1.5680346458885351e-07,
"loss": 1.8903,
"step": 885
},
{
"epoch": 1.3930817610062893,
"grad_norm": 2.1857460166910703,
"learning_rate": 1.560604861021099e-07,
"loss": 1.8461,
"step": 886
},
{
"epoch": 1.3946540880503144,
"grad_norm": 1.9802850293847218,
"learning_rate": 1.5531876665574905e-07,
"loss": 1.6594,
"step": 887
},
{
"epoch": 1.3962264150943398,
"grad_norm": 2.044592124136331,
"learning_rate": 1.5457831106495645e-07,
"loss": 1.8477,
"step": 888
},
{
"epoch": 1.3977987421383649,
"grad_norm": 2.1730178709565027,
"learning_rate": 1.538391241367128e-07,
"loss": 1.8571,
"step": 889
},
{
"epoch": 1.39937106918239,
"grad_norm": 2.226125612813974,
"learning_rate": 1.5310121066976246e-07,
"loss": 1.8246,
"step": 890
},
{
"epoch": 1.400943396226415,
"grad_norm": 2.110513299587775,
"learning_rate": 1.52364575454583e-07,
"loss": 1.5938,
"step": 891
},
{
"epoch": 1.4025157232704402,
"grad_norm": 2.0945323670932483,
"learning_rate": 1.5162922327335352e-07,
"loss": 1.9624,
"step": 892
},
{
"epoch": 1.4040880503144655,
"grad_norm": 2.031350721397557,
"learning_rate": 1.5089515889992337e-07,
"loss": 1.5905,
"step": 893
},
{
"epoch": 1.4056603773584906,
"grad_norm": 2.5190983077756,
"learning_rate": 1.5016238709978235e-07,
"loss": 1.6355,
"step": 894
},
{
"epoch": 1.4072327044025157,
"grad_norm": 2.0786833230760946,
"learning_rate": 1.4943091263002846e-07,
"loss": 2.4022,
"step": 895
},
{
"epoch": 1.408805031446541,
"grad_norm": 2.2624991107175076,
"learning_rate": 1.487007402393374e-07,
"loss": 2.0047,
"step": 896
},
{
"epoch": 1.4103773584905661,
"grad_norm": 2.13658012100152,
"learning_rate": 1.4797187466793216e-07,
"loss": 2.0784,
"step": 897
},
{
"epoch": 1.4119496855345912,
"grad_norm": 2.154584941501371,
"learning_rate": 1.4724432064755204e-07,
"loss": 1.9006,
"step": 898
},
{
"epoch": 1.4135220125786163,
"grad_norm": 2.2513090599810224,
"learning_rate": 1.4651808290142143e-07,
"loss": 1.87,
"step": 899
},
{
"epoch": 1.4150943396226414,
"grad_norm": 2.1884972541614336,
"learning_rate": 1.457931661442199e-07,
"loss": 1.8312,
"step": 900
},
{
"epoch": 1.4166666666666667,
"grad_norm": 2.165721743117902,
"learning_rate": 1.450695750820513e-07,
"loss": 1.5696,
"step": 901
},
{
"epoch": 1.4182389937106918,
"grad_norm": 2.0366037954150964,
"learning_rate": 1.4434731441241295e-07,
"loss": 1.864,
"step": 902
},
{
"epoch": 1.419811320754717,
"grad_norm": 2.0359358262558436,
"learning_rate": 1.4362638882416552e-07,
"loss": 1.9211,
"step": 903
},
{
"epoch": 1.4213836477987423,
"grad_norm": 2.2277853781005144,
"learning_rate": 1.429068029975025e-07,
"loss": 1.6469,
"step": 904
},
{
"epoch": 1.4229559748427674,
"grad_norm": 2.1703686899029937,
"learning_rate": 1.421885616039194e-07,
"loss": 1.7961,
"step": 905
},
{
"epoch": 1.4245283018867925,
"grad_norm": 2.2098194543382097,
"learning_rate": 1.4147166930618412e-07,
"loss": 1.7475,
"step": 906
},
{
"epoch": 1.4261006289308176,
"grad_norm": 2.0638951952196156,
"learning_rate": 1.4075613075830626e-07,
"loss": 1.5421,
"step": 907
},
{
"epoch": 1.4276729559748427,
"grad_norm": 2.2638501605807786,
"learning_rate": 1.400419506055069e-07,
"loss": 2.0258,
"step": 908
},
{
"epoch": 1.429245283018868,
"grad_norm": 2.026762327168469,
"learning_rate": 1.393291334841886e-07,
"loss": 1.7273,
"step": 909
},
{
"epoch": 1.430817610062893,
"grad_norm": 2.0850924373321655,
"learning_rate": 1.3861768402190533e-07,
"loss": 1.6415,
"step": 910
},
{
"epoch": 1.4323899371069182,
"grad_norm": 2.105162873756192,
"learning_rate": 1.379076068373319e-07,
"loss": 1.7626,
"step": 911
},
{
"epoch": 1.4339622641509435,
"grad_norm": 2.2125942477293474,
"learning_rate": 1.3719890654023485e-07,
"loss": 1.6857,
"step": 912
},
{
"epoch": 1.4355345911949686,
"grad_norm": 2.1153484222332173,
"learning_rate": 1.36491587731442e-07,
"loss": 1.7955,
"step": 913
},
{
"epoch": 1.4371069182389937,
"grad_norm": 2.2997509014276742,
"learning_rate": 1.3578565500281222e-07,
"loss": 2.0574,
"step": 914
},
{
"epoch": 1.4386792452830188,
"grad_norm": 2.078107647008167,
"learning_rate": 1.3508111293720675e-07,
"loss": 2.0042,
"step": 915
},
{
"epoch": 1.440251572327044,
"grad_norm": 2.0399439337966787,
"learning_rate": 1.343779661084584e-07,
"loss": 1.574,
"step": 916
},
{
"epoch": 1.4418238993710693,
"grad_norm": 2.2889011438788063,
"learning_rate": 1.33676219081342e-07,
"loss": 1.7344,
"step": 917
},
{
"epoch": 1.4433962264150944,
"grad_norm": 2.333332062321592,
"learning_rate": 1.329758764115452e-07,
"loss": 1.5233,
"step": 918
},
{
"epoch": 1.4449685534591195,
"grad_norm": 2.1331182619855857,
"learning_rate": 1.322769426456388e-07,
"loss": 1.6755,
"step": 919
},
{
"epoch": 1.4465408805031448,
"grad_norm": 2.1531518116333745,
"learning_rate": 1.3157942232104702e-07,
"loss": 2.0816,
"step": 920
},
{
"epoch": 1.4481132075471699,
"grad_norm": 2.088795076446534,
"learning_rate": 1.308833199660178e-07,
"loss": 2.0221,
"step": 921
},
{
"epoch": 1.449685534591195,
"grad_norm": 2.00003816402544,
"learning_rate": 1.3018864009959402e-07,
"loss": 1.8191,
"step": 922
},
{
"epoch": 1.45125786163522,
"grad_norm": 2.0551941845228403,
"learning_rate": 1.2949538723158427e-07,
"loss": 1.779,
"step": 923
},
{
"epoch": 1.4528301886792452,
"grad_norm": 2.2468743977572547,
"learning_rate": 1.288035658625323e-07,
"loss": 1.7847,
"step": 924
},
{
"epoch": 1.4544025157232705,
"grad_norm": 2.2719489416819254,
"learning_rate": 1.2811318048368927e-07,
"loss": 1.6929,
"step": 925
},
{
"epoch": 1.4559748427672956,
"grad_norm": 2.0990742193527434,
"learning_rate": 1.2742423557698407e-07,
"loss": 1.6888,
"step": 926
},
{
"epoch": 1.4575471698113207,
"grad_norm": 2.21584285821969,
"learning_rate": 1.2673673561499367e-07,
"loss": 1.8427,
"step": 927
},
{
"epoch": 1.459119496855346,
"grad_norm": 2.145727226704997,
"learning_rate": 1.2605068506091503e-07,
"loss": 1.854,
"step": 928
},
{
"epoch": 1.4606918238993711,
"grad_norm": 2.2012601813217554,
"learning_rate": 1.2536608836853537e-07,
"loss": 1.6569,
"step": 929
},
{
"epoch": 1.4622641509433962,
"grad_norm": 2.1152395275847233,
"learning_rate": 1.2468294998220374e-07,
"loss": 1.8322,
"step": 930
},
{
"epoch": 1.4638364779874213,
"grad_norm": 2.1134882928957226,
"learning_rate": 1.2400127433680197e-07,
"loss": 1.54,
"step": 931
},
{
"epoch": 1.4654088050314464,
"grad_norm": 2.058732975760006,
"learning_rate": 1.2332106585771588e-07,
"loss": 1.8464,
"step": 932
},
{
"epoch": 1.4669811320754718,
"grad_norm": 1.9450661693276792,
"learning_rate": 1.226423289608063e-07,
"loss": 1.6493,
"step": 933
},
{
"epoch": 1.4685534591194969,
"grad_norm": 2.162745466557572,
"learning_rate": 1.2196506805238097e-07,
"loss": 1.7669,
"step": 934
},
{
"epoch": 1.470125786163522,
"grad_norm": 1.9865409540246532,
"learning_rate": 1.2128928752916557e-07,
"loss": 1.9032,
"step": 935
},
{
"epoch": 1.4716981132075473,
"grad_norm": 2.1486068004112098,
"learning_rate": 1.2061499177827517e-07,
"loss": 1.7487,
"step": 936
},
{
"epoch": 1.4732704402515724,
"grad_norm": 2.1353688348778244,
"learning_rate": 1.199421851771858e-07,
"loss": 1.9648,
"step": 937
},
{
"epoch": 1.4748427672955975,
"grad_norm": 2.203719714639102,
"learning_rate": 1.1927087209370627e-07,
"loss": 2.2411,
"step": 938
},
{
"epoch": 1.4764150943396226,
"grad_norm": 2.1007559098440707,
"learning_rate": 1.1860105688594913e-07,
"loss": 1.9012,
"step": 939
},
{
"epoch": 1.4779874213836477,
"grad_norm": 2.0112830813880724,
"learning_rate": 1.179327439023032e-07,
"loss": 1.7904,
"step": 940
},
{
"epoch": 1.479559748427673,
"grad_norm": 2.147891306593028,
"learning_rate": 1.1726593748140503e-07,
"loss": 1.849,
"step": 941
},
{
"epoch": 1.4811320754716981,
"grad_norm": 2.1951970364454167,
"learning_rate": 1.1660064195211026e-07,
"loss": 1.8406,
"step": 942
},
{
"epoch": 1.4827044025157232,
"grad_norm": 2.251395431799435,
"learning_rate": 1.1593686163346624e-07,
"loss": 1.8115,
"step": 943
},
{
"epoch": 1.4842767295597485,
"grad_norm": 2.2374065524605915,
"learning_rate": 1.1527460083468404e-07,
"loss": 1.7597,
"step": 944
},
{
"epoch": 1.4858490566037736,
"grad_norm": 2.2516356519637153,
"learning_rate": 1.1461386385510934e-07,
"loss": 1.7996,
"step": 945
},
{
"epoch": 1.4874213836477987,
"grad_norm": 1.9259732674286747,
"learning_rate": 1.1395465498419584e-07,
"loss": 1.7016,
"step": 946
},
{
"epoch": 1.4889937106918238,
"grad_norm": 2.0151236267786943,
"learning_rate": 1.1329697850147684e-07,
"loss": 1.8591,
"step": 947
},
{
"epoch": 1.490566037735849,
"grad_norm": 2.1157729525808,
"learning_rate": 1.1264083867653721e-07,
"loss": 1.7659,
"step": 948
},
{
"epoch": 1.4921383647798743,
"grad_norm": 2.2039873137986485,
"learning_rate": 1.1198623976898626e-07,
"loss": 1.8312,
"step": 949
},
{
"epoch": 1.4937106918238994,
"grad_norm": 2.139918541343564,
"learning_rate": 1.1133318602842961e-07,
"loss": 1.8547,
"step": 950
},
{
"epoch": 1.4952830188679245,
"grad_norm": 2.0927341451744716,
"learning_rate": 1.1068168169444187e-07,
"loss": 1.6786,
"step": 951
},
{
"epoch": 1.4968553459119498,
"grad_norm": 2.22996580434956,
"learning_rate": 1.1003173099653898e-07,
"loss": 1.9014,
"step": 952
},
{
"epoch": 1.498427672955975,
"grad_norm": 2.2224019206058707,
"learning_rate": 1.093833381541509e-07,
"loss": 1.9734,
"step": 953
},
{
"epoch": 1.5,
"grad_norm": 2.292687450843933,
"learning_rate": 1.087365073765938e-07,
"loss": 1.6376,
"step": 954
},
{
"epoch": 1.501572327044025,
"grad_norm": 1.933890837573844,
"learning_rate": 1.0809124286304334e-07,
"loss": 1.6966,
"step": 955
},
{
"epoch": 1.5031446540880502,
"grad_norm": 2.2347334629753557,
"learning_rate": 1.0744754880250704e-07,
"loss": 1.9026,
"step": 956
},
{
"epoch": 1.5047169811320755,
"grad_norm": 2.0708848593951084,
"learning_rate": 1.0680542937379719e-07,
"loss": 1.7771,
"step": 957
},
{
"epoch": 1.5062893081761006,
"grad_norm": 2.102938366432825,
"learning_rate": 1.061648887455036e-07,
"loss": 1.7984,
"step": 958
},
{
"epoch": 1.507861635220126,
"grad_norm": 2.2787001713384467,
"learning_rate": 1.0552593107596671e-07,
"loss": 1.7934,
"step": 959
},
{
"epoch": 1.509433962264151,
"grad_norm": 2.1532789357045794,
"learning_rate": 1.0488856051325056e-07,
"loss": 1.6814,
"step": 960
},
{
"epoch": 1.5110062893081762,
"grad_norm": 2.253404213973793,
"learning_rate": 1.0425278119511557e-07,
"loss": 1.5369,
"step": 961
},
{
"epoch": 1.5125786163522013,
"grad_norm": 2.1276754628326904,
"learning_rate": 1.0361859724899213e-07,
"loss": 1.6983,
"step": 962
},
{
"epoch": 1.5141509433962264,
"grad_norm": 2.04685261141141,
"learning_rate": 1.0298601279195375e-07,
"loss": 1.9189,
"step": 963
},
{
"epoch": 1.5157232704402515,
"grad_norm": 2.334536087166455,
"learning_rate": 1.0235503193068961e-07,
"loss": 1.9152,
"step": 964
},
{
"epoch": 1.5172955974842768,
"grad_norm": 2.1800867522294975,
"learning_rate": 1.0172565876147919e-07,
"loss": 1.8854,
"step": 965
},
{
"epoch": 1.5188679245283019,
"grad_norm": 2.235966102337266,
"learning_rate": 1.0109789737016459e-07,
"loss": 1.8736,
"step": 966
},
{
"epoch": 1.5204402515723272,
"grad_norm": 2.3031366168949385,
"learning_rate": 1.0047175183212424e-07,
"loss": 1.8837,
"step": 967
},
{
"epoch": 1.5220125786163523,
"grad_norm": 1.9731203822876688,
"learning_rate": 9.984722621224678e-08,
"loss": 1.6989,
"step": 968
},
{
"epoch": 1.5235849056603774,
"grad_norm": 2.0967189530567043,
"learning_rate": 9.922432456490459e-08,
"loss": 1.7385,
"step": 969
},
{
"epoch": 1.5251572327044025,
"grad_norm": 2.2284300025544,
"learning_rate": 9.86030509339269e-08,
"loss": 1.5122,
"step": 970
},
{
"epoch": 1.5267295597484276,
"grad_norm": 2.0075774050458017,
"learning_rate": 9.798340935257439e-08,
"loss": 1.7742,
"step": 971
},
{
"epoch": 1.5283018867924527,
"grad_norm": 2.253336294321935,
"learning_rate": 9.736540384351247e-08,
"loss": 1.8329,
"step": 972
},
{
"epoch": 1.529874213836478,
"grad_norm": 2.0229026010397795,
"learning_rate": 9.674903841878527e-08,
"loss": 1.8612,
"step": 973
},
{
"epoch": 1.5314465408805031,
"grad_norm": 2.1682039050875384,
"learning_rate": 9.613431707978969e-08,
"loss": 1.6209,
"step": 974
},
{
"epoch": 1.5330188679245285,
"grad_norm": 2.17108676879446,
"learning_rate": 9.55212438172494e-08,
"loss": 1.7289,
"step": 975
},
{
"epoch": 1.5345911949685536,
"grad_norm": 2.0449199036226466,
"learning_rate": 9.49098226111885e-08,
"loss": 1.7313,
"step": 976
},
{
"epoch": 1.5361635220125787,
"grad_norm": 2.0509938124397658,
"learning_rate": 9.430005743090654e-08,
"loss": 1.859,
"step": 977
},
{
"epoch": 1.5377358490566038,
"grad_norm": 2.1533522799374016,
"learning_rate": 9.369195223495212e-08,
"loss": 1.5909,
"step": 978
},
{
"epoch": 1.5393081761006289,
"grad_norm": 2.4410463221304957,
"learning_rate": 9.308551097109723e-08,
"loss": 1.9236,
"step": 979
},
{
"epoch": 1.540880503144654,
"grad_norm": 2.1289270496387256,
"learning_rate": 9.248073757631187e-08,
"loss": 1.6905,
"step": 980
},
{
"epoch": 1.5424528301886793,
"grad_norm": 2.160163073483539,
"learning_rate": 9.187763597673842e-08,
"loss": 1.572,
"step": 981
},
{
"epoch": 1.5440251572327044,
"grad_norm": 2.1864157152356247,
"learning_rate": 9.127621008766583e-08,
"loss": 1.486,
"step": 982
},
{
"epoch": 1.5455974842767297,
"grad_norm": 2.228493447513863,
"learning_rate": 9.067646381350473e-08,
"loss": 1.8109,
"step": 983
},
{
"epoch": 1.5471698113207548,
"grad_norm": 2.21672107568335,
"learning_rate": 9.007840104776179e-08,
"loss": 1.7224,
"step": 984
},
{
"epoch": 1.54874213836478,
"grad_norm": 2.0906671637683383,
"learning_rate": 8.948202567301416e-08,
"loss": 1.7993,
"step": 985
},
{
"epoch": 1.550314465408805,
"grad_norm": 2.103005760268071,
"learning_rate": 8.888734156088509e-08,
"loss": 1.7734,
"step": 986
},
{
"epoch": 1.5518867924528301,
"grad_norm": 2.1048717139473534,
"learning_rate": 8.829435257201803e-08,
"loss": 1.6411,
"step": 987
},
{
"epoch": 1.5534591194968552,
"grad_norm": 2.244151500892785,
"learning_rate": 8.77030625560516e-08,
"loss": 1.9157,
"step": 988
},
{
"epoch": 1.5550314465408805,
"grad_norm": 2.3422640414896407,
"learning_rate": 8.711347535159517e-08,
"loss": 1.446,
"step": 989
},
{
"epoch": 1.5566037735849056,
"grad_norm": 2.21179768023497,
"learning_rate": 8.652559478620349e-08,
"loss": 1.7682,
"step": 990
},
{
"epoch": 1.558176100628931,
"grad_norm": 2.130587628281344,
"learning_rate": 8.593942467635173e-08,
"loss": 1.9265,
"step": 991
},
{
"epoch": 1.559748427672956,
"grad_norm": 2.2427742208191384,
"learning_rate": 8.535496882741118e-08,
"loss": 1.8189,
"step": 992
},
{
"epoch": 1.5613207547169812,
"grad_norm": 2.319870729623824,
"learning_rate": 8.47722310336241e-08,
"loss": 1.7268,
"step": 993
},
{
"epoch": 1.5628930817610063,
"grad_norm": 2.0786710848008214,
"learning_rate": 8.419121507807966e-08,
"loss": 1.6414,
"step": 994
},
{
"epoch": 1.5644654088050314,
"grad_norm": 2.1766614925100805,
"learning_rate": 8.361192473268831e-08,
"loss": 1.7614,
"step": 995
},
{
"epoch": 1.5660377358490565,
"grad_norm": 2.0644913614642784,
"learning_rate": 8.30343637581585e-08,
"loss": 1.7658,
"step": 996
},
{
"epoch": 1.5676100628930818,
"grad_norm": 1.9942721896362847,
"learning_rate": 8.245853590397171e-08,
"loss": 1.5864,
"step": 997
},
{
"epoch": 1.569182389937107,
"grad_norm": 2.107035516490154,
"learning_rate": 8.188444490835773e-08,
"loss": 1.5109,
"step": 998
},
{
"epoch": 1.5707547169811322,
"grad_norm": 2.157536925590625,
"learning_rate": 8.131209449827121e-08,
"loss": 1.8098,
"step": 999
},
{
"epoch": 1.5723270440251573,
"grad_norm": 2.3169442855875535,
"learning_rate": 8.074148838936693e-08,
"loss": 1.718,
"step": 1000
},
{
"epoch": 1.5723270440251573,
"eval_sat2_MCTS_chains_SFT_val_loss": 1.7004035711288452,
"eval_sat2_MCTS_chains_SFT_val_runtime": 92.1151,
"eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.16,
"eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.4,
"step": 1000
},
{
"epoch": 1.5738993710691824,
"grad_norm": 1.9653337976940755,
"learning_rate": 8.017263028597577e-08,
"loss": 1.6755,
"step": 1001
},
{
"epoch": 1.5754716981132075,
"grad_norm": 2.259220567901995,
"learning_rate": 7.960552388108074e-08,
"loss": 1.6192,
"step": 1002
},
{
"epoch": 1.5770440251572326,
"grad_norm": 2.172629330750012,
"learning_rate": 7.9040172856293e-08,
"loss": 1.7591,
"step": 1003
},
{
"epoch": 1.5786163522012577,
"grad_norm": 2.235449144986895,
"learning_rate": 7.847658088182764e-08,
"loss": 1.6464,
"step": 1004
},
{
"epoch": 1.580188679245283,
"grad_norm": 2.1331939859889766,
"learning_rate": 7.791475161648044e-08,
"loss": 1.7274,
"step": 1005
},
{
"epoch": 1.5817610062893082,
"grad_norm": 2.11798299275123,
"learning_rate": 7.735468870760373e-08,
"loss": 1.9111,
"step": 1006
},
{
"epoch": 1.5833333333333335,
"grad_norm": 2.1205150272186266,
"learning_rate": 7.679639579108278e-08,
"loss": 1.8506,
"step": 1007
},
{
"epoch": 1.5849056603773586,
"grad_norm": 2.0605259082135428,
"learning_rate": 7.623987649131212e-08,
"loss": 1.5979,
"step": 1008
},
{
"epoch": 1.5864779874213837,
"grad_norm": 2.366698058592538,
"learning_rate": 7.568513442117235e-08,
"loss": 1.6993,
"step": 1009
},
{
"epoch": 1.5880503144654088,
"grad_norm": 2.218013241692686,
"learning_rate": 7.513217318200599e-08,
"loss": 1.7854,
"step": 1010
},
{
"epoch": 1.5896226415094339,
"grad_norm": 1.9909585694013383,
"learning_rate": 7.458099636359496e-08,
"loss": 1.6368,
"step": 1011
},
{
"epoch": 1.591194968553459,
"grad_norm": 2.290605549139274,
"learning_rate": 7.403160754413676e-08,
"loss": 1.7737,
"step": 1012
},
{
"epoch": 1.5927672955974843,
"grad_norm": 2.1367293795429947,
"learning_rate": 7.348401029022108e-08,
"loss": 1.6134,
"step": 1013
},
{
"epoch": 1.5943396226415094,
"grad_norm": 2.01820143740589,
"learning_rate": 7.293820815680712e-08,
"loss": 1.7256,
"step": 1014
},
{
"epoch": 1.5959119496855347,
"grad_norm": 2.276755773858234,
"learning_rate": 7.239420468720059e-08,
"loss": 1.9804,
"step": 1015
},
{
"epoch": 1.5974842767295598,
"grad_norm": 2.330331102976199,
"learning_rate": 7.185200341302975e-08,
"loss": 1.7043,
"step": 1016
},
{
"epoch": 1.599056603773585,
"grad_norm": 2.1276439937862306,
"learning_rate": 7.131160785422365e-08,
"loss": 1.9397,
"step": 1017
},
{
"epoch": 1.60062893081761,
"grad_norm": 2.172507453951336,
"learning_rate": 7.077302151898875e-08,
"loss": 1.9139,
"step": 1018
},
{
"epoch": 1.6022012578616351,
"grad_norm": 2.1478223685971356,
"learning_rate": 7.023624790378576e-08,
"loss": 1.6555,
"step": 1019
},
{
"epoch": 1.6037735849056602,
"grad_norm": 2.3969459707002256,
"learning_rate": 6.97012904933078e-08,
"loss": 1.9195,
"step": 1020
},
{
"epoch": 1.6053459119496856,
"grad_norm": 1.9248844302089616,
"learning_rate": 6.916815276045719e-08,
"loss": 1.8894,
"step": 1021
},
{
"epoch": 1.6069182389937107,
"grad_norm": 2.0212176784028717,
"learning_rate": 6.863683816632293e-08,
"loss": 1.7218,
"step": 1022
},
{
"epoch": 1.608490566037736,
"grad_norm": 2.094474217163886,
"learning_rate": 6.810735016015846e-08,
"loss": 1.6663,
"step": 1023
},
{
"epoch": 1.610062893081761,
"grad_norm": 2.0244431225363697,
"learning_rate": 6.757969217935929e-08,
"loss": 1.6878,
"step": 1024
},
{
"epoch": 1.6116352201257862,
"grad_norm": 2.1281168867505014,
"learning_rate": 6.705386764944006e-08,
"loss": 1.8226,
"step": 1025
},
{
"epoch": 1.6132075471698113,
"grad_norm": 2.0886881066721625,
"learning_rate": 6.652987998401334e-08,
"loss": 1.655,
"step": 1026
},
{
"epoch": 1.6147798742138364,
"grad_norm": 2.0367504844576247,
"learning_rate": 6.60077325847666e-08,
"loss": 1.7722,
"step": 1027
},
{
"epoch": 1.6163522012578615,
"grad_norm": 1.9930526077088684,
"learning_rate": 6.548742884144054e-08,
"loss": 1.7073,
"step": 1028
},
{
"epoch": 1.6179245283018868,
"grad_norm": 2.159190917587176,
"learning_rate": 6.4968972131807e-08,
"loss": 1.8479,
"step": 1029
},
{
"epoch": 1.619496855345912,
"grad_norm": 2.2749098562974903,
"learning_rate": 6.445236582164699e-08,
"loss": 1.9923,
"step": 1030
},
{
"epoch": 1.6210691823899372,
"grad_norm": 2.2025296280904225,
"learning_rate": 6.393761326472898e-08,
"loss": 1.6454,
"step": 1031
},
{
"epoch": 1.6226415094339623,
"grad_norm": 2.1334919488245414,
"learning_rate": 6.342471780278667e-08,
"loss": 1.6965,
"step": 1032
},
{
"epoch": 1.6242138364779874,
"grad_norm": 2.322316370670461,
"learning_rate": 6.291368276549802e-08,
"loss": 1.7228,
"step": 1033
},
{
"epoch": 1.6257861635220126,
"grad_norm": 1.995542145637434,
"learning_rate": 6.240451147046318e-08,
"loss": 1.595,
"step": 1034
},
{
"epoch": 1.6273584905660377,
"grad_norm": 2.196444636496991,
"learning_rate": 6.189720722318278e-08,
"loss": 1.8758,
"step": 1035
},
{
"epoch": 1.6289308176100628,
"grad_norm": 2.0927387390257657,
"learning_rate": 6.139177331703707e-08,
"loss": 2.1127,
"step": 1036
},
{
"epoch": 1.630503144654088,
"grad_norm": 1.9307021857625342,
"learning_rate": 6.088821303326411e-08,
"loss": 1.803,
"step": 1037
},
{
"epoch": 1.6320754716981132,
"grad_norm": 2.082620089850429,
"learning_rate": 6.038652964093827e-08,
"loss": 1.6595,
"step": 1038
},
{
"epoch": 1.6336477987421385,
"grad_norm": 2.067695395804015,
"learning_rate": 5.988672639694953e-08,
"loss": 1.8777,
"step": 1039
},
{
"epoch": 1.6352201257861636,
"grad_norm": 2.042833868758026,
"learning_rate": 5.938880654598219e-08,
"loss": 1.7071,
"step": 1040
},
{
"epoch": 1.6367924528301887,
"grad_norm": 2.117785030809429,
"learning_rate": 5.889277332049334e-08,
"loss": 1.7538,
"step": 1041
},
{
"epoch": 1.6383647798742138,
"grad_norm": 2.0095241417925265,
"learning_rate": 5.839862994069262e-08,
"loss": 1.8899,
"step": 1042
},
{
"epoch": 1.639937106918239,
"grad_norm": 2.2478546507042405,
"learning_rate": 5.79063796145207e-08,
"loss": 1.8472,
"step": 1043
},
{
"epoch": 1.641509433962264,
"grad_norm": 2.305709446534207,
"learning_rate": 5.74160255376288e-08,
"loss": 1.7277,
"step": 1044
},
{
"epoch": 1.6430817610062893,
"grad_norm": 2.18183263977078,
"learning_rate": 5.692757089335781e-08,
"loss": 1.9153,
"step": 1045
},
{
"epoch": 1.6446540880503144,
"grad_norm": 2.0254378536986386,
"learning_rate": 5.644101885271778e-08,
"loss": 1.8602,
"step": 1046
},
{
"epoch": 1.6462264150943398,
"grad_norm": 2.2161096727855085,
"learning_rate": 5.5956372574366835e-08,
"loss": 1.6629,
"step": 1047
},
{
"epoch": 1.6477987421383649,
"grad_norm": 2.301864500140701,
"learning_rate": 5.547363520459137e-08,
"loss": 1.7943,
"step": 1048
},
{
"epoch": 1.64937106918239,
"grad_norm": 2.092542135751507,
"learning_rate": 5.4992809877285235e-08,
"loss": 1.7474,
"step": 1049
},
{
"epoch": 1.650943396226415,
"grad_norm": 2.148759273760875,
"learning_rate": 5.4513899713929394e-08,
"loss": 1.9493,
"step": 1050
},
{
"epoch": 1.6525157232704402,
"grad_norm": 2.103715691929771,
"learning_rate": 5.403690782357175e-08,
"loss": 1.8557,
"step": 1051
},
{
"epoch": 1.6540880503144653,
"grad_norm": 2.099977822828033,
"learning_rate": 5.3561837302806944e-08,
"loss": 1.7166,
"step": 1052
},
{
"epoch": 1.6556603773584906,
"grad_norm": 1.9796989676905823,
"learning_rate": 5.3088691235756094e-08,
"loss": 1.9712,
"step": 1053
},
{
"epoch": 1.6572327044025157,
"grad_norm": 2.119765004058249,
"learning_rate": 5.2617472694047037e-08,
"loss": 1.8249,
"step": 1054
},
{
"epoch": 1.658805031446541,
"grad_norm": 2.0993720478618387,
"learning_rate": 5.2148184736794346e-08,
"loss": 1.8525,
"step": 1055
},
{
"epoch": 1.6603773584905661,
"grad_norm": 2.023540938803579,
"learning_rate": 5.1680830410579055e-08,
"loss": 2.0546,
"step": 1056
},
{
"epoch": 1.6619496855345912,
"grad_norm": 2.0699562130101015,
"learning_rate": 5.121541274942966e-08,
"loss": 1.9134,
"step": 1057
},
{
"epoch": 1.6635220125786163,
"grad_norm": 2.144350196893977,
"learning_rate": 5.07519347748018e-08,
"loss": 1.7238,
"step": 1058
},
{
"epoch": 1.6650943396226414,
"grad_norm": 2.0183983742920075,
"learning_rate": 5.029039949555856e-08,
"loss": 1.8309,
"step": 1059
},
{
"epoch": 1.6666666666666665,
"grad_norm": 2.1227831992403043,
"learning_rate": 4.983080990795154e-08,
"loss": 1.7035,
"step": 1060
},
{
"epoch": 1.6682389937106918,
"grad_norm": 2.2858740994961506,
"learning_rate": 4.937316899560099e-08,
"loss": 1.5596,
"step": 1061
},
{
"epoch": 1.669811320754717,
"grad_norm": 2.1280194157468344,
"learning_rate": 4.891747972947634e-08,
"loss": 1.6423,
"step": 1062
},
{
"epoch": 1.6713836477987423,
"grad_norm": 2.2102497033701543,
"learning_rate": 4.846374506787724e-08,
"loss": 1.6832,
"step": 1063
},
{
"epoch": 1.6729559748427674,
"grad_norm": 2.238490260804657,
"learning_rate": 4.8011967956414156e-08,
"loss": 1.6306,
"step": 1064
},
{
"epoch": 1.6745283018867925,
"grad_norm": 2.200290791301945,
"learning_rate": 4.756215132798929e-08,
"loss": 1.778,
"step": 1065
},
{
"epoch": 1.6761006289308176,
"grad_norm": 2.0985204995078024,
"learning_rate": 4.7114298102777545e-08,
"loss": 1.7058,
"step": 1066
},
{
"epoch": 1.6776729559748427,
"grad_norm": 2.087398311229735,
"learning_rate": 4.666841118820755e-08,
"loss": 1.7865,
"step": 1067
},
{
"epoch": 1.6792452830188678,
"grad_norm": 2.1078417553705817,
"learning_rate": 4.622449347894291e-08,
"loss": 1.7158,
"step": 1068
},
{
"epoch": 1.680817610062893,
"grad_norm": 2.116464602679099,
"learning_rate": 4.578254785686302e-08,
"loss": 1.8466,
"step": 1069
},
{
"epoch": 1.6823899371069182,
"grad_norm": 2.089237238472885,
"learning_rate": 4.5342577191044845e-08,
"loss": 1.6295,
"step": 1070
},
{
"epoch": 1.6839622641509435,
"grad_norm": 2.0547646711843988,
"learning_rate": 4.4904584337744134e-08,
"loss": 1.7459,
"step": 1071
},
{
"epoch": 1.6855345911949686,
"grad_norm": 2.0054803327980335,
"learning_rate": 4.4468572140376675e-08,
"loss": 1.6197,
"step": 1072
},
{
"epoch": 1.6871069182389937,
"grad_norm": 2.0697707743408293,
"learning_rate": 4.403454342950009e-08,
"loss": 1.829,
"step": 1073
},
{
"epoch": 1.6886792452830188,
"grad_norm": 2.3284003484454416,
"learning_rate": 4.360250102279542e-08,
"loss": 1.8744,
"step": 1074
},
{
"epoch": 1.690251572327044,
"grad_norm": 2.242850396768619,
"learning_rate": 4.317244772504851e-08,
"loss": 1.7455,
"step": 1075
},
{
"epoch": 1.691823899371069,
"grad_norm": 2.1997671073539204,
"learning_rate": 4.274438632813232e-08,
"loss": 2.0059,
"step": 1076
},
{
"epoch": 1.6933962264150944,
"grad_norm": 2.1566093126536643,
"learning_rate": 4.2318319610988444e-08,
"loss": 1.5531,
"step": 1077
},
{
"epoch": 1.6949685534591195,
"grad_norm": 2.13597178016107,
"learning_rate": 4.1894250339609196e-08,
"loss": 1.8328,
"step": 1078
},
{
"epoch": 1.6965408805031448,
"grad_norm": 2.07918842349438,
"learning_rate": 4.1472181267019636e-08,
"loss": 1.7407,
"step": 1079
},
{
"epoch": 1.6981132075471699,
"grad_norm": 2.028781264262438,
"learning_rate": 4.1052115133259726e-08,
"loss": 1.8737,
"step": 1080
},
{
"epoch": 1.699685534591195,
"grad_norm": 2.0355607974120016,
"learning_rate": 4.063405466536631e-08,
"loss": 1.5415,
"step": 1081
},
{
"epoch": 1.70125786163522,
"grad_norm": 1.9226221416432272,
"learning_rate": 4.021800257735578e-08,
"loss": 1.9198,
"step": 1082
},
{
"epoch": 1.7028301886792452,
"grad_norm": 1.8732770572866655,
"learning_rate": 3.9803961570206315e-08,
"loss": 1.8087,
"step": 1083
},
{
"epoch": 1.7044025157232703,
"grad_norm": 2.346364883253458,
"learning_rate": 3.9391934331840104e-08,
"loss": 1.9382,
"step": 1084
},
{
"epoch": 1.7059748427672956,
"grad_norm": 2.1529023946779433,
"learning_rate": 3.898192353710623e-08,
"loss": 1.8482,
"step": 1085
},
{
"epoch": 1.7075471698113207,
"grad_norm": 2.2018108847525646,
"learning_rate": 3.857393184776341e-08,
"loss": 1.672,
"step": 1086
},
{
"epoch": 1.709119496855346,
"grad_norm": 2.2290874113828902,
"learning_rate": 3.8167961912462046e-08,
"loss": 1.9239,
"step": 1087
},
{
"epoch": 1.7106918238993711,
"grad_norm": 2.100849188665365,
"learning_rate": 3.7764016366727704e-08,
"loss": 1.8664,
"step": 1088
},
{
"epoch": 1.7122641509433962,
"grad_norm": 2.1510675871347975,
"learning_rate": 3.73620978329439e-08,
"loss": 1.7952,
"step": 1089
},
{
"epoch": 1.7138364779874213,
"grad_norm": 2.109222961162139,
"learning_rate": 3.6962208920334554e-08,
"loss": 1.7452,
"step": 1090
},
{
"epoch": 1.7154088050314464,
"grad_norm": 2.121301963046306,
"learning_rate": 3.656435222494782e-08,
"loss": 1.8288,
"step": 1091
},
{
"epoch": 1.7169811320754715,
"grad_norm": 2.0337417924569,
"learning_rate": 3.61685303296387e-08,
"loss": 1.8223,
"step": 1092
},
{
"epoch": 1.7185534591194969,
"grad_norm": 2.1339663352283913,
"learning_rate": 3.577474580405245e-08,
"loss": 1.5421,
"step": 1093
},
{
"epoch": 1.720125786163522,
"grad_norm": 2.049959891378422,
"learning_rate": 3.5383001204607826e-08,
"loss": 1.8102,
"step": 1094
},
{
"epoch": 1.7216981132075473,
"grad_norm": 2.1813938906584847,
"learning_rate": 3.499329907448072e-08,
"loss": 2.1207,
"step": 1095
},
{
"epoch": 1.7232704402515724,
"grad_norm": 2.0705000827955558,
"learning_rate": 3.4605641943587113e-08,
"loss": 1.8636,
"step": 1096
},
{
"epoch": 1.7248427672955975,
"grad_norm": 2.238011733888544,
"learning_rate": 3.4220032328567384e-08,
"loss": 1.5974,
"step": 1097
},
{
"epoch": 1.7264150943396226,
"grad_norm": 2.2247891478941857,
"learning_rate": 3.383647273276945e-08,
"loss": 1.8494,
"step": 1098
},
{
"epoch": 1.7279874213836477,
"grad_norm": 2.312396508453329,
"learning_rate": 3.345496564623257e-08,
"loss": 1.885,
"step": 1099
},
{
"epoch": 1.7295597484276728,
"grad_norm": 2.066339746833989,
"learning_rate": 3.3075513545671434e-08,
"loss": 1.7994,
"step": 1100
},
{
"epoch": 1.7311320754716981,
"grad_norm": 2.0879972636931314,
"learning_rate": 3.269811889445988e-08,
"loss": 1.6402,
"step": 1101
},
{
"epoch": 1.7327044025157232,
"grad_norm": 2.175814596182231,
"learning_rate": 3.232278414261481e-08,
"loss": 1.5661,
"step": 1102
},
{
"epoch": 1.7342767295597485,
"grad_norm": 2.1194787082555018,
"learning_rate": 3.194951172678054e-08,
"loss": 1.7645,
"step": 1103
},
{
"epoch": 1.7358490566037736,
"grad_norm": 1.8763626024150262,
"learning_rate": 3.157830407021283e-08,
"loss": 1.596,
"step": 1104
},
{
"epoch": 1.7374213836477987,
"grad_norm": 1.9594370469295614,
"learning_rate": 3.120916358276331e-08,
"loss": 1.6861,
"step": 1105
},
{
"epoch": 1.7389937106918238,
"grad_norm": 2.3713769253326453,
"learning_rate": 3.084209266086331e-08,
"loss": 1.5862,
"step": 1106
},
{
"epoch": 1.740566037735849,
"grad_norm": 2.1938199292645937,
"learning_rate": 3.047709368750924e-08,
"loss": 1.6771,
"step": 1107
},
{
"epoch": 1.742138364779874,
"grad_norm": 2.3215493957832267,
"learning_rate": 3.01141690322463e-08,
"loss": 1.7517,
"step": 1108
},
{
"epoch": 1.7437106918238994,
"grad_norm": 2.221134746509961,
"learning_rate": 2.9753321051153258e-08,
"loss": 1.7712,
"step": 1109
},
{
"epoch": 1.7452830188679245,
"grad_norm": 2.1030740049013517,
"learning_rate": 2.9394552086827434e-08,
"loss": 1.7837,
"step": 1110
},
{
"epoch": 1.7468553459119498,
"grad_norm": 2.178087587225378,
"learning_rate": 2.9037864468369417e-08,
"loss": 1.7186,
"step": 1111
},
{
"epoch": 1.748427672955975,
"grad_norm": 2.1574371301384434,
"learning_rate": 2.8683260511367614e-08,
"loss": 1.5805,
"step": 1112
},
{
"epoch": 1.75,
"grad_norm": 2.042041045810803,
"learning_rate": 2.8330742517883645e-08,
"loss": 1.7781,
"step": 1113
},
{
"epoch": 1.751572327044025,
"grad_norm": 1.996501467635451,
"learning_rate": 2.7980312776437142e-08,
"loss": 1.8566,
"step": 1114
},
{
"epoch": 1.7531446540880502,
"grad_norm": 1.9640009019290934,
"learning_rate": 2.7631973561990995e-08,
"loss": 2.0415,
"step": 1115
},
{
"epoch": 1.7547169811320755,
"grad_norm": 2.181588054812827,
"learning_rate": 2.7285727135936608e-08,
"loss": 1.6838,
"step": 1116
},
{
"epoch": 1.7562893081761006,
"grad_norm": 2.1470396103954705,
"learning_rate": 2.6941575746079108e-08,
"loss": 1.9552,
"step": 1117
},
{
"epoch": 1.757861635220126,
"grad_norm": 2.064818182873414,
"learning_rate": 2.659952162662269e-08,
"loss": 1.7339,
"step": 1118
},
{
"epoch": 1.759433962264151,
"grad_norm": 2.39097255770087,
"learning_rate": 2.625956699815639e-08,
"loss": 1.6014,
"step": 1119
},
{
"epoch": 1.7610062893081762,
"grad_norm": 2.08931618388101,
"learning_rate": 2.592171406763949e-08,
"loss": 1.6226,
"step": 1120
},
{
"epoch": 1.7625786163522013,
"grad_norm": 2.085612329162341,
"learning_rate": 2.5585965028387198e-08,
"loss": 1.6741,
"step": 1121
},
{
"epoch": 1.7641509433962264,
"grad_norm": 2.0859266300459067,
"learning_rate": 2.5252322060056403e-08,
"loss": 1.8141,
"step": 1122
},
{
"epoch": 1.7657232704402515,
"grad_norm": 2.063805491342782,
"learning_rate": 2.4920787328631565e-08,
"loss": 1.6166,
"step": 1123
},
{
"epoch": 1.7672955974842768,
"grad_norm": 2.1187493282183016,
"learning_rate": 2.459136298641057e-08,
"loss": 1.6022,
"step": 1124
},
{
"epoch": 1.7688679245283019,
"grad_norm": 2.1979153283650414,
"learning_rate": 2.426405117199089e-08,
"loss": 1.6834,
"step": 1125
},
{
"epoch": 1.7704402515723272,
"grad_norm": 2.20082255588697,
"learning_rate": 2.393885401025565e-08,
"loss": 1.9188,
"step": 1126
},
{
"epoch": 1.7720125786163523,
"grad_norm": 2.1540464614977504,
"learning_rate": 2.361577361235962e-08,
"loss": 1.5527,
"step": 1127
},
{
"epoch": 1.7735849056603774,
"grad_norm": 2.0788864004074923,
"learning_rate": 2.3294812075716015e-08,
"loss": 1.9392,
"step": 1128
},
{
"epoch": 1.7751572327044025,
"grad_norm": 2.04615578653692,
"learning_rate": 2.2975971483982428e-08,
"loss": 1.8391,
"step": 1129
},
{
"epoch": 1.7767295597484276,
"grad_norm": 2.034007984738505,
"learning_rate": 2.265925390704726e-08,
"loss": 1.8705,
"step": 1130
},
{
"epoch": 1.7783018867924527,
"grad_norm": 2.146991438580307,
"learning_rate": 2.2344661401016678e-08,
"loss": 1.9585,
"step": 1131
},
{
"epoch": 1.779874213836478,
"grad_norm": 2.279053736154095,
"learning_rate": 2.203219600820112e-08,
"loss": 2.1532,
"step": 1132
},
{
"epoch": 1.7814465408805031,
"grad_norm": 2.0190752770101548,
"learning_rate": 2.1721859757101658e-08,
"loss": 1.6968,
"step": 1133
},
{
"epoch": 1.7830188679245285,
"grad_norm": 2.28515721518105,
"learning_rate": 2.1413654662397408e-08,
"loss": 1.626,
"step": 1134
},
{
"epoch": 1.7845911949685536,
"grad_norm": 2.135849516978255,
"learning_rate": 2.1107582724932088e-08,
"loss": 1.7029,
"step": 1135
},
{
"epoch": 1.7861635220125787,
"grad_norm": 2.418793471259512,
"learning_rate": 2.0803645931701158e-08,
"loss": 1.759,
"step": 1136
},
{
"epoch": 1.7877358490566038,
"grad_norm": 2.322445509938412,
"learning_rate": 2.0501846255838835e-08,
"loss": 1.6907,
"step": 1137
},
{
"epoch": 1.7893081761006289,
"grad_norm": 2.115867407487902,
"learning_rate": 2.0202185656605426e-08,
"loss": 1.8523,
"step": 1138
},
{
"epoch": 1.790880503144654,
"grad_norm": 2.2369615207847096,
"learning_rate": 1.9904666079374393e-08,
"loss": 1.8127,
"step": 1139
},
{
"epoch": 1.7924528301886793,
"grad_norm": 2.0960658556245133,
"learning_rate": 1.9609289455619883e-08,
"loss": 1.5551,
"step": 1140
},
{
"epoch": 1.7940251572327044,
"grad_norm": 2.085123820184512,
"learning_rate": 1.9316057702904277e-08,
"loss": 1.8461,
"step": 1141
},
{
"epoch": 1.7955974842767297,
"grad_norm": 2.082494020984376,
"learning_rate": 1.9024972724865423e-08,
"loss": 1.9352,
"step": 1142
},
{
"epoch": 1.7971698113207548,
"grad_norm": 2.093869902799492,
"learning_rate": 1.8736036411204626e-08,
"loss": 1.7042,
"step": 1143
},
{
"epoch": 1.79874213836478,
"grad_norm": 2.3137806461183845,
"learning_rate": 1.8449250637674162e-08,
"loss": 1.7895,
"step": 1144
},
{
"epoch": 1.800314465408805,
"grad_norm": 2.1172529387064563,
"learning_rate": 1.8164617266065252e-08,
"loss": 1.8503,
"step": 1145
},
{
"epoch": 1.8018867924528301,
"grad_norm": 2.1273968333527282,
"learning_rate": 1.7882138144195685e-08,
"loss": 1.6297,
"step": 1146
},
{
"epoch": 1.8034591194968552,
"grad_norm": 2.268804643140494,
"learning_rate": 1.7601815105898215e-08,
"loss": 1.73,
"step": 1147
},
{
"epoch": 1.8050314465408805,
"grad_norm": 2.113208754031722,
"learning_rate": 1.7323649971008393e-08,
"loss": 1.6516,
"step": 1148
},
{
"epoch": 1.8066037735849056,
"grad_norm": 2.3541202063911695,
"learning_rate": 1.7047644545352903e-08,
"loss": 1.681,
"step": 1149
},
{
"epoch": 1.808176100628931,
"grad_norm": 2.248100742975873,
"learning_rate": 1.6773800620737644e-08,
"loss": 1.8295,
"step": 1150
},
{
"epoch": 1.809748427672956,
"grad_norm": 1.9422747223256194,
"learning_rate": 1.650211997493634e-08,
"loss": 1.6425,
"step": 1151
},
{
"epoch": 1.8113207547169812,
"grad_norm": 2.1573082083805577,
"learning_rate": 1.6232604371678726e-08,
"loss": 1.923,
"step": 1152
},
{
"epoch": 1.8128930817610063,
"grad_norm": 2.1325777796205254,
"learning_rate": 1.5965255560639394e-08,
"loss": 1.7978,
"step": 1153
},
{
"epoch": 1.8144654088050314,
"grad_norm": 2.002442725649601,
"learning_rate": 1.5700075277426262e-08,
"loss": 1.7531,
"step": 1154
},
{
"epoch": 1.8160377358490565,
"grad_norm": 1.9280913979981322,
"learning_rate": 1.543706524356917e-08,
"loss": 1.8257,
"step": 1155
},
{
"epoch": 1.8176100628930818,
"grad_norm": 2.1344422655015496,
"learning_rate": 1.5176227166509058e-08,
"loss": 1.7187,
"step": 1156
},
{
"epoch": 1.819182389937107,
"grad_norm": 2.2004037593468224,
"learning_rate": 1.491756273958673e-08,
"loss": 1.9901,
"step": 1157
},
{
"epoch": 1.8207547169811322,
"grad_norm": 2.0033561732048955,
"learning_rate": 1.466107364203158e-08,
"loss": 1.679,
"step": 1158
},
{
"epoch": 1.8223270440251573,
"grad_norm": 2.060413883738862,
"learning_rate": 1.440676153895114e-08,
"loss": 1.7332,
"step": 1159
},
{
"epoch": 1.8238993710691824,
"grad_norm": 2.2111387133657314,
"learning_rate": 1.4154628081320014e-08,
"loss": 1.7536,
"step": 1160
},
{
"epoch": 1.8254716981132075,
"grad_norm": 2.131048492611576,
"learning_rate": 1.3904674905969066e-08,
"loss": 1.8556,
"step": 1161
},
{
"epoch": 1.8270440251572326,
"grad_norm": 2.0610846483559664,
"learning_rate": 1.3656903635575167e-08,
"loss": 1.8077,
"step": 1162
},
{
"epoch": 1.8286163522012577,
"grad_norm": 2.1736177189318733,
"learning_rate": 1.3411315878650237e-08,
"loss": 1.6993,
"step": 1163
},
{
"epoch": 1.830188679245283,
"grad_norm": 2.2099683077755583,
"learning_rate": 1.3167913229531135e-08,
"loss": 1.7427,
"step": 1164
},
{
"epoch": 1.8317610062893082,
"grad_norm": 2.222331633508216,
"learning_rate": 1.2926697268369101e-08,
"loss": 1.6154,
"step": 1165
},
{
"epoch": 1.8333333333333335,
"grad_norm": 2.056907346969074,
"learning_rate": 1.2687669561119568e-08,
"loss": 1.5743,
"step": 1166
},
{
"epoch": 1.8349056603773586,
"grad_norm": 2.2254294416157134,
"learning_rate": 1.245083165953194e-08,
"loss": 1.6526,
"step": 1167
},
{
"epoch": 1.8364779874213837,
"grad_norm": 1.975892358409506,
"learning_rate": 1.2216185101139692e-08,
"loss": 1.6919,
"step": 1168
},
{
"epoch": 1.8380503144654088,
"grad_norm": 1.9887040123280215,
"learning_rate": 1.1983731409250181e-08,
"loss": 1.9421,
"step": 1169
},
{
"epoch": 1.8396226415094339,
"grad_norm": 2.065623115774402,
"learning_rate": 1.1753472092934858e-08,
"loss": 1.664,
"step": 1170
},
{
"epoch": 1.841194968553459,
"grad_norm": 2.0212401073533495,
"learning_rate": 1.1525408647019474e-08,
"loss": 1.9716,
"step": 1171
},
{
"epoch": 1.8427672955974843,
"grad_norm": 2.0410818459050075,
"learning_rate": 1.129954255207441e-08,
"loss": 1.647,
"step": 1172
},
{
"epoch": 1.8443396226415094,
"grad_norm": 2.2637429629509964,
"learning_rate": 1.1075875274404834e-08,
"loss": 1.9561,
"step": 1173
},
{
"epoch": 1.8459119496855347,
"grad_norm": 2.1445378180592356,
"learning_rate": 1.0854408266041543e-08,
"loss": 1.8834,
"step": 1174
},
{
"epoch": 1.8474842767295598,
"grad_norm": 2.171320431386727,
"learning_rate": 1.063514296473132e-08,
"loss": 1.8161,
"step": 1175
},
{
"epoch": 1.849056603773585,
"grad_norm": 2.0483569709125966,
"learning_rate": 1.041808079392753e-08,
"loss": 1.811,
"step": 1176
},
{
"epoch": 1.85062893081761,
"grad_norm": 2.1123320054916697,
"learning_rate": 1.020322316278111e-08,
"loss": 1.8621,
"step": 1177
},
{
"epoch": 1.8522012578616351,
"grad_norm": 1.9907743276040535,
"learning_rate": 9.990571466131276e-09,
"loss": 1.8181,
"step": 1178
},
{
"epoch": 1.8537735849056602,
"grad_norm": 2.2022266952576572,
"learning_rate": 9.780127084496431e-09,
"loss": 1.5795,
"step": 1179
},
{
"epoch": 1.8553459119496856,
"grad_norm": 2.1497279507664535,
"learning_rate": 9.571891384065272e-09,
"loss": 1.6334,
"step": 1180
},
{
"epoch": 1.8569182389937107,
"grad_norm": 2.2214098590343028,
"learning_rate": 9.365865716687965e-09,
"loss": 2.0922,
"step": 1181
},
{
"epoch": 1.858490566037736,
"grad_norm": 2.064033723821905,
"learning_rate": 9.162051419867245e-09,
"loss": 1.6108,
"step": 1182
},
{
"epoch": 1.860062893081761,
"grad_norm": 2.426092174363886,
"learning_rate": 8.960449816749832e-09,
"loss": 1.5644,
"step": 1183
},
{
"epoch": 1.8616352201257862,
"grad_norm": 2.074214131520031,
"learning_rate": 8.761062216117765e-09,
"loss": 1.5808,
"step": 1184
},
{
"epoch": 1.8632075471698113,
"grad_norm": 2.1958655328363146,
"learning_rate": 8.563889912380046e-09,
"loss": 1.8186,
"step": 1185
},
{
"epoch": 1.8647798742138364,
"grad_norm": 2.2651556186496626,
"learning_rate": 8.368934185564013e-09,
"loss": 1.6952,
"step": 1186
},
{
"epoch": 1.8663522012578615,
"grad_norm": 2.1256908151950915,
"learning_rate": 8.176196301307264e-09,
"loss": 1.7424,
"step": 1187
},
{
"epoch": 1.8679245283018868,
"grad_norm": 2.0538768988218536,
"learning_rate": 7.985677510849332e-09,
"loss": 1.923,
"step": 1188
},
{
"epoch": 1.869496855345912,
"grad_norm": 2.061086071767639,
"learning_rate": 7.79737905102349e-09,
"loss": 2.0386,
"step": 1189
},
{
"epoch": 1.8710691823899372,
"grad_norm": 2.170281435880277,
"learning_rate": 7.611302144248788e-09,
"loss": 1.979,
"step": 1190
},
{
"epoch": 1.8726415094339623,
"grad_norm": 2.0702664036289944,
"learning_rate": 7.427447998522241e-09,
"loss": 1.9203,
"step": 1191
},
{
"epoch": 1.8742138364779874,
"grad_norm": 2.186437061351241,
"learning_rate": 7.245817807410742e-09,
"loss": 2.0204,
"step": 1192
},
{
"epoch": 1.8757861635220126,
"grad_norm": 2.403033320304919,
"learning_rate": 7.066412750043532e-09,
"loss": 1.8169,
"step": 1193
},
{
"epoch": 1.8773584905660377,
"grad_norm": 2.113165661681811,
"learning_rate": 6.889233991104421e-09,
"loss": 1.8014,
"step": 1194
},
{
"epoch": 1.8789308176100628,
"grad_norm": 2.1364558866941907,
"learning_rate": 6.714282680824252e-09,
"loss": 1.8172,
"step": 1195
},
{
"epoch": 1.880503144654088,
"grad_norm": 2.085369574271752,
"learning_rate": 6.54155995497348e-09,
"loss": 1.9062,
"step": 1196
},
{
"epoch": 1.8820754716981132,
"grad_norm": 2.158776809363429,
"learning_rate": 6.371066934854713e-09,
"loss": 1.7571,
"step": 1197
},
{
"epoch": 1.8836477987421385,
"grad_norm": 2.1204855812030936,
"learning_rate": 6.202804727295441e-09,
"loss": 1.4898,
"step": 1198
},
{
"epoch": 1.8852201257861636,
"grad_norm": 1.9588917914820057,
"learning_rate": 6.036774424641044e-09,
"loss": 1.9212,
"step": 1199
},
{
"epoch": 1.8867924528301887,
"grad_norm": 2.289926267202458,
"learning_rate": 5.872977104747451e-09,
"loss": 1.8261,
"step": 1200
},
{
"epoch": 1.8883647798742138,
"grad_norm": 2.248912074374657,
"learning_rate": 5.711413830974177e-09,
"loss": 1.5361,
"step": 1201
},
{
"epoch": 1.889937106918239,
"grad_norm": 2.1042256295937656,
"learning_rate": 5.5520856521775685e-09,
"loss": 1.8066,
"step": 1202
},
{
"epoch": 1.891509433962264,
"grad_norm": 2.180029267811429,
"learning_rate": 5.3949936027039625e-09,
"loss": 2.1126,
"step": 1203
},
{
"epoch": 1.8930817610062893,
"grad_norm": 2.1625082758181744,
"learning_rate": 5.240138702382729e-09,
"loss": 1.7901,
"step": 1204
},
{
"epoch": 1.8946540880503144,
"grad_norm": 2.1665119765928,
"learning_rate": 5.087521956520058e-09,
"loss": 1.8569,
"step": 1205
},
{
"epoch": 1.8962264150943398,
"grad_norm": 2.0777861736774725,
"learning_rate": 4.937144355891998e-09,
"loss": 1.793,
"step": 1206
},
{
"epoch": 1.8977987421383649,
"grad_norm": 2.066304356371116,
"learning_rate": 4.789006876738438e-09,
"loss": 1.7053,
"step": 1207
},
{
"epoch": 1.89937106918239,
"grad_norm": 2.0957159756988006,
"learning_rate": 4.643110480756423e-09,
"loss": 1.7215,
"step": 1208
},
{
"epoch": 1.900943396226415,
"grad_norm": 2.111256856709086,
"learning_rate": 4.499456115094169e-09,
"loss": 1.8245,
"step": 1209
},
{
"epoch": 1.9025157232704402,
"grad_norm": 1.8782104151978112,
"learning_rate": 4.358044712344688e-09,
"loss": 1.6741,
"step": 1210
},
{
"epoch": 1.9040880503144653,
"grad_norm": 2.141467865299451,
"learning_rate": 4.218877190539927e-09,
"loss": 1.7832,
"step": 1211
},
{
"epoch": 1.9056603773584906,
"grad_norm": 2.0679767608171584,
"learning_rate": 4.081954453144737e-09,
"loss": 1.8032,
"step": 1212
},
{
"epoch": 1.9072327044025157,
"grad_norm": 2.044401061483079,
"learning_rate": 3.947277389051013e-09,
"loss": 1.5991,
"step": 1213
},
{
"epoch": 1.908805031446541,
"grad_norm": 2.3204022598290424,
"learning_rate": 3.814846872571781e-09,
"loss": 2.1206,
"step": 1214
},
{
"epoch": 1.9103773584905661,
"grad_norm": 2.194698485486922,
"learning_rate": 3.68466376343588e-09,
"loss": 1.8517,
"step": 1215
},
{
"epoch": 1.9119496855345912,
"grad_norm": 2.116787406844949,
"learning_rate": 3.556728906781897e-09,
"loss": 1.751,
"step": 1216
},
{
"epoch": 1.9135220125786163,
"grad_norm": 2.105786362411949,
"learning_rate": 3.4310431331531553e-09,
"loss": 1.6647,
"step": 1217
},
{
"epoch": 1.9150943396226414,
"grad_norm": 2.23499471842287,
"learning_rate": 3.307607258491962e-09,
"loss": 1.7705,
"step": 1218
},
{
"epoch": 1.9166666666666665,
"grad_norm": 2.131569173737192,
"learning_rate": 3.18642208413456e-09,
"loss": 1.889,
"step": 1219
},
{
"epoch": 1.9182389937106918,
"grad_norm": 2.0221410280515,
"learning_rate": 3.067488396805684e-09,
"loss": 1.5895,
"step": 1220
},
{
"epoch": 1.919811320754717,
"grad_norm": 2.000235641315321,
"learning_rate": 2.950806968613745e-09,
"loss": 1.8866,
"step": 1221
},
{
"epoch": 1.9213836477987423,
"grad_norm": 2.6217655796510377,
"learning_rate": 2.8363785570455436e-09,
"loss": 1.7802,
"step": 1222
},
{
"epoch": 1.9229559748427674,
"grad_norm": 2.3713251232928996,
"learning_rate": 2.724203904961531e-09,
"loss": 1.7429,
"step": 1223
},
{
"epoch": 1.9245283018867925,
"grad_norm": 2.0162450187498515,
"learning_rate": 2.6142837405909113e-09,
"loss": 1.6852,
"step": 1224
},
{
"epoch": 1.9261006289308176,
"grad_norm": 1.9184165558043198,
"learning_rate": 2.5066187775269034e-09,
"loss": 2.0859,
"step": 1225
},
{
"epoch": 1.9276729559748427,
"grad_norm": 2.0980898585416403,
"learning_rate": 2.401209714722152e-09,
"loss": 1.776,
"step": 1226
},
{
"epoch": 1.9292452830188678,
"grad_norm": 2.102737428792809,
"learning_rate": 2.2980572364841854e-09,
"loss": 1.8705,
"step": 1227
},
{
"epoch": 1.930817610062893,
"grad_norm": 2.235581223829696,
"learning_rate": 2.1971620124709435e-09,
"loss": 1.8633,
"step": 1228
},
{
"epoch": 1.9323899371069182,
"grad_norm": 2.1003684288920006,
"learning_rate": 2.098524697686427e-09,
"loss": 1.8356,
"step": 1229
},
{
"epoch": 1.9339622641509435,
"grad_norm": 2.085896244612752,
"learning_rate": 2.002145932476501e-09,
"loss": 1.6605,
"step": 1230
},
{
"epoch": 1.9355345911949686,
"grad_norm": 2.249282131879236,
"learning_rate": 1.908026342524738e-09,
"loss": 1.572,
"step": 1231
},
{
"epoch": 1.9371069182389937,
"grad_norm": 2.133881200071366,
"learning_rate": 1.8161665388481796e-09,
"loss": 1.7188,
"step": 1232
},
{
"epoch": 1.9386792452830188,
"grad_norm": 2.0523111952597124,
"learning_rate": 1.7265671177936092e-09,
"loss": 2.0153,
"step": 1233
},
{
"epoch": 1.940251572327044,
"grad_norm": 2.095543253062672,
"learning_rate": 1.639228661033587e-09,
"loss": 1.5568,
"step": 1234
},
{
"epoch": 1.941823899371069,
"grad_norm": 2.2647929929993764,
"learning_rate": 1.554151735562642e-09,
"loss": 2.0448,
"step": 1235
},
{
"epoch": 1.9433962264150944,
"grad_norm": 2.1607959179153404,
"learning_rate": 1.47133689369362e-09,
"loss": 1.6183,
"step": 1236
},
{
"epoch": 1.9449685534591195,
"grad_norm": 2.069314519936326,
"learning_rate": 1.3907846730541073e-09,
"loss": 1.7316,
"step": 1237
},
{
"epoch": 1.9465408805031448,
"grad_norm": 2.402914241865446,
"learning_rate": 1.3124955965828966e-09,
"loss": 1.6961,
"step": 1238
},
{
"epoch": 1.9481132075471699,
"grad_norm": 2.418179213383202,
"learning_rate": 1.2364701725266436e-09,
"loss": 1.8602,
"step": 1239
},
{
"epoch": 1.949685534591195,
"grad_norm": 2.0182001003309122,
"learning_rate": 1.162708894436526e-09,
"loss": 1.8426,
"step": 1240
},
{
"epoch": 1.95125786163522,
"grad_norm": 1.9544224426157475,
"learning_rate": 1.0912122411651348e-09,
"loss": 1.912,
"step": 1241
},
{
"epoch": 1.9528301886792452,
"grad_norm": 2.1284678150486975,
"learning_rate": 1.0219806768631712e-09,
"loss": 1.7659,
"step": 1242
},
{
"epoch": 1.9544025157232703,
"grad_norm": 2.103380584288797,
"learning_rate": 9.550146509766489e-10,
"loss": 1.6285,
"step": 1243
},
{
"epoch": 1.9559748427672956,
"grad_norm": 2.318883943366702,
"learning_rate": 8.903145982438242e-10,
"loss": 1.7361,
"step": 1244
},
{
"epoch": 1.9575471698113207,
"grad_norm": 2.0210439137006007,
"learning_rate": 8.278809386924767e-10,
"loss": 1.8383,
"step": 1245
},
{
"epoch": 1.959119496855346,
"grad_norm": 2.122214078228037,
"learning_rate": 7.677140776371494e-10,
"loss": 1.6549,
"step": 1246
},
{
"epoch": 1.9606918238993711,
"grad_norm": 2.148904360940334,
"learning_rate": 7.0981440567639e-10,
"loss": 1.777,
"step": 1247
},
{
"epoch": 1.9622641509433962,
"grad_norm": 2.157751893993133,
"learning_rate": 6.541822986904589e-10,
"loss": 1.7056,
"step": 1248
},
{
"epoch": 1.9638364779874213,
"grad_norm": 1.9449801123342445,
"learning_rate": 6.00818117838725e-10,
"loss": 1.7219,
"step": 1249
},
{
"epoch": 1.9654088050314464,
"grad_norm": 2.054951250857022,
"learning_rate": 5.497222095572962e-10,
"loss": 1.6673,
"step": 1250
},
{
"epoch": 1.9654088050314464,
"eval_sat2_MCTS_chains_SFT_val_loss": 1.6998926401138306,
"eval_sat2_MCTS_chains_SFT_val_runtime": 91.7731,
"eval_sat2_MCTS_chains_SFT_val_samples_per_second": 11.202,
"eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.406,
"step": 1250
},
{
"epoch": 1.9669811320754715,
"grad_norm": 2.05069047537442,
"learning_rate": 5.008949055568812e-10,
"loss": 1.7608,
"step": 1251
},
{
"epoch": 1.9685534591194969,
"grad_norm": 2.154702108749229,
"learning_rate": 4.543365228205753e-10,
"loss": 1.6858,
"step": 1252
},
{
"epoch": 1.970125786163522,
"grad_norm": 2.075168647544607,
"learning_rate": 4.1004736360183976e-10,
"loss": 1.6641,
"step": 1253
},
{
"epoch": 1.9716981132075473,
"grad_norm": 2.2047735011240226,
"learning_rate": 3.6802771542244204e-10,
"loss": 1.7977,
"step": 1254
},
{
"epoch": 1.9732704402515724,
"grad_norm": 2.128359624030826,
"learning_rate": 3.2827785107074623e-10,
"loss": 1.5849,
"step": 1255
},
{
"epoch": 1.9748427672955975,
"grad_norm": 2.1524898340309595,
"learning_rate": 2.907980285997702e-10,
"loss": 1.6826,
"step": 1256
},
{
"epoch": 1.9764150943396226,
"grad_norm": 2.0578150120619005,
"learning_rate": 2.555884913256312e-10,
"loss": 1.9806,
"step": 1257
},
{
"epoch": 1.9779874213836477,
"grad_norm": 2.0645335423876263,
"learning_rate": 2.2264946782599158e-10,
"loss": 1.7219,
"step": 1258
},
{
"epoch": 1.9795597484276728,
"grad_norm": 2.1474076116693337,
"learning_rate": 1.9198117193838791e-10,
"loss": 1.853,
"step": 1259
},
{
"epoch": 1.9811320754716981,
"grad_norm": 2.0521508824628105,
"learning_rate": 1.6358380275906524e-10,
"loss": 1.6023,
"step": 1260
},
{
"epoch": 1.9827044025157232,
"grad_norm": 2.3303244823585625,
"learning_rate": 1.3745754464157823e-10,
"loss": 1.7856,
"step": 1261
},
{
"epoch": 1.9842767295597485,
"grad_norm": 2.112760564566335,
"learning_rate": 1.1360256719554762e-10,
"loss": 2.052,
"step": 1262
},
{
"epoch": 1.9858490566037736,
"grad_norm": 2.0973660824743323,
"learning_rate": 9.201902528561123e-11,
"loss": 1.8666,
"step": 1263
},
{
"epoch": 1.9874213836477987,
"grad_norm": 2.1164187723139043,
"learning_rate": 7.270705903056895e-11,
"loss": 1.7619,
"step": 1264
},
{
"epoch": 1.9889937106918238,
"grad_norm": 2.184592687671629,
"learning_rate": 5.566679380210049e-11,
"loss": 1.8967,
"step": 1265
},
{
"epoch": 1.990566037735849,
"grad_norm": 2.0610164571083756,
"learning_rate": 4.089834022437677e-11,
"loss": 1.8686,
"step": 1266
},
{
"epoch": 1.992138364779874,
"grad_norm": 2.176062181352369,
"learning_rate": 2.8401794173049666e-11,
"loss": 1.5427,
"step": 1267
},
{
"epoch": 1.9937106918238994,
"grad_norm": 2.0675804076152313,
"learning_rate": 1.8177236774707948e-11,
"loss": 1.6378,
"step": 1268
},
{
"epoch": 1.9952830188679245,
"grad_norm": 2.290052278929668,
"learning_rate": 1.022473440637217e-11,
"loss": 1.656,
"step": 1269
},
{
"epoch": 1.9968553459119498,
"grad_norm": 2.287180872342125,
"learning_rate": 4.544338695106064e-12,
"loss": 1.8058,
"step": 1270
},
{
"epoch": 1.998427672955975,
"grad_norm": 2.0048323595450275,
"learning_rate": 1.1360865176279766e-12,
"loss": 1.6803,
"step": 1271
},
{
"epoch": 2.0,
"grad_norm": 2.049630174327001,
"learning_rate": 0.0,
"loss": 1.7973,
"step": 1272
}
],
"logging_steps": 1,
"max_steps": 1272,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 214395832958976.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}