ViGoRL-MCTS-SFT-7b-Spatial / trainer_state.json
gsarch's picture
Initial checkpoint upload
c223a85 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.10062893081761,
"eval_steps": 250,
"global_step": 700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015723270440251573,
"grad_norm": 3.035508934265075,
"learning_rate": 3.3333333333333334e-08,
"loss": 1.6777,
"step": 1
},
{
"epoch": 0.0031446540880503146,
"grad_norm": 3.0057695605079546,
"learning_rate": 6.666666666666667e-08,
"loss": 1.6331,
"step": 2
},
{
"epoch": 0.0047169811320754715,
"grad_norm": 2.9814044526374595,
"learning_rate": 1e-07,
"loss": 1.4808,
"step": 3
},
{
"epoch": 0.006289308176100629,
"grad_norm": 3.0308381749187743,
"learning_rate": 1.3333333333333334e-07,
"loss": 1.6287,
"step": 4
},
{
"epoch": 0.007861635220125786,
"grad_norm": 2.9090567988102314,
"learning_rate": 1.6666666666666665e-07,
"loss": 1.6219,
"step": 5
},
{
"epoch": 0.009433962264150943,
"grad_norm": 3.0513140041110858,
"learning_rate": 2e-07,
"loss": 1.6202,
"step": 6
},
{
"epoch": 0.0110062893081761,
"grad_norm": 2.8644498623931534,
"learning_rate": 2.3333333333333333e-07,
"loss": 1.7804,
"step": 7
},
{
"epoch": 0.012578616352201259,
"grad_norm": 2.830057997617726,
"learning_rate": 2.6666666666666667e-07,
"loss": 1.757,
"step": 8
},
{
"epoch": 0.014150943396226415,
"grad_norm": 2.8388015134887135,
"learning_rate": 3e-07,
"loss": 1.6526,
"step": 9
},
{
"epoch": 0.015723270440251572,
"grad_norm": 2.880110978968438,
"learning_rate": 3.333333333333333e-07,
"loss": 1.5365,
"step": 10
},
{
"epoch": 0.01729559748427673,
"grad_norm": 2.8055557623152865,
"learning_rate": 3.666666666666666e-07,
"loss": 1.7431,
"step": 11
},
{
"epoch": 0.018867924528301886,
"grad_norm": 2.9896802933245774,
"learning_rate": 4e-07,
"loss": 1.4656,
"step": 12
},
{
"epoch": 0.020440251572327043,
"grad_norm": 2.8810736182794288,
"learning_rate": 4.3333333333333335e-07,
"loss": 1.6771,
"step": 13
},
{
"epoch": 0.0220125786163522,
"grad_norm": 2.947703957244773,
"learning_rate": 4.6666666666666666e-07,
"loss": 1.6045,
"step": 14
},
{
"epoch": 0.02358490566037736,
"grad_norm": 2.8455733324410803,
"learning_rate": 5e-07,
"loss": 1.6552,
"step": 15
},
{
"epoch": 0.025157232704402517,
"grad_norm": 2.778002396514386,
"learning_rate": 5.333333333333333e-07,
"loss": 1.7711,
"step": 16
},
{
"epoch": 0.026729559748427674,
"grad_norm": 2.9492084900651876,
"learning_rate": 5.666666666666666e-07,
"loss": 2.0138,
"step": 17
},
{
"epoch": 0.02830188679245283,
"grad_norm": 2.7955099115508673,
"learning_rate": 6e-07,
"loss": 1.5838,
"step": 18
},
{
"epoch": 0.029874213836477988,
"grad_norm": 2.6565096089163025,
"learning_rate": 6.333333333333332e-07,
"loss": 1.5531,
"step": 19
},
{
"epoch": 0.031446540880503145,
"grad_norm": 2.9560437361213654,
"learning_rate": 6.666666666666666e-07,
"loss": 1.6762,
"step": 20
},
{
"epoch": 0.0330188679245283,
"grad_norm": 2.8608313422183844,
"learning_rate": 7e-07,
"loss": 1.6094,
"step": 21
},
{
"epoch": 0.03459119496855346,
"grad_norm": 2.798168174146352,
"learning_rate": 7.333333333333332e-07,
"loss": 1.4948,
"step": 22
},
{
"epoch": 0.036163522012578615,
"grad_norm": 2.951819566765442,
"learning_rate": 7.666666666666667e-07,
"loss": 1.7308,
"step": 23
},
{
"epoch": 0.03773584905660377,
"grad_norm": 2.7022575669246827,
"learning_rate": 8e-07,
"loss": 1.6431,
"step": 24
},
{
"epoch": 0.03930817610062893,
"grad_norm": 2.9734048507595046,
"learning_rate": 8.333333333333333e-07,
"loss": 1.4427,
"step": 25
},
{
"epoch": 0.040880503144654086,
"grad_norm": 2.8578492497460215,
"learning_rate": 8.666666666666667e-07,
"loss": 1.652,
"step": 26
},
{
"epoch": 0.04245283018867924,
"grad_norm": 2.688378160170607,
"learning_rate": 9e-07,
"loss": 1.548,
"step": 27
},
{
"epoch": 0.0440251572327044,
"grad_norm": 2.720907333149357,
"learning_rate": 9.333333333333333e-07,
"loss": 1.4457,
"step": 28
},
{
"epoch": 0.04559748427672956,
"grad_norm": 2.7657315566885403,
"learning_rate": 9.666666666666666e-07,
"loss": 1.5847,
"step": 29
},
{
"epoch": 0.04716981132075472,
"grad_norm": 2.883002593514775,
"learning_rate": 1e-06,
"loss": 1.7482,
"step": 30
},
{
"epoch": 0.04874213836477988,
"grad_norm": 2.794154766097177,
"learning_rate": 9.99997377618298e-07,
"loss": 1.8203,
"step": 31
},
{
"epoch": 0.050314465408805034,
"grad_norm": 3.088898639473824,
"learning_rate": 9.999895105006994e-07,
"loss": 1.7194,
"step": 32
},
{
"epoch": 0.05188679245283019,
"grad_norm": 2.6537048553933595,
"learning_rate": 9.999763987297264e-07,
"loss": 1.5857,
"step": 33
},
{
"epoch": 0.05345911949685535,
"grad_norm": 2.8821124249415058,
"learning_rate": 9.999580424429159e-07,
"loss": 1.8331,
"step": 34
},
{
"epoch": 0.055031446540880505,
"grad_norm": 2.7140815674763528,
"learning_rate": 9.99934441832816e-07,
"loss": 1.7067,
"step": 35
},
{
"epoch": 0.05660377358490566,
"grad_norm": 4.038361733121943,
"learning_rate": 9.999055971469863e-07,
"loss": 1.6599,
"step": 36
},
{
"epoch": 0.05817610062893082,
"grad_norm": 2.818848742384438,
"learning_rate": 9.998715086879935e-07,
"loss": 1.6759,
"step": 37
},
{
"epoch": 0.059748427672955975,
"grad_norm": 2.6841802401523087,
"learning_rate": 9.9983217681341e-07,
"loss": 1.4583,
"step": 38
},
{
"epoch": 0.06132075471698113,
"grad_norm": 2.9473060866002427,
"learning_rate": 9.997876019358083e-07,
"loss": 1.7774,
"step": 39
},
{
"epoch": 0.06289308176100629,
"grad_norm": 2.8471406767354868,
"learning_rate": 9.997377845227574e-07,
"loss": 1.721,
"step": 40
},
{
"epoch": 0.06446540880503145,
"grad_norm": 2.9451168374380705,
"learning_rate": 9.996827250968189e-07,
"loss": 1.6511,
"step": 41
},
{
"epoch": 0.0660377358490566,
"grad_norm": 2.6981678791938464,
"learning_rate": 9.996224242355397e-07,
"loss": 1.6338,
"step": 42
},
{
"epoch": 0.06761006289308176,
"grad_norm": 2.8294757698285555,
"learning_rate": 9.995568825714478e-07,
"loss": 1.6758,
"step": 43
},
{
"epoch": 0.06918238993710692,
"grad_norm": 2.8990349133049262,
"learning_rate": 9.994861007920439e-07,
"loss": 1.6104,
"step": 44
},
{
"epoch": 0.07075471698113207,
"grad_norm": 3.223974104333564,
"learning_rate": 9.994100796397953e-07,
"loss": 1.6934,
"step": 45
},
{
"epoch": 0.07232704402515723,
"grad_norm": 2.5987506786490826,
"learning_rate": 9.993288199121282e-07,
"loss": 1.8592,
"step": 46
},
{
"epoch": 0.07389937106918239,
"grad_norm": 2.900601376300913,
"learning_rate": 9.992423224614183e-07,
"loss": 1.6923,
"step": 47
},
{
"epoch": 0.07547169811320754,
"grad_norm": 2.960175869720827,
"learning_rate": 9.991505881949836e-07,
"loss": 1.6595,
"step": 48
},
{
"epoch": 0.0770440251572327,
"grad_norm": 2.7854484480315596,
"learning_rate": 9.990536180750723e-07,
"loss": 1.6249,
"step": 49
},
{
"epoch": 0.07861635220125786,
"grad_norm": 2.887167509757426,
"learning_rate": 9.989514131188558e-07,
"loss": 1.6919,
"step": 50
},
{
"epoch": 0.08018867924528301,
"grad_norm": 2.685247678792898,
"learning_rate": 9.988439743984152e-07,
"loss": 1.5878,
"step": 51
},
{
"epoch": 0.08176100628930817,
"grad_norm": 3.0741107440461795,
"learning_rate": 9.987313030407323e-07,
"loss": 1.8271,
"step": 52
},
{
"epoch": 0.08333333333333333,
"grad_norm": 2.966822892375055,
"learning_rate": 9.986134002276759e-07,
"loss": 1.8816,
"step": 53
},
{
"epoch": 0.08490566037735849,
"grad_norm": 2.9759914869433772,
"learning_rate": 9.98490267195991e-07,
"loss": 1.6398,
"step": 54
},
{
"epoch": 0.08647798742138364,
"grad_norm": 2.7657894370450373,
"learning_rate": 9.983619052372847e-07,
"loss": 1.5922,
"step": 55
},
{
"epoch": 0.0880503144654088,
"grad_norm": 2.9896944252013355,
"learning_rate": 9.98228315698013e-07,
"loss": 1.7046,
"step": 56
},
{
"epoch": 0.08962264150943396,
"grad_norm": 2.934360797135303,
"learning_rate": 9.980894999794678e-07,
"loss": 1.6305,
"step": 57
},
{
"epoch": 0.09119496855345911,
"grad_norm": 2.766331961112693,
"learning_rate": 9.979454595377593e-07,
"loss": 1.5623,
"step": 58
},
{
"epoch": 0.09276729559748427,
"grad_norm": 2.7288840403410095,
"learning_rate": 9.97796195883804e-07,
"loss": 1.7267,
"step": 59
},
{
"epoch": 0.09433962264150944,
"grad_norm": 2.8579967121162118,
"learning_rate": 9.97641710583307e-07,
"loss": 1.7857,
"step": 60
},
{
"epoch": 0.0959119496855346,
"grad_norm": 2.875962566949244,
"learning_rate": 9.974820052567459e-07,
"loss": 1.7046,
"step": 61
},
{
"epoch": 0.09748427672955975,
"grad_norm": 2.795328958545491,
"learning_rate": 9.973170815793542e-07,
"loss": 1.5145,
"step": 62
},
{
"epoch": 0.09905660377358491,
"grad_norm": 2.7813800259967802,
"learning_rate": 9.971469412811032e-07,
"loss": 1.4644,
"step": 63
},
{
"epoch": 0.10062893081761007,
"grad_norm": 2.884776745864663,
"learning_rate": 9.969715861466839e-07,
"loss": 1.6132,
"step": 64
},
{
"epoch": 0.10220125786163523,
"grad_norm": 2.9656746421376883,
"learning_rate": 9.967910180154888e-07,
"loss": 1.5777,
"step": 65
},
{
"epoch": 0.10377358490566038,
"grad_norm": 2.9036874607841523,
"learning_rate": 9.96605238781592e-07,
"loss": 1.5308,
"step": 66
},
{
"epoch": 0.10534591194968554,
"grad_norm": 2.7956792718016876,
"learning_rate": 9.964142503937305e-07,
"loss": 1.5853,
"step": 67
},
{
"epoch": 0.1069182389937107,
"grad_norm": 2.897862336708564,
"learning_rate": 9.96218054855281e-07,
"loss": 1.5228,
"step": 68
},
{
"epoch": 0.10849056603773585,
"grad_norm": 2.8838404866317164,
"learning_rate": 9.960166542242428e-07,
"loss": 1.5635,
"step": 69
},
{
"epoch": 0.11006289308176101,
"grad_norm": 2.7639581085690574,
"learning_rate": 9.958100506132126e-07,
"loss": 1.67,
"step": 70
},
{
"epoch": 0.11163522012578617,
"grad_norm": 2.9848350207143586,
"learning_rate": 9.955982461893646e-07,
"loss": 1.7932,
"step": 71
},
{
"epoch": 0.11320754716981132,
"grad_norm": 2.748422015742939,
"learning_rate": 9.953812431744274e-07,
"loss": 1.4805,
"step": 72
},
{
"epoch": 0.11477987421383648,
"grad_norm": 2.769235366918167,
"learning_rate": 9.951590438446596e-07,
"loss": 1.5452,
"step": 73
},
{
"epoch": 0.11635220125786164,
"grad_norm": 3.0140385674610073,
"learning_rate": 9.94931650530827e-07,
"loss": 1.6898,
"step": 74
},
{
"epoch": 0.1179245283018868,
"grad_norm": 2.887633044905872,
"learning_rate": 9.946990656181779e-07,
"loss": 1.7871,
"step": 75
},
{
"epoch": 0.11949685534591195,
"grad_norm": 2.9129477208043038,
"learning_rate": 9.94461291546418e-07,
"loss": 1.6111,
"step": 76
},
{
"epoch": 0.12106918238993711,
"grad_norm": 3.01831163886919,
"learning_rate": 9.942183308096853e-07,
"loss": 1.6132,
"step": 77
},
{
"epoch": 0.12264150943396226,
"grad_norm": 2.997495878078337,
"learning_rate": 9.93970185956522e-07,
"loss": 1.4617,
"step": 78
},
{
"epoch": 0.12421383647798742,
"grad_norm": 2.678939753603714,
"learning_rate": 9.937168595898508e-07,
"loss": 1.8479,
"step": 79
},
{
"epoch": 0.12578616352201258,
"grad_norm": 2.860552053547966,
"learning_rate": 9.934583543669453e-07,
"loss": 1.6458,
"step": 80
},
{
"epoch": 0.12735849056603774,
"grad_norm": 3.226580813213542,
"learning_rate": 9.93194672999403e-07,
"loss": 1.8994,
"step": 81
},
{
"epoch": 0.1289308176100629,
"grad_norm": 2.9327109905443445,
"learning_rate": 9.929258182531166e-07,
"loss": 1.5082,
"step": 82
},
{
"epoch": 0.13050314465408805,
"grad_norm": 2.745588457904038,
"learning_rate": 9.926517929482452e-07,
"loss": 1.4991,
"step": 83
},
{
"epoch": 0.1320754716981132,
"grad_norm": 2.7892084882752903,
"learning_rate": 9.923725999591846e-07,
"loss": 1.7913,
"step": 84
},
{
"epoch": 0.13364779874213836,
"grad_norm": 2.661813722362997,
"learning_rate": 9.92088242214537e-07,
"loss": 1.7167,
"step": 85
},
{
"epoch": 0.13522012578616352,
"grad_norm": 2.6050356717221548,
"learning_rate": 9.91798722697081e-07,
"loss": 1.5446,
"step": 86
},
{
"epoch": 0.13679245283018868,
"grad_norm": 2.907740454108008,
"learning_rate": 9.915040444437388e-07,
"loss": 1.6019,
"step": 87
},
{
"epoch": 0.13836477987421383,
"grad_norm": 2.9635527119113982,
"learning_rate": 9.912042105455461e-07,
"loss": 1.7168,
"step": 88
},
{
"epoch": 0.139937106918239,
"grad_norm": 2.9813810825397966,
"learning_rate": 9.908992241476186e-07,
"loss": 1.8573,
"step": 89
},
{
"epoch": 0.14150943396226415,
"grad_norm": 2.925962029784791,
"learning_rate": 9.905890884491194e-07,
"loss": 1.5332,
"step": 90
},
{
"epoch": 0.1430817610062893,
"grad_norm": 2.8712379530519474,
"learning_rate": 9.902738067032253e-07,
"loss": 1.757,
"step": 91
},
{
"epoch": 0.14465408805031446,
"grad_norm": 2.6784420543405205,
"learning_rate": 9.899533822170921e-07,
"loss": 1.5947,
"step": 92
},
{
"epoch": 0.14622641509433962,
"grad_norm": 2.6480973364337683,
"learning_rate": 9.896278183518216e-07,
"loss": 1.5718,
"step": 93
},
{
"epoch": 0.14779874213836477,
"grad_norm": 2.703204128264305,
"learning_rate": 9.892971185224244e-07,
"loss": 1.7105,
"step": 94
},
{
"epoch": 0.14937106918238993,
"grad_norm": 3.4041744211779057,
"learning_rate": 9.889612861977853e-07,
"loss": 1.6717,
"step": 95
},
{
"epoch": 0.1509433962264151,
"grad_norm": 3.067864390619053,
"learning_rate": 9.886203249006264e-07,
"loss": 1.6355,
"step": 96
},
{
"epoch": 0.15251572327044025,
"grad_norm": 2.8468635252020205,
"learning_rate": 9.882742382074706e-07,
"loss": 1.5463,
"step": 97
},
{
"epoch": 0.1540880503144654,
"grad_norm": 3.2598365988029827,
"learning_rate": 9.879230297486034e-07,
"loss": 1.5718,
"step": 98
},
{
"epoch": 0.15566037735849056,
"grad_norm": 2.7916727929412763,
"learning_rate": 9.875667032080352e-07,
"loss": 1.6524,
"step": 99
},
{
"epoch": 0.15723270440251572,
"grad_norm": 2.907285248234792,
"learning_rate": 9.872052623234631e-07,
"loss": 1.634,
"step": 100
},
{
"epoch": 0.15880503144654087,
"grad_norm": 2.7962618088809132,
"learning_rate": 9.868387108862305e-07,
"loss": 1.7726,
"step": 101
},
{
"epoch": 0.16037735849056603,
"grad_norm": 2.8049200840595616,
"learning_rate": 9.86467052741289e-07,
"loss": 1.7011,
"step": 102
},
{
"epoch": 0.1619496855345912,
"grad_norm": 3.0342654424151236,
"learning_rate": 9.860902917871566e-07,
"loss": 1.5985,
"step": 103
},
{
"epoch": 0.16352201257861634,
"grad_norm": 3.133992010155292,
"learning_rate": 9.85708431975877e-07,
"loss": 1.6142,
"step": 104
},
{
"epoch": 0.1650943396226415,
"grad_norm": 2.765316038304173,
"learning_rate": 9.853214773129795e-07,
"loss": 1.6856,
"step": 105
},
{
"epoch": 0.16666666666666666,
"grad_norm": 2.783202148615839,
"learning_rate": 9.84929431857435e-07,
"loss": 1.7386,
"step": 106
},
{
"epoch": 0.16823899371069181,
"grad_norm": 2.865985855515824,
"learning_rate": 9.845322997216151e-07,
"loss": 1.5695,
"step": 107
},
{
"epoch": 0.16981132075471697,
"grad_norm": 2.910313902240843,
"learning_rate": 9.841300850712478e-07,
"loss": 1.7493,
"step": 108
},
{
"epoch": 0.17138364779874213,
"grad_norm": 2.978282642772813,
"learning_rate": 9.837227921253745e-07,
"loss": 1.4316,
"step": 109
},
{
"epoch": 0.17295597484276728,
"grad_norm": 2.829785642110051,
"learning_rate": 9.833104251563055e-07,
"loss": 1.8296,
"step": 110
},
{
"epoch": 0.17452830188679244,
"grad_norm": 2.858578282056512,
"learning_rate": 9.828929884895752e-07,
"loss": 1.5925,
"step": 111
},
{
"epoch": 0.1761006289308176,
"grad_norm": 2.5984089623254985,
"learning_rate": 9.824704865038967e-07,
"loss": 1.7858,
"step": 112
},
{
"epoch": 0.17767295597484276,
"grad_norm": 2.8160266518499113,
"learning_rate": 9.820429236311158e-07,
"loss": 1.6244,
"step": 113
},
{
"epoch": 0.1792452830188679,
"grad_norm": 2.9596119800105627,
"learning_rate": 9.816103043561648e-07,
"loss": 1.7678,
"step": 114
},
{
"epoch": 0.18081761006289307,
"grad_norm": 2.8939811442478267,
"learning_rate": 9.81172633217015e-07,
"loss": 1.47,
"step": 115
},
{
"epoch": 0.18238993710691823,
"grad_norm": 3.045485057646403,
"learning_rate": 9.8072991480463e-07,
"loss": 1.6268,
"step": 116
},
{
"epoch": 0.18396226415094338,
"grad_norm": 2.857537343336608,
"learning_rate": 9.80282153762916e-07,
"loss": 1.8207,
"step": 117
},
{
"epoch": 0.18553459119496854,
"grad_norm": 3.0151265251520902,
"learning_rate": 9.798293547886746e-07,
"loss": 1.6861,
"step": 118
},
{
"epoch": 0.1871069182389937,
"grad_norm": 2.8731688993411058,
"learning_rate": 9.793715226315528e-07,
"loss": 1.7075,
"step": 119
},
{
"epoch": 0.18867924528301888,
"grad_norm": 3.036031941810449,
"learning_rate": 9.789086620939935e-07,
"loss": 1.7492,
"step": 120
},
{
"epoch": 0.19025157232704404,
"grad_norm": 2.525691377987968,
"learning_rate": 9.784407780311845e-07,
"loss": 1.624,
"step": 121
},
{
"epoch": 0.1918238993710692,
"grad_norm": 2.831970564433951,
"learning_rate": 9.77967875351008e-07,
"loss": 1.7192,
"step": 122
},
{
"epoch": 0.19339622641509435,
"grad_norm": 2.9942913777146702,
"learning_rate": 9.774899590139897e-07,
"loss": 1.6851,
"step": 123
},
{
"epoch": 0.1949685534591195,
"grad_norm": 2.8618269576046416,
"learning_rate": 9.770070340332456e-07,
"loss": 1.6114,
"step": 124
},
{
"epoch": 0.19654088050314467,
"grad_norm": 2.883119377274619,
"learning_rate": 9.765191054744304e-07,
"loss": 1.6136,
"step": 125
},
{
"epoch": 0.19811320754716982,
"grad_norm": 3.044922518807826,
"learning_rate": 9.760261784556838e-07,
"loss": 1.5851,
"step": 126
},
{
"epoch": 0.19968553459119498,
"grad_norm": 3.0028920393371963,
"learning_rate": 9.755282581475767e-07,
"loss": 1.4865,
"step": 127
},
{
"epoch": 0.20125786163522014,
"grad_norm": 2.8941881562896117,
"learning_rate": 9.750253497730579e-07,
"loss": 1.6744,
"step": 128
},
{
"epoch": 0.2028301886792453,
"grad_norm": 2.8528564542903574,
"learning_rate": 9.745174586073982e-07,
"loss": 1.7564,
"step": 129
},
{
"epoch": 0.20440251572327045,
"grad_norm": 2.906920298063749,
"learning_rate": 9.740045899781352e-07,
"loss": 1.6233,
"step": 130
},
{
"epoch": 0.2059748427672956,
"grad_norm": 2.9381321768165365,
"learning_rate": 9.734867492650186e-07,
"loss": 1.5676,
"step": 131
},
{
"epoch": 0.20754716981132076,
"grad_norm": 2.525924813073986,
"learning_rate": 9.729639418999522e-07,
"loss": 1.6379,
"step": 132
},
{
"epoch": 0.20911949685534592,
"grad_norm": 2.874791690228388,
"learning_rate": 9.72436173366938e-07,
"loss": 1.6909,
"step": 133
},
{
"epoch": 0.21069182389937108,
"grad_norm": 2.8783530324233575,
"learning_rate": 9.71903449202018e-07,
"loss": 1.6119,
"step": 134
},
{
"epoch": 0.21226415094339623,
"grad_norm": 3.01107861687442,
"learning_rate": 9.713657749932171e-07,
"loss": 1.5956,
"step": 135
},
{
"epoch": 0.2138364779874214,
"grad_norm": 2.6494766790190396,
"learning_rate": 9.708231563804828e-07,
"loss": 1.7058,
"step": 136
},
{
"epoch": 0.21540880503144655,
"grad_norm": 2.85527919179103,
"learning_rate": 9.702755990556276e-07,
"loss": 1.4486,
"step": 137
},
{
"epoch": 0.2169811320754717,
"grad_norm": 2.8898725200578457,
"learning_rate": 9.697231087622689e-07,
"loss": 1.6514,
"step": 138
},
{
"epoch": 0.21855345911949686,
"grad_norm": 2.7800528070460193,
"learning_rate": 9.691656912957684e-07,
"loss": 1.5338,
"step": 139
},
{
"epoch": 0.22012578616352202,
"grad_norm": 2.9948230844512986,
"learning_rate": 9.686033525031719e-07,
"loss": 1.6575,
"step": 140
},
{
"epoch": 0.22169811320754718,
"grad_norm": 3.0719024693904156,
"learning_rate": 9.680360982831466e-07,
"loss": 1.8096,
"step": 141
},
{
"epoch": 0.22327044025157233,
"grad_norm": 2.965190903432847,
"learning_rate": 9.674639345859212e-07,
"loss": 1.6506,
"step": 142
},
{
"epoch": 0.2248427672955975,
"grad_norm": 2.7336724355420974,
"learning_rate": 9.668868674132222e-07,
"loss": 1.5536,
"step": 143
},
{
"epoch": 0.22641509433962265,
"grad_norm": 2.936229195084225,
"learning_rate": 9.663049028182111e-07,
"loss": 1.6658,
"step": 144
},
{
"epoch": 0.2279874213836478,
"grad_norm": 2.9200650900522938,
"learning_rate": 9.657180469054212e-07,
"loss": 1.775,
"step": 145
},
{
"epoch": 0.22955974842767296,
"grad_norm": 2.9510267288582313,
"learning_rate": 9.651263058306932e-07,
"loss": 1.8345,
"step": 146
},
{
"epoch": 0.23113207547169812,
"grad_norm": 2.911711013545184,
"learning_rate": 9.645296858011107e-07,
"loss": 1.5686,
"step": 147
},
{
"epoch": 0.23270440251572327,
"grad_norm": 2.825920400262173,
"learning_rate": 9.63928193074936e-07,
"loss": 1.8736,
"step": 148
},
{
"epoch": 0.23427672955974843,
"grad_norm": 2.8916020775900777,
"learning_rate": 9.633218339615432e-07,
"loss": 1.6468,
"step": 149
},
{
"epoch": 0.2358490566037736,
"grad_norm": 2.7416340763711933,
"learning_rate": 9.62710614821352e-07,
"loss": 1.6178,
"step": 150
},
{
"epoch": 0.23742138364779874,
"grad_norm": 2.9834851184926645,
"learning_rate": 9.620945420657623e-07,
"loss": 1.6483,
"step": 151
},
{
"epoch": 0.2389937106918239,
"grad_norm": 2.7340688138076614,
"learning_rate": 9.61473622157086e-07,
"loss": 1.6169,
"step": 152
},
{
"epoch": 0.24056603773584906,
"grad_norm": 3.0147829611770507,
"learning_rate": 9.608478616084782e-07,
"loss": 1.5423,
"step": 153
},
{
"epoch": 0.24213836477987422,
"grad_norm": 2.7769273819581795,
"learning_rate": 9.60217266983872e-07,
"loss": 1.7204,
"step": 154
},
{
"epoch": 0.24371069182389937,
"grad_norm": 3.1113626251292983,
"learning_rate": 9.59581844897906e-07,
"loss": 1.5996,
"step": 155
},
{
"epoch": 0.24528301886792453,
"grad_norm": 3.096327063422857,
"learning_rate": 9.589416020158577e-07,
"loss": 1.6628,
"step": 156
},
{
"epoch": 0.2468553459119497,
"grad_norm": 2.7450104707143894,
"learning_rate": 9.582965450535713e-07,
"loss": 1.6721,
"step": 157
},
{
"epoch": 0.24842767295597484,
"grad_norm": 2.8568927176653847,
"learning_rate": 9.576466807773898e-07,
"loss": 1.7649,
"step": 158
},
{
"epoch": 0.25,
"grad_norm": 2.9012234468955453,
"learning_rate": 9.569920160040814e-07,
"loss": 1.6029,
"step": 159
},
{
"epoch": 0.25157232704402516,
"grad_norm": 2.9377323423033648,
"learning_rate": 9.5633255760077e-07,
"loss": 1.5913,
"step": 160
},
{
"epoch": 0.2531446540880503,
"grad_norm": 3.058065738854532,
"learning_rate": 9.556683124848623e-07,
"loss": 1.7773,
"step": 161
},
{
"epoch": 0.25471698113207547,
"grad_norm": 2.722307155009549,
"learning_rate": 9.54999287623975e-07,
"loss": 1.7939,
"step": 162
},
{
"epoch": 0.2562893081761006,
"grad_norm": 2.728987715004337,
"learning_rate": 9.543254900358628e-07,
"loss": 1.5814,
"step": 163
},
{
"epoch": 0.2578616352201258,
"grad_norm": 2.7633955658565417,
"learning_rate": 9.536469267883431e-07,
"loss": 1.7616,
"step": 164
},
{
"epoch": 0.25943396226415094,
"grad_norm": 2.930495074512941,
"learning_rate": 9.529636049992233e-07,
"loss": 1.5567,
"step": 165
},
{
"epoch": 0.2610062893081761,
"grad_norm": 2.9643728371296185,
"learning_rate": 9.522755318362259e-07,
"loss": 1.8551,
"step": 166
},
{
"epoch": 0.26257861635220126,
"grad_norm": 2.573314810237526,
"learning_rate": 9.515827145169127e-07,
"loss": 1.6787,
"step": 167
},
{
"epoch": 0.2641509433962264,
"grad_norm": 2.9470989186968333,
"learning_rate": 9.508851603086092e-07,
"loss": 1.7099,
"step": 168
},
{
"epoch": 0.26572327044025157,
"grad_norm": 2.8828467331827228,
"learning_rate": 9.501828765283294e-07,
"loss": 1.6403,
"step": 169
},
{
"epoch": 0.2672955974842767,
"grad_norm": 2.9869324639180967,
"learning_rate": 9.494758705426976e-07,
"loss": 1.6434,
"step": 170
},
{
"epoch": 0.2688679245283019,
"grad_norm": 2.8105412464843087,
"learning_rate": 9.487641497678722e-07,
"loss": 1.6361,
"step": 171
},
{
"epoch": 0.27044025157232704,
"grad_norm": 3.0942364695061486,
"learning_rate": 9.480477216694673e-07,
"loss": 1.6451,
"step": 172
},
{
"epoch": 0.2720125786163522,
"grad_norm": 3.0314242945023637,
"learning_rate": 9.473265937624746e-07,
"loss": 1.6221,
"step": 173
},
{
"epoch": 0.27358490566037735,
"grad_norm": 2.7961141658929964,
"learning_rate": 9.466007736111845e-07,
"loss": 1.5805,
"step": 174
},
{
"epoch": 0.2751572327044025,
"grad_norm": 3.0044488577576836,
"learning_rate": 9.458702688291071e-07,
"loss": 1.6451,
"step": 175
},
{
"epoch": 0.27672955974842767,
"grad_norm": 2.9175212019013017,
"learning_rate": 9.45135087078892e-07,
"loss": 1.6767,
"step": 176
},
{
"epoch": 0.2783018867924528,
"grad_norm": 2.852020241073256,
"learning_rate": 9.443952360722476e-07,
"loss": 1.527,
"step": 177
},
{
"epoch": 0.279874213836478,
"grad_norm": 3.1705793252501775,
"learning_rate": 9.43650723569861e-07,
"loss": 1.5246,
"step": 178
},
{
"epoch": 0.28144654088050314,
"grad_norm": 2.862701451741416,
"learning_rate": 9.429015573813162e-07,
"loss": 1.5528,
"step": 179
},
{
"epoch": 0.2830188679245283,
"grad_norm": 2.8470290608359825,
"learning_rate": 9.421477453650117e-07,
"loss": 1.6671,
"step": 180
},
{
"epoch": 0.28459119496855345,
"grad_norm": 2.9393982731492487,
"learning_rate": 9.413892954280791e-07,
"loss": 1.6337,
"step": 181
},
{
"epoch": 0.2861635220125786,
"grad_norm": 3.065777945428912,
"learning_rate": 9.406262155262994e-07,
"loss": 1.6864,
"step": 182
},
{
"epoch": 0.28773584905660377,
"grad_norm": 2.894668346030678,
"learning_rate": 9.398585136640194e-07,
"loss": 1.4675,
"step": 183
},
{
"epoch": 0.2893081761006289,
"grad_norm": 3.014856784865118,
"learning_rate": 9.390861978940685e-07,
"loss": 1.7052,
"step": 184
},
{
"epoch": 0.2908805031446541,
"grad_norm": 2.8120139974233638,
"learning_rate": 9.383092763176738e-07,
"loss": 1.4707,
"step": 185
},
{
"epoch": 0.29245283018867924,
"grad_norm": 2.6767355416672762,
"learning_rate": 9.375277570843749e-07,
"loss": 1.7074,
"step": 186
},
{
"epoch": 0.2940251572327044,
"grad_norm": 3.0632750130950113,
"learning_rate": 9.367416483919387e-07,
"loss": 1.7085,
"step": 187
},
{
"epoch": 0.29559748427672955,
"grad_norm": 2.698141986224555,
"learning_rate": 9.359509584862735e-07,
"loss": 1.6085,
"step": 188
},
{
"epoch": 0.2971698113207547,
"grad_norm": 2.8483611453583997,
"learning_rate": 9.351556956613422e-07,
"loss": 1.5957,
"step": 189
},
{
"epoch": 0.29874213836477986,
"grad_norm": 3.0115523041706482,
"learning_rate": 9.343558682590755e-07,
"loss": 1.4812,
"step": 190
},
{
"epoch": 0.300314465408805,
"grad_norm": 2.864790703828733,
"learning_rate": 9.335514846692845e-07,
"loss": 1.8253,
"step": 191
},
{
"epoch": 0.3018867924528302,
"grad_norm": 2.7185712819247723,
"learning_rate": 9.327425533295723e-07,
"loss": 1.7479,
"step": 192
},
{
"epoch": 0.30345911949685533,
"grad_norm": 2.849610670824873,
"learning_rate": 9.319290827252459e-07,
"loss": 1.4834,
"step": 193
},
{
"epoch": 0.3050314465408805,
"grad_norm": 2.8962850323516713,
"learning_rate": 9.311110813892269e-07,
"loss": 2.0937,
"step": 194
},
{
"epoch": 0.30660377358490565,
"grad_norm": 2.801049995905829,
"learning_rate": 9.302885579019626e-07,
"loss": 1.6817,
"step": 195
},
{
"epoch": 0.3081761006289308,
"grad_norm": 2.539543086891977,
"learning_rate": 9.294615208913348e-07,
"loss": 1.6195,
"step": 196
},
{
"epoch": 0.30974842767295596,
"grad_norm": 3.14726092255164,
"learning_rate": 9.286299790325706e-07,
"loss": 1.495,
"step": 197
},
{
"epoch": 0.3113207547169811,
"grad_norm": 2.838713337721914,
"learning_rate": 9.277939410481505e-07,
"loss": 1.7412,
"step": 198
},
{
"epoch": 0.3128930817610063,
"grad_norm": 2.870003649666304,
"learning_rate": 9.269534157077176e-07,
"loss": 1.5115,
"step": 199
},
{
"epoch": 0.31446540880503143,
"grad_norm": 2.8165531483143362,
"learning_rate": 9.261084118279846e-07,
"loss": 1.7478,
"step": 200
},
{
"epoch": 0.3160377358490566,
"grad_norm": 2.825444545833805,
"learning_rate": 9.252589382726425e-07,
"loss": 1.7086,
"step": 201
},
{
"epoch": 0.31761006289308175,
"grad_norm": 3.171370944860818,
"learning_rate": 9.244050039522672e-07,
"loss": 1.5309,
"step": 202
},
{
"epoch": 0.3191823899371069,
"grad_norm": 2.757671298779396,
"learning_rate": 9.235466178242253e-07,
"loss": 1.6026,
"step": 203
},
{
"epoch": 0.32075471698113206,
"grad_norm": 2.790965429905353,
"learning_rate": 9.226837888925812e-07,
"loss": 1.7138,
"step": 204
},
{
"epoch": 0.3223270440251572,
"grad_norm": 2.93045091970252,
"learning_rate": 9.218165262080022e-07,
"loss": 1.6591,
"step": 205
},
{
"epoch": 0.3238993710691824,
"grad_norm": 2.82026195028126,
"learning_rate": 9.209448388676635e-07,
"loss": 1.6508,
"step": 206
},
{
"epoch": 0.32547169811320753,
"grad_norm": 3.126262802082267,
"learning_rate": 9.200687360151527e-07,
"loss": 1.4957,
"step": 207
},
{
"epoch": 0.3270440251572327,
"grad_norm": 2.895265690199012,
"learning_rate": 9.191882268403741e-07,
"loss": 1.5272,
"step": 208
},
{
"epoch": 0.32861635220125784,
"grad_norm": 2.877287755389602,
"learning_rate": 9.183033205794524e-07,
"loss": 1.54,
"step": 209
},
{
"epoch": 0.330188679245283,
"grad_norm": 2.734817869548982,
"learning_rate": 9.174140265146355e-07,
"loss": 1.6412,
"step": 210
},
{
"epoch": 0.33176100628930816,
"grad_norm": 2.867112233501488,
"learning_rate": 9.165203539741974e-07,
"loss": 1.6154,
"step": 211
},
{
"epoch": 0.3333333333333333,
"grad_norm": 2.8418280829564813,
"learning_rate": 9.156223123323404e-07,
"loss": 1.4353,
"step": 212
},
{
"epoch": 0.33490566037735847,
"grad_norm": 2.727537043457243,
"learning_rate": 9.147199110090958e-07,
"loss": 1.5871,
"step": 213
},
{
"epoch": 0.33647798742138363,
"grad_norm": 2.944426021386228,
"learning_rate": 9.13813159470227e-07,
"loss": 1.5932,
"step": 214
},
{
"epoch": 0.3380503144654088,
"grad_norm": 3.2505023109601447,
"learning_rate": 9.129020672271281e-07,
"loss": 1.6692,
"step": 215
},
{
"epoch": 0.33962264150943394,
"grad_norm": 2.961550017326567,
"learning_rate": 9.119866438367262e-07,
"loss": 1.732,
"step": 216
},
{
"epoch": 0.3411949685534591,
"grad_norm": 2.900334576192079,
"learning_rate": 9.11066898901379e-07,
"loss": 1.5478,
"step": 217
},
{
"epoch": 0.34276729559748426,
"grad_norm": 3.1405096823470173,
"learning_rate": 9.101428420687757e-07,
"loss": 1.7267,
"step": 218
},
{
"epoch": 0.3443396226415094,
"grad_norm": 3.0460348289304817,
"learning_rate": 9.092144830318357e-07,
"loss": 1.9256,
"step": 219
},
{
"epoch": 0.34591194968553457,
"grad_norm": 2.960266857426143,
"learning_rate": 9.082818315286054e-07,
"loss": 1.6707,
"step": 220
},
{
"epoch": 0.3474842767295597,
"grad_norm": 2.571519228148803,
"learning_rate": 9.07344897342158e-07,
"loss": 1.6558,
"step": 221
},
{
"epoch": 0.3490566037735849,
"grad_norm": 2.758050865261492,
"learning_rate": 9.064036903004899e-07,
"loss": 1.6994,
"step": 222
},
{
"epoch": 0.35062893081761004,
"grad_norm": 2.9660590146600354,
"learning_rate": 9.054582202764174e-07,
"loss": 1.696,
"step": 223
},
{
"epoch": 0.3522012578616352,
"grad_norm": 2.752028378445374,
"learning_rate": 9.045084971874737e-07,
"loss": 1.6747,
"step": 224
},
{
"epoch": 0.35377358490566035,
"grad_norm": 2.6744551200672793,
"learning_rate": 9.035545309958046e-07,
"loss": 1.5971,
"step": 225
},
{
"epoch": 0.3553459119496855,
"grad_norm": 3.016663128171545,
"learning_rate": 9.02596331708064e-07,
"loss": 1.4696,
"step": 226
},
{
"epoch": 0.35691823899371067,
"grad_norm": 2.901009716820562,
"learning_rate": 9.016339093753092e-07,
"loss": 1.8178,
"step": 227
},
{
"epoch": 0.3584905660377358,
"grad_norm": 3.1343446993717325,
"learning_rate": 9.00667274092895e-07,
"loss": 1.5964,
"step": 228
},
{
"epoch": 0.360062893081761,
"grad_norm": 3.1424828372852613,
"learning_rate": 8.99696436000368e-07,
"loss": 1.6684,
"step": 229
},
{
"epoch": 0.36163522012578614,
"grad_norm": 2.882452137937917,
"learning_rate": 8.987214052813603e-07,
"loss": 1.6412,
"step": 230
},
{
"epoch": 0.3632075471698113,
"grad_norm": 2.9373977120677246,
"learning_rate": 8.977421921634831e-07,
"loss": 1.5271,
"step": 231
},
{
"epoch": 0.36477987421383645,
"grad_norm": 3.0198708919947164,
"learning_rate": 8.967588069182183e-07,
"loss": 1.8023,
"step": 232
},
{
"epoch": 0.3663522012578616,
"grad_norm": 2.960141817420143,
"learning_rate": 8.957712598608122e-07,
"loss": 1.3358,
"step": 233
},
{
"epoch": 0.36792452830188677,
"grad_norm": 2.8903797021829103,
"learning_rate": 8.947795613501656e-07,
"loss": 1.5905,
"step": 234
},
{
"epoch": 0.3694968553459119,
"grad_norm": 2.77391341345864,
"learning_rate": 8.937837217887272e-07,
"loss": 1.5112,
"step": 235
},
{
"epoch": 0.3710691823899371,
"grad_norm": 2.842050023433647,
"learning_rate": 8.927837516223823e-07,
"loss": 1.4979,
"step": 236
},
{
"epoch": 0.37264150943396224,
"grad_norm": 3.000728789759411,
"learning_rate": 8.91779661340345e-07,
"loss": 1.8295,
"step": 237
},
{
"epoch": 0.3742138364779874,
"grad_norm": 2.9583627313435,
"learning_rate": 8.907714614750472e-07,
"loss": 1.43,
"step": 238
},
{
"epoch": 0.3757861635220126,
"grad_norm": 2.7918669538945253,
"learning_rate": 8.897591626020284e-07,
"loss": 1.6926,
"step": 239
},
{
"epoch": 0.37735849056603776,
"grad_norm": 2.7781083647108717,
"learning_rate": 8.887427753398247e-07,
"loss": 1.5972,
"step": 240
},
{
"epoch": 0.3789308176100629,
"grad_norm": 3.0160818815953254,
"learning_rate": 8.877223103498575e-07,
"loss": 1.6652,
"step": 241
},
{
"epoch": 0.3805031446540881,
"grad_norm": 3.013147280783508,
"learning_rate": 8.866977783363218e-07,
"loss": 1.8097,
"step": 242
},
{
"epoch": 0.38207547169811323,
"grad_norm": 2.9939209838493515,
"learning_rate": 8.856691900460738e-07,
"loss": 1.6367,
"step": 243
},
{
"epoch": 0.3836477987421384,
"grad_norm": 2.733039232088572,
"learning_rate": 8.846365562685176e-07,
"loss": 1.7351,
"step": 244
},
{
"epoch": 0.38522012578616355,
"grad_norm": 3.001144973341077,
"learning_rate": 8.83599887835493e-07,
"loss": 1.572,
"step": 245
},
{
"epoch": 0.3867924528301887,
"grad_norm": 2.9948930326234597,
"learning_rate": 8.825591956211614e-07,
"loss": 1.6102,
"step": 246
},
{
"epoch": 0.38836477987421386,
"grad_norm": 2.8653519222859063,
"learning_rate": 8.815144905418916e-07,
"loss": 1.6772,
"step": 247
},
{
"epoch": 0.389937106918239,
"grad_norm": 3.1826396332295706,
"learning_rate": 8.804657835561456e-07,
"loss": 1.682,
"step": 248
},
{
"epoch": 0.3915094339622642,
"grad_norm": 2.8216807328948486,
"learning_rate": 8.794130856643633e-07,
"loss": 1.5604,
"step": 249
},
{
"epoch": 0.39308176100628933,
"grad_norm": 2.9678136425542467,
"learning_rate": 8.783564079088476e-07,
"loss": 1.6586,
"step": 250
},
{
"epoch": 0.39308176100628933,
"eval_sat2_MCTS_chains_SFT_val_loss": 1.6584604978561401,
"eval_sat2_MCTS_chains_SFT_val_runtime": 103.8596,
"eval_sat2_MCTS_chains_SFT_val_samples_per_second": 9.898,
"eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.242,
"step": 250
},
{
"epoch": 0.3946540880503145,
"grad_norm": 2.9586113804706953,
"learning_rate": 8.772957613736482e-07,
"loss": 1.6791,
"step": 251
},
{
"epoch": 0.39622641509433965,
"grad_norm": 2.6413415294984914,
"learning_rate": 8.76231157184445e-07,
"loss": 1.4647,
"step": 252
},
{
"epoch": 0.3977987421383648,
"grad_norm": 2.9283074873964288,
"learning_rate": 8.751626065084328e-07,
"loss": 1.4933,
"step": 253
},
{
"epoch": 0.39937106918238996,
"grad_norm": 3.0602312017167628,
"learning_rate": 8.74090120554202e-07,
"loss": 1.4125,
"step": 254
},
{
"epoch": 0.4009433962264151,
"grad_norm": 2.7986235291458037,
"learning_rate": 8.73013710571623e-07,
"loss": 1.5496,
"step": 255
},
{
"epoch": 0.4025157232704403,
"grad_norm": 2.948976102323842,
"learning_rate": 8.719333878517273e-07,
"loss": 1.4968,
"step": 256
},
{
"epoch": 0.40408805031446543,
"grad_norm": 2.760683793395907,
"learning_rate": 8.708491637265887e-07,
"loss": 1.6747,
"step": 257
},
{
"epoch": 0.4056603773584906,
"grad_norm": 3.1671580545141675,
"learning_rate": 8.697610495692054e-07,
"loss": 1.658,
"step": 258
},
{
"epoch": 0.40723270440251574,
"grad_norm": 2.674259937449716,
"learning_rate": 8.686690567933801e-07,
"loss": 1.6129,
"step": 259
},
{
"epoch": 0.4088050314465409,
"grad_norm": 3.1018574143157527,
"learning_rate": 8.675731968536002e-07,
"loss": 1.3611,
"step": 260
},
{
"epoch": 0.41037735849056606,
"grad_norm": 3.0423161539909356,
"learning_rate": 8.664734812449179e-07,
"loss": 1.8075,
"step": 261
},
{
"epoch": 0.4119496855345912,
"grad_norm": 2.9517263449475606,
"learning_rate": 8.653699215028296e-07,
"loss": 1.5475,
"step": 262
},
{
"epoch": 0.41352201257861637,
"grad_norm": 3.0424454760034725,
"learning_rate": 8.642625292031549e-07,
"loss": 1.7245,
"step": 263
},
{
"epoch": 0.41509433962264153,
"grad_norm": 2.9120793531370723,
"learning_rate": 8.631513159619149e-07,
"loss": 1.4242,
"step": 264
},
{
"epoch": 0.4166666666666667,
"grad_norm": 3.225432662896855,
"learning_rate": 8.620362934352108e-07,
"loss": 1.8377,
"step": 265
},
{
"epoch": 0.41823899371069184,
"grad_norm": 3.0087973641995616,
"learning_rate": 8.60917473319101e-07,
"loss": 1.4507,
"step": 266
},
{
"epoch": 0.419811320754717,
"grad_norm": 3.7250085221059495,
"learning_rate": 8.597948673494794e-07,
"loss": 1.5964,
"step": 267
},
{
"epoch": 0.42138364779874216,
"grad_norm": 3.0757842367996306,
"learning_rate": 8.586684873019512e-07,
"loss": 1.574,
"step": 268
},
{
"epoch": 0.4229559748427673,
"grad_norm": 3.041887462503267,
"learning_rate": 8.575383449917102e-07,
"loss": 1.8146,
"step": 269
},
{
"epoch": 0.42452830188679247,
"grad_norm": 4.268588227080502,
"learning_rate": 8.564044522734146e-07,
"loss": 1.464,
"step": 270
},
{
"epoch": 0.4261006289308176,
"grad_norm": 3.029271230795498,
"learning_rate": 8.552668210410623e-07,
"loss": 1.598,
"step": 271
},
{
"epoch": 0.4276729559748428,
"grad_norm": 2.973518798384465,
"learning_rate": 8.541254632278665e-07,
"loss": 1.5129,
"step": 272
},
{
"epoch": 0.42924528301886794,
"grad_norm": 3.3044387064367,
"learning_rate": 8.529803908061308e-07,
"loss": 1.4694,
"step": 273
},
{
"epoch": 0.4308176100628931,
"grad_norm": 2.9571919167741614,
"learning_rate": 8.51831615787123e-07,
"loss": 1.6167,
"step": 274
},
{
"epoch": 0.43238993710691825,
"grad_norm": 2.7276928178815036,
"learning_rate": 8.506791502209496e-07,
"loss": 1.6561,
"step": 275
},
{
"epoch": 0.4339622641509434,
"grad_norm": 2.8324926682096043,
"learning_rate": 8.495230061964287e-07,
"loss": 1.6792,
"step": 276
},
{
"epoch": 0.43553459119496857,
"grad_norm": 3.0015707635143327,
"learning_rate": 8.483631958409643e-07,
"loss": 1.5538,
"step": 277
},
{
"epoch": 0.4371069182389937,
"grad_norm": 3.3059643260815914,
"learning_rate": 8.471997313204182e-07,
"loss": 1.6007,
"step": 278
},
{
"epoch": 0.4386792452830189,
"grad_norm": 3.014759127631608,
"learning_rate": 8.460326248389824e-07,
"loss": 1.6257,
"step": 279
},
{
"epoch": 0.44025157232704404,
"grad_norm": 2.7778802261201343,
"learning_rate": 8.448618886390521e-07,
"loss": 1.6052,
"step": 280
},
{
"epoch": 0.4418238993710692,
"grad_norm": 3.0502038930705626,
"learning_rate": 8.436875350010957e-07,
"loss": 1.7498,
"step": 281
},
{
"epoch": 0.44339622641509435,
"grad_norm": 3.827056849787235,
"learning_rate": 8.425095762435273e-07,
"loss": 1.5904,
"step": 282
},
{
"epoch": 0.4449685534591195,
"grad_norm": 2.9342766381705747,
"learning_rate": 8.413280247225768e-07,
"loss": 1.5282,
"step": 283
},
{
"epoch": 0.44654088050314467,
"grad_norm": 4.64307318858484,
"learning_rate": 8.401428928321607e-07,
"loss": 1.774,
"step": 284
},
{
"epoch": 0.4481132075471698,
"grad_norm": 3.0422727524827144,
"learning_rate": 8.389541930037516e-07,
"loss": 1.5489,
"step": 285
},
{
"epoch": 0.449685534591195,
"grad_norm": 2.989589728066482,
"learning_rate": 8.377619377062482e-07,
"loss": 1.621,
"step": 286
},
{
"epoch": 0.45125786163522014,
"grad_norm": 3.174962884017163,
"learning_rate": 8.365661394458445e-07,
"loss": 1.5202,
"step": 287
},
{
"epoch": 0.4528301886792453,
"grad_norm": 2.809772090337394,
"learning_rate": 8.353668107658983e-07,
"loss": 1.9324,
"step": 288
},
{
"epoch": 0.45440251572327045,
"grad_norm": 3.0560314015959897,
"learning_rate": 8.341639642468001e-07,
"loss": 1.5088,
"step": 289
},
{
"epoch": 0.4559748427672956,
"grad_norm": 2.959872733329314,
"learning_rate": 8.329576125058405e-07,
"loss": 1.6243,
"step": 290
},
{
"epoch": 0.45754716981132076,
"grad_norm": 2.9842620788957737,
"learning_rate": 8.317477681970786e-07,
"loss": 1.6596,
"step": 291
},
{
"epoch": 0.4591194968553459,
"grad_norm": 3.217709997180059,
"learning_rate": 8.305344440112087e-07,
"loss": 1.5782,
"step": 292
},
{
"epoch": 0.4606918238993711,
"grad_norm": 2.8625881670603746,
"learning_rate": 8.293176526754273e-07,
"loss": 1.732,
"step": 293
},
{
"epoch": 0.46226415094339623,
"grad_norm": 3.8491482316260655,
"learning_rate": 8.280974069532998e-07,
"loss": 1.6535,
"step": 294
},
{
"epoch": 0.4638364779874214,
"grad_norm": 3.1350366958145495,
"learning_rate": 8.268737196446263e-07,
"loss": 1.6854,
"step": 295
},
{
"epoch": 0.46540880503144655,
"grad_norm": 2.9549762795233536,
"learning_rate": 8.256466035853075e-07,
"loss": 1.5536,
"step": 296
},
{
"epoch": 0.4669811320754717,
"grad_norm": 3.036679274641372,
"learning_rate": 8.244160716472108e-07,
"loss": 1.5475,
"step": 297
},
{
"epoch": 0.46855345911949686,
"grad_norm": 2.8915060348851873,
"learning_rate": 8.231821367380334e-07,
"loss": 1.6829,
"step": 298
},
{
"epoch": 0.470125786163522,
"grad_norm": 2.7293513305001116,
"learning_rate": 8.219448118011687e-07,
"loss": 1.6559,
"step": 299
},
{
"epoch": 0.4716981132075472,
"grad_norm": 2.9948451050860854,
"learning_rate": 8.207041098155699e-07,
"loss": 1.5436,
"step": 300
},
{
"epoch": 0.47327044025157233,
"grad_norm": 3.2031506579963946,
"learning_rate": 8.194600437956139e-07,
"loss": 1.6503,
"step": 301
},
{
"epoch": 0.4748427672955975,
"grad_norm": 2.807136246279641,
"learning_rate": 8.18212626790964e-07,
"loss": 2.0248,
"step": 302
},
{
"epoch": 0.47641509433962265,
"grad_norm": 2.9001657979865194,
"learning_rate": 8.16961871886435e-07,
"loss": 1.6034,
"step": 303
},
{
"epoch": 0.4779874213836478,
"grad_norm": 3.0792715017671446,
"learning_rate": 8.157077922018536e-07,
"loss": 1.7428,
"step": 304
},
{
"epoch": 0.47955974842767296,
"grad_norm": 2.9975961967554663,
"learning_rate": 8.144504008919222e-07,
"loss": 1.689,
"step": 305
},
{
"epoch": 0.4811320754716981,
"grad_norm": 3.095878589609334,
"learning_rate": 8.131897111460809e-07,
"loss": 1.7788,
"step": 306
},
{
"epoch": 0.4827044025157233,
"grad_norm": 3.1315862097170384,
"learning_rate": 8.119257361883686e-07,
"loss": 1.6655,
"step": 307
},
{
"epoch": 0.48427672955974843,
"grad_norm": 3.0428927144491484,
"learning_rate": 8.106584892772843e-07,
"loss": 1.6418,
"step": 308
},
{
"epoch": 0.4858490566037736,
"grad_norm": 3.8550756798387593,
"learning_rate": 8.093879837056485e-07,
"loss": 1.5158,
"step": 309
},
{
"epoch": 0.48742138364779874,
"grad_norm": 2.8698562798909197,
"learning_rate": 8.081142328004636e-07,
"loss": 1.7003,
"step": 310
},
{
"epoch": 0.4889937106918239,
"grad_norm": 2.8474789377828755,
"learning_rate": 8.068372499227736e-07,
"loss": 1.5878,
"step": 311
},
{
"epoch": 0.49056603773584906,
"grad_norm": 2.944852309492029,
"learning_rate": 8.05557048467525e-07,
"loss": 1.5342,
"step": 312
},
{
"epoch": 0.4921383647798742,
"grad_norm": 2.6824854754227134,
"learning_rate": 8.04273641863425e-07,
"loss": 1.5598,
"step": 313
},
{
"epoch": 0.4937106918238994,
"grad_norm": 3.1516894861498486,
"learning_rate": 8.029870435728017e-07,
"loss": 1.5505,
"step": 314
},
{
"epoch": 0.49528301886792453,
"grad_norm": 2.801213556135511,
"learning_rate": 8.016972670914623e-07,
"loss": 1.7167,
"step": 315
},
{
"epoch": 0.4968553459119497,
"grad_norm": 2.890665548023088,
"learning_rate": 8.004043259485518e-07,
"loss": 1.8476,
"step": 316
},
{
"epoch": 0.49842767295597484,
"grad_norm": 3.0315629963242556,
"learning_rate": 7.991082337064109e-07,
"loss": 1.6837,
"step": 317
},
{
"epoch": 0.5,
"grad_norm": 2.829540637779091,
"learning_rate": 7.978090039604341e-07,
"loss": 1.59,
"step": 318
},
{
"epoch": 0.5015723270440252,
"grad_norm": 2.8207683491833797,
"learning_rate": 7.965066503389264e-07,
"loss": 1.5492,
"step": 319
},
{
"epoch": 0.5031446540880503,
"grad_norm": 3.157170747378039,
"learning_rate": 7.952011865029613e-07,
"loss": 1.6466,
"step": 320
},
{
"epoch": 0.5047169811320755,
"grad_norm": 2.9337171526662345,
"learning_rate": 7.938926261462365e-07,
"loss": 1.5796,
"step": 321
},
{
"epoch": 0.5062893081761006,
"grad_norm": 2.9017291227111137,
"learning_rate": 7.925809829949311e-07,
"loss": 1.5726,
"step": 322
},
{
"epoch": 0.5078616352201258,
"grad_norm": 3.0869804482037653,
"learning_rate": 7.91266270807561e-07,
"loss": 1.5899,
"step": 323
},
{
"epoch": 0.5094339622641509,
"grad_norm": 2.7999552261033402,
"learning_rate": 7.89948503374835e-07,
"loss": 1.4062,
"step": 324
},
{
"epoch": 0.5110062893081762,
"grad_norm": 3.285221317869385,
"learning_rate": 7.886276945195097e-07,
"loss": 1.5396,
"step": 325
},
{
"epoch": 0.5125786163522013,
"grad_norm": 3.0268929248373984,
"learning_rate": 7.873038580962453e-07,
"loss": 1.5924,
"step": 326
},
{
"epoch": 0.5141509433962265,
"grad_norm": 2.913072841616009,
"learning_rate": 7.859770079914592e-07,
"loss": 2.1225,
"step": 327
},
{
"epoch": 0.5157232704402516,
"grad_norm": 2.8622294907562975,
"learning_rate": 7.846471581231813e-07,
"loss": 1.9179,
"step": 328
},
{
"epoch": 0.5172955974842768,
"grad_norm": 3.3746719664000255,
"learning_rate": 7.833143224409075e-07,
"loss": 1.5467,
"step": 329
},
{
"epoch": 0.5188679245283019,
"grad_norm": 2.755221725795704,
"learning_rate": 7.819785149254532e-07,
"loss": 1.777,
"step": 330
},
{
"epoch": 0.5204402515723271,
"grad_norm": 2.9788828173349278,
"learning_rate": 7.806397495888073e-07,
"loss": 1.7245,
"step": 331
},
{
"epoch": 0.5220125786163522,
"grad_norm": 3.123230924072451,
"learning_rate": 7.792980404739847e-07,
"loss": 1.4695,
"step": 332
},
{
"epoch": 0.5235849056603774,
"grad_norm": 2.951880279336297,
"learning_rate": 7.77953401654879e-07,
"loss": 1.6968,
"step": 333
},
{
"epoch": 0.5251572327044025,
"grad_norm": 3.0894908078161487,
"learning_rate": 7.766058472361153e-07,
"loss": 1.546,
"step": 334
},
{
"epoch": 0.5267295597484277,
"grad_norm": 3.0707624349548337,
"learning_rate": 7.752553913529018e-07,
"loss": 1.5085,
"step": 335
},
{
"epoch": 0.5283018867924528,
"grad_norm": 2.9477557662065506,
"learning_rate": 7.739020481708814e-07,
"loss": 1.6163,
"step": 336
},
{
"epoch": 0.529874213836478,
"grad_norm": 2.914337832721276,
"learning_rate": 7.725458318859841e-07,
"loss": 1.7411,
"step": 337
},
{
"epoch": 0.5314465408805031,
"grad_norm": 2.7398214376825534,
"learning_rate": 7.711867567242766e-07,
"loss": 1.7266,
"step": 338
},
{
"epoch": 0.5330188679245284,
"grad_norm": 2.9634022885907685,
"learning_rate": 7.698248369418146e-07,
"loss": 1.6571,
"step": 339
},
{
"epoch": 0.5345911949685535,
"grad_norm": 2.896055421421961,
"learning_rate": 7.684600868244919e-07,
"loss": 1.5207,
"step": 340
},
{
"epoch": 0.5361635220125787,
"grad_norm": 3.069581678589251,
"learning_rate": 7.670925206878916e-07,
"loss": 2.0008,
"step": 341
},
{
"epoch": 0.5377358490566038,
"grad_norm": 2.822860925833609,
"learning_rate": 7.657221528771351e-07,
"loss": 1.5865,
"step": 342
},
{
"epoch": 0.539308176100629,
"grad_norm": 2.9532018181594655,
"learning_rate": 7.643489977667325e-07,
"loss": 1.652,
"step": 343
},
{
"epoch": 0.5408805031446541,
"grad_norm": 3.1536405676814563,
"learning_rate": 7.629730697604313e-07,
"loss": 1.6205,
"step": 344
},
{
"epoch": 0.5424528301886793,
"grad_norm": 2.891482043321394,
"learning_rate": 7.61594383291065e-07,
"loss": 1.5776,
"step": 345
},
{
"epoch": 0.5440251572327044,
"grad_norm": 3.003689771378288,
"learning_rate": 7.602129528204022e-07,
"loss": 1.5402,
"step": 346
},
{
"epoch": 0.5455974842767296,
"grad_norm": 3.067089408704145,
"learning_rate": 7.588287928389951e-07,
"loss": 1.6742,
"step": 347
},
{
"epoch": 0.5471698113207547,
"grad_norm": 2.7753239765920488,
"learning_rate": 7.574419178660268e-07,
"loss": 1.5732,
"step": 348
},
{
"epoch": 0.5487421383647799,
"grad_norm": 2.856804882953154,
"learning_rate": 7.560523424491594e-07,
"loss": 1.6788,
"step": 349
},
{
"epoch": 0.550314465408805,
"grad_norm": 2.995113850576118,
"learning_rate": 7.546600811643816e-07,
"loss": 1.6173,
"step": 350
},
{
"epoch": 0.5518867924528302,
"grad_norm": 2.834168921469406,
"learning_rate": 7.532651486158554e-07,
"loss": 1.5904,
"step": 351
},
{
"epoch": 0.5534591194968553,
"grad_norm": 2.899270630246711,
"learning_rate": 7.518675594357632e-07,
"loss": 1.6489,
"step": 352
},
{
"epoch": 0.5550314465408805,
"grad_norm": 3.5659878099391795,
"learning_rate": 7.504673282841543e-07,
"loss": 1.7493,
"step": 353
},
{
"epoch": 0.5566037735849056,
"grad_norm": 3.01362773663695,
"learning_rate": 7.490644698487908e-07,
"loss": 1.6529,
"step": 354
},
{
"epoch": 0.5581761006289309,
"grad_norm": 2.829582474483786,
"learning_rate": 7.476589988449938e-07,
"loss": 1.739,
"step": 355
},
{
"epoch": 0.559748427672956,
"grad_norm": 2.8637416112700063,
"learning_rate": 7.462509300154891e-07,
"loss": 1.6167,
"step": 356
},
{
"epoch": 0.5613207547169812,
"grad_norm": 3.320226806079709,
"learning_rate": 7.448402781302525e-07,
"loss": 1.7115,
"step": 357
},
{
"epoch": 0.5628930817610063,
"grad_norm": 3.2848631934338703,
"learning_rate": 7.434270579863548e-07,
"loss": 1.7411,
"step": 358
},
{
"epoch": 0.5644654088050315,
"grad_norm": 2.8557557567154532,
"learning_rate": 7.420112844078065e-07,
"loss": 1.6507,
"step": 359
},
{
"epoch": 0.5660377358490566,
"grad_norm": 2.813687805019359,
"learning_rate": 7.405929722454025e-07,
"loss": 1.8697,
"step": 360
},
{
"epoch": 0.5676100628930818,
"grad_norm": 3.1367104120043017,
"learning_rate": 7.391721363765663e-07,
"loss": 1.546,
"step": 361
},
{
"epoch": 0.5691823899371069,
"grad_norm": 2.916992253603825,
"learning_rate": 7.377487917051938e-07,
"loss": 1.6718,
"step": 362
},
{
"epoch": 0.5707547169811321,
"grad_norm": 2.8435389119301773,
"learning_rate": 7.363229531614972e-07,
"loss": 1.7208,
"step": 363
},
{
"epoch": 0.5723270440251572,
"grad_norm": 3.128997657710974,
"learning_rate": 7.348946357018479e-07,
"loss": 1.5151,
"step": 364
},
{
"epoch": 0.5738993710691824,
"grad_norm": 3.140464711506973,
"learning_rate": 7.334638543086203e-07,
"loss": 1.6267,
"step": 365
},
{
"epoch": 0.5754716981132075,
"grad_norm": 2.8634668901359377,
"learning_rate": 7.320306239900342e-07,
"loss": 1.6124,
"step": 366
},
{
"epoch": 0.5770440251572327,
"grad_norm": 3.1226311552428294,
"learning_rate": 7.305949597799976e-07,
"loss": 1.5731,
"step": 367
},
{
"epoch": 0.5786163522012578,
"grad_norm": 2.9737828234739014,
"learning_rate": 7.291568767379483e-07,
"loss": 1.7076,
"step": 368
},
{
"epoch": 0.5801886792452831,
"grad_norm": 3.5758350484358017,
"learning_rate": 7.277163899486974e-07,
"loss": 1.6001,
"step": 369
},
{
"epoch": 0.5817610062893082,
"grad_norm": 3.179740779225836,
"learning_rate": 7.262735145222695e-07,
"loss": 1.5627,
"step": 370
},
{
"epoch": 0.5833333333333334,
"grad_norm": 2.8903320572773334,
"learning_rate": 7.24828265593745e-07,
"loss": 1.6912,
"step": 371
},
{
"epoch": 0.5849056603773585,
"grad_norm": 2.8931419439995523,
"learning_rate": 7.233806583231011e-07,
"loss": 1.6214,
"step": 372
},
{
"epoch": 0.5864779874213837,
"grad_norm": 2.919579831018194,
"learning_rate": 7.219307078950535e-07,
"loss": 1.5862,
"step": 373
},
{
"epoch": 0.5880503144654088,
"grad_norm": 3.0045761782474703,
"learning_rate": 7.204784295188958e-07,
"loss": 1.8617,
"step": 374
},
{
"epoch": 0.589622641509434,
"grad_norm": 3.1244686625095484,
"learning_rate": 7.190238384283412e-07,
"loss": 1.9887,
"step": 375
},
{
"epoch": 0.5911949685534591,
"grad_norm": 3.2453253002987106,
"learning_rate": 7.175669498813616e-07,
"loss": 1.592,
"step": 376
},
{
"epoch": 0.5927672955974843,
"grad_norm": 3.0877719472322367,
"learning_rate": 7.161077791600287e-07,
"loss": 1.4491,
"step": 377
},
{
"epoch": 0.5943396226415094,
"grad_norm": 3.141247199862426,
"learning_rate": 7.14646341570353e-07,
"loss": 1.6298,
"step": 378
},
{
"epoch": 0.5959119496855346,
"grad_norm": 3.0631856404614117,
"learning_rate": 7.131826524421229e-07,
"loss": 1.6426,
"step": 379
},
{
"epoch": 0.5974842767295597,
"grad_norm": 3.270854655641661,
"learning_rate": 7.117167271287452e-07,
"loss": 1.5782,
"step": 380
},
{
"epoch": 0.5990566037735849,
"grad_norm": 2.797638399208883,
"learning_rate": 7.102485810070823e-07,
"loss": 1.832,
"step": 381
},
{
"epoch": 0.60062893081761,
"grad_norm": 3.0354014098510484,
"learning_rate": 7.087782294772926e-07,
"loss": 1.4384,
"step": 382
},
{
"epoch": 0.6022012578616353,
"grad_norm": 2.9846921337198933,
"learning_rate": 7.07305687962668e-07,
"loss": 1.7531,
"step": 383
},
{
"epoch": 0.6037735849056604,
"grad_norm": 3.106084720294311,
"learning_rate": 7.05830971909472e-07,
"loss": 1.4538,
"step": 384
},
{
"epoch": 0.6053459119496856,
"grad_norm": 2.871555190950071,
"learning_rate": 7.043540967867781e-07,
"loss": 1.6914,
"step": 385
},
{
"epoch": 0.6069182389937107,
"grad_norm": 2.840880923886873,
"learning_rate": 7.028750780863078e-07,
"loss": 1.6388,
"step": 386
},
{
"epoch": 0.6084905660377359,
"grad_norm": 3.1454444170824316,
"learning_rate": 7.013939313222669e-07,
"loss": 1.4082,
"step": 387
},
{
"epoch": 0.610062893081761,
"grad_norm": 3.1511066609699503,
"learning_rate": 6.999106720311845e-07,
"loss": 1.5937,
"step": 388
},
{
"epoch": 0.6116352201257862,
"grad_norm": 2.9608662423607512,
"learning_rate": 6.984253157717485e-07,
"loss": 1.6905,
"step": 389
},
{
"epoch": 0.6132075471698113,
"grad_norm": 2.971295349160843,
"learning_rate": 6.969378781246436e-07,
"loss": 1.6028,
"step": 390
},
{
"epoch": 0.6147798742138365,
"grad_norm": 2.887441093132589,
"learning_rate": 6.954483746923864e-07,
"loss": 1.5795,
"step": 391
},
{
"epoch": 0.6163522012578616,
"grad_norm": 2.765921351694156,
"learning_rate": 6.939568210991632e-07,
"loss": 1.77,
"step": 392
},
{
"epoch": 0.6179245283018868,
"grad_norm": 2.8917529035479514,
"learning_rate": 6.924632329906656e-07,
"loss": 1.4817,
"step": 393
},
{
"epoch": 0.6194968553459119,
"grad_norm": 3.0703990060541497,
"learning_rate": 6.909676260339259e-07,
"loss": 1.5371,
"step": 394
},
{
"epoch": 0.6210691823899371,
"grad_norm": 3.140318941056899,
"learning_rate": 6.894700159171534e-07,
"loss": 1.5016,
"step": 395
},
{
"epoch": 0.6226415094339622,
"grad_norm": 3.1648818923914925,
"learning_rate": 6.879704183495695e-07,
"loss": 1.5751,
"step": 396
},
{
"epoch": 0.6242138364779874,
"grad_norm": 3.0902741266005793,
"learning_rate": 6.864688490612433e-07,
"loss": 1.4564,
"step": 397
},
{
"epoch": 0.6257861635220126,
"grad_norm": 3.571095235370477,
"learning_rate": 6.84965323802926e-07,
"loss": 1.4466,
"step": 398
},
{
"epoch": 0.6273584905660378,
"grad_norm": 3.246655504553822,
"learning_rate": 6.834598583458861e-07,
"loss": 1.6618,
"step": 399
},
{
"epoch": 0.6289308176100629,
"grad_norm": 3.0343685055658307,
"learning_rate": 6.819524684817438e-07,
"loss": 1.5849,
"step": 400
},
{
"epoch": 0.6305031446540881,
"grad_norm": 3.4147960516478557,
"learning_rate": 6.804431700223055e-07,
"loss": 1.5878,
"step": 401
},
{
"epoch": 0.6320754716981132,
"grad_norm": 2.9908883955283407,
"learning_rate": 6.789319787993979e-07,
"loss": 1.6317,
"step": 402
},
{
"epoch": 0.6336477987421384,
"grad_norm": 3.196286199019289,
"learning_rate": 6.774189106647021e-07,
"loss": 1.7304,
"step": 403
},
{
"epoch": 0.6352201257861635,
"grad_norm": 3.079020576367776,
"learning_rate": 6.759039814895862e-07,
"loss": 1.8023,
"step": 404
},
{
"epoch": 0.6367924528301887,
"grad_norm": 2.8088266541190694,
"learning_rate": 6.743872071649411e-07,
"loss": 1.606,
"step": 405
},
{
"epoch": 0.6383647798742138,
"grad_norm": 2.878246806108592,
"learning_rate": 6.728686036010114e-07,
"loss": 1.3972,
"step": 406
},
{
"epoch": 0.639937106918239,
"grad_norm": 2.681608870043916,
"learning_rate": 6.713481867272299e-07,
"loss": 1.5793,
"step": 407
},
{
"epoch": 0.6415094339622641,
"grad_norm": 2.9759341078654717,
"learning_rate": 6.698259724920502e-07,
"loss": 1.8096,
"step": 408
},
{
"epoch": 0.6430817610062893,
"grad_norm": 2.930107243897169,
"learning_rate": 6.683019768627794e-07,
"loss": 1.9373,
"step": 409
},
{
"epoch": 0.6446540880503144,
"grad_norm": 3.3019087122259614,
"learning_rate": 6.667762158254103e-07,
"loss": 1.6784,
"step": 410
},
{
"epoch": 0.6462264150943396,
"grad_norm": 3.046122411688699,
"learning_rate": 6.652487053844544e-07,
"loss": 1.7923,
"step": 411
},
{
"epoch": 0.6477987421383647,
"grad_norm": 3.226966880220217,
"learning_rate": 6.637194615627732e-07,
"loss": 1.767,
"step": 412
},
{
"epoch": 0.64937106918239,
"grad_norm": 3.5839568234533625,
"learning_rate": 6.621885004014111e-07,
"loss": 1.7144,
"step": 413
},
{
"epoch": 0.6509433962264151,
"grad_norm": 3.2662623680583867,
"learning_rate": 6.606558379594261e-07,
"loss": 1.6909,
"step": 414
},
{
"epoch": 0.6525157232704403,
"grad_norm": 3.293320476598104,
"learning_rate": 6.59121490313722e-07,
"loss": 1.5467,
"step": 415
},
{
"epoch": 0.6540880503144654,
"grad_norm": 2.8253698384570622,
"learning_rate": 6.575854735588794e-07,
"loss": 1.612,
"step": 416
},
{
"epoch": 0.6556603773584906,
"grad_norm": 3.3489939495652914,
"learning_rate": 6.560478038069872e-07,
"loss": 1.5001,
"step": 417
},
{
"epoch": 0.6572327044025157,
"grad_norm": 3.0875383018689373,
"learning_rate": 6.545084971874736e-07,
"loss": 1.6029,
"step": 418
},
{
"epoch": 0.6588050314465409,
"grad_norm": 2.7373051620760394,
"learning_rate": 6.529675698469369e-07,
"loss": 1.75,
"step": 419
},
{
"epoch": 0.660377358490566,
"grad_norm": 3.0779640368460743,
"learning_rate": 6.514250379489753e-07,
"loss": 1.5913,
"step": 420
},
{
"epoch": 0.6619496855345912,
"grad_norm": 3.337822319490678,
"learning_rate": 6.498809176740189e-07,
"loss": 1.7076,
"step": 421
},
{
"epoch": 0.6635220125786163,
"grad_norm": 3.0702637505567476,
"learning_rate": 6.483352252191584e-07,
"loss": 1.5407,
"step": 422
},
{
"epoch": 0.6650943396226415,
"grad_norm": 3.157672741863842,
"learning_rate": 6.467879767979764e-07,
"loss": 1.5754,
"step": 423
},
{
"epoch": 0.6666666666666666,
"grad_norm": 4.29354289767503,
"learning_rate": 6.452391886403766e-07,
"loss": 1.4986,
"step": 424
},
{
"epoch": 0.6682389937106918,
"grad_norm": 2.9437863849920545,
"learning_rate": 6.436888769924141e-07,
"loss": 1.5725,
"step": 425
},
{
"epoch": 0.6698113207547169,
"grad_norm": 2.784911047159229,
"learning_rate": 6.421370581161243e-07,
"loss": 1.4859,
"step": 426
},
{
"epoch": 0.6713836477987422,
"grad_norm": 3.2167104786038316,
"learning_rate": 6.405837482893528e-07,
"loss": 1.7109,
"step": 427
},
{
"epoch": 0.6729559748427673,
"grad_norm": 2.9822678864067,
"learning_rate": 6.390289638055851e-07,
"loss": 1.5598,
"step": 428
},
{
"epoch": 0.6745283018867925,
"grad_norm": 2.8764956617307225,
"learning_rate": 6.374727209737742e-07,
"loss": 1.7887,
"step": 429
},
{
"epoch": 0.6761006289308176,
"grad_norm": 2.956840694387534,
"learning_rate": 6.359150361181714e-07,
"loss": 1.5556,
"step": 430
},
{
"epoch": 0.6776729559748428,
"grad_norm": 3.0343380447523423,
"learning_rate": 6.343559255781537e-07,
"loss": 1.6393,
"step": 431
},
{
"epoch": 0.6792452830188679,
"grad_norm": 2.7767252718165496,
"learning_rate": 6.327954057080526e-07,
"loss": 1.6502,
"step": 432
},
{
"epoch": 0.6808176100628931,
"grad_norm": 3.1882701427281788,
"learning_rate": 6.312334928769833e-07,
"loss": 1.5724,
"step": 433
},
{
"epoch": 0.6823899371069182,
"grad_norm": 3.097455620552655,
"learning_rate": 6.296702034686725e-07,
"loss": 1.6448,
"step": 434
},
{
"epoch": 0.6839622641509434,
"grad_norm": 2.967616326765885,
"learning_rate": 6.281055538812861e-07,
"loss": 1.5575,
"step": 435
},
{
"epoch": 0.6855345911949685,
"grad_norm": 2.9903290692872218,
"learning_rate": 6.265395605272581e-07,
"loss": 1.5267,
"step": 436
},
{
"epoch": 0.6871069182389937,
"grad_norm": 2.976142177695413,
"learning_rate": 6.249722398331176e-07,
"loss": 1.6737,
"step": 437
},
{
"epoch": 0.6886792452830188,
"grad_norm": 2.9630944851380123,
"learning_rate": 6.234036082393171e-07,
"loss": 1.6495,
"step": 438
},
{
"epoch": 0.690251572327044,
"grad_norm": 2.7088231883537817,
"learning_rate": 6.218336822000597e-07,
"loss": 1.4323,
"step": 439
},
{
"epoch": 0.6918238993710691,
"grad_norm": 2.937020551603309,
"learning_rate": 6.202624781831268e-07,
"loss": 1.5755,
"step": 440
},
{
"epoch": 0.6933962264150944,
"grad_norm": 3.0176348553682786,
"learning_rate": 6.18690012669705e-07,
"loss": 1.6015,
"step": 441
},
{
"epoch": 0.6949685534591195,
"grad_norm": 3.3315984504157945,
"learning_rate": 6.171163021542133e-07,
"loss": 1.6381,
"step": 442
},
{
"epoch": 0.6965408805031447,
"grad_norm": 3.1688246150461676,
"learning_rate": 6.155413631441306e-07,
"loss": 1.607,
"step": 443
},
{
"epoch": 0.6981132075471698,
"grad_norm": 3.09165082620452,
"learning_rate": 6.139652121598218e-07,
"loss": 1.7871,
"step": 444
},
{
"epoch": 0.699685534591195,
"grad_norm": 3.297756722716993,
"learning_rate": 6.123878657343647e-07,
"loss": 1.4711,
"step": 445
},
{
"epoch": 0.7012578616352201,
"grad_norm": 3.0883302587006,
"learning_rate": 6.108093404133772e-07,
"loss": 1.6702,
"step": 446
},
{
"epoch": 0.7028301886792453,
"grad_norm": 3.076701869873011,
"learning_rate": 6.092296527548426e-07,
"loss": 1.4897,
"step": 447
},
{
"epoch": 0.7044025157232704,
"grad_norm": 2.84568602644286,
"learning_rate": 6.076488193289374e-07,
"loss": 1.7678,
"step": 448
},
{
"epoch": 0.7059748427672956,
"grad_norm": 5.139935927987656,
"learning_rate": 6.060668567178559e-07,
"loss": 1.5979,
"step": 449
},
{
"epoch": 0.7075471698113207,
"grad_norm": 3.1264797262686477,
"learning_rate": 6.044837815156376e-07,
"loss": 1.5206,
"step": 450
},
{
"epoch": 0.7091194968553459,
"grad_norm": 3.3534156320240522,
"learning_rate": 6.028996103279917e-07,
"loss": 1.5362,
"step": 451
},
{
"epoch": 0.710691823899371,
"grad_norm": 3.165943529869633,
"learning_rate": 6.013143597721251e-07,
"loss": 1.4814,
"step": 452
},
{
"epoch": 0.7122641509433962,
"grad_norm": 3.3330090224325897,
"learning_rate": 5.997280464765653e-07,
"loss": 1.4184,
"step": 453
},
{
"epoch": 0.7138364779874213,
"grad_norm": 3.054343208237251,
"learning_rate": 5.981406870809888e-07,
"loss": 1.5341,
"step": 454
},
{
"epoch": 0.7154088050314465,
"grad_norm": 2.9985777856359586,
"learning_rate": 5.96552298236044e-07,
"loss": 1.5153,
"step": 455
},
{
"epoch": 0.7169811320754716,
"grad_norm": 3.0416404334656786,
"learning_rate": 5.949628966031784e-07,
"loss": 1.5342,
"step": 456
},
{
"epoch": 0.7185534591194969,
"grad_norm": 3.116507921003848,
"learning_rate": 5.933724988544632e-07,
"loss": 1.4448,
"step": 457
},
{
"epoch": 0.720125786163522,
"grad_norm": 3.0997481538278526,
"learning_rate": 5.91781121672418e-07,
"loss": 1.5867,
"step": 458
},
{
"epoch": 0.7216981132075472,
"grad_norm": 3.1876523216248165,
"learning_rate": 5.901887817498367e-07,
"loss": 1.5432,
"step": 459
},
{
"epoch": 0.7232704402515723,
"grad_norm": 3.1650167832638902,
"learning_rate": 5.885954957896115e-07,
"loss": 1.5536,
"step": 460
},
{
"epoch": 0.7248427672955975,
"grad_norm": 3.195279291197498,
"learning_rate": 5.870012805045579e-07,
"loss": 1.4512,
"step": 461
},
{
"epoch": 0.7264150943396226,
"grad_norm": 2.934331743213936,
"learning_rate": 5.854061526172401e-07,
"loss": 1.5895,
"step": 462
},
{
"epoch": 0.7279874213836478,
"grad_norm": 2.910990978962794,
"learning_rate": 5.83810128859795e-07,
"loss": 1.8213,
"step": 463
},
{
"epoch": 0.7295597484276729,
"grad_norm": 2.9419669708040477,
"learning_rate": 5.822132259737564e-07,
"loss": 1.5848,
"step": 464
},
{
"epoch": 0.7311320754716981,
"grad_norm": 3.417719384818703,
"learning_rate": 5.806154607098799e-07,
"loss": 1.8473,
"step": 465
},
{
"epoch": 0.7327044025157232,
"grad_norm": 2.9986530708779218,
"learning_rate": 5.790168498279671e-07,
"loss": 1.6022,
"step": 466
},
{
"epoch": 0.7342767295597484,
"grad_norm": 3.096387566914234,
"learning_rate": 5.774174100966899e-07,
"loss": 1.5598,
"step": 467
},
{
"epoch": 0.7358490566037735,
"grad_norm": 3.1688341508266817,
"learning_rate": 5.75817158293414e-07,
"loss": 1.5955,
"step": 468
},
{
"epoch": 0.7374213836477987,
"grad_norm": 3.589983459266237,
"learning_rate": 5.742161112040236e-07,
"loss": 1.4531,
"step": 469
},
{
"epoch": 0.7389937106918238,
"grad_norm": 3.0454871929245755,
"learning_rate": 5.726142856227452e-07,
"loss": 1.6809,
"step": 470
},
{
"epoch": 0.7405660377358491,
"grad_norm": 3.187136777050043,
"learning_rate": 5.710116983519711e-07,
"loss": 1.5083,
"step": 471
},
{
"epoch": 0.7421383647798742,
"grad_norm": 3.4456677764552097,
"learning_rate": 5.694083662020834e-07,
"loss": 1.5031,
"step": 472
},
{
"epoch": 0.7437106918238994,
"grad_norm": 3.0711812431855328,
"learning_rate": 5.678043059912776e-07,
"loss": 1.4553,
"step": 473
},
{
"epoch": 0.7452830188679245,
"grad_norm": 3.3203836858651448,
"learning_rate": 5.661995345453866e-07,
"loss": 1.5409,
"step": 474
},
{
"epoch": 0.7468553459119497,
"grad_norm": 3.1365764386189405,
"learning_rate": 5.645940686977032e-07,
"loss": 1.8013,
"step": 475
},
{
"epoch": 0.7484276729559748,
"grad_norm": 3.1568448692061715,
"learning_rate": 5.629879252888045e-07,
"loss": 1.6867,
"step": 476
},
{
"epoch": 0.75,
"grad_norm": 3.2555349541265635,
"learning_rate": 5.61381121166375e-07,
"loss": 1.5651,
"step": 477
},
{
"epoch": 0.7515723270440252,
"grad_norm": 3.2156977140753296,
"learning_rate": 5.597736731850294e-07,
"loss": 1.6762,
"step": 478
},
{
"epoch": 0.7531446540880503,
"grad_norm": 3.3231239637752448,
"learning_rate": 5.581655982061366e-07,
"loss": 1.4839,
"step": 479
},
{
"epoch": 0.7547169811320755,
"grad_norm": 2.9815593831758203,
"learning_rate": 5.565569130976422e-07,
"loss": 1.5976,
"step": 480
},
{
"epoch": 0.7562893081761006,
"grad_norm": 3.049414168390122,
"learning_rate": 5.549476347338913e-07,
"loss": 1.6503,
"step": 481
},
{
"epoch": 0.7578616352201258,
"grad_norm": 3.115613499312693,
"learning_rate": 5.533377799954531e-07,
"loss": 1.6372,
"step": 482
},
{
"epoch": 0.7594339622641509,
"grad_norm": 3.316587691013018,
"learning_rate": 5.517273657689418e-07,
"loss": 1.8254,
"step": 483
},
{
"epoch": 0.7610062893081762,
"grad_norm": 3.230503066134413,
"learning_rate": 5.501164089468405e-07,
"loss": 1.4632,
"step": 484
},
{
"epoch": 0.7625786163522013,
"grad_norm": 3.1674466671644472,
"learning_rate": 5.485049264273241e-07,
"loss": 1.5059,
"step": 485
},
{
"epoch": 0.7641509433962265,
"grad_norm": 3.087706666603333,
"learning_rate": 5.468929351140815e-07,
"loss": 1.5832,
"step": 486
},
{
"epoch": 0.7657232704402516,
"grad_norm": 2.992373824489146,
"learning_rate": 5.452804519161389e-07,
"loss": 1.6634,
"step": 487
},
{
"epoch": 0.7672955974842768,
"grad_norm": 2.8390916500019983,
"learning_rate": 5.436674937476819e-07,
"loss": 1.6625,
"step": 488
},
{
"epoch": 0.7688679245283019,
"grad_norm": 3.185527589207637,
"learning_rate": 5.420540775278788e-07,
"loss": 1.7379,
"step": 489
},
{
"epoch": 0.7704402515723271,
"grad_norm": 2.8909583251962645,
"learning_rate": 5.404402201807021e-07,
"loss": 1.5009,
"step": 490
},
{
"epoch": 0.7720125786163522,
"grad_norm": 3.3764568950079337,
"learning_rate": 5.388259386347517e-07,
"loss": 1.8461,
"step": 491
},
{
"epoch": 0.7735849056603774,
"grad_norm": 2.9907675034645154,
"learning_rate": 5.37211249823077e-07,
"loss": 1.6859,
"step": 492
},
{
"epoch": 0.7751572327044025,
"grad_norm": 3.0081342090492815,
"learning_rate": 5.355961706829997e-07,
"loss": 1.8076,
"step": 493
},
{
"epoch": 0.7767295597484277,
"grad_norm": 3.206744621623335,
"learning_rate": 5.339807181559358e-07,
"loss": 1.4324,
"step": 494
},
{
"epoch": 0.7783018867924528,
"grad_norm": 3.0827592144848737,
"learning_rate": 5.323649091872178e-07,
"loss": 1.4862,
"step": 495
},
{
"epoch": 0.779874213836478,
"grad_norm": 3.0432766929055104,
"learning_rate": 5.307487607259174e-07,
"loss": 1.7757,
"step": 496
},
{
"epoch": 0.7814465408805031,
"grad_norm": 3.0681103462894725,
"learning_rate": 5.291322897246668e-07,
"loss": 1.5578,
"step": 497
},
{
"epoch": 0.7830188679245284,
"grad_norm": 3.3579149440322795,
"learning_rate": 5.275155131394824e-07,
"loss": 1.486,
"step": 498
},
{
"epoch": 0.7845911949685535,
"grad_norm": 3.024186532283961,
"learning_rate": 5.258984479295852e-07,
"loss": 1.4805,
"step": 499
},
{
"epoch": 0.7861635220125787,
"grad_norm": 2.9881904115084854,
"learning_rate": 5.242811110572242e-07,
"loss": 1.5482,
"step": 500
},
{
"epoch": 0.7861635220125787,
"eval_sat2_MCTS_chains_SFT_val_loss": 1.6792867183685303,
"eval_sat2_MCTS_chains_SFT_val_runtime": 103.382,
"eval_sat2_MCTS_chains_SFT_val_samples_per_second": 9.944,
"eval_sat2_MCTS_chains_SFT_val_steps_per_second": 1.248,
"step": 500
},
{
"epoch": 0.7877358490566038,
"grad_norm": 3.112808846064443,
"learning_rate": 5.226635194874977e-07,
"loss": 1.5644,
"step": 501
},
{
"epoch": 0.789308176100629,
"grad_norm": 3.2640628638265454,
"learning_rate": 5.21045690188176e-07,
"loss": 1.6825,
"step": 502
},
{
"epoch": 0.7908805031446541,
"grad_norm": 3.219943846696886,
"learning_rate": 5.19427640129523e-07,
"loss": 1.5106,
"step": 503
},
{
"epoch": 0.7924528301886793,
"grad_norm": 3.0378707975393437,
"learning_rate": 5.178093862841178e-07,
"loss": 1.47,
"step": 504
},
{
"epoch": 0.7940251572327044,
"grad_norm": 2.8365886455988853,
"learning_rate": 5.16190945626678e-07,
"loss": 1.5678,
"step": 505
},
{
"epoch": 0.7955974842767296,
"grad_norm": 3.5805336192243016,
"learning_rate": 5.145723351338798e-07,
"loss": 1.683,
"step": 506
},
{
"epoch": 0.7971698113207547,
"grad_norm": 3.222694102591329,
"learning_rate": 5.129535717841818e-07,
"loss": 1.5866,
"step": 507
},
{
"epoch": 0.7987421383647799,
"grad_norm": 3.25235170280169,
"learning_rate": 5.11334672557645e-07,
"loss": 1.6389,
"step": 508
},
{
"epoch": 0.800314465408805,
"grad_norm": 2.973328614815833,
"learning_rate": 5.097156544357567e-07,
"loss": 1.4565,
"step": 509
},
{
"epoch": 0.8018867924528302,
"grad_norm": 3.4518265144515534,
"learning_rate": 5.080965344012508e-07,
"loss": 1.5516,
"step": 510
},
{
"epoch": 0.8034591194968553,
"grad_norm": 2.8522493610275945,
"learning_rate": 5.064773294379302e-07,
"loss": 1.665,
"step": 511
},
{
"epoch": 0.8050314465408805,
"grad_norm": 2.8746693476380827,
"learning_rate": 5.048580565304886e-07,
"loss": 1.4801,
"step": 512
},
{
"epoch": 0.8066037735849056,
"grad_norm": 3.7133447000927493,
"learning_rate": 5.03238732664333e-07,
"loss": 1.7017,
"step": 513
},
{
"epoch": 0.8081761006289309,
"grad_norm": 3.2252723169555275,
"learning_rate": 5.016193748254044e-07,
"loss": 1.5495,
"step": 514
},
{
"epoch": 0.809748427672956,
"grad_norm": 3.019919222840472,
"learning_rate": 5e-07,
"loss": 1.4837,
"step": 515
},
{
"epoch": 0.8113207547169812,
"grad_norm": 5.981459859212562,
"learning_rate": 4.983806251745957e-07,
"loss": 1.5122,
"step": 516
},
{
"epoch": 0.8128930817610063,
"grad_norm": 3.1675144304415217,
"learning_rate": 4.967612673356669e-07,
"loss": 1.357,
"step": 517
},
{
"epoch": 0.8144654088050315,
"grad_norm": 3.1550044652043767,
"learning_rate": 4.951419434695113e-07,
"loss": 1.6727,
"step": 518
},
{
"epoch": 0.8160377358490566,
"grad_norm": 3.2157731056157703,
"learning_rate": 4.935226705620699e-07,
"loss": 1.5374,
"step": 519
},
{
"epoch": 0.8176100628930818,
"grad_norm": 3.433295089818606,
"learning_rate": 4.919034655987492e-07,
"loss": 1.4016,
"step": 520
},
{
"epoch": 0.8191823899371069,
"grad_norm": 3.2174967579065417,
"learning_rate": 4.902843455642433e-07,
"loss": 1.6296,
"step": 521
},
{
"epoch": 0.8207547169811321,
"grad_norm": 3.2592113989158333,
"learning_rate": 4.88665327442355e-07,
"loss": 1.5081,
"step": 522
},
{
"epoch": 0.8223270440251572,
"grad_norm": 3.159751293392431,
"learning_rate": 4.870464282158184e-07,
"loss": 1.8609,
"step": 523
},
{
"epoch": 0.8238993710691824,
"grad_norm": 3.456855111565593,
"learning_rate": 4.854276648661202e-07,
"loss": 1.5157,
"step": 524
},
{
"epoch": 0.8254716981132075,
"grad_norm": 2.875021106322295,
"learning_rate": 4.838090543733221e-07,
"loss": 1.5928,
"step": 525
},
{
"epoch": 0.8270440251572327,
"grad_norm": 2.9255730050969793,
"learning_rate": 4.821906137158821e-07,
"loss": 1.6097,
"step": 526
},
{
"epoch": 0.8286163522012578,
"grad_norm": 3.0868845547911037,
"learning_rate": 4.805723598704771e-07,
"loss": 1.5752,
"step": 527
},
{
"epoch": 0.8301886792452831,
"grad_norm": 2.991273798342134,
"learning_rate": 4.789543098118241e-07,
"loss": 1.5966,
"step": 528
},
{
"epoch": 0.8317610062893082,
"grad_norm": 3.176550358572851,
"learning_rate": 4.773364805125024e-07,
"loss": 1.6147,
"step": 529
},
{
"epoch": 0.8333333333333334,
"grad_norm": 3.0393464625754874,
"learning_rate": 4.75718888942776e-07,
"loss": 1.4362,
"step": 530
},
{
"epoch": 0.8349056603773585,
"grad_norm": 3.382289038536882,
"learning_rate": 4.7410155207041476e-07,
"loss": 1.4906,
"step": 531
},
{
"epoch": 0.8364779874213837,
"grad_norm": 3.023144808660818,
"learning_rate": 4.7248448686051753e-07,
"loss": 1.4524,
"step": 532
},
{
"epoch": 0.8380503144654088,
"grad_norm": 3.272898596251666,
"learning_rate": 4.708677102753331e-07,
"loss": 1.4412,
"step": 533
},
{
"epoch": 0.839622641509434,
"grad_norm": 3.368636687037789,
"learning_rate": 4.692512392740826e-07,
"loss": 1.5923,
"step": 534
},
{
"epoch": 0.8411949685534591,
"grad_norm": 3.0269316359210703,
"learning_rate": 4.676350908127821e-07,
"loss": 1.5435,
"step": 535
},
{
"epoch": 0.8427672955974843,
"grad_norm": 3.104762410537656,
"learning_rate": 4.6601928184406407e-07,
"loss": 1.763,
"step": 536
},
{
"epoch": 0.8443396226415094,
"grad_norm": 3.07205733907163,
"learning_rate": 4.6440382931700025e-07,
"loss": 1.6383,
"step": 537
},
{
"epoch": 0.8459119496855346,
"grad_norm": 3.2208435755618514,
"learning_rate": 4.6278875017692305e-07,
"loss": 1.6919,
"step": 538
},
{
"epoch": 0.8474842767295597,
"grad_norm": 3.120167553691476,
"learning_rate": 4.611740613652484e-07,
"loss": 1.7508,
"step": 539
},
{
"epoch": 0.8490566037735849,
"grad_norm": 3.222111818087616,
"learning_rate": 4.595597798192979e-07,
"loss": 1.7055,
"step": 540
},
{
"epoch": 0.85062893081761,
"grad_norm": 3.0750638601202525,
"learning_rate": 4.5794592247212115e-07,
"loss": 1.5565,
"step": 541
},
{
"epoch": 0.8522012578616353,
"grad_norm": 3.140050284801352,
"learning_rate": 4.56332506252318e-07,
"loss": 1.5851,
"step": 542
},
{
"epoch": 0.8537735849056604,
"grad_norm": 3.0237503315775642,
"learning_rate": 4.547195480838611e-07,
"loss": 1.8113,
"step": 543
},
{
"epoch": 0.8553459119496856,
"grad_norm": 2.8795497225735187,
"learning_rate": 4.5310706488591854e-07,
"loss": 1.4624,
"step": 544
},
{
"epoch": 0.8569182389937107,
"grad_norm": 3.1652826858619596,
"learning_rate": 4.5149507357267597e-07,
"loss": 1.4178,
"step": 545
},
{
"epoch": 0.8584905660377359,
"grad_norm": 3.310546691513496,
"learning_rate": 4.498835910531595e-07,
"loss": 1.494,
"step": 546
},
{
"epoch": 0.860062893081761,
"grad_norm": 3.299127034963117,
"learning_rate": 4.4827263423105815e-07,
"loss": 1.7251,
"step": 547
},
{
"epoch": 0.8616352201257862,
"grad_norm": 3.495217186292534,
"learning_rate": 4.466622200045468e-07,
"loss": 1.5429,
"step": 548
},
{
"epoch": 0.8632075471698113,
"grad_norm": 3.2024593517565334,
"learning_rate": 4.4505236526610856e-07,
"loss": 1.7903,
"step": 549
},
{
"epoch": 0.8647798742138365,
"grad_norm": 3.6359390641047247,
"learning_rate": 4.434430869023579e-07,
"loss": 1.4595,
"step": 550
},
{
"epoch": 0.8663522012578616,
"grad_norm": 3.0315341288087216,
"learning_rate": 4.418344017938633e-07,
"loss": 1.5896,
"step": 551
},
{
"epoch": 0.8679245283018868,
"grad_norm": 2.9431226438488225,
"learning_rate": 4.4022632681497056e-07,
"loss": 1.5016,
"step": 552
},
{
"epoch": 0.8694968553459119,
"grad_norm": 2.76871961076697,
"learning_rate": 4.3861887883362505e-07,
"loss": 1.5735,
"step": 553
},
{
"epoch": 0.8710691823899371,
"grad_norm": 3.001757601924961,
"learning_rate": 4.370120747111955e-07,
"loss": 1.6215,
"step": 554
},
{
"epoch": 0.8726415094339622,
"grad_norm": 3.1989100749072974,
"learning_rate": 4.354059313022969e-07,
"loss": 1.5669,
"step": 555
},
{
"epoch": 0.8742138364779874,
"grad_norm": 3.2670176156220267,
"learning_rate": 4.3380046545461357e-07,
"loss": 1.6728,
"step": 556
},
{
"epoch": 0.8757861635220126,
"grad_norm": 3.251241086797364,
"learning_rate": 4.3219569400872234e-07,
"loss": 1.5385,
"step": 557
},
{
"epoch": 0.8773584905660378,
"grad_norm": 3.224015403978076,
"learning_rate": 4.305916337979167e-07,
"loss": 1.4749,
"step": 558
},
{
"epoch": 0.8789308176100629,
"grad_norm": 3.0290334656918145,
"learning_rate": 4.289883016480291e-07,
"loss": 1.5637,
"step": 559
},
{
"epoch": 0.8805031446540881,
"grad_norm": 3.5571187387226133,
"learning_rate": 4.2738571437725496e-07,
"loss": 1.6427,
"step": 560
},
{
"epoch": 0.8820754716981132,
"grad_norm": 3.1631834259489144,
"learning_rate": 4.257838887959763e-07,
"loss": 1.4748,
"step": 561
},
{
"epoch": 0.8836477987421384,
"grad_norm": 3.467969945927553,
"learning_rate": 4.2418284170658595e-07,
"loss": 1.5934,
"step": 562
},
{
"epoch": 0.8852201257861635,
"grad_norm": 3.165363685749207,
"learning_rate": 4.2258258990331007e-07,
"loss": 1.5096,
"step": 563
},
{
"epoch": 0.8867924528301887,
"grad_norm": 3.2224659620946885,
"learning_rate": 4.209831501720328e-07,
"loss": 1.5952,
"step": 564
},
{
"epoch": 0.8883647798742138,
"grad_norm": 3.3169481022705343,
"learning_rate": 4.193845392901201e-07,
"loss": 1.4145,
"step": 565
},
{
"epoch": 0.889937106918239,
"grad_norm": 3.02538270876987,
"learning_rate": 4.177867740262436e-07,
"loss": 1.5246,
"step": 566
},
{
"epoch": 0.8915094339622641,
"grad_norm": 3.94869374366118,
"learning_rate": 4.1618987114020495e-07,
"loss": 1.569,
"step": 567
},
{
"epoch": 0.8930817610062893,
"grad_norm": 2.991956599684122,
"learning_rate": 4.145938473827598e-07,
"loss": 1.5099,
"step": 568
},
{
"epoch": 0.8946540880503144,
"grad_norm": 3.2982588739570966,
"learning_rate": 4.129987194954421e-07,
"loss": 1.4944,
"step": 569
},
{
"epoch": 0.8962264150943396,
"grad_norm": 3.348577038705965,
"learning_rate": 4.1140450421038866e-07,
"loss": 1.5357,
"step": 570
},
{
"epoch": 0.8977987421383647,
"grad_norm": 3.515207694168226,
"learning_rate": 4.098112182501633e-07,
"loss": 1.6219,
"step": 571
},
{
"epoch": 0.89937106918239,
"grad_norm": 3.0410070114945222,
"learning_rate": 4.0821887832758194e-07,
"loss": 1.6244,
"step": 572
},
{
"epoch": 0.9009433962264151,
"grad_norm": 3.7444315598525333,
"learning_rate": 4.0662750114553685e-07,
"loss": 1.5238,
"step": 573
},
{
"epoch": 0.9025157232704403,
"grad_norm": 3.331368882227541,
"learning_rate": 4.050371033968215e-07,
"loss": 1.5481,
"step": 574
},
{
"epoch": 0.9040880503144654,
"grad_norm": 3.007868223160233,
"learning_rate": 4.0344770176395606e-07,
"loss": 1.563,
"step": 575
},
{
"epoch": 0.9056603773584906,
"grad_norm": 3.150820294437964,
"learning_rate": 4.018593129190113e-07,
"loss": 1.4964,
"step": 576
},
{
"epoch": 0.9072327044025157,
"grad_norm": 3.0198801950596,
"learning_rate": 4.0027195352343456e-07,
"loss": 1.6168,
"step": 577
},
{
"epoch": 0.9088050314465409,
"grad_norm": 3.0904507818273377,
"learning_rate": 3.98685640227875e-07,
"loss": 1.4994,
"step": 578
},
{
"epoch": 0.910377358490566,
"grad_norm": 3.2771071808209142,
"learning_rate": 3.971003896720082e-07,
"loss": 1.7361,
"step": 579
},
{
"epoch": 0.9119496855345912,
"grad_norm": 3.111995014392537,
"learning_rate": 3.955162184843624e-07,
"loss": 1.5881,
"step": 580
},
{
"epoch": 0.9135220125786163,
"grad_norm": 3.358309691767164,
"learning_rate": 3.93933143282144e-07,
"loss": 1.6736,
"step": 581
},
{
"epoch": 0.9150943396226415,
"grad_norm": 4.345173695966827,
"learning_rate": 3.923511806710625e-07,
"loss": 1.3978,
"step": 582
},
{
"epoch": 0.9166666666666666,
"grad_norm": 3.4592040777889284,
"learning_rate": 3.907703472451573e-07,
"loss": 1.496,
"step": 583
},
{
"epoch": 0.9182389937106918,
"grad_norm": 3.2765433187832147,
"learning_rate": 3.8919065958662295e-07,
"loss": 1.7576,
"step": 584
},
{
"epoch": 0.9198113207547169,
"grad_norm": 3.135962913002799,
"learning_rate": 3.8761213426563543e-07,
"loss": 1.8539,
"step": 585
},
{
"epoch": 0.9213836477987422,
"grad_norm": 3.0325653708421623,
"learning_rate": 3.860347878401784e-07,
"loss": 1.5986,
"step": 586
},
{
"epoch": 0.9229559748427673,
"grad_norm": 3.288278731016835,
"learning_rate": 3.844586368558694e-07,
"loss": 1.5314,
"step": 587
},
{
"epoch": 0.9245283018867925,
"grad_norm": 3.504682969050096,
"learning_rate": 3.828836978457867e-07,
"loss": 1.5027,
"step": 588
},
{
"epoch": 0.9261006289308176,
"grad_norm": 3.059351611957445,
"learning_rate": 3.813099873302951e-07,
"loss": 1.6774,
"step": 589
},
{
"epoch": 0.9276729559748428,
"grad_norm": 3.0459760979745023,
"learning_rate": 3.7973752181687327e-07,
"loss": 1.676,
"step": 590
},
{
"epoch": 0.9292452830188679,
"grad_norm": 3.247916912464039,
"learning_rate": 3.781663177999401e-07,
"loss": 1.7239,
"step": 591
},
{
"epoch": 0.9308176100628931,
"grad_norm": 3.7058658418998514,
"learning_rate": 3.765963917606828e-07,
"loss": 1.7732,
"step": 592
},
{
"epoch": 0.9323899371069182,
"grad_norm": 3.1817920794892363,
"learning_rate": 3.750277601668823e-07,
"loss": 1.5874,
"step": 593
},
{
"epoch": 0.9339622641509434,
"grad_norm": 3.426732542442707,
"learning_rate": 3.7346043947274186e-07,
"loss": 1.6734,
"step": 594
},
{
"epoch": 0.9355345911949685,
"grad_norm": 3.174283975354623,
"learning_rate": 3.718944461187138e-07,
"loss": 1.3842,
"step": 595
},
{
"epoch": 0.9371069182389937,
"grad_norm": 3.0801471315347513,
"learning_rate": 3.7032979653132747e-07,
"loss": 1.3734,
"step": 596
},
{
"epoch": 0.9386792452830188,
"grad_norm": 2.9253641065501714,
"learning_rate": 3.6876650712301647e-07,
"loss": 1.8524,
"step": 597
},
{
"epoch": 0.940251572327044,
"grad_norm": 3.5083452967032014,
"learning_rate": 3.6720459429194737e-07,
"loss": 1.6783,
"step": 598
},
{
"epoch": 0.9418238993710691,
"grad_norm": 3.098054523539633,
"learning_rate": 3.656440744218464e-07,
"loss": 1.5777,
"step": 599
},
{
"epoch": 0.9433962264150944,
"grad_norm": 3.17444031508909,
"learning_rate": 3.640849638818285e-07,
"loss": 1.4485,
"step": 600
},
{
"epoch": 0.9449685534591195,
"grad_norm": 3.38521379733728,
"learning_rate": 3.625272790262257e-07,
"loss": 1.5424,
"step": 601
},
{
"epoch": 0.9465408805031447,
"grad_norm": 3.2924344281098072,
"learning_rate": 3.60971036194415e-07,
"loss": 1.749,
"step": 602
},
{
"epoch": 0.9481132075471698,
"grad_norm": 3.289281005988172,
"learning_rate": 3.594162517106472e-07,
"loss": 1.5505,
"step": 603
},
{
"epoch": 0.949685534591195,
"grad_norm": 3.152800599797803,
"learning_rate": 3.578629418838757e-07,
"loss": 1.4122,
"step": 604
},
{
"epoch": 0.9512578616352201,
"grad_norm": 3.0655978527785495,
"learning_rate": 3.563111230075859e-07,
"loss": 1.7778,
"step": 605
},
{
"epoch": 0.9528301886792453,
"grad_norm": 3.2065107290681767,
"learning_rate": 3.547608113596233e-07,
"loss": 1.5953,
"step": 606
},
{
"epoch": 0.9544025157232704,
"grad_norm": 3.144578851012227,
"learning_rate": 3.532120232020236e-07,
"loss": 1.7357,
"step": 607
},
{
"epoch": 0.9559748427672956,
"grad_norm": 3.4100400563663813,
"learning_rate": 3.516647747808417e-07,
"loss": 1.6029,
"step": 608
},
{
"epoch": 0.9575471698113207,
"grad_norm": 2.941026820149774,
"learning_rate": 3.501190823259812e-07,
"loss": 1.696,
"step": 609
},
{
"epoch": 0.9591194968553459,
"grad_norm": 3.0809553995028622,
"learning_rate": 3.485749620510247e-07,
"loss": 1.5933,
"step": 610
},
{
"epoch": 0.960691823899371,
"grad_norm": 3.126026676001102,
"learning_rate": 3.470324301530631e-07,
"loss": 1.596,
"step": 611
},
{
"epoch": 0.9622641509433962,
"grad_norm": 2.939282311557233,
"learning_rate": 3.454915028125263e-07,
"loss": 1.5106,
"step": 612
},
{
"epoch": 0.9638364779874213,
"grad_norm": 4.618369006446248,
"learning_rate": 3.4395219619301285e-07,
"loss": 1.7572,
"step": 613
},
{
"epoch": 0.9654088050314465,
"grad_norm": 3.0249997103524837,
"learning_rate": 3.424145264411208e-07,
"loss": 1.6931,
"step": 614
},
{
"epoch": 0.9669811320754716,
"grad_norm": 3.121161609283383,
"learning_rate": 3.408785096862782e-07,
"loss": 1.5564,
"step": 615
},
{
"epoch": 0.9685534591194969,
"grad_norm": 2.98922657988821,
"learning_rate": 3.393441620405739e-07,
"loss": 1.7031,
"step": 616
},
{
"epoch": 0.970125786163522,
"grad_norm": 3.2545095978155043,
"learning_rate": 3.378114995985889e-07,
"loss": 1.4904,
"step": 617
},
{
"epoch": 0.9716981132075472,
"grad_norm": 3.1241849889746285,
"learning_rate": 3.362805384372267e-07,
"loss": 1.6119,
"step": 618
},
{
"epoch": 0.9732704402515723,
"grad_norm": 3.30354250616738,
"learning_rate": 3.3475129461554566e-07,
"loss": 1.8179,
"step": 619
},
{
"epoch": 0.9748427672955975,
"grad_norm": 3.1460767575471,
"learning_rate": 3.3322378417458977e-07,
"loss": 1.6612,
"step": 620
},
{
"epoch": 0.9764150943396226,
"grad_norm": 3.032989419999135,
"learning_rate": 3.3169802313722073e-07,
"loss": 1.7385,
"step": 621
},
{
"epoch": 0.9779874213836478,
"grad_norm": 3.1187973670716835,
"learning_rate": 3.301740275079497e-07,
"loss": 1.3352,
"step": 622
},
{
"epoch": 0.9795597484276729,
"grad_norm": 2.988051541266211,
"learning_rate": 3.2865181327277005e-07,
"loss": 1.4746,
"step": 623
},
{
"epoch": 0.9811320754716981,
"grad_norm": 2.829001916528421,
"learning_rate": 3.2713139639898854e-07,
"loss": 1.5913,
"step": 624
},
{
"epoch": 0.9827044025157232,
"grad_norm": 3.097727818757928,
"learning_rate": 3.2561279283505884e-07,
"loss": 1.5274,
"step": 625
},
{
"epoch": 0.9842767295597484,
"grad_norm": 3.280444035668285,
"learning_rate": 3.240960185104137e-07,
"loss": 1.5034,
"step": 626
},
{
"epoch": 0.9858490566037735,
"grad_norm": 2.984585170501972,
"learning_rate": 3.2258108933529805e-07,
"loss": 1.6207,
"step": 627
},
{
"epoch": 0.9874213836477987,
"grad_norm": 3.1514680873957337,
"learning_rate": 3.2106802120060194e-07,
"loss": 1.4952,
"step": 628
},
{
"epoch": 0.9889937106918238,
"grad_norm": 3.118313513827925,
"learning_rate": 3.1955682997769447e-07,
"loss": 1.3681,
"step": 629
},
{
"epoch": 0.9905660377358491,
"grad_norm": 2.892603332117668,
"learning_rate": 3.1804753151825627e-07,
"loss": 1.6935,
"step": 630
},
{
"epoch": 0.9921383647798742,
"grad_norm": 3.196944465600346,
"learning_rate": 3.16540141654114e-07,
"loss": 1.431,
"step": 631
},
{
"epoch": 0.9937106918238994,
"grad_norm": 3.626584174105943,
"learning_rate": 3.15034676197074e-07,
"loss": 1.8299,
"step": 632
},
{
"epoch": 0.9952830188679245,
"grad_norm": 3.095208053523766,
"learning_rate": 3.135311509387567e-07,
"loss": 1.4026,
"step": 633
},
{
"epoch": 0.9968553459119497,
"grad_norm": 3.174860623522535,
"learning_rate": 3.120295816504305e-07,
"loss": 1.6607,
"step": 634
},
{
"epoch": 0.9984276729559748,
"grad_norm": 3.2555221492639785,
"learning_rate": 3.105299840828466e-07,
"loss": 1.5827,
"step": 635
},
{
"epoch": 1.0,
"grad_norm": 3.0636918987199846,
"learning_rate": 3.090323739660742e-07,
"loss": 1.6189,
"step": 636
},
{
"epoch": 1.001572327044025,
"grad_norm": 2.925175741352935,
"learning_rate": 3.0753676700933445e-07,
"loss": 1.4471,
"step": 637
},
{
"epoch": 1.0031446540880504,
"grad_norm": 3.190232090409089,
"learning_rate": 3.0604317890083674e-07,
"loss": 1.5929,
"step": 638
},
{
"epoch": 1.0047169811320755,
"grad_norm": 2.9342322211564245,
"learning_rate": 3.045516253076137e-07,
"loss": 1.5289,
"step": 639
},
{
"epoch": 1.0062893081761006,
"grad_norm": 3.05977531743234,
"learning_rate": 3.030621218753565e-07,
"loss": 1.608,
"step": 640
},
{
"epoch": 1.0078616352201257,
"grad_norm": 2.9208361212348324,
"learning_rate": 3.0157468422825147e-07,
"loss": 1.4697,
"step": 641
},
{
"epoch": 1.009433962264151,
"grad_norm": 3.097347482656311,
"learning_rate": 3.0008932796881546e-07,
"loss": 1.5677,
"step": 642
},
{
"epoch": 1.0110062893081762,
"grad_norm": 3.0507165989593927,
"learning_rate": 2.9860606867773317e-07,
"loss": 1.4711,
"step": 643
},
{
"epoch": 1.0125786163522013,
"grad_norm": 2.9464488425337465,
"learning_rate": 2.9712492191369244e-07,
"loss": 1.4238,
"step": 644
},
{
"epoch": 1.0141509433962264,
"grad_norm": 3.3320478398426614,
"learning_rate": 2.95645903213222e-07,
"loss": 1.5607,
"step": 645
},
{
"epoch": 1.0157232704402517,
"grad_norm": 2.9788169571994834,
"learning_rate": 2.9416902809052814e-07,
"loss": 1.5556,
"step": 646
},
{
"epoch": 1.0172955974842768,
"grad_norm": 3.3312805191020796,
"learning_rate": 2.9269431203733206e-07,
"loss": 1.6243,
"step": 647
},
{
"epoch": 1.0188679245283019,
"grad_norm": 3.0115060001645455,
"learning_rate": 2.9122177052270747e-07,
"loss": 1.4988,
"step": 648
},
{
"epoch": 1.020440251572327,
"grad_norm": 3.3321250244143936,
"learning_rate": 2.897514189929177e-07,
"loss": 1.4174,
"step": 649
},
{
"epoch": 1.0220125786163523,
"grad_norm": 3.1753277857894915,
"learning_rate": 2.8828327287125507e-07,
"loss": 1.6069,
"step": 650
},
{
"epoch": 1.0235849056603774,
"grad_norm": 2.9899947184348328,
"learning_rate": 2.8681734755787716e-07,
"loss": 1.5228,
"step": 651
},
{
"epoch": 1.0251572327044025,
"grad_norm": 3.448011217347156,
"learning_rate": 2.853536584296471e-07,
"loss": 1.5431,
"step": 652
},
{
"epoch": 1.0267295597484276,
"grad_norm": 3.281542638072726,
"learning_rate": 2.8389222083997117e-07,
"loss": 1.4371,
"step": 653
},
{
"epoch": 1.028301886792453,
"grad_norm": 2.955949837511534,
"learning_rate": 2.8243305011863837e-07,
"loss": 1.6474,
"step": 654
},
{
"epoch": 1.029874213836478,
"grad_norm": 2.9895818915531986,
"learning_rate": 2.8097616157165885e-07,
"loss": 1.7398,
"step": 655
},
{
"epoch": 1.0314465408805031,
"grad_norm": 3.1813377026113834,
"learning_rate": 2.7952157048110406e-07,
"loss": 1.3422,
"step": 656
},
{
"epoch": 1.0330188679245282,
"grad_norm": 3.206575227698428,
"learning_rate": 2.7806929210494646e-07,
"loss": 1.4353,
"step": 657
},
{
"epoch": 1.0345911949685536,
"grad_norm": 2.8562478460137,
"learning_rate": 2.766193416768988e-07,
"loss": 1.5319,
"step": 658
},
{
"epoch": 1.0361635220125787,
"grad_norm": 3.192759899206982,
"learning_rate": 2.751717344062552e-07,
"loss": 1.3627,
"step": 659
},
{
"epoch": 1.0377358490566038,
"grad_norm": 3.0969602295505885,
"learning_rate": 2.7372648547773056e-07,
"loss": 1.4714,
"step": 660
},
{
"epoch": 1.0393081761006289,
"grad_norm": 3.005160598948565,
"learning_rate": 2.722836100513027e-07,
"loss": 1.4917,
"step": 661
},
{
"epoch": 1.0408805031446542,
"grad_norm": 3.194092221211697,
"learning_rate": 2.708431232620516e-07,
"loss": 1.6016,
"step": 662
},
{
"epoch": 1.0424528301886793,
"grad_norm": 3.2942434950589,
"learning_rate": 2.6940504022000244e-07,
"loss": 1.3387,
"step": 663
},
{
"epoch": 1.0440251572327044,
"grad_norm": 3.072420534138645,
"learning_rate": 2.679693760099658e-07,
"loss": 1.5528,
"step": 664
},
{
"epoch": 1.0455974842767295,
"grad_norm": 3.163961305087953,
"learning_rate": 2.665361456913797e-07,
"loss": 1.7042,
"step": 665
},
{
"epoch": 1.0471698113207548,
"grad_norm": 3.124340898841506,
"learning_rate": 2.651053642981522e-07,
"loss": 1.8125,
"step": 666
},
{
"epoch": 1.04874213836478,
"grad_norm": 3.266997346467644,
"learning_rate": 2.6367704683850287e-07,
"loss": 1.3475,
"step": 667
},
{
"epoch": 1.050314465408805,
"grad_norm": 4.0511615721483745,
"learning_rate": 2.6225120829480627e-07,
"loss": 1.3714,
"step": 668
},
{
"epoch": 1.0518867924528301,
"grad_norm": 3.2394052110431906,
"learning_rate": 2.6082786362343374e-07,
"loss": 1.6496,
"step": 669
},
{
"epoch": 1.0534591194968554,
"grad_norm": 3.2297916537019593,
"learning_rate": 2.5940702775459744e-07,
"loss": 1.5557,
"step": 670
},
{
"epoch": 1.0550314465408805,
"grad_norm": 3.1460312089986218,
"learning_rate": 2.579887155921936e-07,
"loss": 1.5001,
"step": 671
},
{
"epoch": 1.0566037735849056,
"grad_norm": 3.544561305322451,
"learning_rate": 2.5657294201364523e-07,
"loss": 1.6661,
"step": 672
},
{
"epoch": 1.0581761006289307,
"grad_norm": 3.3796494907434846,
"learning_rate": 2.551597218697475e-07,
"loss": 1.4262,
"step": 673
},
{
"epoch": 1.059748427672956,
"grad_norm": 3.1326980300376386,
"learning_rate": 2.537490699845109e-07,
"loss": 1.7046,
"step": 674
},
{
"epoch": 1.0613207547169812,
"grad_norm": 3.053494075129222,
"learning_rate": 2.523410011550064e-07,
"loss": 1.5256,
"step": 675
},
{
"epoch": 1.0628930817610063,
"grad_norm": 2.8913745597217546,
"learning_rate": 2.5093553015120934e-07,
"loss": 1.59,
"step": 676
},
{
"epoch": 1.0644654088050314,
"grad_norm": 3.5453147384528996,
"learning_rate": 2.495326717158457e-07,
"loss": 1.4309,
"step": 677
},
{
"epoch": 1.0660377358490567,
"grad_norm": 3.0039880470586513,
"learning_rate": 2.4813244056423686e-07,
"loss": 1.43,
"step": 678
},
{
"epoch": 1.0676100628930818,
"grad_norm": 3.2472760469814212,
"learning_rate": 2.467348513841447e-07,
"loss": 1.5304,
"step": 679
},
{
"epoch": 1.069182389937107,
"grad_norm": 2.863821975975358,
"learning_rate": 2.4533991883561867e-07,
"loss": 1.2874,
"step": 680
},
{
"epoch": 1.070754716981132,
"grad_norm": 5.7732998659847725,
"learning_rate": 2.439476575508408e-07,
"loss": 1.4263,
"step": 681
},
{
"epoch": 1.0723270440251573,
"grad_norm": 3.1433425411459193,
"learning_rate": 2.425580821339733e-07,
"loss": 1.501,
"step": 682
},
{
"epoch": 1.0738993710691824,
"grad_norm": 3.6371508420472614,
"learning_rate": 2.411712071610048e-07,
"loss": 1.5254,
"step": 683
},
{
"epoch": 1.0754716981132075,
"grad_norm": 2.962554371919303,
"learning_rate": 2.3978704717959776e-07,
"loss": 1.6529,
"step": 684
},
{
"epoch": 1.0770440251572326,
"grad_norm": 3.6781213038382483,
"learning_rate": 2.3840561670893495e-07,
"loss": 1.4484,
"step": 685
},
{
"epoch": 1.078616352201258,
"grad_norm": 3.1791495438402584,
"learning_rate": 2.3702693023956848e-07,
"loss": 1.5015,
"step": 686
},
{
"epoch": 1.080188679245283,
"grad_norm": 3.4330234166348617,
"learning_rate": 2.3565100223326735e-07,
"loss": 1.4895,
"step": 687
},
{
"epoch": 1.0817610062893082,
"grad_norm": 3.2582510503112556,
"learning_rate": 2.3427784712286475e-07,
"loss": 1.4913,
"step": 688
},
{
"epoch": 1.0833333333333333,
"grad_norm": 3.112247746272538,
"learning_rate": 2.3290747931210848e-07,
"loss": 1.5616,
"step": 689
},
{
"epoch": 1.0849056603773586,
"grad_norm": 3.1297461225513876,
"learning_rate": 2.3153991317550808e-07,
"loss": 1.6498,
"step": 690
},
{
"epoch": 1.0864779874213837,
"grad_norm": 3.1597599015515283,
"learning_rate": 2.3017516305818546e-07,
"loss": 1.5233,
"step": 691
},
{
"epoch": 1.0880503144654088,
"grad_norm": 3.155900443793459,
"learning_rate": 2.288132432757233e-07,
"loss": 1.5558,
"step": 692
},
{
"epoch": 1.0896226415094339,
"grad_norm": 2.9696101347619566,
"learning_rate": 2.2745416811401584e-07,
"loss": 1.3783,
"step": 693
},
{
"epoch": 1.0911949685534592,
"grad_norm": 3.053820395116348,
"learning_rate": 2.2609795182911857e-07,
"loss": 1.4285,
"step": 694
},
{
"epoch": 1.0927672955974843,
"grad_norm": 3.2826961481245265,
"learning_rate": 2.247446086470982e-07,
"loss": 1.608,
"step": 695
},
{
"epoch": 1.0943396226415094,
"grad_norm": 3.0770194090310503,
"learning_rate": 2.2339415276388474e-07,
"loss": 1.4713,
"step": 696
},
{
"epoch": 1.0959119496855345,
"grad_norm": 3.044905869572258,
"learning_rate": 2.220465983451209e-07,
"loss": 1.5122,
"step": 697
},
{
"epoch": 1.0974842767295598,
"grad_norm": 3.1453525312191313,
"learning_rate": 2.207019595260154e-07,
"loss": 1.4947,
"step": 698
},
{
"epoch": 1.099056603773585,
"grad_norm": 3.236671349430344,
"learning_rate": 2.1936025041119265e-07,
"loss": 1.4189,
"step": 699
},
{
"epoch": 1.10062893081761,
"grad_norm": 3.083772083487567,
"learning_rate": 2.180214850745467e-07,
"loss": 1.625,
"step": 700
}
],
"logging_steps": 1,
"max_steps": 1000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 120177106550784.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}