ViGoRL-MCTS-SFT-3b-Web-Grounding / trainer_state.json
gsarch's picture
Initial checkpoint upload
065478b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989743589743589,
"eval_steps": 250,
"global_step": 487,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0020512820512820513,
"grad_norm": 6.810319140331888,
"learning_rate": 6.666666666666667e-08,
"loss": 1.7185,
"step": 1
},
{
"epoch": 0.0041025641025641026,
"grad_norm": 6.973912436199157,
"learning_rate": 1.3333333333333334e-07,
"loss": 1.7037,
"step": 2
},
{
"epoch": 0.006153846153846154,
"grad_norm": 6.7660565555022165,
"learning_rate": 2e-07,
"loss": 1.6,
"step": 3
},
{
"epoch": 0.008205128205128205,
"grad_norm": 6.771663318387663,
"learning_rate": 2.6666666666666667e-07,
"loss": 1.6726,
"step": 4
},
{
"epoch": 0.010256410256410256,
"grad_norm": 6.52871070669014,
"learning_rate": 3.333333333333333e-07,
"loss": 1.5925,
"step": 5
},
{
"epoch": 0.012307692307692308,
"grad_norm": 6.512202528712754,
"learning_rate": 4e-07,
"loss": 1.6276,
"step": 6
},
{
"epoch": 0.014358974358974359,
"grad_norm": 6.817275132656996,
"learning_rate": 4.6666666666666666e-07,
"loss": 1.6963,
"step": 7
},
{
"epoch": 0.01641025641025641,
"grad_norm": 6.628192486672898,
"learning_rate": 5.333333333333333e-07,
"loss": 1.6174,
"step": 8
},
{
"epoch": 0.018461538461538463,
"grad_norm": 6.619040148801032,
"learning_rate": 6e-07,
"loss": 1.6508,
"step": 9
},
{
"epoch": 0.020512820512820513,
"grad_norm": 6.406981751026222,
"learning_rate": 6.666666666666666e-07,
"loss": 1.6735,
"step": 10
},
{
"epoch": 0.022564102564102566,
"grad_norm": 6.604487207098839,
"learning_rate": 7.333333333333332e-07,
"loss": 1.6365,
"step": 11
},
{
"epoch": 0.024615384615384615,
"grad_norm": 6.631786935020852,
"learning_rate": 8e-07,
"loss": 1.6344,
"step": 12
},
{
"epoch": 0.02666666666666667,
"grad_norm": 5.9719505737206,
"learning_rate": 8.666666666666667e-07,
"loss": 1.5787,
"step": 13
},
{
"epoch": 0.028717948717948718,
"grad_norm": 6.2761075086977645,
"learning_rate": 9.333333333333333e-07,
"loss": 1.6528,
"step": 14
},
{
"epoch": 0.03076923076923077,
"grad_norm": 5.8862406004197965,
"learning_rate": 1e-06,
"loss": 1.6318,
"step": 15
},
{
"epoch": 0.03282051282051282,
"grad_norm": 5.538352647927954,
"learning_rate": 9.99988924734311e-07,
"loss": 1.5542,
"step": 16
},
{
"epoch": 0.03487179487179487,
"grad_norm": 4.459604234006354,
"learning_rate": 9.999556994278908e-07,
"loss": 1.6195,
"step": 17
},
{
"epoch": 0.036923076923076927,
"grad_norm": 4.173369165379434,
"learning_rate": 9.999003255526553e-07,
"loss": 1.5388,
"step": 18
},
{
"epoch": 0.038974358974358976,
"grad_norm": 3.8681669780200902,
"learning_rate": 9.998228055617262e-07,
"loss": 1.6043,
"step": 19
},
{
"epoch": 0.041025641025641026,
"grad_norm": 3.573458363070342,
"learning_rate": 9.997231428893215e-07,
"loss": 1.4993,
"step": 20
},
{
"epoch": 0.043076923076923075,
"grad_norm": 3.658332959872666,
"learning_rate": 9.996013419506033e-07,
"loss": 1.5278,
"step": 21
},
{
"epoch": 0.04512820512820513,
"grad_norm": 3.7157840687731105,
"learning_rate": 9.994574081414829e-07,
"loss": 1.5844,
"step": 22
},
{
"epoch": 0.04717948717948718,
"grad_norm": 3.629986722740094,
"learning_rate": 9.992913478383809e-07,
"loss": 1.6517,
"step": 23
},
{
"epoch": 0.04923076923076923,
"grad_norm": 3.0451360403292385,
"learning_rate": 9.991031683979451e-07,
"loss": 1.4926,
"step": 24
},
{
"epoch": 0.05128205128205128,
"grad_norm": 2.5837734286427563,
"learning_rate": 9.98892878156725e-07,
"loss": 1.5223,
"step": 25
},
{
"epoch": 0.05333333333333334,
"grad_norm": 2.873780501731463,
"learning_rate": 9.986604864308015e-07,
"loss": 1.4939,
"step": 26
},
{
"epoch": 0.055384615384615386,
"grad_norm": 3.1829655306698283,
"learning_rate": 9.98406003515375e-07,
"loss": 1.5643,
"step": 27
},
{
"epoch": 0.057435897435897436,
"grad_norm": 2.8311773740844983,
"learning_rate": 9.981294406843093e-07,
"loss": 1.5249,
"step": 28
},
{
"epoch": 0.059487179487179485,
"grad_norm": 2.6622418441691518,
"learning_rate": 9.978308101896316e-07,
"loss": 1.4994,
"step": 29
},
{
"epoch": 0.06153846153846154,
"grad_norm": 2.5130076146469666,
"learning_rate": 9.975101252609903e-07,
"loss": 1.5234,
"step": 30
},
{
"epoch": 0.06358974358974359,
"grad_norm": 2.4697277284762684,
"learning_rate": 9.971674001050686e-07,
"loss": 1.44,
"step": 31
},
{
"epoch": 0.06564102564102564,
"grad_norm": 2.3387108497072533,
"learning_rate": 9.968026499049549e-07,
"loss": 1.4284,
"step": 32
},
{
"epoch": 0.06769230769230769,
"grad_norm": 2.1694373048980866,
"learning_rate": 9.964158908194706e-07,
"loss": 1.4756,
"step": 33
},
{
"epoch": 0.06974358974358974,
"grad_norm": 2.0195715873177162,
"learning_rate": 9.960071399824547e-07,
"loss": 1.5196,
"step": 34
},
{
"epoch": 0.07179487179487179,
"grad_norm": 1.9300579544476943,
"learning_rate": 9.955764155020035e-07,
"loss": 1.487,
"step": 35
},
{
"epoch": 0.07384615384615385,
"grad_norm": 2.100972647844811,
"learning_rate": 9.951237364596692e-07,
"loss": 1.4524,
"step": 36
},
{
"epoch": 0.0758974358974359,
"grad_norm": 2.0943953912186823,
"learning_rate": 9.946491229096141e-07,
"loss": 1.46,
"step": 37
},
{
"epoch": 0.07794871794871795,
"grad_norm": 1.8819330420666514,
"learning_rate": 9.941525958777235e-07,
"loss": 1.4445,
"step": 38
},
{
"epoch": 0.08,
"grad_norm": 1.857247298082436,
"learning_rate": 9.936341773606722e-07,
"loss": 1.4701,
"step": 39
},
{
"epoch": 0.08205128205128205,
"grad_norm": 1.7834716663964,
"learning_rate": 9.930938903249516e-07,
"loss": 1.4925,
"step": 40
},
{
"epoch": 0.0841025641025641,
"grad_norm": 1.77948843911021,
"learning_rate": 9.925317587058514e-07,
"loss": 1.4404,
"step": 41
},
{
"epoch": 0.08615384615384615,
"grad_norm": 1.80399799518289,
"learning_rate": 9.919478074064001e-07,
"loss": 1.3905,
"step": 42
},
{
"epoch": 0.0882051282051282,
"grad_norm": 1.8928509170240126,
"learning_rate": 9.913420622962604e-07,
"loss": 1.4511,
"step": 43
},
{
"epoch": 0.09025641025641026,
"grad_norm": 1.989776786423599,
"learning_rate": 9.907145502105846e-07,
"loss": 1.431,
"step": 44
},
{
"epoch": 0.09230769230769231,
"grad_norm": 1.8409975760997632,
"learning_rate": 9.900652989488253e-07,
"loss": 1.4704,
"step": 45
},
{
"epoch": 0.09435897435897436,
"grad_norm": 1.9311013876868204,
"learning_rate": 9.893943372735032e-07,
"loss": 1.4376,
"step": 46
},
{
"epoch": 0.09641025641025641,
"grad_norm": 1.965914168449665,
"learning_rate": 9.887016949089332e-07,
"loss": 1.4216,
"step": 47
},
{
"epoch": 0.09846153846153846,
"grad_norm": 1.868447251521439,
"learning_rate": 9.879874025399087e-07,
"loss": 1.4665,
"step": 48
},
{
"epoch": 0.10051282051282051,
"grad_norm": 1.9154090530688537,
"learning_rate": 9.872514918103405e-07,
"loss": 1.4637,
"step": 49
},
{
"epoch": 0.10256410256410256,
"grad_norm": 1.6049154522002187,
"learning_rate": 9.864939953218561e-07,
"loss": 1.4262,
"step": 50
},
{
"epoch": 0.10461538461538461,
"grad_norm": 1.6938863631437229,
"learning_rate": 9.85714946632355e-07,
"loss": 1.4541,
"step": 51
},
{
"epoch": 0.10666666666666667,
"grad_norm": 1.7257827008232656,
"learning_rate": 9.84914380254522e-07,
"loss": 1.4412,
"step": 52
},
{
"epoch": 0.10871794871794872,
"grad_norm": 1.664955905322989,
"learning_rate": 9.840923316542983e-07,
"loss": 1.379,
"step": 53
},
{
"epoch": 0.11076923076923077,
"grad_norm": 1.6147134785008361,
"learning_rate": 9.832488372493108e-07,
"loss": 1.4204,
"step": 54
},
{
"epoch": 0.11282051282051282,
"grad_norm": 1.636416478063699,
"learning_rate": 9.82383934407258e-07,
"loss": 1.4208,
"step": 55
},
{
"epoch": 0.11487179487179487,
"grad_norm": 1.606350625791673,
"learning_rate": 9.814976614442547e-07,
"loss": 1.4269,
"step": 56
},
{
"epoch": 0.11692307692307692,
"grad_norm": 1.5879209023967076,
"learning_rate": 9.805900576231357e-07,
"loss": 1.4145,
"step": 57
},
{
"epoch": 0.11897435897435897,
"grad_norm": 1.6388251717824514,
"learning_rate": 9.796611631517141e-07,
"loss": 1.398,
"step": 58
},
{
"epoch": 0.12102564102564102,
"grad_norm": 1.5770065594768676,
"learning_rate": 9.787110191810026e-07,
"loss": 1.4293,
"step": 59
},
{
"epoch": 0.12307692307692308,
"grad_norm": 1.6534814499374035,
"learning_rate": 9.77739667803389e-07,
"loss": 1.4118,
"step": 60
},
{
"epoch": 0.12512820512820513,
"grad_norm": 1.5180093555981888,
"learning_rate": 9.76747152050771e-07,
"loss": 1.4125,
"step": 61
},
{
"epoch": 0.12717948717948718,
"grad_norm": 1.5377464933820018,
"learning_rate": 9.75733515892652e-07,
"loss": 1.3973,
"step": 62
},
{
"epoch": 0.12923076923076923,
"grad_norm": 1.546752794133953,
"learning_rate": 9.746988042341907e-07,
"loss": 1.3887,
"step": 63
},
{
"epoch": 0.13128205128205128,
"grad_norm": 1.5508521340777879,
"learning_rate": 9.736430629142128e-07,
"loss": 1.4109,
"step": 64
},
{
"epoch": 0.13333333333333333,
"grad_norm": 1.5007776923133969,
"learning_rate": 9.725663387031816e-07,
"loss": 1.4729,
"step": 65
},
{
"epoch": 0.13538461538461538,
"grad_norm": 1.4673639929870512,
"learning_rate": 9.714686793011235e-07,
"loss": 1.3129,
"step": 66
},
{
"epoch": 0.13743589743589743,
"grad_norm": 1.474577715216591,
"learning_rate": 9.703501333355166e-07,
"loss": 1.3637,
"step": 67
},
{
"epoch": 0.13948717948717948,
"grad_norm": 1.3551101779554455,
"learning_rate": 9.692107503591358e-07,
"loss": 1.3751,
"step": 68
},
{
"epoch": 0.14153846153846153,
"grad_norm": 1.4084255265110892,
"learning_rate": 9.680505808478581e-07,
"loss": 1.3955,
"step": 69
},
{
"epoch": 0.14358974358974358,
"grad_norm": 1.5650402584913055,
"learning_rate": 9.668696761984254e-07,
"loss": 1.4009,
"step": 70
},
{
"epoch": 0.14564102564102563,
"grad_norm": 1.4833112999624978,
"learning_rate": 9.656680887261692e-07,
"loss": 1.3421,
"step": 71
},
{
"epoch": 0.1476923076923077,
"grad_norm": 1.5883238829964639,
"learning_rate": 9.644458716626911e-07,
"loss": 1.3866,
"step": 72
},
{
"epoch": 0.14974358974358976,
"grad_norm": 1.4394317678417627,
"learning_rate": 9.63203079153506e-07,
"loss": 1.4153,
"step": 73
},
{
"epoch": 0.1517948717948718,
"grad_norm": 1.5179476858030934,
"learning_rate": 9.619397662556433e-07,
"loss": 1.3906,
"step": 74
},
{
"epoch": 0.15384615384615385,
"grad_norm": 1.4148990540769752,
"learning_rate": 9.606559889352063e-07,
"loss": 1.3855,
"step": 75
},
{
"epoch": 0.1558974358974359,
"grad_norm": 1.4321199734694527,
"learning_rate": 9.593518040648952e-07,
"loss": 1.4001,
"step": 76
},
{
"epoch": 0.15794871794871795,
"grad_norm": 1.4329827399289827,
"learning_rate": 9.580272694214854e-07,
"loss": 1.3603,
"step": 77
},
{
"epoch": 0.16,
"grad_norm": 1.5010775682420985,
"learning_rate": 9.566824436832695e-07,
"loss": 1.3655,
"step": 78
},
{
"epoch": 0.16205128205128205,
"grad_norm": 1.3530276824132195,
"learning_rate": 9.553173864274566e-07,
"loss": 1.4273,
"step": 79
},
{
"epoch": 0.1641025641025641,
"grad_norm": 1.454039165880218,
"learning_rate": 9.539321581275342e-07,
"loss": 1.428,
"step": 80
},
{
"epoch": 0.16615384615384615,
"grad_norm": 1.4908113453423757,
"learning_rate": 9.525268201505878e-07,
"loss": 1.4529,
"step": 81
},
{
"epoch": 0.1682051282051282,
"grad_norm": 1.4981477147110476,
"learning_rate": 9.511014347545837e-07,
"loss": 1.3925,
"step": 82
},
{
"epoch": 0.17025641025641025,
"grad_norm": 1.5127117023542747,
"learning_rate": 9.496560650856096e-07,
"loss": 1.4043,
"step": 83
},
{
"epoch": 0.1723076923076923,
"grad_norm": 1.3345405352227973,
"learning_rate": 9.481907751750779e-07,
"loss": 1.3832,
"step": 84
},
{
"epoch": 0.17435897435897435,
"grad_norm": 1.5108834135083573,
"learning_rate": 9.467056299368887e-07,
"loss": 1.3508,
"step": 85
},
{
"epoch": 0.1764102564102564,
"grad_norm": 1.5258123857029053,
"learning_rate": 9.452006951645548e-07,
"loss": 1.3265,
"step": 86
},
{
"epoch": 0.17846153846153845,
"grad_norm": 1.4934293257822084,
"learning_rate": 9.436760375282857e-07,
"loss": 1.3619,
"step": 87
},
{
"epoch": 0.18051282051282053,
"grad_norm": 1.607227037073415,
"learning_rate": 9.421317245720352e-07,
"loss": 1.4034,
"step": 88
},
{
"epoch": 0.18256410256410258,
"grad_norm": 1.4141545560354007,
"learning_rate": 9.405678247105082e-07,
"loss": 1.3655,
"step": 89
},
{
"epoch": 0.18461538461538463,
"grad_norm": 1.3743360655972685,
"learning_rate": 9.38984407226131e-07,
"loss": 1.3442,
"step": 90
},
{
"epoch": 0.18666666666666668,
"grad_norm": 1.440216218296637,
"learning_rate": 9.373815422659805e-07,
"loss": 1.3413,
"step": 91
},
{
"epoch": 0.18871794871794872,
"grad_norm": 1.723193290271358,
"learning_rate": 9.357593008386784e-07,
"loss": 1.3816,
"step": 92
},
{
"epoch": 0.19076923076923077,
"grad_norm": 1.3262804575095386,
"learning_rate": 9.341177548112436e-07,
"loss": 1.3464,
"step": 93
},
{
"epoch": 0.19282051282051282,
"grad_norm": 1.5488725881566392,
"learning_rate": 9.324569769059096e-07,
"loss": 1.3809,
"step": 94
},
{
"epoch": 0.19487179487179487,
"grad_norm": 1.3796152701528939,
"learning_rate": 9.30777040696903e-07,
"loss": 1.3366,
"step": 95
},
{
"epoch": 0.19692307692307692,
"grad_norm": 1.5098823183414498,
"learning_rate": 9.29078020607183e-07,
"loss": 1.3543,
"step": 96
},
{
"epoch": 0.19897435897435897,
"grad_norm": 1.4190386613608355,
"learning_rate": 9.273599919051452e-07,
"loss": 1.3981,
"step": 97
},
{
"epoch": 0.20102564102564102,
"grad_norm": 1.340509343495511,
"learning_rate": 9.256230307012869e-07,
"loss": 1.356,
"step": 98
},
{
"epoch": 0.20307692307692307,
"grad_norm": 1.4810146382672011,
"learning_rate": 9.238672139448353e-07,
"loss": 1.3745,
"step": 99
},
{
"epoch": 0.20512820512820512,
"grad_norm": 1.428418635076634,
"learning_rate": 9.220926194203392e-07,
"loss": 1.406,
"step": 100
},
{
"epoch": 0.20717948717948717,
"grad_norm": 1.4107901013454047,
"learning_rate": 9.202993257442216e-07,
"loss": 1.3739,
"step": 101
},
{
"epoch": 0.20923076923076922,
"grad_norm": 1.306309001032096,
"learning_rate": 9.184874123612981e-07,
"loss": 1.329,
"step": 102
},
{
"epoch": 0.21128205128205127,
"grad_norm": 1.4893758607520267,
"learning_rate": 9.166569595412574e-07,
"loss": 1.327,
"step": 103
},
{
"epoch": 0.21333333333333335,
"grad_norm": 1.4494483023674931,
"learning_rate": 9.148080483751048e-07,
"loss": 1.3855,
"step": 104
},
{
"epoch": 0.2153846153846154,
"grad_norm": 1.4170674386122224,
"learning_rate": 9.129407607715696e-07,
"loss": 1.3565,
"step": 105
},
{
"epoch": 0.21743589743589745,
"grad_norm": 1.304459135205338,
"learning_rate": 9.110551794534775e-07,
"loss": 1.3398,
"step": 106
},
{
"epoch": 0.2194871794871795,
"grad_norm": 1.4294708281101414,
"learning_rate": 9.091513879540844e-07,
"loss": 1.4091,
"step": 107
},
{
"epoch": 0.22153846153846155,
"grad_norm": 1.2746277106037083,
"learning_rate": 9.072294706133774e-07,
"loss": 1.2911,
"step": 108
},
{
"epoch": 0.2235897435897436,
"grad_norm": 1.353520157073593,
"learning_rate": 9.052895125743369e-07,
"loss": 1.3424,
"step": 109
},
{
"epoch": 0.22564102564102564,
"grad_norm": 1.323794347591316,
"learning_rate": 9.033315997791659e-07,
"loss": 1.3317,
"step": 110
},
{
"epoch": 0.2276923076923077,
"grad_norm": 1.398614118697795,
"learning_rate": 9.013558189654817e-07,
"loss": 1.3961,
"step": 111
},
{
"epoch": 0.22974358974358974,
"grad_norm": 1.2766969874630751,
"learning_rate": 8.993622576624746e-07,
"loss": 1.3269,
"step": 112
},
{
"epoch": 0.2317948717948718,
"grad_norm": 1.495310177937772,
"learning_rate": 8.973510041870287e-07,
"loss": 1.4208,
"step": 113
},
{
"epoch": 0.23384615384615384,
"grad_norm": 1.3088655411190178,
"learning_rate": 8.953221476398105e-07,
"loss": 1.3953,
"step": 114
},
{
"epoch": 0.2358974358974359,
"grad_norm": 1.5052199539599196,
"learning_rate": 8.932757779013213e-07,
"loss": 1.4416,
"step": 115
},
{
"epoch": 0.23794871794871794,
"grad_norm": 1.3026306985567253,
"learning_rate": 8.912119856279149e-07,
"loss": 1.2805,
"step": 116
},
{
"epoch": 0.24,
"grad_norm": 1.488343491577546,
"learning_rate": 8.891308622477829e-07,
"loss": 1.373,
"step": 117
},
{
"epoch": 0.24205128205128204,
"grad_norm": 1.369401311033249,
"learning_rate": 8.870324999569024e-07,
"loss": 1.3611,
"step": 118
},
{
"epoch": 0.2441025641025641,
"grad_norm": 1.3002048421979404,
"learning_rate": 8.849169917149531e-07,
"loss": 1.3939,
"step": 119
},
{
"epoch": 0.24615384615384617,
"grad_norm": 1.426958695759786,
"learning_rate": 8.827844312411982e-07,
"loss": 1.4275,
"step": 120
},
{
"epoch": 0.24820512820512822,
"grad_norm": 1.3820975115802594,
"learning_rate": 8.806349130103332e-07,
"loss": 1.2887,
"step": 121
},
{
"epoch": 0.25025641025641027,
"grad_norm": 1.389435389906626,
"learning_rate": 8.784685322483003e-07,
"loss": 1.3588,
"step": 122
},
{
"epoch": 0.2523076923076923,
"grad_norm": 1.5275652251917355,
"learning_rate": 8.762853849280691e-07,
"loss": 1.2914,
"step": 123
},
{
"epoch": 0.25435897435897437,
"grad_norm": 1.4263989780538462,
"learning_rate": 8.740855677653867e-07,
"loss": 1.4078,
"step": 124
},
{
"epoch": 0.2564102564102564,
"grad_norm": 1.4808394570173404,
"learning_rate": 8.718691782144907e-07,
"loss": 1.3716,
"step": 125
},
{
"epoch": 0.25846153846153846,
"grad_norm": 1.380297970298931,
"learning_rate": 8.69636314463794e-07,
"loss": 1.3086,
"step": 126
},
{
"epoch": 0.2605128205128205,
"grad_norm": 1.42784023805761,
"learning_rate": 8.673870754315336e-07,
"loss": 1.4023,
"step": 127
},
{
"epoch": 0.26256410256410256,
"grad_norm": 1.5340569739550813,
"learning_rate": 8.651215607613891e-07,
"loss": 1.322,
"step": 128
},
{
"epoch": 0.26461538461538464,
"grad_norm": 1.3976404822571311,
"learning_rate": 8.628398708180679e-07,
"loss": 1.3275,
"step": 129
},
{
"epoch": 0.26666666666666666,
"grad_norm": 1.4130220590772273,
"learning_rate": 8.605421066828598e-07,
"loss": 1.344,
"step": 130
},
{
"epoch": 0.26871794871794874,
"grad_norm": 1.3647943645755969,
"learning_rate": 8.582283701491575e-07,
"loss": 1.3595,
"step": 131
},
{
"epoch": 0.27076923076923076,
"grad_norm": 1.462715888488961,
"learning_rate": 8.558987637179487e-07,
"loss": 1.338,
"step": 132
},
{
"epoch": 0.27282051282051284,
"grad_norm": 1.3983473187198934,
"learning_rate": 8.535533905932737e-07,
"loss": 1.3913,
"step": 133
},
{
"epoch": 0.27487179487179486,
"grad_norm": 1.3130917948802023,
"learning_rate": 8.51192354677655e-07,
"loss": 1.2714,
"step": 134
},
{
"epoch": 0.27692307692307694,
"grad_norm": 1.4479220825321475,
"learning_rate": 8.488157605674924e-07,
"loss": 1.3719,
"step": 135
},
{
"epoch": 0.27897435897435896,
"grad_norm": 1.4036608256384233,
"learning_rate": 8.464237135484309e-07,
"loss": 1.3593,
"step": 136
},
{
"epoch": 0.28102564102564104,
"grad_norm": 1.405752858435705,
"learning_rate": 8.440163195906958e-07,
"loss": 1.3171,
"step": 137
},
{
"epoch": 0.28307692307692306,
"grad_norm": 1.3478889818215098,
"learning_rate": 8.415936853443974e-07,
"loss": 1.3703,
"step": 138
},
{
"epoch": 0.28512820512820514,
"grad_norm": 1.3507769885527494,
"learning_rate": 8.391559181348081e-07,
"loss": 1.3835,
"step": 139
},
{
"epoch": 0.28717948717948716,
"grad_norm": 1.4358538509697099,
"learning_rate": 8.367031259576056e-07,
"loss": 1.3472,
"step": 140
},
{
"epoch": 0.28923076923076924,
"grad_norm": 1.4256463951812464,
"learning_rate": 8.342354174740902e-07,
"loss": 1.3536,
"step": 141
},
{
"epoch": 0.29128205128205126,
"grad_norm": 1.37330874853555,
"learning_rate": 8.317529020063703e-07,
"loss": 1.3144,
"step": 142
},
{
"epoch": 0.29333333333333333,
"grad_norm": 1.424304273714467,
"learning_rate": 8.292556895325194e-07,
"loss": 1.3858,
"step": 143
},
{
"epoch": 0.2953846153846154,
"grad_norm": 1.5014570464124226,
"learning_rate": 8.267438906817039e-07,
"loss": 1.4179,
"step": 144
},
{
"epoch": 0.29743589743589743,
"grad_norm": 1.3885797736642898,
"learning_rate": 8.242176167292826e-07,
"loss": 1.3554,
"step": 145
},
{
"epoch": 0.2994871794871795,
"grad_norm": 1.399968517661764,
"learning_rate": 8.216769795918762e-07,
"loss": 1.2941,
"step": 146
},
{
"epoch": 0.30153846153846153,
"grad_norm": 1.3351303735116444,
"learning_rate": 8.1912209182241e-07,
"loss": 1.3682,
"step": 147
},
{
"epoch": 0.3035897435897436,
"grad_norm": 1.4849614268873412,
"learning_rate": 8.165530666051275e-07,
"loss": 1.3761,
"step": 148
},
{
"epoch": 0.30564102564102563,
"grad_norm": 1.4111664153752073,
"learning_rate": 8.139700177505759e-07,
"loss": 1.3164,
"step": 149
},
{
"epoch": 0.3076923076923077,
"grad_norm": 1.3001144500599642,
"learning_rate": 8.113730596905648e-07,
"loss": 1.3093,
"step": 150
},
{
"epoch": 0.30974358974358973,
"grad_norm": 1.4002532454809997,
"learning_rate": 8.087623074730959e-07,
"loss": 1.3857,
"step": 151
},
{
"epoch": 0.3117948717948718,
"grad_norm": 1.4470191142442426,
"learning_rate": 8.061378767572673e-07,
"loss": 1.3335,
"step": 152
},
{
"epoch": 0.31384615384615383,
"grad_norm": 1.3514370453203643,
"learning_rate": 8.034998838081489e-07,
"loss": 1.3756,
"step": 153
},
{
"epoch": 0.3158974358974359,
"grad_norm": 1.4287393724078254,
"learning_rate": 8.008484454916316e-07,
"loss": 1.3153,
"step": 154
},
{
"epoch": 0.31794871794871793,
"grad_norm": 1.3651421834133906,
"learning_rate": 7.981836792692507e-07,
"loss": 1.2833,
"step": 155
},
{
"epoch": 0.32,
"grad_norm": 1.386223426300147,
"learning_rate": 7.955057031929819e-07,
"loss": 1.3377,
"step": 156
},
{
"epoch": 0.32205128205128203,
"grad_norm": 1.3442671594803883,
"learning_rate": 7.928146359000117e-07,
"loss": 1.4253,
"step": 157
},
{
"epoch": 0.3241025641025641,
"grad_norm": 1.4640568604532251,
"learning_rate": 7.901105966074806e-07,
"loss": 1.4161,
"step": 158
},
{
"epoch": 0.3261538461538461,
"grad_norm": 1.41021093543057,
"learning_rate": 7.873937051072035e-07,
"loss": 1.3809,
"step": 159
},
{
"epoch": 0.3282051282051282,
"grad_norm": 1.394834384818255,
"learning_rate": 7.846640817603607e-07,
"loss": 1.4037,
"step": 160
},
{
"epoch": 0.3302564102564103,
"grad_norm": 1.4823904331092446,
"learning_rate": 7.819218474921679e-07,
"loss": 1.335,
"step": 161
},
{
"epoch": 0.3323076923076923,
"grad_norm": 1.4008041043128283,
"learning_rate": 7.791671237865174e-07,
"loss": 1.3413,
"step": 162
},
{
"epoch": 0.3343589743589744,
"grad_norm": 1.3105770564662627,
"learning_rate": 7.764000326805966e-07,
"loss": 1.3521,
"step": 163
},
{
"epoch": 0.3364102564102564,
"grad_norm": 1.4619762152088143,
"learning_rate": 7.736206967594827e-07,
"loss": 1.3035,
"step": 164
},
{
"epoch": 0.3384615384615385,
"grad_norm": 1.2996575385311386,
"learning_rate": 7.708292391507105e-07,
"loss": 1.3164,
"step": 165
},
{
"epoch": 0.3405128205128205,
"grad_norm": 1.27071752376492,
"learning_rate": 7.680257835188186e-07,
"loss": 1.2964,
"step": 166
},
{
"epoch": 0.3425641025641026,
"grad_norm": 1.327855217456687,
"learning_rate": 7.652104540598712e-07,
"loss": 1.3476,
"step": 167
},
{
"epoch": 0.3446153846153846,
"grad_norm": 1.4536458066818985,
"learning_rate": 7.623833754959551e-07,
"loss": 1.3434,
"step": 168
},
{
"epoch": 0.3466666666666667,
"grad_norm": 1.4105614477006825,
"learning_rate": 7.595446730696553e-07,
"loss": 1.364,
"step": 169
},
{
"epoch": 0.3487179487179487,
"grad_norm": 1.5356583052898265,
"learning_rate": 7.56694472538506e-07,
"loss": 1.3487,
"step": 170
},
{
"epoch": 0.3507692307692308,
"grad_norm": 1.4487335955760117,
"learning_rate": 7.538329001694199e-07,
"loss": 1.2782,
"step": 171
},
{
"epoch": 0.3528205128205128,
"grad_norm": 1.3795988167861302,
"learning_rate": 7.509600827330942e-07,
"loss": 1.4282,
"step": 172
},
{
"epoch": 0.3548717948717949,
"grad_norm": 1.3636686216275424,
"learning_rate": 7.480761474983943e-07,
"loss": 1.2897,
"step": 173
},
{
"epoch": 0.3569230769230769,
"grad_norm": 1.4273922371235048,
"learning_rate": 7.451812222267157e-07,
"loss": 1.3154,
"step": 174
},
{
"epoch": 0.358974358974359,
"grad_norm": 1.378247984135367,
"learning_rate": 7.422754351663251e-07,
"loss": 1.2701,
"step": 175
},
{
"epoch": 0.36102564102564105,
"grad_norm": 1.4547712487270863,
"learning_rate": 7.39358915046677e-07,
"loss": 1.356,
"step": 176
},
{
"epoch": 0.3630769230769231,
"grad_norm": 1.2921318934822192,
"learning_rate": 7.364317910727127e-07,
"loss": 1.3087,
"step": 177
},
{
"epoch": 0.36512820512820515,
"grad_norm": 1.3528572849054787,
"learning_rate": 7.334941929191343e-07,
"loss": 1.3213,
"step": 178
},
{
"epoch": 0.3671794871794872,
"grad_norm": 1.356101334022072,
"learning_rate": 7.305462507246629e-07,
"loss": 1.3622,
"step": 179
},
{
"epoch": 0.36923076923076925,
"grad_norm": 1.385787615622585,
"learning_rate": 7.2758809508627e-07,
"loss": 1.2812,
"step": 180
},
{
"epoch": 0.3712820512820513,
"grad_norm": 1.382621750590725,
"learning_rate": 7.246198570533944e-07,
"loss": 1.3158,
"step": 181
},
{
"epoch": 0.37333333333333335,
"grad_norm": 1.3556980794870819,
"learning_rate": 7.216416681221353e-07,
"loss": 1.3015,
"step": 182
},
{
"epoch": 0.37538461538461537,
"grad_norm": 1.3154150058043996,
"learning_rate": 7.186536602294278e-07,
"loss": 1.2819,
"step": 183
},
{
"epoch": 0.37743589743589745,
"grad_norm": 1.41202917935767,
"learning_rate": 7.156559657471966e-07,
"loss": 1.3517,
"step": 184
},
{
"epoch": 0.37948717948717947,
"grad_norm": 1.3968980770043466,
"learning_rate": 7.126487174764935e-07,
"loss": 1.2971,
"step": 185
},
{
"epoch": 0.38153846153846155,
"grad_norm": 1.4283842268074054,
"learning_rate": 7.096320486416124e-07,
"loss": 1.3319,
"step": 186
},
{
"epoch": 0.38358974358974357,
"grad_norm": 1.4389331160861967,
"learning_rate": 7.06606092884189e-07,
"loss": 1.3313,
"step": 187
},
{
"epoch": 0.38564102564102565,
"grad_norm": 1.366798910593162,
"learning_rate": 7.035709842572792e-07,
"loss": 1.315,
"step": 188
},
{
"epoch": 0.38769230769230767,
"grad_norm": 1.4801735124712503,
"learning_rate": 7.005268572194207e-07,
"loss": 1.368,
"step": 189
},
{
"epoch": 0.38974358974358975,
"grad_norm": 1.3323443914353508,
"learning_rate": 6.974738466286765e-07,
"loss": 1.3025,
"step": 190
},
{
"epoch": 0.39179487179487177,
"grad_norm": 1.3039032352168842,
"learning_rate": 6.944120877366604e-07,
"loss": 1.2744,
"step": 191
},
{
"epoch": 0.39384615384615385,
"grad_norm": 1.4073653745829573,
"learning_rate": 6.913417161825449e-07,
"loss": 1.344,
"step": 192
},
{
"epoch": 0.3958974358974359,
"grad_norm": 1.2825924865288278,
"learning_rate": 6.882628679870531e-07,
"loss": 1.3075,
"step": 193
},
{
"epoch": 0.39794871794871794,
"grad_norm": 1.4518075398628796,
"learning_rate": 6.851756795464323e-07,
"loss": 1.3981,
"step": 194
},
{
"epoch": 0.4,
"grad_norm": 1.393725949206135,
"learning_rate": 6.820802876264111e-07,
"loss": 1.2986,
"step": 195
},
{
"epoch": 0.40205128205128204,
"grad_norm": 1.3851879908737978,
"learning_rate": 6.789768293561413e-07,
"loss": 1.3757,
"step": 196
},
{
"epoch": 0.4041025641025641,
"grad_norm": 1.387897673235893,
"learning_rate": 6.758654422221224e-07,
"loss": 1.2985,
"step": 197
},
{
"epoch": 0.40615384615384614,
"grad_norm": 1.4919947840109538,
"learning_rate": 6.727462640621112e-07,
"loss": 1.3517,
"step": 198
},
{
"epoch": 0.4082051282051282,
"grad_norm": 1.3341481537871696,
"learning_rate": 6.69619433059015e-07,
"loss": 1.3302,
"step": 199
},
{
"epoch": 0.41025641025641024,
"grad_norm": 1.345662539893686,
"learning_rate": 6.664850877347705e-07,
"loss": 1.3182,
"step": 200
},
{
"epoch": 0.4123076923076923,
"grad_norm": 1.376354584890257,
"learning_rate": 6.633433669442064e-07,
"loss": 1.2953,
"step": 201
},
{
"epoch": 0.41435897435897434,
"grad_norm": 1.44510644298783,
"learning_rate": 6.601944098688927e-07,
"loss": 1.3001,
"step": 202
},
{
"epoch": 0.4164102564102564,
"grad_norm": 1.4431030025015483,
"learning_rate": 6.570383560109745e-07,
"loss": 1.2941,
"step": 203
},
{
"epoch": 0.41846153846153844,
"grad_norm": 1.4708503974375458,
"learning_rate": 6.538753451869913e-07,
"loss": 1.4086,
"step": 204
},
{
"epoch": 0.4205128205128205,
"grad_norm": 1.421296157525184,
"learning_rate": 6.507055175216849e-07,
"loss": 1.2755,
"step": 205
},
{
"epoch": 0.42256410256410254,
"grad_norm": 1.3273597087706324,
"learning_rate": 6.475290134417891e-07,
"loss": 1.369,
"step": 206
},
{
"epoch": 0.4246153846153846,
"grad_norm": 1.458296531875952,
"learning_rate": 6.443459736698105e-07,
"loss": 1.3266,
"step": 207
},
{
"epoch": 0.4266666666666667,
"grad_norm": 1.3887196370993375,
"learning_rate": 6.41156539217794e-07,
"loss": 1.3293,
"step": 208
},
{
"epoch": 0.4287179487179487,
"grad_norm": 1.3414834568362113,
"learning_rate": 6.379608513810753e-07,
"loss": 1.3066,
"step": 209
},
{
"epoch": 0.4307692307692308,
"grad_norm": 1.441297553114322,
"learning_rate": 6.347590517320217e-07,
"loss": 1.3329,
"step": 210
},
{
"epoch": 0.4328205128205128,
"grad_norm": 1.3531042410782805,
"learning_rate": 6.315512821137606e-07,
"loss": 1.293,
"step": 211
},
{
"epoch": 0.4348717948717949,
"grad_norm": 1.3554046113834761,
"learning_rate": 6.28337684633895e-07,
"loss": 1.2414,
"step": 212
},
{
"epoch": 0.4369230769230769,
"grad_norm": 1.394677662879496,
"learning_rate": 6.251184016582088e-07,
"loss": 1.3264,
"step": 213
},
{
"epoch": 0.438974358974359,
"grad_norm": 1.4851633778642261,
"learning_rate": 6.218935758043586e-07,
"loss": 1.2634,
"step": 214
},
{
"epoch": 0.441025641025641,
"grad_norm": 1.3371557479948093,
"learning_rate": 6.186633499355575e-07,
"loss": 1.3876,
"step": 215
},
{
"epoch": 0.4430769230769231,
"grad_norm": 1.4887491463790388,
"learning_rate": 6.15427867154244e-07,
"loss": 1.3122,
"step": 216
},
{
"epoch": 0.4451282051282051,
"grad_norm": 1.3232196760718127,
"learning_rate": 6.121872707957441e-07,
"loss": 1.3441,
"step": 217
},
{
"epoch": 0.4471794871794872,
"grad_norm": 1.3766112511648216,
"learning_rate": 6.089417044219201e-07,
"loss": 1.3255,
"step": 218
},
{
"epoch": 0.4492307692307692,
"grad_norm": 1.3049112726080363,
"learning_rate": 6.056913118148121e-07,
"loss": 1.3397,
"step": 219
},
{
"epoch": 0.4512820512820513,
"grad_norm": 1.3939219423691345,
"learning_rate": 6.024362369702668e-07,
"loss": 1.2519,
"step": 220
},
{
"epoch": 0.4533333333333333,
"grad_norm": 1.371353907416093,
"learning_rate": 5.991766240915589e-07,
"loss": 1.301,
"step": 221
},
{
"epoch": 0.4553846153846154,
"grad_norm": 1.4850791746392926,
"learning_rate": 5.959126175830033e-07,
"loss": 1.2983,
"step": 222
},
{
"epoch": 0.4574358974358974,
"grad_norm": 1.4663453627095475,
"learning_rate": 5.926443620435571e-07,
"loss": 1.283,
"step": 223
},
{
"epoch": 0.4594871794871795,
"grad_norm": 1.4492201774552442,
"learning_rate": 5.893720022604142e-07,
"loss": 1.3509,
"step": 224
},
{
"epoch": 0.46153846153846156,
"grad_norm": 1.4069307451775082,
"learning_rate": 5.860956832025906e-07,
"loss": 1.3087,
"step": 225
},
{
"epoch": 0.4635897435897436,
"grad_norm": 1.3370341068000464,
"learning_rate": 5.828155500145024e-07,
"loss": 1.3227,
"step": 226
},
{
"epoch": 0.46564102564102566,
"grad_norm": 1.3297533267380588,
"learning_rate": 5.79531748009536e-07,
"loss": 1.3174,
"step": 227
},
{
"epoch": 0.4676923076923077,
"grad_norm": 1.347608878869153,
"learning_rate": 5.7624442266361e-07,
"loss": 1.2451,
"step": 228
},
{
"epoch": 0.46974358974358976,
"grad_norm": 1.2409369335474423,
"learning_rate": 5.729537196087308e-07,
"loss": 1.2842,
"step": 229
},
{
"epoch": 0.4717948717948718,
"grad_norm": 1.3300849973007849,
"learning_rate": 5.696597846265411e-07,
"loss": 1.3136,
"step": 230
},
{
"epoch": 0.47384615384615386,
"grad_norm": 1.4479979686773294,
"learning_rate": 5.663627636418609e-07,
"loss": 1.3757,
"step": 231
},
{
"epoch": 0.4758974358974359,
"grad_norm": 1.3087492331617634,
"learning_rate": 5.630628027162243e-07,
"loss": 1.3633,
"step": 232
},
{
"epoch": 0.47794871794871796,
"grad_norm": 1.4490486681330532,
"learning_rate": 5.597600480414068e-07,
"loss": 1.3271,
"step": 233
},
{
"epoch": 0.48,
"grad_norm": 1.5347632065237542,
"learning_rate": 5.564546459329509e-07,
"loss": 1.3038,
"step": 234
},
{
"epoch": 0.48205128205128206,
"grad_norm": 1.3875201636263441,
"learning_rate": 5.531467428236827e-07,
"loss": 1.3906,
"step": 235
},
{
"epoch": 0.4841025641025641,
"grad_norm": 1.3525087883277989,
"learning_rate": 5.498364852572255e-07,
"loss": 1.3648,
"step": 236
},
{
"epoch": 0.48615384615384616,
"grad_norm": 1.2792944836481481,
"learning_rate": 5.465240198815072e-07,
"loss": 1.2822,
"step": 237
},
{
"epoch": 0.4882051282051282,
"grad_norm": 1.4555679204072403,
"learning_rate": 5.432094934422648e-07,
"loss": 1.3249,
"step": 238
},
{
"epoch": 0.49025641025641026,
"grad_norm": 1.3529453067601664,
"learning_rate": 5.398930527765415e-07,
"loss": 1.3209,
"step": 239
},
{
"epoch": 0.49230769230769234,
"grad_norm": 1.3313720449010154,
"learning_rate": 5.365748448061837e-07,
"loss": 1.2981,
"step": 240
},
{
"epoch": 0.49435897435897436,
"grad_norm": 1.3879386825445084,
"learning_rate": 5.332550165313312e-07,
"loss": 1.3005,
"step": 241
},
{
"epoch": 0.49641025641025643,
"grad_norm": 1.3914024176149524,
"learning_rate": 5.299337150239041e-07,
"loss": 1.296,
"step": 242
},
{
"epoch": 0.49846153846153846,
"grad_norm": 1.4576866533836497,
"learning_rate": 5.266110874210892e-07,
"loss": 1.3351,
"step": 243
},
{
"epoch": 0.5005128205128205,
"grad_norm": 1.4191392954223687,
"learning_rate": 5.232872809188208e-07,
"loss": 1.3313,
"step": 244
},
{
"epoch": 0.5025641025641026,
"grad_norm": 1.2857007376482181,
"learning_rate": 5.199624427652588e-07,
"loss": 1.2928,
"step": 245
},
{
"epoch": 0.5046153846153846,
"grad_norm": 1.3182105285446684,
"learning_rate": 5.166367202542671e-07,
"loss": 1.3421,
"step": 246
},
{
"epoch": 0.5066666666666667,
"grad_norm": 1.293734923156538,
"learning_rate": 5.133102607188874e-07,
"loss": 1.3405,
"step": 247
},
{
"epoch": 0.5087179487179487,
"grad_norm": 1.4051818888405565,
"learning_rate": 5.099832115248123e-07,
"loss": 1.2858,
"step": 248
},
{
"epoch": 0.5107692307692308,
"grad_norm": 1.419436972903703,
"learning_rate": 5.066557200638569e-07,
"loss": 1.3539,
"step": 249
},
{
"epoch": 0.5128205128205128,
"grad_norm": 1.3763121975287578,
"learning_rate": 5.033279337474294e-07,
"loss": 1.3814,
"step": 250
},
{
"epoch": 0.5128205128205128,
"eval_uground_MCTS_chains_SFT_val_loss": 1.338526725769043,
"eval_uground_MCTS_chains_SFT_val_runtime": 142.2738,
"eval_uground_MCTS_chains_SFT_val_samples_per_second": 12.785,
"eval_uground_MCTS_chains_SFT_val_steps_per_second": 1.603,
"step": 250
},
{
"epoch": 0.5148717948717949,
"grad_norm": 1.3803064200700599,
"learning_rate": 5e-07,
"loss": 1.3431,
"step": 251
},
{
"epoch": 0.5169230769230769,
"grad_norm": 1.3364019814551773,
"learning_rate": 4.966720662525707e-07,
"loss": 1.3339,
"step": 252
},
{
"epoch": 0.518974358974359,
"grad_norm": 1.3814304512811713,
"learning_rate": 4.933442799361431e-07,
"loss": 1.3885,
"step": 253
},
{
"epoch": 0.521025641025641,
"grad_norm": 1.3302704766710616,
"learning_rate": 4.900167884751877e-07,
"loss": 1.2784,
"step": 254
},
{
"epoch": 0.5230769230769231,
"grad_norm": 1.3532645859025179,
"learning_rate": 4.866897392811126e-07,
"loss": 1.4133,
"step": 255
},
{
"epoch": 0.5251282051282051,
"grad_norm": 1.3326049138231024,
"learning_rate": 4.833632797457331e-07,
"loss": 1.2788,
"step": 256
},
{
"epoch": 0.5271794871794871,
"grad_norm": 1.3680818424670418,
"learning_rate": 4.800375572347413e-07,
"loss": 1.3483,
"step": 257
},
{
"epoch": 0.5292307692307693,
"grad_norm": 1.3780541644452522,
"learning_rate": 4.767127190811793e-07,
"loss": 1.3152,
"step": 258
},
{
"epoch": 0.5312820512820513,
"grad_norm": 1.3069364604536544,
"learning_rate": 4.7338891257891076e-07,
"loss": 1.3299,
"step": 259
},
{
"epoch": 0.5333333333333333,
"grad_norm": 1.3707233739601012,
"learning_rate": 4.7006628497609604e-07,
"loss": 1.3201,
"step": 260
},
{
"epoch": 0.5353846153846153,
"grad_norm": 1.3491562859786448,
"learning_rate": 4.6674498346866887e-07,
"loss": 1.2785,
"step": 261
},
{
"epoch": 0.5374358974358975,
"grad_norm": 1.467464986000282,
"learning_rate": 4.634251551938161e-07,
"loss": 1.337,
"step": 262
},
{
"epoch": 0.5394871794871795,
"grad_norm": 1.29313468913082,
"learning_rate": 4.601069472234584e-07,
"loss": 1.324,
"step": 263
},
{
"epoch": 0.5415384615384615,
"grad_norm": 1.324791527915958,
"learning_rate": 4.5679050655773534e-07,
"loss": 1.316,
"step": 264
},
{
"epoch": 0.5435897435897435,
"grad_norm": 1.40484113279842,
"learning_rate": 4.5347598011849275e-07,
"loss": 1.2967,
"step": 265
},
{
"epoch": 0.5456410256410257,
"grad_norm": 1.3059231618524412,
"learning_rate": 4.501635147427745e-07,
"loss": 1.2795,
"step": 266
},
{
"epoch": 0.5476923076923077,
"grad_norm": 1.3379544072622815,
"learning_rate": 4.4685325717631734e-07,
"loss": 1.2621,
"step": 267
},
{
"epoch": 0.5497435897435897,
"grad_norm": 1.3860481263368158,
"learning_rate": 4.4354535406704907e-07,
"loss": 1.3012,
"step": 268
},
{
"epoch": 0.5517948717948717,
"grad_norm": 1.3489865311164444,
"learning_rate": 4.4023995195859313e-07,
"loss": 1.2748,
"step": 269
},
{
"epoch": 0.5538461538461539,
"grad_norm": 1.3313443764200086,
"learning_rate": 4.369371972837757e-07,
"loss": 1.3682,
"step": 270
},
{
"epoch": 0.5558974358974359,
"grad_norm": 1.4195434027790386,
"learning_rate": 4.33637236358139e-07,
"loss": 1.2826,
"step": 271
},
{
"epoch": 0.5579487179487179,
"grad_norm": 1.3431350195403668,
"learning_rate": 4.30340215373459e-07,
"loss": 1.3432,
"step": 272
},
{
"epoch": 0.56,
"grad_norm": 1.3960093820700656,
"learning_rate": 4.2704628039126914e-07,
"loss": 1.2941,
"step": 273
},
{
"epoch": 0.5620512820512821,
"grad_norm": 1.401017396814776,
"learning_rate": 4.2375557733639006e-07,
"loss": 1.319,
"step": 274
},
{
"epoch": 0.5641025641025641,
"grad_norm": 1.397892504319514,
"learning_rate": 4.20468251990464e-07,
"loss": 1.3374,
"step": 275
},
{
"epoch": 0.5661538461538461,
"grad_norm": 1.365941739199125,
"learning_rate": 4.1718444998549756e-07,
"loss": 1.344,
"step": 276
},
{
"epoch": 0.5682051282051283,
"grad_norm": 1.350043286129735,
"learning_rate": 4.1390431679740953e-07,
"loss": 1.2851,
"step": 277
},
{
"epoch": 0.5702564102564103,
"grad_norm": 1.4263357120734497,
"learning_rate": 4.106279977395858e-07,
"loss": 1.3298,
"step": 278
},
{
"epoch": 0.5723076923076923,
"grad_norm": 1.2818553970002176,
"learning_rate": 4.073556379564429e-07,
"loss": 1.2684,
"step": 279
},
{
"epoch": 0.5743589743589743,
"grad_norm": 1.413899213332057,
"learning_rate": 4.0408738241699685e-07,
"loss": 1.3092,
"step": 280
},
{
"epoch": 0.5764102564102564,
"grad_norm": 1.3497769672706679,
"learning_rate": 4.00823375908441e-07,
"loss": 1.329,
"step": 281
},
{
"epoch": 0.5784615384615385,
"grad_norm": 1.3254634061152786,
"learning_rate": 3.9756376302973325e-07,
"loss": 1.3076,
"step": 282
},
{
"epoch": 0.5805128205128205,
"grad_norm": 1.4049294607846992,
"learning_rate": 3.943086881851878e-07,
"loss": 1.2649,
"step": 283
},
{
"epoch": 0.5825641025641025,
"grad_norm": 1.5373330046399727,
"learning_rate": 3.9105829557807973e-07,
"loss": 1.3728,
"step": 284
},
{
"epoch": 0.5846153846153846,
"grad_norm": 1.4097914378402818,
"learning_rate": 3.87812729204256e-07,
"loss": 1.3186,
"step": 285
},
{
"epoch": 0.5866666666666667,
"grad_norm": 1.4677590739466415,
"learning_rate": 3.84572132845756e-07,
"loss": 1.2695,
"step": 286
},
{
"epoch": 0.5887179487179487,
"grad_norm": 1.3914941908309093,
"learning_rate": 3.8133665006444255e-07,
"loss": 1.2708,
"step": 287
},
{
"epoch": 0.5907692307692308,
"grad_norm": 1.3463645339331958,
"learning_rate": 3.781064241956414e-07,
"loss": 1.3028,
"step": 288
},
{
"epoch": 0.5928205128205128,
"grad_norm": 1.4020220379821526,
"learning_rate": 3.7488159834179135e-07,
"loss": 1.2784,
"step": 289
},
{
"epoch": 0.5948717948717949,
"grad_norm": 1.4540293204215256,
"learning_rate": 3.716623153661049e-07,
"loss": 1.3005,
"step": 290
},
{
"epoch": 0.5969230769230769,
"grad_norm": 1.3902452427671015,
"learning_rate": 3.6844871788623945e-07,
"loss": 1.2524,
"step": 291
},
{
"epoch": 0.598974358974359,
"grad_norm": 1.48338078362365,
"learning_rate": 3.652409482679783e-07,
"loss": 1.3222,
"step": 292
},
{
"epoch": 0.601025641025641,
"grad_norm": 1.2846473500863387,
"learning_rate": 3.6203914861892476e-07,
"loss": 1.3626,
"step": 293
},
{
"epoch": 0.6030769230769231,
"grad_norm": 1.471140280043153,
"learning_rate": 3.588434607822061e-07,
"loss": 1.3137,
"step": 294
},
{
"epoch": 0.6051282051282051,
"grad_norm": 1.4330668442336907,
"learning_rate": 3.5565402633018957e-07,
"loss": 1.2806,
"step": 295
},
{
"epoch": 0.6071794871794872,
"grad_norm": 1.3403409049501387,
"learning_rate": 3.5247098655821103e-07,
"loss": 1.3276,
"step": 296
},
{
"epoch": 0.6092307692307692,
"grad_norm": 1.3471334531902774,
"learning_rate": 3.4929448247831514e-07,
"loss": 1.3527,
"step": 297
},
{
"epoch": 0.6112820512820513,
"grad_norm": 1.441754768297771,
"learning_rate": 3.4612465481300867e-07,
"loss": 1.3509,
"step": 298
},
{
"epoch": 0.6133333333333333,
"grad_norm": 1.3109786154015102,
"learning_rate": 3.429616439890257e-07,
"loss": 1.3303,
"step": 299
},
{
"epoch": 0.6153846153846154,
"grad_norm": 1.3571971672387129,
"learning_rate": 3.398055901311073e-07,
"loss": 1.2926,
"step": 300
},
{
"epoch": 0.6174358974358974,
"grad_norm": 1.3873664792216218,
"learning_rate": 3.3665663305579344e-07,
"loss": 1.3244,
"step": 301
},
{
"epoch": 0.6194871794871795,
"grad_norm": 1.3799572812815109,
"learning_rate": 3.335149122652293e-07,
"loss": 1.284,
"step": 302
},
{
"epoch": 0.6215384615384615,
"grad_norm": 1.316197811127298,
"learning_rate": 3.303805669409848e-07,
"loss": 1.3153,
"step": 303
},
{
"epoch": 0.6235897435897436,
"grad_norm": 1.2600316458800467,
"learning_rate": 3.272537359378887e-07,
"loss": 1.3686,
"step": 304
},
{
"epoch": 0.6256410256410256,
"grad_norm": 1.3725839158894015,
"learning_rate": 3.2413455777787746e-07,
"loss": 1.2968,
"step": 305
},
{
"epoch": 0.6276923076923077,
"grad_norm": 1.294502428896565,
"learning_rate": 3.2102317064385876e-07,
"loss": 1.2874,
"step": 306
},
{
"epoch": 0.6297435897435898,
"grad_norm": 1.4104402124249922,
"learning_rate": 3.179197123735889e-07,
"loss": 1.2672,
"step": 307
},
{
"epoch": 0.6317948717948718,
"grad_norm": 1.3711533346685432,
"learning_rate": 3.148243204535677e-07,
"loss": 1.2661,
"step": 308
},
{
"epoch": 0.6338461538461538,
"grad_norm": 1.3385883768449498,
"learning_rate": 3.117371320129469e-07,
"loss": 1.3546,
"step": 309
},
{
"epoch": 0.6358974358974359,
"grad_norm": 1.3583569291948376,
"learning_rate": 3.086582838174551e-07,
"loss": 1.2698,
"step": 310
},
{
"epoch": 0.637948717948718,
"grad_norm": 1.2759125465275387,
"learning_rate": 3.055879122633397e-07,
"loss": 1.3022,
"step": 311
},
{
"epoch": 0.64,
"grad_norm": 1.4220971900274135,
"learning_rate": 3.025261533713235e-07,
"loss": 1.315,
"step": 312
},
{
"epoch": 0.642051282051282,
"grad_norm": 1.386745544730108,
"learning_rate": 2.994731427805792e-07,
"loss": 1.2634,
"step": 313
},
{
"epoch": 0.6441025641025641,
"grad_norm": 1.3092798515784028,
"learning_rate": 2.964290157427207e-07,
"loss": 1.2438,
"step": 314
},
{
"epoch": 0.6461538461538462,
"grad_norm": 1.4018848728602682,
"learning_rate": 2.9339390711581105e-07,
"loss": 1.394,
"step": 315
},
{
"epoch": 0.6482051282051282,
"grad_norm": 1.4469110144038708,
"learning_rate": 2.9036795135838764e-07,
"loss": 1.3446,
"step": 316
},
{
"epoch": 0.6502564102564102,
"grad_norm": 1.3545060659112242,
"learning_rate": 2.8735128252350674e-07,
"loss": 1.2794,
"step": 317
},
{
"epoch": 0.6523076923076923,
"grad_norm": 1.393409490719331,
"learning_rate": 2.843440342528035e-07,
"loss": 1.3257,
"step": 318
},
{
"epoch": 0.6543589743589744,
"grad_norm": 1.3673405096575244,
"learning_rate": 2.813463397705723e-07,
"loss": 1.3053,
"step": 319
},
{
"epoch": 0.6564102564102564,
"grad_norm": 1.2769338414370688,
"learning_rate": 2.783583318778646e-07,
"loss": 1.2706,
"step": 320
},
{
"epoch": 0.6584615384615384,
"grad_norm": 1.4095662966250955,
"learning_rate": 2.753801429466056e-07,
"loss": 1.3405,
"step": 321
},
{
"epoch": 0.6605128205128206,
"grad_norm": 1.271906555167854,
"learning_rate": 2.7241190491372987e-07,
"loss": 1.2279,
"step": 322
},
{
"epoch": 0.6625641025641026,
"grad_norm": 1.4207452998511736,
"learning_rate": 2.6945374927533697e-07,
"loss": 1.3218,
"step": 323
},
{
"epoch": 0.6646153846153846,
"grad_norm": 1.4323142733077865,
"learning_rate": 2.665058070808654e-07,
"loss": 1.4065,
"step": 324
},
{
"epoch": 0.6666666666666666,
"grad_norm": 1.265108069283216,
"learning_rate": 2.635682089272875e-07,
"loss": 1.2986,
"step": 325
},
{
"epoch": 0.6687179487179488,
"grad_norm": 1.4383291062967463,
"learning_rate": 2.6064108495332293e-07,
"loss": 1.3276,
"step": 326
},
{
"epoch": 0.6707692307692308,
"grad_norm": 1.3012684857872605,
"learning_rate": 2.5772456483367497e-07,
"loss": 1.2725,
"step": 327
},
{
"epoch": 0.6728205128205128,
"grad_norm": 1.4239883240238744,
"learning_rate": 2.5481877777328424e-07,
"loss": 1.3433,
"step": 328
},
{
"epoch": 0.6748717948717948,
"grad_norm": 1.3329136724779032,
"learning_rate": 2.5192385250160586e-07,
"loss": 1.2651,
"step": 329
},
{
"epoch": 0.676923076923077,
"grad_norm": 1.3523109345462954,
"learning_rate": 2.4903991726690583e-07,
"loss": 1.2988,
"step": 330
},
{
"epoch": 0.678974358974359,
"grad_norm": 1.311204740811716,
"learning_rate": 2.461670998305801e-07,
"loss": 1.2068,
"step": 331
},
{
"epoch": 0.681025641025641,
"grad_norm": 1.246738747824622,
"learning_rate": 2.4330552746149404e-07,
"loss": 1.2955,
"step": 332
},
{
"epoch": 0.683076923076923,
"grad_norm": 1.3933636676146037,
"learning_rate": 2.4045532693034474e-07,
"loss": 1.3791,
"step": 333
},
{
"epoch": 0.6851282051282052,
"grad_norm": 1.4374856626540078,
"learning_rate": 2.3761662450404492e-07,
"loss": 1.35,
"step": 334
},
{
"epoch": 0.6871794871794872,
"grad_norm": 1.3638334560630514,
"learning_rate": 2.347895459401288e-07,
"loss": 1.2993,
"step": 335
},
{
"epoch": 0.6892307692307692,
"grad_norm": 1.3485827756964341,
"learning_rate": 2.319742164811813e-07,
"loss": 1.3159,
"step": 336
},
{
"epoch": 0.6912820512820513,
"grad_norm": 1.418888206942911,
"learning_rate": 2.2917076084928948e-07,
"loss": 1.3593,
"step": 337
},
{
"epoch": 0.6933333333333334,
"grad_norm": 1.4828726277064257,
"learning_rate": 2.2637930324051747e-07,
"loss": 1.3679,
"step": 338
},
{
"epoch": 0.6953846153846154,
"grad_norm": 1.413301068518357,
"learning_rate": 2.2359996731940345e-07,
"loss": 1.27,
"step": 339
},
{
"epoch": 0.6974358974358974,
"grad_norm": 1.3208309322946137,
"learning_rate": 2.2083287621348256e-07,
"loss": 1.2937,
"step": 340
},
{
"epoch": 0.6994871794871795,
"grad_norm": 1.4368838454397468,
"learning_rate": 2.180781525078319e-07,
"loss": 1.2766,
"step": 341
},
{
"epoch": 0.7015384615384616,
"grad_norm": 1.4246678530032884,
"learning_rate": 2.1533591823963926e-07,
"loss": 1.2996,
"step": 342
},
{
"epoch": 0.7035897435897436,
"grad_norm": 1.3428136313711472,
"learning_rate": 2.1260629489279657e-07,
"loss": 1.3312,
"step": 343
},
{
"epoch": 0.7056410256410256,
"grad_norm": 1.3574535316266307,
"learning_rate": 2.0988940339251937e-07,
"loss": 1.3234,
"step": 344
},
{
"epoch": 0.7076923076923077,
"grad_norm": 1.2707949058163033,
"learning_rate": 2.0718536409998833e-07,
"loss": 1.2958,
"step": 345
},
{
"epoch": 0.7097435897435898,
"grad_norm": 1.4822667590568277,
"learning_rate": 2.0449429680701797e-07,
"loss": 1.2867,
"step": 346
},
{
"epoch": 0.7117948717948718,
"grad_norm": 1.2917213676654393,
"learning_rate": 2.0181632073074923e-07,
"loss": 1.3462,
"step": 347
},
{
"epoch": 0.7138461538461538,
"grad_norm": 1.4001267259726107,
"learning_rate": 1.991515545083684e-07,
"loss": 1.2215,
"step": 348
},
{
"epoch": 0.7158974358974359,
"grad_norm": 1.3397954504556553,
"learning_rate": 1.9650011619185126e-07,
"loss": 1.2748,
"step": 349
},
{
"epoch": 0.717948717948718,
"grad_norm": 1.4376449099130564,
"learning_rate": 1.938621232427327e-07,
"loss": 1.3395,
"step": 350
},
{
"epoch": 0.72,
"grad_norm": 1.3659796825711872,
"learning_rate": 1.9123769252690407e-07,
"loss": 1.342,
"step": 351
},
{
"epoch": 0.7220512820512821,
"grad_norm": 1.2551401015316006,
"learning_rate": 1.8862694030943528e-07,
"loss": 1.2282,
"step": 352
},
{
"epoch": 0.7241025641025641,
"grad_norm": 1.4075072417448022,
"learning_rate": 1.8602998224942406e-07,
"loss": 1.2872,
"step": 353
},
{
"epoch": 0.7261538461538461,
"grad_norm": 1.2962040010095723,
"learning_rate": 1.834469333948725e-07,
"loss": 1.3481,
"step": 354
},
{
"epoch": 0.7282051282051282,
"grad_norm": 1.299136753253947,
"learning_rate": 1.808779081775901e-07,
"loss": 1.2932,
"step": 355
},
{
"epoch": 0.7302564102564103,
"grad_norm": 1.4000758162190168,
"learning_rate": 1.7832302040812392e-07,
"loss": 1.3154,
"step": 356
},
{
"epoch": 0.7323076923076923,
"grad_norm": 1.252044581176086,
"learning_rate": 1.757823832707175e-07,
"loss": 1.338,
"step": 357
},
{
"epoch": 0.7343589743589743,
"grad_norm": 1.3740222857140072,
"learning_rate": 1.7325610931829616e-07,
"loss": 1.2449,
"step": 358
},
{
"epoch": 0.7364102564102564,
"grad_norm": 1.2947442493826966,
"learning_rate": 1.7074431046748074e-07,
"loss": 1.3193,
"step": 359
},
{
"epoch": 0.7384615384615385,
"grad_norm": 1.357340685900848,
"learning_rate": 1.682470979936298e-07,
"loss": 1.336,
"step": 360
},
{
"epoch": 0.7405128205128205,
"grad_norm": 1.3548253079749504,
"learning_rate": 1.6576458252590986e-07,
"loss": 1.2955,
"step": 361
},
{
"epoch": 0.7425641025641025,
"grad_norm": 1.3012853046416282,
"learning_rate": 1.6329687404239445e-07,
"loss": 1.3528,
"step": 362
},
{
"epoch": 0.7446153846153846,
"grad_norm": 1.3726340184170516,
"learning_rate": 1.6084408186519194e-07,
"loss": 1.2899,
"step": 363
},
{
"epoch": 0.7466666666666667,
"grad_norm": 1.2475355635801402,
"learning_rate": 1.584063146556025e-07,
"loss": 1.3549,
"step": 364
},
{
"epoch": 0.7487179487179487,
"grad_norm": 1.375734131748055,
"learning_rate": 1.5598368040930427e-07,
"loss": 1.3121,
"step": 365
},
{
"epoch": 0.7507692307692307,
"grad_norm": 1.410388031615801,
"learning_rate": 1.5357628645156918e-07,
"loss": 1.2698,
"step": 366
},
{
"epoch": 0.7528205128205128,
"grad_norm": 1.3473981945869655,
"learning_rate": 1.5118423943250768e-07,
"loss": 1.2902,
"step": 367
},
{
"epoch": 0.7548717948717949,
"grad_norm": 1.4495547654976086,
"learning_rate": 1.4880764532234514e-07,
"loss": 1.2196,
"step": 368
},
{
"epoch": 0.7569230769230769,
"grad_norm": 1.357106166673668,
"learning_rate": 1.4644660940672627e-07,
"loss": 1.2519,
"step": 369
},
{
"epoch": 0.7589743589743589,
"grad_norm": 1.2548591046322328,
"learning_rate": 1.4410123628205134e-07,
"loss": 1.2896,
"step": 370
},
{
"epoch": 0.7610256410256411,
"grad_norm": 1.2809196807216436,
"learning_rate": 1.417716298508424e-07,
"loss": 1.3136,
"step": 371
},
{
"epoch": 0.7630769230769231,
"grad_norm": 1.3603566060815664,
"learning_rate": 1.3945789331714013e-07,
"loss": 1.3298,
"step": 372
},
{
"epoch": 0.7651282051282051,
"grad_norm": 1.2416081192958257,
"learning_rate": 1.3716012918193205e-07,
"loss": 1.2653,
"step": 373
},
{
"epoch": 0.7671794871794871,
"grad_norm": 1.2397913153351197,
"learning_rate": 1.3487843923861098e-07,
"loss": 1.3004,
"step": 374
},
{
"epoch": 0.7692307692307693,
"grad_norm": 1.3957552308537007,
"learning_rate": 1.3261292456846646e-07,
"loss": 1.3135,
"step": 375
},
{
"epoch": 0.7712820512820513,
"grad_norm": 1.3389437330568568,
"learning_rate": 1.30363685536206e-07,
"loss": 1.2816,
"step": 376
},
{
"epoch": 0.7733333333333333,
"grad_norm": 1.4171003129680448,
"learning_rate": 1.2813082178550928e-07,
"loss": 1.3315,
"step": 377
},
{
"epoch": 0.7753846153846153,
"grad_norm": 1.2968744143026596,
"learning_rate": 1.2591443223461333e-07,
"loss": 1.3179,
"step": 378
},
{
"epoch": 0.7774358974358975,
"grad_norm": 1.3860589730680748,
"learning_rate": 1.2371461507193075e-07,
"loss": 1.309,
"step": 379
},
{
"epoch": 0.7794871794871795,
"grad_norm": 1.4261801869961688,
"learning_rate": 1.215314677516997e-07,
"loss": 1.2594,
"step": 380
},
{
"epoch": 0.7815384615384615,
"grad_norm": 1.3803317479367614,
"learning_rate": 1.1936508698966663e-07,
"loss": 1.327,
"step": 381
},
{
"epoch": 0.7835897435897435,
"grad_norm": 1.3462031002972898,
"learning_rate": 1.1721556875880167e-07,
"loss": 1.3252,
"step": 382
},
{
"epoch": 0.7856410256410257,
"grad_norm": 1.3804636579208875,
"learning_rate": 1.150830082850468e-07,
"loss": 1.2994,
"step": 383
},
{
"epoch": 0.7876923076923077,
"grad_norm": 1.3874044977427191,
"learning_rate": 1.1296750004309757e-07,
"loss": 1.342,
"step": 384
},
{
"epoch": 0.7897435897435897,
"grad_norm": 1.2538944806181445,
"learning_rate": 1.1086913775221706e-07,
"loss": 1.2532,
"step": 385
},
{
"epoch": 0.7917948717948718,
"grad_norm": 1.388891555677492,
"learning_rate": 1.0878801437208496e-07,
"loss": 1.338,
"step": 386
},
{
"epoch": 0.7938461538461539,
"grad_norm": 1.4920405662743708,
"learning_rate": 1.0672422209867876e-07,
"loss": 1.284,
"step": 387
},
{
"epoch": 0.7958974358974359,
"grad_norm": 1.375211936323982,
"learning_rate": 1.0467785236018944e-07,
"loss": 1.3315,
"step": 388
},
{
"epoch": 0.7979487179487179,
"grad_norm": 1.3363209851874036,
"learning_rate": 1.026489958129712e-07,
"loss": 1.2874,
"step": 389
},
{
"epoch": 0.8,
"grad_norm": 1.291621044229256,
"learning_rate": 1.0063774233752542e-07,
"loss": 1.3416,
"step": 390
},
{
"epoch": 0.8020512820512821,
"grad_norm": 1.3581366805811677,
"learning_rate": 9.864418103451827e-08,
"loss": 1.2981,
"step": 391
},
{
"epoch": 0.8041025641025641,
"grad_norm": 1.3003239187798818,
"learning_rate": 9.666840022083422e-08,
"loss": 1.3101,
"step": 392
},
{
"epoch": 0.8061538461538461,
"grad_norm": 1.3246739403857846,
"learning_rate": 9.471048742566312e-08,
"loss": 1.3382,
"step": 393
},
{
"epoch": 0.8082051282051282,
"grad_norm": 1.3582591260843835,
"learning_rate": 9.27705293866226e-08,
"loss": 1.3002,
"step": 394
},
{
"epoch": 0.8102564102564103,
"grad_norm": 1.2313737865301981,
"learning_rate": 9.084861204591549e-08,
"loss": 1.2978,
"step": 395
},
{
"epoch": 0.8123076923076923,
"grad_norm": 1.3353913098594299,
"learning_rate": 8.894482054652247e-08,
"loss": 1.2584,
"step": 396
},
{
"epoch": 0.8143589743589743,
"grad_norm": 1.3242269914914493,
"learning_rate": 8.705923922843039e-08,
"loss": 1.3307,
"step": 397
},
{
"epoch": 0.8164102564102564,
"grad_norm": 1.2697101868447127,
"learning_rate": 8.519195162489528e-08,
"loss": 1.2834,
"step": 398
},
{
"epoch": 0.8184615384615385,
"grad_norm": 1.4632526641918853,
"learning_rate": 8.334304045874246e-08,
"loss": 1.3194,
"step": 399
},
{
"epoch": 0.8205128205128205,
"grad_norm": 1.3907694450406674,
"learning_rate": 8.151258763870177e-08,
"loss": 1.306,
"step": 400
},
{
"epoch": 0.8225641025641026,
"grad_norm": 1.3741854129969415,
"learning_rate": 7.970067425577847e-08,
"loss": 1.3207,
"step": 401
},
{
"epoch": 0.8246153846153846,
"grad_norm": 1.2990539782457562,
"learning_rate": 7.790738057966079e-08,
"loss": 1.311,
"step": 402
},
{
"epoch": 0.8266666666666667,
"grad_norm": 1.3139752156003466,
"learning_rate": 7.613278605516454e-08,
"loss": 1.2679,
"step": 403
},
{
"epoch": 0.8287179487179487,
"grad_norm": 1.4261493929651812,
"learning_rate": 7.437696929871312e-08,
"loss": 1.4016,
"step": 404
},
{
"epoch": 0.8307692307692308,
"grad_norm": 1.4327200805455274,
"learning_rate": 7.264000809485482e-08,
"loss": 1.2647,
"step": 405
},
{
"epoch": 0.8328205128205128,
"grad_norm": 1.3748258593974458,
"learning_rate": 7.092197939281696e-08,
"loss": 1.3448,
"step": 406
},
{
"epoch": 0.8348717948717949,
"grad_norm": 1.335787936320607,
"learning_rate": 6.92229593030969e-08,
"loss": 1.2803,
"step": 407
},
{
"epoch": 0.8369230769230769,
"grad_norm": 1.3764176859888628,
"learning_rate": 6.754302309409033e-08,
"loss": 1.3138,
"step": 408
},
{
"epoch": 0.838974358974359,
"grad_norm": 1.3300596184326687,
"learning_rate": 6.588224518875646e-08,
"loss": 1.2705,
"step": 409
},
{
"epoch": 0.841025641025641,
"grad_norm": 1.394358508134729,
"learning_rate": 6.424069916132163e-08,
"loss": 1.3222,
"step": 410
},
{
"epoch": 0.8430769230769231,
"grad_norm": 1.3109916047636498,
"learning_rate": 6.261845773401937e-08,
"loss": 1.2643,
"step": 411
},
{
"epoch": 0.8451282051282051,
"grad_norm": 1.4202733175264604,
"learning_rate": 6.101559277386903e-08,
"loss": 1.3386,
"step": 412
},
{
"epoch": 0.8471794871794872,
"grad_norm": 1.2483773635147983,
"learning_rate": 5.943217528949168e-08,
"loss": 1.2888,
"step": 413
},
{
"epoch": 0.8492307692307692,
"grad_norm": 1.2779111173888642,
"learning_rate": 5.786827542796491e-08,
"loss": 1.314,
"step": 414
},
{
"epoch": 0.8512820512820513,
"grad_norm": 1.3238564515375497,
"learning_rate": 5.632396247171428e-08,
"loss": 1.2913,
"step": 415
},
{
"epoch": 0.8533333333333334,
"grad_norm": 1.4410152622622796,
"learning_rate": 5.47993048354452e-08,
"loss": 1.3451,
"step": 416
},
{
"epoch": 0.8553846153846154,
"grad_norm": 1.3897594499448358,
"learning_rate": 5.3294370063111213e-08,
"loss": 1.2569,
"step": 417
},
{
"epoch": 0.8574358974358974,
"grad_norm": 1.3902552303122406,
"learning_rate": 5.1809224824922174e-08,
"loss": 1.2562,
"step": 418
},
{
"epoch": 0.8594871794871795,
"grad_norm": 1.3609774270312844,
"learning_rate": 5.0343934914390426e-08,
"loss": 1.3177,
"step": 419
},
{
"epoch": 0.8615384615384616,
"grad_norm": 1.24678063079174,
"learning_rate": 4.8898565245416246e-08,
"loss": 1.2621,
"step": 420
},
{
"epoch": 0.8635897435897436,
"grad_norm": 1.2709715813867861,
"learning_rate": 4.747317984941213e-08,
"loss": 1.2854,
"step": 421
},
{
"epoch": 0.8656410256410256,
"grad_norm": 1.3875358765975183,
"learning_rate": 4.606784187246587e-08,
"loss": 1.2577,
"step": 422
},
{
"epoch": 0.8676923076923077,
"grad_norm": 1.3634193741456224,
"learning_rate": 4.468261357254338e-08,
"loss": 1.3436,
"step": 423
},
{
"epoch": 0.8697435897435898,
"grad_norm": 1.3526734393532784,
"learning_rate": 4.331755631673056e-08,
"loss": 1.2838,
"step": 424
},
{
"epoch": 0.8717948717948718,
"grad_norm": 1.3628760969364189,
"learning_rate": 4.197273057851464e-08,
"loss": 1.3112,
"step": 425
},
{
"epoch": 0.8738461538461538,
"grad_norm": 1.3340141060832507,
"learning_rate": 4.0648195935104767e-08,
"loss": 1.303,
"step": 426
},
{
"epoch": 0.8758974358974358,
"grad_norm": 1.3627820828373822,
"learning_rate": 3.934401106479351e-08,
"loss": 1.3302,
"step": 427
},
{
"epoch": 0.877948717948718,
"grad_norm": 1.2918615457638776,
"learning_rate": 3.806023374435663e-08,
"loss": 1.2913,
"step": 428
},
{
"epoch": 0.88,
"grad_norm": 1.3921884369956634,
"learning_rate": 3.6796920846493714e-08,
"loss": 1.3353,
"step": 429
},
{
"epoch": 0.882051282051282,
"grad_norm": 1.442462815929615,
"learning_rate": 3.555412833730881e-08,
"loss": 1.3185,
"step": 430
},
{
"epoch": 0.884102564102564,
"grad_norm": 1.4083279440312293,
"learning_rate": 3.4331911273830784e-08,
"loss": 1.3429,
"step": 431
},
{
"epoch": 0.8861538461538462,
"grad_norm": 1.396143913243508,
"learning_rate": 3.313032380157454e-08,
"loss": 1.3309,
"step": 432
},
{
"epoch": 0.8882051282051282,
"grad_norm": 1.311150147580031,
"learning_rate": 3.1949419152142e-08,
"loss": 1.2913,
"step": 433
},
{
"epoch": 0.8902564102564102,
"grad_norm": 1.3752884331352524,
"learning_rate": 3.078924964086416e-08,
"loss": 1.2808,
"step": 434
},
{
"epoch": 0.8923076923076924,
"grad_norm": 1.31858969839846,
"learning_rate": 2.9649866664483382e-08,
"loss": 1.26,
"step": 435
},
{
"epoch": 0.8943589743589744,
"grad_norm": 1.2935400196335294,
"learning_rate": 2.8531320698876428e-08,
"loss": 1.256,
"step": 436
},
{
"epoch": 0.8964102564102564,
"grad_norm": 1.3508280125945176,
"learning_rate": 2.7433661296818232e-08,
"loss": 1.291,
"step": 437
},
{
"epoch": 0.8984615384615384,
"grad_norm": 1.2947996736751957,
"learning_rate": 2.6356937085786956e-08,
"loss": 1.3182,
"step": 438
},
{
"epoch": 0.9005128205128206,
"grad_norm": 1.3268339625220218,
"learning_rate": 2.530119576580936e-08,
"loss": 1.3027,
"step": 439
},
{
"epoch": 0.9025641025641026,
"grad_norm": 1.4328144299183967,
"learning_rate": 2.426648410734794e-08,
"loss": 1.345,
"step": 440
},
{
"epoch": 0.9046153846153846,
"grad_norm": 1.3138230356748517,
"learning_rate": 2.3252847949228826e-08,
"loss": 1.2649,
"step": 441
},
{
"epoch": 0.9066666666666666,
"grad_norm": 1.2807840482032402,
"learning_rate": 2.2260332196610997e-08,
"loss": 1.2554,
"step": 442
},
{
"epoch": 0.9087179487179488,
"grad_norm": 1.4043522223341176,
"learning_rate": 2.128898081899727e-08,
"loss": 1.3474,
"step": 443
},
{
"epoch": 0.9107692307692308,
"grad_norm": 1.347315403936581,
"learning_rate": 2.03388368482858e-08,
"loss": 1.2347,
"step": 444
},
{
"epoch": 0.9128205128205128,
"grad_norm": 1.3662668916302128,
"learning_rate": 1.940994237686433e-08,
"loss": 1.3457,
"step": 445
},
{
"epoch": 0.9148717948717948,
"grad_norm": 1.4158295255358022,
"learning_rate": 1.8502338555745124e-08,
"loss": 1.3326,
"step": 446
},
{
"epoch": 0.916923076923077,
"grad_norm": 1.2780375459475717,
"learning_rate": 1.7616065592742034e-08,
"loss": 1.2814,
"step": 447
},
{
"epoch": 0.918974358974359,
"grad_norm": 1.2749072583260956,
"learning_rate": 1.6751162750689164e-08,
"loss": 1.3122,
"step": 448
},
{
"epoch": 0.921025641025641,
"grad_norm": 1.3367919003159088,
"learning_rate": 1.590766834570173e-08,
"loss": 1.257,
"step": 449
},
{
"epoch": 0.9230769230769231,
"grad_norm": 1.3152042540986686,
"learning_rate": 1.508561974547812e-08,
"loss": 1.3096,
"step": 450
},
{
"epoch": 0.9251282051282051,
"grad_norm": 1.3270136311007528,
"learning_rate": 1.4285053367645073e-08,
"loss": 1.2955,
"step": 451
},
{
"epoch": 0.9271794871794872,
"grad_norm": 1.265439090150174,
"learning_rate": 1.3506004678143834e-08,
"loss": 1.3265,
"step": 452
},
{
"epoch": 0.9292307692307692,
"grad_norm": 1.316351798054078,
"learning_rate": 1.2748508189659446e-08,
"loss": 1.2659,
"step": 453
},
{
"epoch": 0.9312820512820513,
"grad_norm": 1.36770340106091,
"learning_rate": 1.2012597460091201e-08,
"loss": 1.2548,
"step": 454
},
{
"epoch": 0.9333333333333333,
"grad_norm": 1.3009822941793905,
"learning_rate": 1.1298305091066662e-08,
"loss": 1.3421,
"step": 455
},
{
"epoch": 0.9353846153846154,
"grad_norm": 1.3976626704836301,
"learning_rate": 1.0605662726496877e-08,
"loss": 1.3743,
"step": 456
},
{
"epoch": 0.9374358974358974,
"grad_norm": 1.3323344873403382,
"learning_rate": 9.93470105117461e-09,
"loss": 1.3038,
"step": 457
},
{
"epoch": 0.9394871794871795,
"grad_norm": 1.33662126434898,
"learning_rate": 9.285449789415145e-09,
"loss": 1.3841,
"step": 458
},
{
"epoch": 0.9415384615384615,
"grad_norm": 1.2905917845938792,
"learning_rate": 8.657937703739515e-09,
"loss": 1.4017,
"step": 459
},
{
"epoch": 0.9435897435897436,
"grad_norm": 1.2878409648663358,
"learning_rate": 8.052192593599905e-09,
"loss": 1.3052,
"step": 460
},
{
"epoch": 0.9456410256410256,
"grad_norm": 1.359774229106595,
"learning_rate": 7.46824129414847e-09,
"loss": 1.2997,
"step": 461
},
{
"epoch": 0.9476923076923077,
"grad_norm": 1.3536433009615407,
"learning_rate": 6.9061096750483435e-09,
"loss": 1.2946,
"step": 462
},
{
"epoch": 0.9497435897435897,
"grad_norm": 1.2840551656174324,
"learning_rate": 6.365822639327723e-09,
"loss": 1.3496,
"step": 463
},
{
"epoch": 0.9517948717948718,
"grad_norm": 1.3775049985724006,
"learning_rate": 5.8474041222764114e-09,
"loss": 1.3167,
"step": 464
},
{
"epoch": 0.9538461538461539,
"grad_norm": 1.395262346657903,
"learning_rate": 5.35087709038573e-09,
"loss": 1.2255,
"step": 465
},
{
"epoch": 0.9558974358974359,
"grad_norm": 1.3508361630725259,
"learning_rate": 4.8762635403308275e-09,
"loss": 1.2973,
"step": 466
},
{
"epoch": 0.9579487179487179,
"grad_norm": 1.3826942707464611,
"learning_rate": 4.423584497996457e-09,
"loss": 1.2715,
"step": 467
},
{
"epoch": 0.96,
"grad_norm": 1.4244692630586457,
"learning_rate": 3.9928600175451185e-09,
"loss": 1.3069,
"step": 468
},
{
"epoch": 0.9620512820512821,
"grad_norm": 1.3821668725285425,
"learning_rate": 3.5841091805292045e-09,
"loss": 1.2713,
"step": 469
},
{
"epoch": 0.9641025641025641,
"grad_norm": 1.4398924035729572,
"learning_rate": 3.197350095045126e-09,
"loss": 1.2748,
"step": 470
},
{
"epoch": 0.9661538461538461,
"grad_norm": 1.2386387780077939,
"learning_rate": 2.832599894931453e-09,
"loss": 1.3441,
"step": 471
},
{
"epoch": 0.9682051282051282,
"grad_norm": 1.3525741526071395,
"learning_rate": 2.489874739009579e-09,
"loss": 1.2753,
"step": 472
},
{
"epoch": 0.9702564102564103,
"grad_norm": 1.4039770005597791,
"learning_rate": 2.1691898103682883e-09,
"loss": 1.3159,
"step": 473
},
{
"epoch": 0.9723076923076923,
"grad_norm": 1.3628598317312024,
"learning_rate": 1.870559315690634e-09,
"loss": 1.2887,
"step": 474
},
{
"epoch": 0.9743589743589743,
"grad_norm": 1.371403001365053,
"learning_rate": 1.5939964846249377e-09,
"loss": 1.3487,
"step": 475
},
{
"epoch": 0.9764102564102564,
"grad_norm": 1.3133007826982859,
"learning_rate": 1.339513569198536e-09,
"loss": 1.3189,
"step": 476
},
{
"epoch": 0.9784615384615385,
"grad_norm": 1.4606362529352184,
"learning_rate": 1.107121843274994e-09,
"loss": 1.3189,
"step": 477
},
{
"epoch": 0.9805128205128205,
"grad_norm": 1.3509780560710458,
"learning_rate": 8.968316020547261e-10,
"loss": 1.2825,
"step": 478
},
{
"epoch": 0.9825641025641025,
"grad_norm": 1.2243091739514533,
"learning_rate": 7.086521616190277e-10,
"loss": 1.3313,
"step": 479
},
{
"epoch": 0.9846153846153847,
"grad_norm": 1.3506027231957574,
"learning_rate": 5.425918585170164e-10,
"loss": 1.3359,
"step": 480
},
{
"epoch": 0.9866666666666667,
"grad_norm": 1.413547920911948,
"learning_rate": 3.9865804939659407e-10,
"loss": 1.271,
"step": 481
},
{
"epoch": 0.9887179487179487,
"grad_norm": 1.3106473731101602,
"learning_rate": 2.768571106784856e-10,
"loss": 1.3602,
"step": 482
},
{
"epoch": 0.9907692307692307,
"grad_norm": 1.374110583470325,
"learning_rate": 1.7719443827368674e-10,
"loss": 1.3089,
"step": 483
},
{
"epoch": 0.9928205128205129,
"grad_norm": 1.4151814700892573,
"learning_rate": 9.967444734459984e-11,
"loss": 1.3375,
"step": 484
},
{
"epoch": 0.9948717948717949,
"grad_norm": 1.306432628782312,
"learning_rate": 4.430057210913496e-11,
"loss": 1.2872,
"step": 485
},
{
"epoch": 0.9969230769230769,
"grad_norm": 1.2495964430501498,
"learning_rate": 1.1075265688775814e-11,
"loss": 1.306,
"step": 486
},
{
"epoch": 0.9989743589743589,
"grad_norm": 1.3152699338512461,
"learning_rate": 0.0,
"loss": 1.3145,
"step": 487
}
],
"logging_steps": 1,
"max_steps": 487,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 292305580720128.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}