| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9989743589743589, | |
| "eval_steps": 250, | |
| "global_step": 487, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0020512820512820513, | |
| "grad_norm": 6.810319140331888, | |
| "learning_rate": 6.666666666666667e-08, | |
| "loss": 1.7185, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0041025641025641026, | |
| "grad_norm": 6.973912436199157, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "loss": 1.7037, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.006153846153846154, | |
| "grad_norm": 6.7660565555022165, | |
| "learning_rate": 2e-07, | |
| "loss": 1.6, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.008205128205128205, | |
| "grad_norm": 6.771663318387663, | |
| "learning_rate": 2.6666666666666667e-07, | |
| "loss": 1.6726, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.010256410256410256, | |
| "grad_norm": 6.52871070669014, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 1.5925, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.012307692307692308, | |
| "grad_norm": 6.512202528712754, | |
| "learning_rate": 4e-07, | |
| "loss": 1.6276, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.014358974358974359, | |
| "grad_norm": 6.817275132656996, | |
| "learning_rate": 4.6666666666666666e-07, | |
| "loss": 1.6963, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.01641025641025641, | |
| "grad_norm": 6.628192486672898, | |
| "learning_rate": 5.333333333333333e-07, | |
| "loss": 1.6174, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.018461538461538463, | |
| "grad_norm": 6.619040148801032, | |
| "learning_rate": 6e-07, | |
| "loss": 1.6508, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.020512820512820513, | |
| "grad_norm": 6.406981751026222, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 1.6735, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.022564102564102566, | |
| "grad_norm": 6.604487207098839, | |
| "learning_rate": 7.333333333333332e-07, | |
| "loss": 1.6365, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.024615384615384615, | |
| "grad_norm": 6.631786935020852, | |
| "learning_rate": 8e-07, | |
| "loss": 1.6344, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02666666666666667, | |
| "grad_norm": 5.9719505737206, | |
| "learning_rate": 8.666666666666667e-07, | |
| "loss": 1.5787, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.028717948717948718, | |
| "grad_norm": 6.2761075086977645, | |
| "learning_rate": 9.333333333333333e-07, | |
| "loss": 1.6528, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03076923076923077, | |
| "grad_norm": 5.8862406004197965, | |
| "learning_rate": 1e-06, | |
| "loss": 1.6318, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03282051282051282, | |
| "grad_norm": 5.538352647927954, | |
| "learning_rate": 9.99988924734311e-07, | |
| "loss": 1.5542, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.03487179487179487, | |
| "grad_norm": 4.459604234006354, | |
| "learning_rate": 9.999556994278908e-07, | |
| "loss": 1.6195, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.036923076923076927, | |
| "grad_norm": 4.173369165379434, | |
| "learning_rate": 9.999003255526553e-07, | |
| "loss": 1.5388, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.038974358974358976, | |
| "grad_norm": 3.8681669780200902, | |
| "learning_rate": 9.998228055617262e-07, | |
| "loss": 1.6043, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.041025641025641026, | |
| "grad_norm": 3.573458363070342, | |
| "learning_rate": 9.997231428893215e-07, | |
| "loss": 1.4993, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.043076923076923075, | |
| "grad_norm": 3.658332959872666, | |
| "learning_rate": 9.996013419506033e-07, | |
| "loss": 1.5278, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.04512820512820513, | |
| "grad_norm": 3.7157840687731105, | |
| "learning_rate": 9.994574081414829e-07, | |
| "loss": 1.5844, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04717948717948718, | |
| "grad_norm": 3.629986722740094, | |
| "learning_rate": 9.992913478383809e-07, | |
| "loss": 1.6517, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.04923076923076923, | |
| "grad_norm": 3.0451360403292385, | |
| "learning_rate": 9.991031683979451e-07, | |
| "loss": 1.4926, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 2.5837734286427563, | |
| "learning_rate": 9.98892878156725e-07, | |
| "loss": 1.5223, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 2.873780501731463, | |
| "learning_rate": 9.986604864308015e-07, | |
| "loss": 1.4939, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.055384615384615386, | |
| "grad_norm": 3.1829655306698283, | |
| "learning_rate": 9.98406003515375e-07, | |
| "loss": 1.5643, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.057435897435897436, | |
| "grad_norm": 2.8311773740844983, | |
| "learning_rate": 9.981294406843093e-07, | |
| "loss": 1.5249, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.059487179487179485, | |
| "grad_norm": 2.6622418441691518, | |
| "learning_rate": 9.978308101896316e-07, | |
| "loss": 1.4994, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.06153846153846154, | |
| "grad_norm": 2.5130076146469666, | |
| "learning_rate": 9.975101252609903e-07, | |
| "loss": 1.5234, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06358974358974359, | |
| "grad_norm": 2.4697277284762684, | |
| "learning_rate": 9.971674001050686e-07, | |
| "loss": 1.44, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06564102564102564, | |
| "grad_norm": 2.3387108497072533, | |
| "learning_rate": 9.968026499049549e-07, | |
| "loss": 1.4284, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06769230769230769, | |
| "grad_norm": 2.1694373048980866, | |
| "learning_rate": 9.964158908194706e-07, | |
| "loss": 1.4756, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.06974358974358974, | |
| "grad_norm": 2.0195715873177162, | |
| "learning_rate": 9.960071399824547e-07, | |
| "loss": 1.5196, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.07179487179487179, | |
| "grad_norm": 1.9300579544476943, | |
| "learning_rate": 9.955764155020035e-07, | |
| "loss": 1.487, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07384615384615385, | |
| "grad_norm": 2.100972647844811, | |
| "learning_rate": 9.951237364596692e-07, | |
| "loss": 1.4524, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0758974358974359, | |
| "grad_norm": 2.0943953912186823, | |
| "learning_rate": 9.946491229096141e-07, | |
| "loss": 1.46, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.07794871794871795, | |
| "grad_norm": 1.8819330420666514, | |
| "learning_rate": 9.941525958777235e-07, | |
| "loss": 1.4445, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.857247298082436, | |
| "learning_rate": 9.936341773606722e-07, | |
| "loss": 1.4701, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.08205128205128205, | |
| "grad_norm": 1.7834716663964, | |
| "learning_rate": 9.930938903249516e-07, | |
| "loss": 1.4925, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0841025641025641, | |
| "grad_norm": 1.77948843911021, | |
| "learning_rate": 9.925317587058514e-07, | |
| "loss": 1.4404, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.08615384615384615, | |
| "grad_norm": 1.80399799518289, | |
| "learning_rate": 9.919478074064001e-07, | |
| "loss": 1.3905, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0882051282051282, | |
| "grad_norm": 1.8928509170240126, | |
| "learning_rate": 9.913420622962604e-07, | |
| "loss": 1.4511, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.09025641025641026, | |
| "grad_norm": 1.989776786423599, | |
| "learning_rate": 9.907145502105846e-07, | |
| "loss": 1.431, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.09230769230769231, | |
| "grad_norm": 1.8409975760997632, | |
| "learning_rate": 9.900652989488253e-07, | |
| "loss": 1.4704, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.09435897435897436, | |
| "grad_norm": 1.9311013876868204, | |
| "learning_rate": 9.893943372735032e-07, | |
| "loss": 1.4376, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.09641025641025641, | |
| "grad_norm": 1.965914168449665, | |
| "learning_rate": 9.887016949089332e-07, | |
| "loss": 1.4216, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.09846153846153846, | |
| "grad_norm": 1.868447251521439, | |
| "learning_rate": 9.879874025399087e-07, | |
| "loss": 1.4665, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.10051282051282051, | |
| "grad_norm": 1.9154090530688537, | |
| "learning_rate": 9.872514918103405e-07, | |
| "loss": 1.4637, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 1.6049154522002187, | |
| "learning_rate": 9.864939953218561e-07, | |
| "loss": 1.4262, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10461538461538461, | |
| "grad_norm": 1.6938863631437229, | |
| "learning_rate": 9.85714946632355e-07, | |
| "loss": 1.4541, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 1.7257827008232656, | |
| "learning_rate": 9.84914380254522e-07, | |
| "loss": 1.4412, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.10871794871794872, | |
| "grad_norm": 1.664955905322989, | |
| "learning_rate": 9.840923316542983e-07, | |
| "loss": 1.379, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.11076923076923077, | |
| "grad_norm": 1.6147134785008361, | |
| "learning_rate": 9.832488372493108e-07, | |
| "loss": 1.4204, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.11282051282051282, | |
| "grad_norm": 1.636416478063699, | |
| "learning_rate": 9.82383934407258e-07, | |
| "loss": 1.4208, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.11487179487179487, | |
| "grad_norm": 1.606350625791673, | |
| "learning_rate": 9.814976614442547e-07, | |
| "loss": 1.4269, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.11692307692307692, | |
| "grad_norm": 1.5879209023967076, | |
| "learning_rate": 9.805900576231357e-07, | |
| "loss": 1.4145, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.11897435897435897, | |
| "grad_norm": 1.6388251717824514, | |
| "learning_rate": 9.796611631517141e-07, | |
| "loss": 1.398, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.12102564102564102, | |
| "grad_norm": 1.5770065594768676, | |
| "learning_rate": 9.787110191810026e-07, | |
| "loss": 1.4293, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.12307692307692308, | |
| "grad_norm": 1.6534814499374035, | |
| "learning_rate": 9.77739667803389e-07, | |
| "loss": 1.4118, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12512820512820513, | |
| "grad_norm": 1.5180093555981888, | |
| "learning_rate": 9.76747152050771e-07, | |
| "loss": 1.4125, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.12717948717948718, | |
| "grad_norm": 1.5377464933820018, | |
| "learning_rate": 9.75733515892652e-07, | |
| "loss": 1.3973, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.12923076923076923, | |
| "grad_norm": 1.546752794133953, | |
| "learning_rate": 9.746988042341907e-07, | |
| "loss": 1.3887, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.13128205128205128, | |
| "grad_norm": 1.5508521340777879, | |
| "learning_rate": 9.736430629142128e-07, | |
| "loss": 1.4109, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 1.5007776923133969, | |
| "learning_rate": 9.725663387031816e-07, | |
| "loss": 1.4729, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.13538461538461538, | |
| "grad_norm": 1.4673639929870512, | |
| "learning_rate": 9.714686793011235e-07, | |
| "loss": 1.3129, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.13743589743589743, | |
| "grad_norm": 1.474577715216591, | |
| "learning_rate": 9.703501333355166e-07, | |
| "loss": 1.3637, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.13948717948717948, | |
| "grad_norm": 1.3551101779554455, | |
| "learning_rate": 9.692107503591358e-07, | |
| "loss": 1.3751, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.14153846153846153, | |
| "grad_norm": 1.4084255265110892, | |
| "learning_rate": 9.680505808478581e-07, | |
| "loss": 1.3955, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.14358974358974358, | |
| "grad_norm": 1.5650402584913055, | |
| "learning_rate": 9.668696761984254e-07, | |
| "loss": 1.4009, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14564102564102563, | |
| "grad_norm": 1.4833112999624978, | |
| "learning_rate": 9.656680887261692e-07, | |
| "loss": 1.3421, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1476923076923077, | |
| "grad_norm": 1.5883238829964639, | |
| "learning_rate": 9.644458716626911e-07, | |
| "loss": 1.3866, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.14974358974358976, | |
| "grad_norm": 1.4394317678417627, | |
| "learning_rate": 9.63203079153506e-07, | |
| "loss": 1.4153, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.1517948717948718, | |
| "grad_norm": 1.5179476858030934, | |
| "learning_rate": 9.619397662556433e-07, | |
| "loss": 1.3906, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 1.4148990540769752, | |
| "learning_rate": 9.606559889352063e-07, | |
| "loss": 1.3855, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.1558974358974359, | |
| "grad_norm": 1.4321199734694527, | |
| "learning_rate": 9.593518040648952e-07, | |
| "loss": 1.4001, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.15794871794871795, | |
| "grad_norm": 1.4329827399289827, | |
| "learning_rate": 9.580272694214854e-07, | |
| "loss": 1.3603, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.5010775682420985, | |
| "learning_rate": 9.566824436832695e-07, | |
| "loss": 1.3655, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.16205128205128205, | |
| "grad_norm": 1.3530276824132195, | |
| "learning_rate": 9.553173864274566e-07, | |
| "loss": 1.4273, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1641025641025641, | |
| "grad_norm": 1.454039165880218, | |
| "learning_rate": 9.539321581275342e-07, | |
| "loss": 1.428, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16615384615384615, | |
| "grad_norm": 1.4908113453423757, | |
| "learning_rate": 9.525268201505878e-07, | |
| "loss": 1.4529, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.1682051282051282, | |
| "grad_norm": 1.4981477147110476, | |
| "learning_rate": 9.511014347545837e-07, | |
| "loss": 1.3925, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.17025641025641025, | |
| "grad_norm": 1.5127117023542747, | |
| "learning_rate": 9.496560650856096e-07, | |
| "loss": 1.4043, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.1723076923076923, | |
| "grad_norm": 1.3345405352227973, | |
| "learning_rate": 9.481907751750779e-07, | |
| "loss": 1.3832, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.17435897435897435, | |
| "grad_norm": 1.5108834135083573, | |
| "learning_rate": 9.467056299368887e-07, | |
| "loss": 1.3508, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1764102564102564, | |
| "grad_norm": 1.5258123857029053, | |
| "learning_rate": 9.452006951645548e-07, | |
| "loss": 1.3265, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.17846153846153845, | |
| "grad_norm": 1.4934293257822084, | |
| "learning_rate": 9.436760375282857e-07, | |
| "loss": 1.3619, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.18051282051282053, | |
| "grad_norm": 1.607227037073415, | |
| "learning_rate": 9.421317245720352e-07, | |
| "loss": 1.4034, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.18256410256410258, | |
| "grad_norm": 1.4141545560354007, | |
| "learning_rate": 9.405678247105082e-07, | |
| "loss": 1.3655, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.18461538461538463, | |
| "grad_norm": 1.3743360655972685, | |
| "learning_rate": 9.38984407226131e-07, | |
| "loss": 1.3442, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.18666666666666668, | |
| "grad_norm": 1.440216218296637, | |
| "learning_rate": 9.373815422659805e-07, | |
| "loss": 1.3413, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.18871794871794872, | |
| "grad_norm": 1.723193290271358, | |
| "learning_rate": 9.357593008386784e-07, | |
| "loss": 1.3816, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.19076923076923077, | |
| "grad_norm": 1.3262804575095386, | |
| "learning_rate": 9.341177548112436e-07, | |
| "loss": 1.3464, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.19282051282051282, | |
| "grad_norm": 1.5488725881566392, | |
| "learning_rate": 9.324569769059096e-07, | |
| "loss": 1.3809, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.19487179487179487, | |
| "grad_norm": 1.3796152701528939, | |
| "learning_rate": 9.30777040696903e-07, | |
| "loss": 1.3366, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.19692307692307692, | |
| "grad_norm": 1.5098823183414498, | |
| "learning_rate": 9.29078020607183e-07, | |
| "loss": 1.3543, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.19897435897435897, | |
| "grad_norm": 1.4190386613608355, | |
| "learning_rate": 9.273599919051452e-07, | |
| "loss": 1.3981, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.20102564102564102, | |
| "grad_norm": 1.340509343495511, | |
| "learning_rate": 9.256230307012869e-07, | |
| "loss": 1.356, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.20307692307692307, | |
| "grad_norm": 1.4810146382672011, | |
| "learning_rate": 9.238672139448353e-07, | |
| "loss": 1.3745, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 1.428418635076634, | |
| "learning_rate": 9.220926194203392e-07, | |
| "loss": 1.406, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20717948717948717, | |
| "grad_norm": 1.4107901013454047, | |
| "learning_rate": 9.202993257442216e-07, | |
| "loss": 1.3739, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.20923076923076922, | |
| "grad_norm": 1.306309001032096, | |
| "learning_rate": 9.184874123612981e-07, | |
| "loss": 1.329, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.21128205128205127, | |
| "grad_norm": 1.4893758607520267, | |
| "learning_rate": 9.166569595412574e-07, | |
| "loss": 1.327, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 1.4494483023674931, | |
| "learning_rate": 9.148080483751048e-07, | |
| "loss": 1.3855, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2153846153846154, | |
| "grad_norm": 1.4170674386122224, | |
| "learning_rate": 9.129407607715696e-07, | |
| "loss": 1.3565, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.21743589743589745, | |
| "grad_norm": 1.304459135205338, | |
| "learning_rate": 9.110551794534775e-07, | |
| "loss": 1.3398, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2194871794871795, | |
| "grad_norm": 1.4294708281101414, | |
| "learning_rate": 9.091513879540844e-07, | |
| "loss": 1.4091, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.22153846153846155, | |
| "grad_norm": 1.2746277106037083, | |
| "learning_rate": 9.072294706133774e-07, | |
| "loss": 1.2911, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2235897435897436, | |
| "grad_norm": 1.353520157073593, | |
| "learning_rate": 9.052895125743369e-07, | |
| "loss": 1.3424, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.22564102564102564, | |
| "grad_norm": 1.323794347591316, | |
| "learning_rate": 9.033315997791659e-07, | |
| "loss": 1.3317, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2276923076923077, | |
| "grad_norm": 1.398614118697795, | |
| "learning_rate": 9.013558189654817e-07, | |
| "loss": 1.3961, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.22974358974358974, | |
| "grad_norm": 1.2766969874630751, | |
| "learning_rate": 8.993622576624746e-07, | |
| "loss": 1.3269, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2317948717948718, | |
| "grad_norm": 1.495310177937772, | |
| "learning_rate": 8.973510041870287e-07, | |
| "loss": 1.4208, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.23384615384615384, | |
| "grad_norm": 1.3088655411190178, | |
| "learning_rate": 8.953221476398105e-07, | |
| "loss": 1.3953, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2358974358974359, | |
| "grad_norm": 1.5052199539599196, | |
| "learning_rate": 8.932757779013213e-07, | |
| "loss": 1.4416, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.23794871794871794, | |
| "grad_norm": 1.3026306985567253, | |
| "learning_rate": 8.912119856279149e-07, | |
| "loss": 1.2805, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.488343491577546, | |
| "learning_rate": 8.891308622477829e-07, | |
| "loss": 1.373, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.24205128205128204, | |
| "grad_norm": 1.369401311033249, | |
| "learning_rate": 8.870324999569024e-07, | |
| "loss": 1.3611, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2441025641025641, | |
| "grad_norm": 1.3002048421979404, | |
| "learning_rate": 8.849169917149531e-07, | |
| "loss": 1.3939, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.24615384615384617, | |
| "grad_norm": 1.426958695759786, | |
| "learning_rate": 8.827844312411982e-07, | |
| "loss": 1.4275, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.24820512820512822, | |
| "grad_norm": 1.3820975115802594, | |
| "learning_rate": 8.806349130103332e-07, | |
| "loss": 1.2887, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.25025641025641027, | |
| "grad_norm": 1.389435389906626, | |
| "learning_rate": 8.784685322483003e-07, | |
| "loss": 1.3588, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2523076923076923, | |
| "grad_norm": 1.5275652251917355, | |
| "learning_rate": 8.762853849280691e-07, | |
| "loss": 1.2914, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.25435897435897437, | |
| "grad_norm": 1.4263989780538462, | |
| "learning_rate": 8.740855677653867e-07, | |
| "loss": 1.4078, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 1.4808394570173404, | |
| "learning_rate": 8.718691782144907e-07, | |
| "loss": 1.3716, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.25846153846153846, | |
| "grad_norm": 1.380297970298931, | |
| "learning_rate": 8.69636314463794e-07, | |
| "loss": 1.3086, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.2605128205128205, | |
| "grad_norm": 1.42784023805761, | |
| "learning_rate": 8.673870754315336e-07, | |
| "loss": 1.4023, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.26256410256410256, | |
| "grad_norm": 1.5340569739550813, | |
| "learning_rate": 8.651215607613891e-07, | |
| "loss": 1.322, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.26461538461538464, | |
| "grad_norm": 1.3976404822571311, | |
| "learning_rate": 8.628398708180679e-07, | |
| "loss": 1.3275, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 1.4130220590772273, | |
| "learning_rate": 8.605421066828598e-07, | |
| "loss": 1.344, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.26871794871794874, | |
| "grad_norm": 1.3647943645755969, | |
| "learning_rate": 8.582283701491575e-07, | |
| "loss": 1.3595, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.27076923076923076, | |
| "grad_norm": 1.462715888488961, | |
| "learning_rate": 8.558987637179487e-07, | |
| "loss": 1.338, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.27282051282051284, | |
| "grad_norm": 1.3983473187198934, | |
| "learning_rate": 8.535533905932737e-07, | |
| "loss": 1.3913, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.27487179487179486, | |
| "grad_norm": 1.3130917948802023, | |
| "learning_rate": 8.51192354677655e-07, | |
| "loss": 1.2714, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.27692307692307694, | |
| "grad_norm": 1.4479220825321475, | |
| "learning_rate": 8.488157605674924e-07, | |
| "loss": 1.3719, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.27897435897435896, | |
| "grad_norm": 1.4036608256384233, | |
| "learning_rate": 8.464237135484309e-07, | |
| "loss": 1.3593, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.28102564102564104, | |
| "grad_norm": 1.405752858435705, | |
| "learning_rate": 8.440163195906958e-07, | |
| "loss": 1.3171, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.28307692307692306, | |
| "grad_norm": 1.3478889818215098, | |
| "learning_rate": 8.415936853443974e-07, | |
| "loss": 1.3703, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.28512820512820514, | |
| "grad_norm": 1.3507769885527494, | |
| "learning_rate": 8.391559181348081e-07, | |
| "loss": 1.3835, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.28717948717948716, | |
| "grad_norm": 1.4358538509697099, | |
| "learning_rate": 8.367031259576056e-07, | |
| "loss": 1.3472, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.28923076923076924, | |
| "grad_norm": 1.4256463951812464, | |
| "learning_rate": 8.342354174740902e-07, | |
| "loss": 1.3536, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.29128205128205126, | |
| "grad_norm": 1.37330874853555, | |
| "learning_rate": 8.317529020063703e-07, | |
| "loss": 1.3144, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.29333333333333333, | |
| "grad_norm": 1.424304273714467, | |
| "learning_rate": 8.292556895325194e-07, | |
| "loss": 1.3858, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.2953846153846154, | |
| "grad_norm": 1.5014570464124226, | |
| "learning_rate": 8.267438906817039e-07, | |
| "loss": 1.4179, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.29743589743589743, | |
| "grad_norm": 1.3885797736642898, | |
| "learning_rate": 8.242176167292826e-07, | |
| "loss": 1.3554, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2994871794871795, | |
| "grad_norm": 1.399968517661764, | |
| "learning_rate": 8.216769795918762e-07, | |
| "loss": 1.2941, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.30153846153846153, | |
| "grad_norm": 1.3351303735116444, | |
| "learning_rate": 8.1912209182241e-07, | |
| "loss": 1.3682, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3035897435897436, | |
| "grad_norm": 1.4849614268873412, | |
| "learning_rate": 8.165530666051275e-07, | |
| "loss": 1.3761, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.30564102564102563, | |
| "grad_norm": 1.4111664153752073, | |
| "learning_rate": 8.139700177505759e-07, | |
| "loss": 1.3164, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 1.3001144500599642, | |
| "learning_rate": 8.113730596905648e-07, | |
| "loss": 1.3093, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.30974358974358973, | |
| "grad_norm": 1.4002532454809997, | |
| "learning_rate": 8.087623074730959e-07, | |
| "loss": 1.3857, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3117948717948718, | |
| "grad_norm": 1.4470191142442426, | |
| "learning_rate": 8.061378767572673e-07, | |
| "loss": 1.3335, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.31384615384615383, | |
| "grad_norm": 1.3514370453203643, | |
| "learning_rate": 8.034998838081489e-07, | |
| "loss": 1.3756, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3158974358974359, | |
| "grad_norm": 1.4287393724078254, | |
| "learning_rate": 8.008484454916316e-07, | |
| "loss": 1.3153, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.31794871794871793, | |
| "grad_norm": 1.3651421834133906, | |
| "learning_rate": 7.981836792692507e-07, | |
| "loss": 1.2833, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.386223426300147, | |
| "learning_rate": 7.955057031929819e-07, | |
| "loss": 1.3377, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.32205128205128203, | |
| "grad_norm": 1.3442671594803883, | |
| "learning_rate": 7.928146359000117e-07, | |
| "loss": 1.4253, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.3241025641025641, | |
| "grad_norm": 1.4640568604532251, | |
| "learning_rate": 7.901105966074806e-07, | |
| "loss": 1.4161, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3261538461538461, | |
| "grad_norm": 1.41021093543057, | |
| "learning_rate": 7.873937051072035e-07, | |
| "loss": 1.3809, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.3282051282051282, | |
| "grad_norm": 1.394834384818255, | |
| "learning_rate": 7.846640817603607e-07, | |
| "loss": 1.4037, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3302564102564103, | |
| "grad_norm": 1.4823904331092446, | |
| "learning_rate": 7.819218474921679e-07, | |
| "loss": 1.335, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.3323076923076923, | |
| "grad_norm": 1.4008041043128283, | |
| "learning_rate": 7.791671237865174e-07, | |
| "loss": 1.3413, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3343589743589744, | |
| "grad_norm": 1.3105770564662627, | |
| "learning_rate": 7.764000326805966e-07, | |
| "loss": 1.3521, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3364102564102564, | |
| "grad_norm": 1.4619762152088143, | |
| "learning_rate": 7.736206967594827e-07, | |
| "loss": 1.3035, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.3384615384615385, | |
| "grad_norm": 1.2996575385311386, | |
| "learning_rate": 7.708292391507105e-07, | |
| "loss": 1.3164, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3405128205128205, | |
| "grad_norm": 1.27071752376492, | |
| "learning_rate": 7.680257835188186e-07, | |
| "loss": 1.2964, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3425641025641026, | |
| "grad_norm": 1.327855217456687, | |
| "learning_rate": 7.652104540598712e-07, | |
| "loss": 1.3476, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.3446153846153846, | |
| "grad_norm": 1.4536458066818985, | |
| "learning_rate": 7.623833754959551e-07, | |
| "loss": 1.3434, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.3466666666666667, | |
| "grad_norm": 1.4105614477006825, | |
| "learning_rate": 7.595446730696553e-07, | |
| "loss": 1.364, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.3487179487179487, | |
| "grad_norm": 1.5356583052898265, | |
| "learning_rate": 7.56694472538506e-07, | |
| "loss": 1.3487, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3507692307692308, | |
| "grad_norm": 1.4487335955760117, | |
| "learning_rate": 7.538329001694199e-07, | |
| "loss": 1.2782, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3528205128205128, | |
| "grad_norm": 1.3795988167861302, | |
| "learning_rate": 7.509600827330942e-07, | |
| "loss": 1.4282, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3548717948717949, | |
| "grad_norm": 1.3636686216275424, | |
| "learning_rate": 7.480761474983943e-07, | |
| "loss": 1.2897, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.3569230769230769, | |
| "grad_norm": 1.4273922371235048, | |
| "learning_rate": 7.451812222267157e-07, | |
| "loss": 1.3154, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 1.378247984135367, | |
| "learning_rate": 7.422754351663251e-07, | |
| "loss": 1.2701, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.36102564102564105, | |
| "grad_norm": 1.4547712487270863, | |
| "learning_rate": 7.39358915046677e-07, | |
| "loss": 1.356, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3630769230769231, | |
| "grad_norm": 1.2921318934822192, | |
| "learning_rate": 7.364317910727127e-07, | |
| "loss": 1.3087, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.36512820512820515, | |
| "grad_norm": 1.3528572849054787, | |
| "learning_rate": 7.334941929191343e-07, | |
| "loss": 1.3213, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3671794871794872, | |
| "grad_norm": 1.356101334022072, | |
| "learning_rate": 7.305462507246629e-07, | |
| "loss": 1.3622, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.36923076923076925, | |
| "grad_norm": 1.385787615622585, | |
| "learning_rate": 7.2758809508627e-07, | |
| "loss": 1.2812, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3712820512820513, | |
| "grad_norm": 1.382621750590725, | |
| "learning_rate": 7.246198570533944e-07, | |
| "loss": 1.3158, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 1.3556980794870819, | |
| "learning_rate": 7.216416681221353e-07, | |
| "loss": 1.3015, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.37538461538461537, | |
| "grad_norm": 1.3154150058043996, | |
| "learning_rate": 7.186536602294278e-07, | |
| "loss": 1.2819, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.37743589743589745, | |
| "grad_norm": 1.41202917935767, | |
| "learning_rate": 7.156559657471966e-07, | |
| "loss": 1.3517, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.37948717948717947, | |
| "grad_norm": 1.3968980770043466, | |
| "learning_rate": 7.126487174764935e-07, | |
| "loss": 1.2971, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.38153846153846155, | |
| "grad_norm": 1.4283842268074054, | |
| "learning_rate": 7.096320486416124e-07, | |
| "loss": 1.3319, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.38358974358974357, | |
| "grad_norm": 1.4389331160861967, | |
| "learning_rate": 7.06606092884189e-07, | |
| "loss": 1.3313, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.38564102564102565, | |
| "grad_norm": 1.366798910593162, | |
| "learning_rate": 7.035709842572792e-07, | |
| "loss": 1.315, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.38769230769230767, | |
| "grad_norm": 1.4801735124712503, | |
| "learning_rate": 7.005268572194207e-07, | |
| "loss": 1.368, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.38974358974358975, | |
| "grad_norm": 1.3323443914353508, | |
| "learning_rate": 6.974738466286765e-07, | |
| "loss": 1.3025, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.39179487179487177, | |
| "grad_norm": 1.3039032352168842, | |
| "learning_rate": 6.944120877366604e-07, | |
| "loss": 1.2744, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.39384615384615385, | |
| "grad_norm": 1.4073653745829573, | |
| "learning_rate": 6.913417161825449e-07, | |
| "loss": 1.344, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3958974358974359, | |
| "grad_norm": 1.2825924865288278, | |
| "learning_rate": 6.882628679870531e-07, | |
| "loss": 1.3075, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.39794871794871794, | |
| "grad_norm": 1.4518075398628796, | |
| "learning_rate": 6.851756795464323e-07, | |
| "loss": 1.3981, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.393725949206135, | |
| "learning_rate": 6.820802876264111e-07, | |
| "loss": 1.2986, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.40205128205128204, | |
| "grad_norm": 1.3851879908737978, | |
| "learning_rate": 6.789768293561413e-07, | |
| "loss": 1.3757, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4041025641025641, | |
| "grad_norm": 1.387897673235893, | |
| "learning_rate": 6.758654422221224e-07, | |
| "loss": 1.2985, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.40615384615384614, | |
| "grad_norm": 1.4919947840109538, | |
| "learning_rate": 6.727462640621112e-07, | |
| "loss": 1.3517, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4082051282051282, | |
| "grad_norm": 1.3341481537871696, | |
| "learning_rate": 6.69619433059015e-07, | |
| "loss": 1.3302, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 1.345662539893686, | |
| "learning_rate": 6.664850877347705e-07, | |
| "loss": 1.3182, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4123076923076923, | |
| "grad_norm": 1.376354584890257, | |
| "learning_rate": 6.633433669442064e-07, | |
| "loss": 1.2953, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.41435897435897434, | |
| "grad_norm": 1.44510644298783, | |
| "learning_rate": 6.601944098688927e-07, | |
| "loss": 1.3001, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4164102564102564, | |
| "grad_norm": 1.4431030025015483, | |
| "learning_rate": 6.570383560109745e-07, | |
| "loss": 1.2941, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.41846153846153844, | |
| "grad_norm": 1.4708503974375458, | |
| "learning_rate": 6.538753451869913e-07, | |
| "loss": 1.4086, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4205128205128205, | |
| "grad_norm": 1.421296157525184, | |
| "learning_rate": 6.507055175216849e-07, | |
| "loss": 1.2755, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.42256410256410254, | |
| "grad_norm": 1.3273597087706324, | |
| "learning_rate": 6.475290134417891e-07, | |
| "loss": 1.369, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.4246153846153846, | |
| "grad_norm": 1.458296531875952, | |
| "learning_rate": 6.443459736698105e-07, | |
| "loss": 1.3266, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 1.3887196370993375, | |
| "learning_rate": 6.41156539217794e-07, | |
| "loss": 1.3293, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.4287179487179487, | |
| "grad_norm": 1.3414834568362113, | |
| "learning_rate": 6.379608513810753e-07, | |
| "loss": 1.3066, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.4307692307692308, | |
| "grad_norm": 1.441297553114322, | |
| "learning_rate": 6.347590517320217e-07, | |
| "loss": 1.3329, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4328205128205128, | |
| "grad_norm": 1.3531042410782805, | |
| "learning_rate": 6.315512821137606e-07, | |
| "loss": 1.293, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.4348717948717949, | |
| "grad_norm": 1.3554046113834761, | |
| "learning_rate": 6.28337684633895e-07, | |
| "loss": 1.2414, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.4369230769230769, | |
| "grad_norm": 1.394677662879496, | |
| "learning_rate": 6.251184016582088e-07, | |
| "loss": 1.3264, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.438974358974359, | |
| "grad_norm": 1.4851633778642261, | |
| "learning_rate": 6.218935758043586e-07, | |
| "loss": 1.2634, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.441025641025641, | |
| "grad_norm": 1.3371557479948093, | |
| "learning_rate": 6.186633499355575e-07, | |
| "loss": 1.3876, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4430769230769231, | |
| "grad_norm": 1.4887491463790388, | |
| "learning_rate": 6.15427867154244e-07, | |
| "loss": 1.3122, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.4451282051282051, | |
| "grad_norm": 1.3232196760718127, | |
| "learning_rate": 6.121872707957441e-07, | |
| "loss": 1.3441, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.4471794871794872, | |
| "grad_norm": 1.3766112511648216, | |
| "learning_rate": 6.089417044219201e-07, | |
| "loss": 1.3255, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.4492307692307692, | |
| "grad_norm": 1.3049112726080363, | |
| "learning_rate": 6.056913118148121e-07, | |
| "loss": 1.3397, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4512820512820513, | |
| "grad_norm": 1.3939219423691345, | |
| "learning_rate": 6.024362369702668e-07, | |
| "loss": 1.2519, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4533333333333333, | |
| "grad_norm": 1.371353907416093, | |
| "learning_rate": 5.991766240915589e-07, | |
| "loss": 1.301, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4553846153846154, | |
| "grad_norm": 1.4850791746392926, | |
| "learning_rate": 5.959126175830033e-07, | |
| "loss": 1.2983, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.4574358974358974, | |
| "grad_norm": 1.4663453627095475, | |
| "learning_rate": 5.926443620435571e-07, | |
| "loss": 1.283, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4594871794871795, | |
| "grad_norm": 1.4492201774552442, | |
| "learning_rate": 5.893720022604142e-07, | |
| "loss": 1.3509, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 1.4069307451775082, | |
| "learning_rate": 5.860956832025906e-07, | |
| "loss": 1.3087, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4635897435897436, | |
| "grad_norm": 1.3370341068000464, | |
| "learning_rate": 5.828155500145024e-07, | |
| "loss": 1.3227, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.46564102564102566, | |
| "grad_norm": 1.3297533267380588, | |
| "learning_rate": 5.79531748009536e-07, | |
| "loss": 1.3174, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.4676923076923077, | |
| "grad_norm": 1.347608878869153, | |
| "learning_rate": 5.7624442266361e-07, | |
| "loss": 1.2451, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.46974358974358976, | |
| "grad_norm": 1.2409369335474423, | |
| "learning_rate": 5.729537196087308e-07, | |
| "loss": 1.2842, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4717948717948718, | |
| "grad_norm": 1.3300849973007849, | |
| "learning_rate": 5.696597846265411e-07, | |
| "loss": 1.3136, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.47384615384615386, | |
| "grad_norm": 1.4479979686773294, | |
| "learning_rate": 5.663627636418609e-07, | |
| "loss": 1.3757, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.4758974358974359, | |
| "grad_norm": 1.3087492331617634, | |
| "learning_rate": 5.630628027162243e-07, | |
| "loss": 1.3633, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.47794871794871796, | |
| "grad_norm": 1.4490486681330532, | |
| "learning_rate": 5.597600480414068e-07, | |
| "loss": 1.3271, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.5347632065237542, | |
| "learning_rate": 5.564546459329509e-07, | |
| "loss": 1.3038, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.48205128205128206, | |
| "grad_norm": 1.3875201636263441, | |
| "learning_rate": 5.531467428236827e-07, | |
| "loss": 1.3906, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.4841025641025641, | |
| "grad_norm": 1.3525087883277989, | |
| "learning_rate": 5.498364852572255e-07, | |
| "loss": 1.3648, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.48615384615384616, | |
| "grad_norm": 1.2792944836481481, | |
| "learning_rate": 5.465240198815072e-07, | |
| "loss": 1.2822, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.4882051282051282, | |
| "grad_norm": 1.4555679204072403, | |
| "learning_rate": 5.432094934422648e-07, | |
| "loss": 1.3249, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.49025641025641026, | |
| "grad_norm": 1.3529453067601664, | |
| "learning_rate": 5.398930527765415e-07, | |
| "loss": 1.3209, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.49230769230769234, | |
| "grad_norm": 1.3313720449010154, | |
| "learning_rate": 5.365748448061837e-07, | |
| "loss": 1.2981, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.49435897435897436, | |
| "grad_norm": 1.3879386825445084, | |
| "learning_rate": 5.332550165313312e-07, | |
| "loss": 1.3005, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.49641025641025643, | |
| "grad_norm": 1.3914024176149524, | |
| "learning_rate": 5.299337150239041e-07, | |
| "loss": 1.296, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.49846153846153846, | |
| "grad_norm": 1.4576866533836497, | |
| "learning_rate": 5.266110874210892e-07, | |
| "loss": 1.3351, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.5005128205128205, | |
| "grad_norm": 1.4191392954223687, | |
| "learning_rate": 5.232872809188208e-07, | |
| "loss": 1.3313, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5025641025641026, | |
| "grad_norm": 1.2857007376482181, | |
| "learning_rate": 5.199624427652588e-07, | |
| "loss": 1.2928, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5046153846153846, | |
| "grad_norm": 1.3182105285446684, | |
| "learning_rate": 5.166367202542671e-07, | |
| "loss": 1.3421, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5066666666666667, | |
| "grad_norm": 1.293734923156538, | |
| "learning_rate": 5.133102607188874e-07, | |
| "loss": 1.3405, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.5087179487179487, | |
| "grad_norm": 1.4051818888405565, | |
| "learning_rate": 5.099832115248123e-07, | |
| "loss": 1.2858, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.5107692307692308, | |
| "grad_norm": 1.419436972903703, | |
| "learning_rate": 5.066557200638569e-07, | |
| "loss": 1.3539, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 1.3763121975287578, | |
| "learning_rate": 5.033279337474294e-07, | |
| "loss": 1.3814, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "eval_uground_MCTS_chains_SFT_val_loss": 1.338526725769043, | |
| "eval_uground_MCTS_chains_SFT_val_runtime": 142.2738, | |
| "eval_uground_MCTS_chains_SFT_val_samples_per_second": 12.785, | |
| "eval_uground_MCTS_chains_SFT_val_steps_per_second": 1.603, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5148717948717949, | |
| "grad_norm": 1.3803064200700599, | |
| "learning_rate": 5e-07, | |
| "loss": 1.3431, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.5169230769230769, | |
| "grad_norm": 1.3364019814551773, | |
| "learning_rate": 4.966720662525707e-07, | |
| "loss": 1.3339, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.518974358974359, | |
| "grad_norm": 1.3814304512811713, | |
| "learning_rate": 4.933442799361431e-07, | |
| "loss": 1.3885, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.521025641025641, | |
| "grad_norm": 1.3302704766710616, | |
| "learning_rate": 4.900167884751877e-07, | |
| "loss": 1.2784, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.5230769230769231, | |
| "grad_norm": 1.3532645859025179, | |
| "learning_rate": 4.866897392811126e-07, | |
| "loss": 1.4133, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5251282051282051, | |
| "grad_norm": 1.3326049138231024, | |
| "learning_rate": 4.833632797457331e-07, | |
| "loss": 1.2788, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5271794871794871, | |
| "grad_norm": 1.3680818424670418, | |
| "learning_rate": 4.800375572347413e-07, | |
| "loss": 1.3483, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5292307692307693, | |
| "grad_norm": 1.3780541644452522, | |
| "learning_rate": 4.767127190811793e-07, | |
| "loss": 1.3152, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5312820512820513, | |
| "grad_norm": 1.3069364604536544, | |
| "learning_rate": 4.7338891257891076e-07, | |
| "loss": 1.3299, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 1.3707233739601012, | |
| "learning_rate": 4.7006628497609604e-07, | |
| "loss": 1.3201, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5353846153846153, | |
| "grad_norm": 1.3491562859786448, | |
| "learning_rate": 4.6674498346866887e-07, | |
| "loss": 1.2785, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5374358974358975, | |
| "grad_norm": 1.467464986000282, | |
| "learning_rate": 4.634251551938161e-07, | |
| "loss": 1.337, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5394871794871795, | |
| "grad_norm": 1.29313468913082, | |
| "learning_rate": 4.601069472234584e-07, | |
| "loss": 1.324, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5415384615384615, | |
| "grad_norm": 1.324791527915958, | |
| "learning_rate": 4.5679050655773534e-07, | |
| "loss": 1.316, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5435897435897435, | |
| "grad_norm": 1.40484113279842, | |
| "learning_rate": 4.5347598011849275e-07, | |
| "loss": 1.2967, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5456410256410257, | |
| "grad_norm": 1.3059231618524412, | |
| "learning_rate": 4.501635147427745e-07, | |
| "loss": 1.2795, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5476923076923077, | |
| "grad_norm": 1.3379544072622815, | |
| "learning_rate": 4.4685325717631734e-07, | |
| "loss": 1.2621, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5497435897435897, | |
| "grad_norm": 1.3860481263368158, | |
| "learning_rate": 4.4354535406704907e-07, | |
| "loss": 1.3012, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.5517948717948717, | |
| "grad_norm": 1.3489865311164444, | |
| "learning_rate": 4.4023995195859313e-07, | |
| "loss": 1.2748, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5538461538461539, | |
| "grad_norm": 1.3313443764200086, | |
| "learning_rate": 4.369371972837757e-07, | |
| "loss": 1.3682, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5558974358974359, | |
| "grad_norm": 1.4195434027790386, | |
| "learning_rate": 4.33637236358139e-07, | |
| "loss": 1.2826, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5579487179487179, | |
| "grad_norm": 1.3431350195403668, | |
| "learning_rate": 4.30340215373459e-07, | |
| "loss": 1.3432, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.3960093820700656, | |
| "learning_rate": 4.2704628039126914e-07, | |
| "loss": 1.2941, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5620512820512821, | |
| "grad_norm": 1.401017396814776, | |
| "learning_rate": 4.2375557733639006e-07, | |
| "loss": 1.319, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 1.397892504319514, | |
| "learning_rate": 4.20468251990464e-07, | |
| "loss": 1.3374, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5661538461538461, | |
| "grad_norm": 1.365941739199125, | |
| "learning_rate": 4.1718444998549756e-07, | |
| "loss": 1.344, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5682051282051283, | |
| "grad_norm": 1.350043286129735, | |
| "learning_rate": 4.1390431679740953e-07, | |
| "loss": 1.2851, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5702564102564103, | |
| "grad_norm": 1.4263357120734497, | |
| "learning_rate": 4.106279977395858e-07, | |
| "loss": 1.3298, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5723076923076923, | |
| "grad_norm": 1.2818553970002176, | |
| "learning_rate": 4.073556379564429e-07, | |
| "loss": 1.2684, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5743589743589743, | |
| "grad_norm": 1.413899213332057, | |
| "learning_rate": 4.0408738241699685e-07, | |
| "loss": 1.3092, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5764102564102564, | |
| "grad_norm": 1.3497769672706679, | |
| "learning_rate": 4.00823375908441e-07, | |
| "loss": 1.329, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.5784615384615385, | |
| "grad_norm": 1.3254634061152786, | |
| "learning_rate": 3.9756376302973325e-07, | |
| "loss": 1.3076, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5805128205128205, | |
| "grad_norm": 1.4049294607846992, | |
| "learning_rate": 3.943086881851878e-07, | |
| "loss": 1.2649, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5825641025641025, | |
| "grad_norm": 1.5373330046399727, | |
| "learning_rate": 3.9105829557807973e-07, | |
| "loss": 1.3728, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5846153846153846, | |
| "grad_norm": 1.4097914378402818, | |
| "learning_rate": 3.87812729204256e-07, | |
| "loss": 1.3186, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5866666666666667, | |
| "grad_norm": 1.4677590739466415, | |
| "learning_rate": 3.84572132845756e-07, | |
| "loss": 1.2695, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5887179487179487, | |
| "grad_norm": 1.3914941908309093, | |
| "learning_rate": 3.8133665006444255e-07, | |
| "loss": 1.2708, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.5907692307692308, | |
| "grad_norm": 1.3463645339331958, | |
| "learning_rate": 3.781064241956414e-07, | |
| "loss": 1.3028, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5928205128205128, | |
| "grad_norm": 1.4020220379821526, | |
| "learning_rate": 3.7488159834179135e-07, | |
| "loss": 1.2784, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.5948717948717949, | |
| "grad_norm": 1.4540293204215256, | |
| "learning_rate": 3.716623153661049e-07, | |
| "loss": 1.3005, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5969230769230769, | |
| "grad_norm": 1.3902452427671015, | |
| "learning_rate": 3.6844871788623945e-07, | |
| "loss": 1.2524, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.598974358974359, | |
| "grad_norm": 1.48338078362365, | |
| "learning_rate": 3.652409482679783e-07, | |
| "loss": 1.3222, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.601025641025641, | |
| "grad_norm": 1.2846473500863387, | |
| "learning_rate": 3.6203914861892476e-07, | |
| "loss": 1.3626, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.6030769230769231, | |
| "grad_norm": 1.471140280043153, | |
| "learning_rate": 3.588434607822061e-07, | |
| "loss": 1.3137, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.6051282051282051, | |
| "grad_norm": 1.4330668442336907, | |
| "learning_rate": 3.5565402633018957e-07, | |
| "loss": 1.2806, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6071794871794872, | |
| "grad_norm": 1.3403409049501387, | |
| "learning_rate": 3.5247098655821103e-07, | |
| "loss": 1.3276, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.6092307692307692, | |
| "grad_norm": 1.3471334531902774, | |
| "learning_rate": 3.4929448247831514e-07, | |
| "loss": 1.3527, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.6112820512820513, | |
| "grad_norm": 1.441754768297771, | |
| "learning_rate": 3.4612465481300867e-07, | |
| "loss": 1.3509, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.6133333333333333, | |
| "grad_norm": 1.3109786154015102, | |
| "learning_rate": 3.429616439890257e-07, | |
| "loss": 1.3303, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 1.3571971672387129, | |
| "learning_rate": 3.398055901311073e-07, | |
| "loss": 1.2926, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6174358974358974, | |
| "grad_norm": 1.3873664792216218, | |
| "learning_rate": 3.3665663305579344e-07, | |
| "loss": 1.3244, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.6194871794871795, | |
| "grad_norm": 1.3799572812815109, | |
| "learning_rate": 3.335149122652293e-07, | |
| "loss": 1.284, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.6215384615384615, | |
| "grad_norm": 1.316197811127298, | |
| "learning_rate": 3.303805669409848e-07, | |
| "loss": 1.3153, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.6235897435897436, | |
| "grad_norm": 1.2600316458800467, | |
| "learning_rate": 3.272537359378887e-07, | |
| "loss": 1.3686, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.6256410256410256, | |
| "grad_norm": 1.3725839158894015, | |
| "learning_rate": 3.2413455777787746e-07, | |
| "loss": 1.2968, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6276923076923077, | |
| "grad_norm": 1.294502428896565, | |
| "learning_rate": 3.2102317064385876e-07, | |
| "loss": 1.2874, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6297435897435898, | |
| "grad_norm": 1.4104402124249922, | |
| "learning_rate": 3.179197123735889e-07, | |
| "loss": 1.2672, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6317948717948718, | |
| "grad_norm": 1.3711533346685432, | |
| "learning_rate": 3.148243204535677e-07, | |
| "loss": 1.2661, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6338461538461538, | |
| "grad_norm": 1.3385883768449498, | |
| "learning_rate": 3.117371320129469e-07, | |
| "loss": 1.3546, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6358974358974359, | |
| "grad_norm": 1.3583569291948376, | |
| "learning_rate": 3.086582838174551e-07, | |
| "loss": 1.2698, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.637948717948718, | |
| "grad_norm": 1.2759125465275387, | |
| "learning_rate": 3.055879122633397e-07, | |
| "loss": 1.3022, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.4220971900274135, | |
| "learning_rate": 3.025261533713235e-07, | |
| "loss": 1.315, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.642051282051282, | |
| "grad_norm": 1.386745544730108, | |
| "learning_rate": 2.994731427805792e-07, | |
| "loss": 1.2634, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6441025641025641, | |
| "grad_norm": 1.3092798515784028, | |
| "learning_rate": 2.964290157427207e-07, | |
| "loss": 1.2438, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6461538461538462, | |
| "grad_norm": 1.4018848728602682, | |
| "learning_rate": 2.9339390711581105e-07, | |
| "loss": 1.394, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6482051282051282, | |
| "grad_norm": 1.4469110144038708, | |
| "learning_rate": 2.9036795135838764e-07, | |
| "loss": 1.3446, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6502564102564102, | |
| "grad_norm": 1.3545060659112242, | |
| "learning_rate": 2.8735128252350674e-07, | |
| "loss": 1.2794, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6523076923076923, | |
| "grad_norm": 1.393409490719331, | |
| "learning_rate": 2.843440342528035e-07, | |
| "loss": 1.3257, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.6543589743589744, | |
| "grad_norm": 1.3673405096575244, | |
| "learning_rate": 2.813463397705723e-07, | |
| "loss": 1.3053, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6564102564102564, | |
| "grad_norm": 1.2769338414370688, | |
| "learning_rate": 2.783583318778646e-07, | |
| "loss": 1.2706, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6584615384615384, | |
| "grad_norm": 1.4095662966250955, | |
| "learning_rate": 2.753801429466056e-07, | |
| "loss": 1.3405, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.6605128205128206, | |
| "grad_norm": 1.271906555167854, | |
| "learning_rate": 2.7241190491372987e-07, | |
| "loss": 1.2279, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6625641025641026, | |
| "grad_norm": 1.4207452998511736, | |
| "learning_rate": 2.6945374927533697e-07, | |
| "loss": 1.3218, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.6646153846153846, | |
| "grad_norm": 1.4323142733077865, | |
| "learning_rate": 2.665058070808654e-07, | |
| "loss": 1.4065, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 1.265108069283216, | |
| "learning_rate": 2.635682089272875e-07, | |
| "loss": 1.2986, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6687179487179488, | |
| "grad_norm": 1.4383291062967463, | |
| "learning_rate": 2.6064108495332293e-07, | |
| "loss": 1.3276, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6707692307692308, | |
| "grad_norm": 1.3012684857872605, | |
| "learning_rate": 2.5772456483367497e-07, | |
| "loss": 1.2725, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.6728205128205128, | |
| "grad_norm": 1.4239883240238744, | |
| "learning_rate": 2.5481877777328424e-07, | |
| "loss": 1.3433, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6748717948717948, | |
| "grad_norm": 1.3329136724779032, | |
| "learning_rate": 2.5192385250160586e-07, | |
| "loss": 1.2651, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.676923076923077, | |
| "grad_norm": 1.3523109345462954, | |
| "learning_rate": 2.4903991726690583e-07, | |
| "loss": 1.2988, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.678974358974359, | |
| "grad_norm": 1.311204740811716, | |
| "learning_rate": 2.461670998305801e-07, | |
| "loss": 1.2068, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.681025641025641, | |
| "grad_norm": 1.246738747824622, | |
| "learning_rate": 2.4330552746149404e-07, | |
| "loss": 1.2955, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.683076923076923, | |
| "grad_norm": 1.3933636676146037, | |
| "learning_rate": 2.4045532693034474e-07, | |
| "loss": 1.3791, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.6851282051282052, | |
| "grad_norm": 1.4374856626540078, | |
| "learning_rate": 2.3761662450404492e-07, | |
| "loss": 1.35, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6871794871794872, | |
| "grad_norm": 1.3638334560630514, | |
| "learning_rate": 2.347895459401288e-07, | |
| "loss": 1.2993, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6892307692307692, | |
| "grad_norm": 1.3485827756964341, | |
| "learning_rate": 2.319742164811813e-07, | |
| "loss": 1.3159, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6912820512820513, | |
| "grad_norm": 1.418888206942911, | |
| "learning_rate": 2.2917076084928948e-07, | |
| "loss": 1.3593, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.6933333333333334, | |
| "grad_norm": 1.4828726277064257, | |
| "learning_rate": 2.2637930324051747e-07, | |
| "loss": 1.3679, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6953846153846154, | |
| "grad_norm": 1.413301068518357, | |
| "learning_rate": 2.2359996731940345e-07, | |
| "loss": 1.27, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.6974358974358974, | |
| "grad_norm": 1.3208309322946137, | |
| "learning_rate": 2.2083287621348256e-07, | |
| "loss": 1.2937, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6994871794871795, | |
| "grad_norm": 1.4368838454397468, | |
| "learning_rate": 2.180781525078319e-07, | |
| "loss": 1.2766, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.7015384615384616, | |
| "grad_norm": 1.4246678530032884, | |
| "learning_rate": 2.1533591823963926e-07, | |
| "loss": 1.2996, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.7035897435897436, | |
| "grad_norm": 1.3428136313711472, | |
| "learning_rate": 2.1260629489279657e-07, | |
| "loss": 1.3312, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.7056410256410256, | |
| "grad_norm": 1.3574535316266307, | |
| "learning_rate": 2.0988940339251937e-07, | |
| "loss": 1.3234, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.7076923076923077, | |
| "grad_norm": 1.2707949058163033, | |
| "learning_rate": 2.0718536409998833e-07, | |
| "loss": 1.2958, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7097435897435898, | |
| "grad_norm": 1.4822667590568277, | |
| "learning_rate": 2.0449429680701797e-07, | |
| "loss": 1.2867, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.7117948717948718, | |
| "grad_norm": 1.2917213676654393, | |
| "learning_rate": 2.0181632073074923e-07, | |
| "loss": 1.3462, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.7138461538461538, | |
| "grad_norm": 1.4001267259726107, | |
| "learning_rate": 1.991515545083684e-07, | |
| "loss": 1.2215, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.7158974358974359, | |
| "grad_norm": 1.3397954504556553, | |
| "learning_rate": 1.9650011619185126e-07, | |
| "loss": 1.2748, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 1.4376449099130564, | |
| "learning_rate": 1.938621232427327e-07, | |
| "loss": 1.3395, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.3659796825711872, | |
| "learning_rate": 1.9123769252690407e-07, | |
| "loss": 1.342, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.7220512820512821, | |
| "grad_norm": 1.2551401015316006, | |
| "learning_rate": 1.8862694030943528e-07, | |
| "loss": 1.2282, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.7241025641025641, | |
| "grad_norm": 1.4075072417448022, | |
| "learning_rate": 1.8602998224942406e-07, | |
| "loss": 1.2872, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.7261538461538461, | |
| "grad_norm": 1.2962040010095723, | |
| "learning_rate": 1.834469333948725e-07, | |
| "loss": 1.3481, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.7282051282051282, | |
| "grad_norm": 1.299136753253947, | |
| "learning_rate": 1.808779081775901e-07, | |
| "loss": 1.2932, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7302564102564103, | |
| "grad_norm": 1.4000758162190168, | |
| "learning_rate": 1.7832302040812392e-07, | |
| "loss": 1.3154, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.7323076923076923, | |
| "grad_norm": 1.252044581176086, | |
| "learning_rate": 1.757823832707175e-07, | |
| "loss": 1.338, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7343589743589743, | |
| "grad_norm": 1.3740222857140072, | |
| "learning_rate": 1.7325610931829616e-07, | |
| "loss": 1.2449, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.7364102564102564, | |
| "grad_norm": 1.2947442493826966, | |
| "learning_rate": 1.7074431046748074e-07, | |
| "loss": 1.3193, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7384615384615385, | |
| "grad_norm": 1.357340685900848, | |
| "learning_rate": 1.682470979936298e-07, | |
| "loss": 1.336, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7405128205128205, | |
| "grad_norm": 1.3548253079749504, | |
| "learning_rate": 1.6576458252590986e-07, | |
| "loss": 1.2955, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7425641025641025, | |
| "grad_norm": 1.3012853046416282, | |
| "learning_rate": 1.6329687404239445e-07, | |
| "loss": 1.3528, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7446153846153846, | |
| "grad_norm": 1.3726340184170516, | |
| "learning_rate": 1.6084408186519194e-07, | |
| "loss": 1.2899, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 1.2475355635801402, | |
| "learning_rate": 1.584063146556025e-07, | |
| "loss": 1.3549, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7487179487179487, | |
| "grad_norm": 1.375734131748055, | |
| "learning_rate": 1.5598368040930427e-07, | |
| "loss": 1.3121, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7507692307692307, | |
| "grad_norm": 1.410388031615801, | |
| "learning_rate": 1.5357628645156918e-07, | |
| "loss": 1.2698, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7528205128205128, | |
| "grad_norm": 1.3473981945869655, | |
| "learning_rate": 1.5118423943250768e-07, | |
| "loss": 1.2902, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.7548717948717949, | |
| "grad_norm": 1.4495547654976086, | |
| "learning_rate": 1.4880764532234514e-07, | |
| "loss": 1.2196, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.7569230769230769, | |
| "grad_norm": 1.357106166673668, | |
| "learning_rate": 1.4644660940672627e-07, | |
| "loss": 1.2519, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.7589743589743589, | |
| "grad_norm": 1.2548591046322328, | |
| "learning_rate": 1.4410123628205134e-07, | |
| "loss": 1.2896, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7610256410256411, | |
| "grad_norm": 1.2809196807216436, | |
| "learning_rate": 1.417716298508424e-07, | |
| "loss": 1.3136, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.7630769230769231, | |
| "grad_norm": 1.3603566060815664, | |
| "learning_rate": 1.3945789331714013e-07, | |
| "loss": 1.3298, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7651282051282051, | |
| "grad_norm": 1.2416081192958257, | |
| "learning_rate": 1.3716012918193205e-07, | |
| "loss": 1.2653, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.7671794871794871, | |
| "grad_norm": 1.2397913153351197, | |
| "learning_rate": 1.3487843923861098e-07, | |
| "loss": 1.3004, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 1.3957552308537007, | |
| "learning_rate": 1.3261292456846646e-07, | |
| "loss": 1.3135, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7712820512820513, | |
| "grad_norm": 1.3389437330568568, | |
| "learning_rate": 1.30363685536206e-07, | |
| "loss": 1.2816, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7733333333333333, | |
| "grad_norm": 1.4171003129680448, | |
| "learning_rate": 1.2813082178550928e-07, | |
| "loss": 1.3315, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.7753846153846153, | |
| "grad_norm": 1.2968744143026596, | |
| "learning_rate": 1.2591443223461333e-07, | |
| "loss": 1.3179, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.7774358974358975, | |
| "grad_norm": 1.3860589730680748, | |
| "learning_rate": 1.2371461507193075e-07, | |
| "loss": 1.309, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.7794871794871795, | |
| "grad_norm": 1.4261801869961688, | |
| "learning_rate": 1.215314677516997e-07, | |
| "loss": 1.2594, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7815384615384615, | |
| "grad_norm": 1.3803317479367614, | |
| "learning_rate": 1.1936508698966663e-07, | |
| "loss": 1.327, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.7835897435897435, | |
| "grad_norm": 1.3462031002972898, | |
| "learning_rate": 1.1721556875880167e-07, | |
| "loss": 1.3252, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.7856410256410257, | |
| "grad_norm": 1.3804636579208875, | |
| "learning_rate": 1.150830082850468e-07, | |
| "loss": 1.2994, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.7876923076923077, | |
| "grad_norm": 1.3874044977427191, | |
| "learning_rate": 1.1296750004309757e-07, | |
| "loss": 1.342, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.7897435897435897, | |
| "grad_norm": 1.2538944806181445, | |
| "learning_rate": 1.1086913775221706e-07, | |
| "loss": 1.2532, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7917948717948718, | |
| "grad_norm": 1.388891555677492, | |
| "learning_rate": 1.0878801437208496e-07, | |
| "loss": 1.338, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7938461538461539, | |
| "grad_norm": 1.4920405662743708, | |
| "learning_rate": 1.0672422209867876e-07, | |
| "loss": 1.284, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.7958974358974359, | |
| "grad_norm": 1.375211936323982, | |
| "learning_rate": 1.0467785236018944e-07, | |
| "loss": 1.3315, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.7979487179487179, | |
| "grad_norm": 1.3363209851874036, | |
| "learning_rate": 1.026489958129712e-07, | |
| "loss": 1.2874, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.291621044229256, | |
| "learning_rate": 1.0063774233752542e-07, | |
| "loss": 1.3416, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8020512820512821, | |
| "grad_norm": 1.3581366805811677, | |
| "learning_rate": 9.864418103451827e-08, | |
| "loss": 1.2981, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.8041025641025641, | |
| "grad_norm": 1.3003239187798818, | |
| "learning_rate": 9.666840022083422e-08, | |
| "loss": 1.3101, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.8061538461538461, | |
| "grad_norm": 1.3246739403857846, | |
| "learning_rate": 9.471048742566312e-08, | |
| "loss": 1.3382, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.8082051282051282, | |
| "grad_norm": 1.3582591260843835, | |
| "learning_rate": 9.27705293866226e-08, | |
| "loss": 1.3002, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.8102564102564103, | |
| "grad_norm": 1.2313737865301981, | |
| "learning_rate": 9.084861204591549e-08, | |
| "loss": 1.2978, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8123076923076923, | |
| "grad_norm": 1.3353913098594299, | |
| "learning_rate": 8.894482054652247e-08, | |
| "loss": 1.2584, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.8143589743589743, | |
| "grad_norm": 1.3242269914914493, | |
| "learning_rate": 8.705923922843039e-08, | |
| "loss": 1.3307, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.8164102564102564, | |
| "grad_norm": 1.2697101868447127, | |
| "learning_rate": 8.519195162489528e-08, | |
| "loss": 1.2834, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.8184615384615385, | |
| "grad_norm": 1.4632526641918853, | |
| "learning_rate": 8.334304045874246e-08, | |
| "loss": 1.3194, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 1.3907694450406674, | |
| "learning_rate": 8.151258763870177e-08, | |
| "loss": 1.306, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8225641025641026, | |
| "grad_norm": 1.3741854129969415, | |
| "learning_rate": 7.970067425577847e-08, | |
| "loss": 1.3207, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.8246153846153846, | |
| "grad_norm": 1.2990539782457562, | |
| "learning_rate": 7.790738057966079e-08, | |
| "loss": 1.311, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.8266666666666667, | |
| "grad_norm": 1.3139752156003466, | |
| "learning_rate": 7.613278605516454e-08, | |
| "loss": 1.2679, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.8287179487179487, | |
| "grad_norm": 1.4261493929651812, | |
| "learning_rate": 7.437696929871312e-08, | |
| "loss": 1.4016, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.8307692307692308, | |
| "grad_norm": 1.4327200805455274, | |
| "learning_rate": 7.264000809485482e-08, | |
| "loss": 1.2647, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8328205128205128, | |
| "grad_norm": 1.3748258593974458, | |
| "learning_rate": 7.092197939281696e-08, | |
| "loss": 1.3448, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.8348717948717949, | |
| "grad_norm": 1.335787936320607, | |
| "learning_rate": 6.92229593030969e-08, | |
| "loss": 1.2803, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.8369230769230769, | |
| "grad_norm": 1.3764176859888628, | |
| "learning_rate": 6.754302309409033e-08, | |
| "loss": 1.3138, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.838974358974359, | |
| "grad_norm": 1.3300596184326687, | |
| "learning_rate": 6.588224518875646e-08, | |
| "loss": 1.2705, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.841025641025641, | |
| "grad_norm": 1.394358508134729, | |
| "learning_rate": 6.424069916132163e-08, | |
| "loss": 1.3222, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8430769230769231, | |
| "grad_norm": 1.3109916047636498, | |
| "learning_rate": 6.261845773401937e-08, | |
| "loss": 1.2643, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.8451282051282051, | |
| "grad_norm": 1.4202733175264604, | |
| "learning_rate": 6.101559277386903e-08, | |
| "loss": 1.3386, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8471794871794872, | |
| "grad_norm": 1.2483773635147983, | |
| "learning_rate": 5.943217528949168e-08, | |
| "loss": 1.2888, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.8492307692307692, | |
| "grad_norm": 1.2779111173888642, | |
| "learning_rate": 5.786827542796491e-08, | |
| "loss": 1.314, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8512820512820513, | |
| "grad_norm": 1.3238564515375497, | |
| "learning_rate": 5.632396247171428e-08, | |
| "loss": 1.2913, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 1.4410152622622796, | |
| "learning_rate": 5.47993048354452e-08, | |
| "loss": 1.3451, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8553846153846154, | |
| "grad_norm": 1.3897594499448358, | |
| "learning_rate": 5.3294370063111213e-08, | |
| "loss": 1.2569, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.8574358974358974, | |
| "grad_norm": 1.3902552303122406, | |
| "learning_rate": 5.1809224824922174e-08, | |
| "loss": 1.2562, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.8594871794871795, | |
| "grad_norm": 1.3609774270312844, | |
| "learning_rate": 5.0343934914390426e-08, | |
| "loss": 1.3177, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.8615384615384616, | |
| "grad_norm": 1.24678063079174, | |
| "learning_rate": 4.8898565245416246e-08, | |
| "loss": 1.2621, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8635897435897436, | |
| "grad_norm": 1.2709715813867861, | |
| "learning_rate": 4.747317984941213e-08, | |
| "loss": 1.2854, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.8656410256410256, | |
| "grad_norm": 1.3875358765975183, | |
| "learning_rate": 4.606784187246587e-08, | |
| "loss": 1.2577, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8676923076923077, | |
| "grad_norm": 1.3634193741456224, | |
| "learning_rate": 4.468261357254338e-08, | |
| "loss": 1.3436, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.8697435897435898, | |
| "grad_norm": 1.3526734393532784, | |
| "learning_rate": 4.331755631673056e-08, | |
| "loss": 1.2838, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 1.3628760969364189, | |
| "learning_rate": 4.197273057851464e-08, | |
| "loss": 1.3112, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8738461538461538, | |
| "grad_norm": 1.3340141060832507, | |
| "learning_rate": 4.0648195935104767e-08, | |
| "loss": 1.303, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.8758974358974358, | |
| "grad_norm": 1.3627820828373822, | |
| "learning_rate": 3.934401106479351e-08, | |
| "loss": 1.3302, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.877948717948718, | |
| "grad_norm": 1.2918615457638776, | |
| "learning_rate": 3.806023374435663e-08, | |
| "loss": 1.2913, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.3921884369956634, | |
| "learning_rate": 3.6796920846493714e-08, | |
| "loss": 1.3353, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.882051282051282, | |
| "grad_norm": 1.442462815929615, | |
| "learning_rate": 3.555412833730881e-08, | |
| "loss": 1.3185, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.884102564102564, | |
| "grad_norm": 1.4083279440312293, | |
| "learning_rate": 3.4331911273830784e-08, | |
| "loss": 1.3429, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.8861538461538462, | |
| "grad_norm": 1.396143913243508, | |
| "learning_rate": 3.313032380157454e-08, | |
| "loss": 1.3309, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.8882051282051282, | |
| "grad_norm": 1.311150147580031, | |
| "learning_rate": 3.1949419152142e-08, | |
| "loss": 1.2913, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.8902564102564102, | |
| "grad_norm": 1.3752884331352524, | |
| "learning_rate": 3.078924964086416e-08, | |
| "loss": 1.2808, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.8923076923076924, | |
| "grad_norm": 1.31858969839846, | |
| "learning_rate": 2.9649866664483382e-08, | |
| "loss": 1.26, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8943589743589744, | |
| "grad_norm": 1.2935400196335294, | |
| "learning_rate": 2.8531320698876428e-08, | |
| "loss": 1.256, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.8964102564102564, | |
| "grad_norm": 1.3508280125945176, | |
| "learning_rate": 2.7433661296818232e-08, | |
| "loss": 1.291, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.8984615384615384, | |
| "grad_norm": 1.2947996736751957, | |
| "learning_rate": 2.6356937085786956e-08, | |
| "loss": 1.3182, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.9005128205128206, | |
| "grad_norm": 1.3268339625220218, | |
| "learning_rate": 2.530119576580936e-08, | |
| "loss": 1.3027, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.9025641025641026, | |
| "grad_norm": 1.4328144299183967, | |
| "learning_rate": 2.426648410734794e-08, | |
| "loss": 1.345, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9046153846153846, | |
| "grad_norm": 1.3138230356748517, | |
| "learning_rate": 2.3252847949228826e-08, | |
| "loss": 1.2649, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.9066666666666666, | |
| "grad_norm": 1.2807840482032402, | |
| "learning_rate": 2.2260332196610997e-08, | |
| "loss": 1.2554, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.9087179487179488, | |
| "grad_norm": 1.4043522223341176, | |
| "learning_rate": 2.128898081899727e-08, | |
| "loss": 1.3474, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.9107692307692308, | |
| "grad_norm": 1.347315403936581, | |
| "learning_rate": 2.03388368482858e-08, | |
| "loss": 1.2347, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.9128205128205128, | |
| "grad_norm": 1.3662668916302128, | |
| "learning_rate": 1.940994237686433e-08, | |
| "loss": 1.3457, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9148717948717948, | |
| "grad_norm": 1.4158295255358022, | |
| "learning_rate": 1.8502338555745124e-08, | |
| "loss": 1.3326, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.916923076923077, | |
| "grad_norm": 1.2780375459475717, | |
| "learning_rate": 1.7616065592742034e-08, | |
| "loss": 1.2814, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.918974358974359, | |
| "grad_norm": 1.2749072583260956, | |
| "learning_rate": 1.6751162750689164e-08, | |
| "loss": 1.3122, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.921025641025641, | |
| "grad_norm": 1.3367919003159088, | |
| "learning_rate": 1.590766834570173e-08, | |
| "loss": 1.257, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 1.3152042540986686, | |
| "learning_rate": 1.508561974547812e-08, | |
| "loss": 1.3096, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9251282051282051, | |
| "grad_norm": 1.3270136311007528, | |
| "learning_rate": 1.4285053367645073e-08, | |
| "loss": 1.2955, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.9271794871794872, | |
| "grad_norm": 1.265439090150174, | |
| "learning_rate": 1.3506004678143834e-08, | |
| "loss": 1.3265, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.9292307692307692, | |
| "grad_norm": 1.316351798054078, | |
| "learning_rate": 1.2748508189659446e-08, | |
| "loss": 1.2659, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.9312820512820513, | |
| "grad_norm": 1.36770340106091, | |
| "learning_rate": 1.2012597460091201e-08, | |
| "loss": 1.2548, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 1.3009822941793905, | |
| "learning_rate": 1.1298305091066662e-08, | |
| "loss": 1.3421, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9353846153846154, | |
| "grad_norm": 1.3976626704836301, | |
| "learning_rate": 1.0605662726496877e-08, | |
| "loss": 1.3743, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.9374358974358974, | |
| "grad_norm": 1.3323344873403382, | |
| "learning_rate": 9.93470105117461e-09, | |
| "loss": 1.3038, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.9394871794871795, | |
| "grad_norm": 1.33662126434898, | |
| "learning_rate": 9.285449789415145e-09, | |
| "loss": 1.3841, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.9415384615384615, | |
| "grad_norm": 1.2905917845938792, | |
| "learning_rate": 8.657937703739515e-09, | |
| "loss": 1.4017, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.9435897435897436, | |
| "grad_norm": 1.2878409648663358, | |
| "learning_rate": 8.052192593599905e-09, | |
| "loss": 1.3052, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9456410256410256, | |
| "grad_norm": 1.359774229106595, | |
| "learning_rate": 7.46824129414847e-09, | |
| "loss": 1.2997, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.9476923076923077, | |
| "grad_norm": 1.3536433009615407, | |
| "learning_rate": 6.9061096750483435e-09, | |
| "loss": 1.2946, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9497435897435897, | |
| "grad_norm": 1.2840551656174324, | |
| "learning_rate": 6.365822639327723e-09, | |
| "loss": 1.3496, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.9517948717948718, | |
| "grad_norm": 1.3775049985724006, | |
| "learning_rate": 5.8474041222764114e-09, | |
| "loss": 1.3167, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9538461538461539, | |
| "grad_norm": 1.395262346657903, | |
| "learning_rate": 5.35087709038573e-09, | |
| "loss": 1.2255, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9558974358974359, | |
| "grad_norm": 1.3508361630725259, | |
| "learning_rate": 4.8762635403308275e-09, | |
| "loss": 1.2973, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9579487179487179, | |
| "grad_norm": 1.3826942707464611, | |
| "learning_rate": 4.423584497996457e-09, | |
| "loss": 1.2715, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.4244692630586457, | |
| "learning_rate": 3.9928600175451185e-09, | |
| "loss": 1.3069, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.9620512820512821, | |
| "grad_norm": 1.3821668725285425, | |
| "learning_rate": 3.5841091805292045e-09, | |
| "loss": 1.2713, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.9641025641025641, | |
| "grad_norm": 1.4398924035729572, | |
| "learning_rate": 3.197350095045126e-09, | |
| "loss": 1.2748, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9661538461538461, | |
| "grad_norm": 1.2386387780077939, | |
| "learning_rate": 2.832599894931453e-09, | |
| "loss": 1.3441, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.9682051282051282, | |
| "grad_norm": 1.3525741526071395, | |
| "learning_rate": 2.489874739009579e-09, | |
| "loss": 1.2753, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.9702564102564103, | |
| "grad_norm": 1.4039770005597791, | |
| "learning_rate": 2.1691898103682883e-09, | |
| "loss": 1.3159, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.9723076923076923, | |
| "grad_norm": 1.3628598317312024, | |
| "learning_rate": 1.870559315690634e-09, | |
| "loss": 1.2887, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 1.371403001365053, | |
| "learning_rate": 1.5939964846249377e-09, | |
| "loss": 1.3487, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.9764102564102564, | |
| "grad_norm": 1.3133007826982859, | |
| "learning_rate": 1.339513569198536e-09, | |
| "loss": 1.3189, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.9784615384615385, | |
| "grad_norm": 1.4606362529352184, | |
| "learning_rate": 1.107121843274994e-09, | |
| "loss": 1.3189, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.9805128205128205, | |
| "grad_norm": 1.3509780560710458, | |
| "learning_rate": 8.968316020547261e-10, | |
| "loss": 1.2825, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.9825641025641025, | |
| "grad_norm": 1.2243091739514533, | |
| "learning_rate": 7.086521616190277e-10, | |
| "loss": 1.3313, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.9846153846153847, | |
| "grad_norm": 1.3506027231957574, | |
| "learning_rate": 5.425918585170164e-10, | |
| "loss": 1.3359, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9866666666666667, | |
| "grad_norm": 1.413547920911948, | |
| "learning_rate": 3.9865804939659407e-10, | |
| "loss": 1.271, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.9887179487179487, | |
| "grad_norm": 1.3106473731101602, | |
| "learning_rate": 2.768571106784856e-10, | |
| "loss": 1.3602, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.9907692307692307, | |
| "grad_norm": 1.374110583470325, | |
| "learning_rate": 1.7719443827368674e-10, | |
| "loss": 1.3089, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.9928205128205129, | |
| "grad_norm": 1.4151814700892573, | |
| "learning_rate": 9.967444734459984e-11, | |
| "loss": 1.3375, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.9948717948717949, | |
| "grad_norm": 1.306432628782312, | |
| "learning_rate": 4.430057210913496e-11, | |
| "loss": 1.2872, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.9969230769230769, | |
| "grad_norm": 1.2495964430501498, | |
| "learning_rate": 1.1075265688775814e-11, | |
| "loss": 1.306, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.9989743589743589, | |
| "grad_norm": 1.3152699338512461, | |
| "learning_rate": 0.0, | |
| "loss": 1.3145, | |
| "step": 487 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 487, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 292305580720128.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |