| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 620, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01615508885298869, | |
| "grad_norm": 1.271867275238037, | |
| "learning_rate": 1.5384615384615385e-06, | |
| "loss": 1.3063, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03231017770597738, | |
| "grad_norm": 0.926250696182251, | |
| "learning_rate": 3.4615384615384617e-06, | |
| "loss": 1.3332, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.048465266558966075, | |
| "grad_norm": 0.7140916585922241, | |
| "learning_rate": 5.384615384615385e-06, | |
| "loss": 1.2903, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06462035541195477, | |
| "grad_norm": 0.9422391653060913, | |
| "learning_rate": 7.307692307692308e-06, | |
| "loss": 1.2598, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08077544426494346, | |
| "grad_norm": 0.5254983305931091, | |
| "learning_rate": 9.230769230769232e-06, | |
| "loss": 1.2614, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.09693053311793215, | |
| "grad_norm": 0.6005647778511047, | |
| "learning_rate": 1.1153846153846154e-05, | |
| "loss": 1.2564, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.11308562197092084, | |
| "grad_norm": 0.5752702355384827, | |
| "learning_rate": 1.3076923076923078e-05, | |
| "loss": 1.2446, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.12924071082390953, | |
| "grad_norm": 0.4668022096157074, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.2149, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14539579967689822, | |
| "grad_norm": 0.530459463596344, | |
| "learning_rate": 1.6923076923076924e-05, | |
| "loss": 1.2028, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.16155088852988692, | |
| "grad_norm": 0.4735831916332245, | |
| "learning_rate": 1.8846153846153846e-05, | |
| "loss": 1.2197, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1777059773828756, | |
| "grad_norm": 0.5452806949615479, | |
| "learning_rate": 2.076923076923077e-05, | |
| "loss": 1.1691, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1938610662358643, | |
| "grad_norm": 0.4458446800708771, | |
| "learning_rate": 2.269230769230769e-05, | |
| "loss": 1.17, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.210016155088853, | |
| "grad_norm": 0.566159725189209, | |
| "learning_rate": 2.4615384615384616e-05, | |
| "loss": 1.1737, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.22617124394184168, | |
| "grad_norm": 0.4490140974521637, | |
| "learning_rate": 2.6538461538461538e-05, | |
| "loss": 1.1417, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.24232633279483037, | |
| "grad_norm": 0.45892786979675293, | |
| "learning_rate": 2.846153846153846e-05, | |
| "loss": 1.1651, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.25848142164781907, | |
| "grad_norm": 0.5318244099617004, | |
| "learning_rate": 2.9999965837847747e-05, | |
| "loss": 1.1468, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.27463651050080773, | |
| "grad_norm": 0.5207120776176453, | |
| "learning_rate": 2.999877017885749e-05, | |
| "loss": 1.1483, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.29079159935379645, | |
| "grad_norm": 0.5204576849937439, | |
| "learning_rate": 2.9995866567858337e-05, | |
| "loss": 1.0666, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3069466882067851, | |
| "grad_norm": 0.5433797836303711, | |
| "learning_rate": 2.9991255335492612e-05, | |
| "loss": 1.0818, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.32310177705977383, | |
| "grad_norm": 0.5017735958099365, | |
| "learning_rate": 2.9984937006854268e-05, | |
| "loss": 1.089, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3392568659127625, | |
| "grad_norm": 0.6111322045326233, | |
| "learning_rate": 2.9976912301429102e-05, | |
| "loss": 1.0468, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3554119547657512, | |
| "grad_norm": 0.5563653707504272, | |
| "learning_rate": 2.996718213301282e-05, | |
| "loss": 1.0671, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3715670436187399, | |
| "grad_norm": 0.7332136034965515, | |
| "learning_rate": 2.9955747609606973e-05, | |
| "loss": 1.0187, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3877221324717286, | |
| "grad_norm": 0.5513479709625244, | |
| "learning_rate": 2.9942610033292804e-05, | |
| "loss": 1.0218, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.40387722132471726, | |
| "grad_norm": 0.5962786674499512, | |
| "learning_rate": 2.9927770900082956e-05, | |
| "loss": 1.0027, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.420032310177706, | |
| "grad_norm": 0.5897378325462341, | |
| "learning_rate": 2.9911231899751127e-05, | |
| "loss": 0.9925, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.43618739903069464, | |
| "grad_norm": 0.6131622791290283, | |
| "learning_rate": 2.9892994915639648e-05, | |
| "loss": 1.0063, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.45234248788368336, | |
| "grad_norm": 0.6106935143470764, | |
| "learning_rate": 2.9873062024445035e-05, | |
| "loss": 0.9924, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.46849757673667203, | |
| "grad_norm": 0.7261186838150024, | |
| "learning_rate": 2.9851435495981487e-05, | |
| "loss": 0.9812, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.48465266558966075, | |
| "grad_norm": 0.6918673515319824, | |
| "learning_rate": 2.982811779292243e-05, | |
| "loss": 0.9703, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5008077544426495, | |
| "grad_norm": 0.6846053004264832, | |
| "learning_rate": 2.980311157052007e-05, | |
| "loss": 0.9455, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.5169628432956381, | |
| "grad_norm": 0.7054921984672546, | |
| "learning_rate": 2.977641967630308e-05, | |
| "loss": 0.9684, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5331179321486268, | |
| "grad_norm": 0.8386259078979492, | |
| "learning_rate": 2.974804514975226e-05, | |
| "loss": 0.9483, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.5492730210016155, | |
| "grad_norm": 0.8338026404380798, | |
| "learning_rate": 2.9717991221954516e-05, | |
| "loss": 0.8942, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5654281098546042, | |
| "grad_norm": 0.8031806349754333, | |
| "learning_rate": 2.9686261315234845e-05, | |
| "loss": 0.8801, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5815831987075929, | |
| "grad_norm": 0.8551956415176392, | |
| "learning_rate": 2.9652859042766687e-05, | |
| "loss": 0.9189, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5977382875605816, | |
| "grad_norm": 0.781044065952301, | |
| "learning_rate": 2.961778820816045e-05, | |
| "loss": 0.9013, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.6138933764135702, | |
| "grad_norm": 0.8323260545730591, | |
| "learning_rate": 2.958105280503039e-05, | |
| "loss": 0.8815, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.630048465266559, | |
| "grad_norm": 1.069571614265442, | |
| "learning_rate": 2.954265701653985e-05, | |
| "loss": 0.887, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.6462035541195477, | |
| "grad_norm": 0.8414632678031921, | |
| "learning_rate": 2.9502605214924905e-05, | |
| "loss": 0.867, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6623586429725363, | |
| "grad_norm": 0.9376639127731323, | |
| "learning_rate": 2.9460901960996484e-05, | |
| "loss": 0.8239, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.678513731825525, | |
| "grad_norm": 0.82874596118927, | |
| "learning_rate": 2.941755200362104e-05, | |
| "loss": 0.8348, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6946688206785138, | |
| "grad_norm": 0.8920372128486633, | |
| "learning_rate": 2.9372560279179733e-05, | |
| "loss": 0.8882, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.7108239095315024, | |
| "grad_norm": 0.8290151357650757, | |
| "learning_rate": 2.932593191100637e-05, | |
| "loss": 0.783, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7269789983844911, | |
| "grad_norm": 0.9877433180809021, | |
| "learning_rate": 2.9277672208803948e-05, | |
| "loss": 0.8075, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7431340872374798, | |
| "grad_norm": 0.9012677073478699, | |
| "learning_rate": 2.922778666804006e-05, | |
| "loss": 0.8248, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7592891760904685, | |
| "grad_norm": 0.8435679078102112, | |
| "learning_rate": 2.917628096932108e-05, | |
| "loss": 0.7821, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7754442649434572, | |
| "grad_norm": 0.9440993070602417, | |
| "learning_rate": 2.912316097774531e-05, | |
| "loss": 0.8035, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7915993537964459, | |
| "grad_norm": 0.9287444353103638, | |
| "learning_rate": 2.9068432742235126e-05, | |
| "loss": 0.7929, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.8077544426494345, | |
| "grad_norm": 0.9541422724723816, | |
| "learning_rate": 2.9012102494848125e-05, | |
| "loss": 0.7871, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8239095315024233, | |
| "grad_norm": 0.9758172631263733, | |
| "learning_rate": 2.8954176650067496e-05, | |
| "loss": 0.7192, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.840064620355412, | |
| "grad_norm": 0.8767186999320984, | |
| "learning_rate": 2.8894661804071588e-05, | |
| "loss": 0.7646, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8562197092084006, | |
| "grad_norm": 1.0081284046173096, | |
| "learning_rate": 2.8833564733982746e-05, | |
| "loss": 0.7274, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.8723747980613893, | |
| "grad_norm": 1.123024582862854, | |
| "learning_rate": 2.877089239709564e-05, | |
| "loss": 0.7234, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8885298869143781, | |
| "grad_norm": 1.1195224523544312, | |
| "learning_rate": 2.8706651930084965e-05, | |
| "loss": 0.721, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.9046849757673667, | |
| "grad_norm": 1.0532171726226807, | |
| "learning_rate": 2.8640850648192795e-05, | |
| "loss": 0.7322, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9208400646203554, | |
| "grad_norm": 1.0443161725997925, | |
| "learning_rate": 2.8573496044395574e-05, | |
| "loss": 0.7272, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.9369951534733441, | |
| "grad_norm": 0.9977967739105225, | |
| "learning_rate": 2.850459578855086e-05, | |
| "loss": 0.7261, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9531502423263328, | |
| "grad_norm": 0.9388869404792786, | |
| "learning_rate": 2.843415772652395e-05, | |
| "loss": 0.7133, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.9693053311793215, | |
| "grad_norm": 0.9346816539764404, | |
| "learning_rate": 2.8362189879294434e-05, | |
| "loss": 0.6597, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9854604200323102, | |
| "grad_norm": 1.0388387441635132, | |
| "learning_rate": 2.828870044204284e-05, | |
| "loss": 0.6581, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.5073713064193726, | |
| "learning_rate": 2.8213697783217413e-05, | |
| "loss": 0.6607, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0161550888529887, | |
| "grad_norm": 1.178043007850647, | |
| "learning_rate": 2.8137190443581175e-05, | |
| "loss": 0.6305, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.0323101777059773, | |
| "grad_norm": 0.9829104542732239, | |
| "learning_rate": 2.8059187135239383e-05, | |
| "loss": 0.6241, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.048465266558966, | |
| "grad_norm": 1.065422534942627, | |
| "learning_rate": 2.7979696740647433e-05, | |
| "loss": 0.6547, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.0646203554119547, | |
| "grad_norm": 1.1355617046356201, | |
| "learning_rate": 2.7898728311599414e-05, | |
| "loss": 0.5667, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0807754442649435, | |
| "grad_norm": 0.9137387275695801, | |
| "learning_rate": 2.781629106819733e-05, | |
| "loss": 0.5735, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.0969305331179322, | |
| "grad_norm": 1.1564706563949585, | |
| "learning_rate": 2.7732394397801196e-05, | |
| "loss": 0.5792, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1130856219709209, | |
| "grad_norm": 1.0879970788955688, | |
| "learning_rate": 2.764704785396007e-05, | |
| "loss": 0.5671, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.1292407108239095, | |
| "grad_norm": 1.134347677230835, | |
| "learning_rate": 2.7560261155324176e-05, | |
| "loss": 0.5761, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1453957996768982, | |
| "grad_norm": 1.189737319946289, | |
| "learning_rate": 2.747204418453818e-05, | |
| "loss": 0.5255, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.1615508885298869, | |
| "grad_norm": 1.0731488466262817, | |
| "learning_rate": 2.7382406987115863e-05, | |
| "loss": 0.5373, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.1777059773828755, | |
| "grad_norm": 1.045630693435669, | |
| "learning_rate": 2.7291359770296196e-05, | |
| "loss": 0.5756, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.1938610662358644, | |
| "grad_norm": 1.1830902099609375, | |
| "learning_rate": 2.7198912901881e-05, | |
| "loss": 0.4824, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.210016155088853, | |
| "grad_norm": 1.1311571598052979, | |
| "learning_rate": 2.7105076909054357e-05, | |
| "loss": 0.5156, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.2261712439418417, | |
| "grad_norm": 1.1190736293792725, | |
| "learning_rate": 2.7009862477183837e-05, | |
| "loss": 0.479, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2423263327948304, | |
| "grad_norm": 1.1775109767913818, | |
| "learning_rate": 2.6913280448603727e-05, | |
| "loss": 0.5447, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.258481421647819, | |
| "grad_norm": 0.9826534986495972, | |
| "learning_rate": 2.6815341821380384e-05, | |
| "loss": 0.5142, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.2746365105008077, | |
| "grad_norm": 1.1323788166046143, | |
| "learning_rate": 2.6716057748059867e-05, | |
| "loss": 0.5244, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.2907915993537964, | |
| "grad_norm": 1.423813819885254, | |
| "learning_rate": 2.6615439534397934e-05, | |
| "loss": 0.5275, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.306946688206785, | |
| "grad_norm": 1.2067975997924805, | |
| "learning_rate": 2.6513498638072664e-05, | |
| "loss": 0.5486, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.3231017770597737, | |
| "grad_norm": 1.089034080505371, | |
| "learning_rate": 2.6410246667379698e-05, | |
| "loss": 0.5377, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3392568659127626, | |
| "grad_norm": 1.1521645784378052, | |
| "learning_rate": 2.630569537991042e-05, | |
| "loss": 0.5249, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.3554119547657513, | |
| "grad_norm": 1.172168254852295, | |
| "learning_rate": 2.6199856681213023e-05, | |
| "loss": 0.4809, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.37156704361874, | |
| "grad_norm": 1.086441159248352, | |
| "learning_rate": 2.6092742623436856e-05, | |
| "loss": 0.4989, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.3877221324717286, | |
| "grad_norm": 1.279645323753357, | |
| "learning_rate": 2.5984365403959966e-05, | |
| "loss": 0.4776, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.4038772213247173, | |
| "grad_norm": 1.2726421356201172, | |
| "learning_rate": 2.5874737364000167e-05, | |
| "loss": 0.4759, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.420032310177706, | |
| "grad_norm": 1.104711890220642, | |
| "learning_rate": 2.5763870987209704e-05, | |
| "loss": 0.4877, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.4361873990306946, | |
| "grad_norm": 1.4142299890518188, | |
| "learning_rate": 2.5651778898253718e-05, | |
| "loss": 0.494, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.4523424878836835, | |
| "grad_norm": 1.147704839706421, | |
| "learning_rate": 2.5538473861372628e-05, | |
| "loss": 0.4801, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.468497576736672, | |
| "grad_norm": 1.1201279163360596, | |
| "learning_rate": 2.542396877892862e-05, | |
| "loss": 0.4552, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.4846526655896608, | |
| "grad_norm": 1.317685842514038, | |
| "learning_rate": 2.5308276689936454e-05, | |
| "loss": 0.4721, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.5008077544426495, | |
| "grad_norm": 1.111350417137146, | |
| "learning_rate": 2.5191410768578643e-05, | |
| "loss": 0.462, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.5169628432956381, | |
| "grad_norm": 1.1139695644378662, | |
| "learning_rate": 2.5073384322705278e-05, | |
| "loss": 0.4189, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.5331179321486268, | |
| "grad_norm": 1.1013826131820679, | |
| "learning_rate": 2.4954210792318637e-05, | |
| "loss": 0.4015, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.5492730210016155, | |
| "grad_norm": 1.0391738414764404, | |
| "learning_rate": 2.483390374804272e-05, | |
| "loss": 0.5059, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5654281098546043, | |
| "grad_norm": 1.1611058712005615, | |
| "learning_rate": 2.4712476889577944e-05, | |
| "loss": 0.4572, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.5815831987075928, | |
| "grad_norm": 1.1287028789520264, | |
| "learning_rate": 2.458994404414109e-05, | |
| "loss": 0.4706, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.5977382875605817, | |
| "grad_norm": 1.2190521955490112, | |
| "learning_rate": 2.4466319164890794e-05, | |
| "loss": 0.4592, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.6138933764135701, | |
| "grad_norm": 1.0761996507644653, | |
| "learning_rate": 2.434161632933863e-05, | |
| "loss": 0.4168, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.630048465266559, | |
| "grad_norm": 1.2677689790725708, | |
| "learning_rate": 2.4215849737746087e-05, | |
| "loss": 0.4239, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.6462035541195477, | |
| "grad_norm": 1.077335238456726, | |
| "learning_rate": 2.4089033711507523e-05, | |
| "loss": 0.4223, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.6623586429725363, | |
| "grad_norm": 1.092629075050354, | |
| "learning_rate": 2.3961182691519386e-05, | |
| "loss": 0.396, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.678513731825525, | |
| "grad_norm": 1.0773464441299438, | |
| "learning_rate": 2.383231123653574e-05, | |
| "loss": 0.3559, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.6946688206785137, | |
| "grad_norm": 1.2922345399856567, | |
| "learning_rate": 2.370243402151045e-05, | |
| "loss": 0.4258, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.7108239095315025, | |
| "grad_norm": 1.1194257736206055, | |
| "learning_rate": 2.35715658359261e-05, | |
| "loss": 0.4128, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.726978998384491, | |
| "grad_norm": 1.2243081331253052, | |
| "learning_rate": 2.3439721582109874e-05, | |
| "loss": 0.4641, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.7431340872374799, | |
| "grad_norm": 1.1439075469970703, | |
| "learning_rate": 2.3306916273536564e-05, | |
| "loss": 0.3985, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.7592891760904685, | |
| "grad_norm": 1.09407639503479, | |
| "learning_rate": 2.3173165033118983e-05, | |
| "loss": 0.3708, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.7754442649434572, | |
| "grad_norm": 1.1844106912612915, | |
| "learning_rate": 2.303848309148584e-05, | |
| "loss": 0.4523, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.7915993537964459, | |
| "grad_norm": 1.1165605783462524, | |
| "learning_rate": 2.2902885785247406e-05, | |
| "loss": 0.3978, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.8077544426494345, | |
| "grad_norm": 1.1502916812896729, | |
| "learning_rate": 2.2766388555249087e-05, | |
| "loss": 0.3759, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.8239095315024234, | |
| "grad_norm": 1.003591537475586, | |
| "learning_rate": 2.262900694481314e-05, | |
| "loss": 0.3749, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.8400646203554119, | |
| "grad_norm": 1.1967657804489136, | |
| "learning_rate": 2.2490756597968663e-05, | |
| "loss": 0.375, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.8562197092084007, | |
| "grad_norm": 1.1565355062484741, | |
| "learning_rate": 2.235165325767026e-05, | |
| "loss": 0.3611, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.8723747980613892, | |
| "grad_norm": 1.1527678966522217, | |
| "learning_rate": 2.2211712764005245e-05, | |
| "loss": 0.4101, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.888529886914378, | |
| "grad_norm": 1.2227520942687988, | |
| "learning_rate": 2.207095105238997e-05, | |
| "loss": 0.3426, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.9046849757673667, | |
| "grad_norm": 1.1511895656585693, | |
| "learning_rate": 2.1929384151755138e-05, | |
| "loss": 0.3967, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.9208400646203554, | |
| "grad_norm": 1.1515477895736694, | |
| "learning_rate": 2.1787028182720606e-05, | |
| "loss": 0.3717, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.936995153473344, | |
| "grad_norm": 1.1370980739593506, | |
| "learning_rate": 2.1643899355759674e-05, | |
| "loss": 0.3588, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.9531502423263327, | |
| "grad_norm": 1.209424376487732, | |
| "learning_rate": 2.1500013969353107e-05, | |
| "loss": 0.329, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.9693053311793216, | |
| "grad_norm": 1.08406662940979, | |
| "learning_rate": 2.1355388408133254e-05, | |
| "loss": 0.3333, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.98546042003231, | |
| "grad_norm": 1.0804470777511597, | |
| "learning_rate": 2.1210039141018204e-05, | |
| "loss": 0.3432, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.6371866464614868, | |
| "learning_rate": 2.1063982719336497e-05, | |
| "loss": 0.3595, | |
| "step": 620 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1550, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.190029403650785e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |