| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.995962314939435, |
| "eval_steps": 500, |
| "global_step": 1113, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.026917900403768506, |
| "grad_norm": 13.069798469543457, |
| "learning_rate": 1.7857142857142859e-06, |
| "loss": 0.3801, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05383580080753701, |
| "grad_norm": 2.184347629547119, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 0.31, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08075370121130551, |
| "grad_norm": 0.8302198648452759, |
| "learning_rate": 5.357142857142857e-06, |
| "loss": 0.2645, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10767160161507403, |
| "grad_norm": 1.0656105279922485, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.2539, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13458950201884254, |
| "grad_norm": 0.5782439708709717, |
| "learning_rate": 8.92857142857143e-06, |
| "loss": 0.2512, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.16150740242261102, |
| "grad_norm": 0.5837422609329224, |
| "learning_rate": 1.0714285714285714e-05, |
| "loss": 0.2508, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.18842530282637954, |
| "grad_norm": 0.6315082907676697, |
| "learning_rate": 1.25e-05, |
| "loss": 0.2481, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.21534320323014805, |
| "grad_norm": 0.649541974067688, |
| "learning_rate": 1.4285714285714287e-05, |
| "loss": 0.2418, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.24226110363391656, |
| "grad_norm": 0.5448735356330872, |
| "learning_rate": 1.6071428571428572e-05, |
| "loss": 0.2479, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2691790040376851, |
| "grad_norm": 0.5558776259422302, |
| "learning_rate": 1.785714285714286e-05, |
| "loss": 0.2421, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2960969044414536, |
| "grad_norm": 0.3394428789615631, |
| "learning_rate": 1.9642857142857145e-05, |
| "loss": 0.2388, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.32301480484522205, |
| "grad_norm": 0.3705368936061859, |
| "learning_rate": 1.9996848199254315e-05, |
| "loss": 0.2407, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.34993270524899056, |
| "grad_norm": 0.3354800343513489, |
| "learning_rate": 1.9984047413708153e-05, |
| "loss": 0.2325, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3768506056527591, |
| "grad_norm": 0.2792787253856659, |
| "learning_rate": 1.9961413253717214e-05, |
| "loss": 0.2385, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4037685060565276, |
| "grad_norm": 0.6989262104034424, |
| "learning_rate": 1.9928968011860973e-05, |
| "loss": 0.2372, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4306864064602961, |
| "grad_norm": 0.4628732204437256, |
| "learning_rate": 1.988674364373809e-05, |
| "loss": 0.2332, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4576043068640646, |
| "grad_norm": 1.1485790014266968, |
| "learning_rate": 1.9834781736493057e-05, |
| "loss": 0.2362, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4845222072678331, |
| "grad_norm": 0.3115156292915344, |
| "learning_rate": 1.9773133467856672e-05, |
| "loss": 0.2347, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5114401076716016, |
| "grad_norm": 0.2576087415218353, |
| "learning_rate": 1.9701859555740647e-05, |
| "loss": 0.2404, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5383580080753702, |
| "grad_norm": 0.3003959059715271, |
| "learning_rate": 1.9621030198436007e-05, |
| "loss": 0.234, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5652759084791387, |
| "grad_norm": 0.22878509759902954, |
| "learning_rate": 1.9530725005474195e-05, |
| "loss": 0.2347, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5921938088829072, |
| "grad_norm": 0.26122385263442993, |
| "learning_rate": 1.9431032919218957e-05, |
| "loss": 0.2446, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6191117092866757, |
| "grad_norm": 0.22441260516643524, |
| "learning_rate": 1.9322052127266234e-05, |
| "loss": 0.2398, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6460296096904441, |
| "grad_norm": 0.2252231240272522, |
| "learning_rate": 1.9203889965738354e-05, |
| "loss": 0.2377, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6729475100942126, |
| "grad_norm": 0.30187228322029114, |
| "learning_rate": 1.9076662813567772e-05, |
| "loss": 0.2355, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6998654104979811, |
| "grad_norm": 0.2517610192298889, |
| "learning_rate": 1.894049597787443e-05, |
| "loss": 0.2402, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7267833109017496, |
| "grad_norm": 0.30307725071907043, |
| "learning_rate": 1.879552357054971e-05, |
| "loss": 0.2378, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7537012113055181, |
| "grad_norm": 0.26731035113334656, |
| "learning_rate": 1.8641888376168483e-05, |
| "loss": 0.2378, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7806191117092867, |
| "grad_norm": 0.22943764925003052, |
| "learning_rate": 1.847974171135933e-05, |
| "loss": 0.235, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8075370121130552, |
| "grad_norm": 0.19347825646400452, |
| "learning_rate": 1.830924327577149e-05, |
| "loss": 0.2329, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8344549125168237, |
| "grad_norm": 0.22859790921211243, |
| "learning_rate": 1.8130560994785325e-05, |
| "loss": 0.2289, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8613728129205922, |
| "grad_norm": 0.2617790699005127, |
| "learning_rate": 1.7943870854121126e-05, |
| "loss": 0.2294, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8882907133243607, |
| "grad_norm": 0.23600426316261292, |
| "learning_rate": 1.7749356726509286e-05, |
| "loss": 0.2304, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9152086137281292, |
| "grad_norm": 0.2116561233997345, |
| "learning_rate": 1.7547210190592446e-05, |
| "loss": 0.2379, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9421265141318977, |
| "grad_norm": 0.19537119567394257, |
| "learning_rate": 1.733763034223804e-05, |
| "loss": 0.2309, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9690444145356663, |
| "grad_norm": 0.22050656378269196, |
| "learning_rate": 1.7120823598447077e-05, |
| "loss": 0.2281, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9959623149394348, |
| "grad_norm": 0.1890714466571808, |
| "learning_rate": 1.6897003494052217e-05, |
| "loss": 0.2327, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0228802153432033, |
| "grad_norm": 0.1974857598543167, |
| "learning_rate": 1.6666390471405504e-05, |
| "loss": 0.2265, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.0497981157469718, |
| "grad_norm": 0.2218897044658661, |
| "learning_rate": 1.642921166326278e-05, |
| "loss": 0.2385, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0767160161507403, |
| "grad_norm": 0.35485249757766724, |
| "learning_rate": 1.6185700669078674e-05, |
| "loss": 0.2274, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1036339165545088, |
| "grad_norm": 0.40264761447906494, |
| "learning_rate": 1.5936097324932487e-05, |
| "loss": 0.2287, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.1305518169582773, |
| "grad_norm": 0.2551412284374237, |
| "learning_rate": 1.568064746731156e-05, |
| "loss": 0.2395, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1574697173620458, |
| "grad_norm": 0.19965523481369019, |
| "learning_rate": 1.5419602690984805e-05, |
| "loss": 0.2331, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.1843876177658144, |
| "grad_norm": 0.18600021302700043, |
| "learning_rate": 1.5153220101204839e-05, |
| "loss": 0.2354, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2113055181695827, |
| "grad_norm": 0.2717427909374237, |
| "learning_rate": 1.4881762060482814e-05, |
| "loss": 0.231, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.2382234185733512, |
| "grad_norm": 0.3491940498352051, |
| "learning_rate": 1.4605495930185303e-05, |
| "loss": 0.2302, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.2651413189771197, |
| "grad_norm": 0.18677066266536713, |
| "learning_rate": 1.4324693807207785e-05, |
| "loss": 0.2311, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.2920592193808882, |
| "grad_norm": 0.24856720864772797, |
| "learning_rate": 1.4039632255984078e-05, |
| "loss": 0.2258, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3189771197846567, |
| "grad_norm": 0.1940755695104599, |
| "learning_rate": 1.375059203609562e-05, |
| "loss": 0.2304, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.3458950201884252, |
| "grad_norm": 0.2115495502948761, |
| "learning_rate": 1.3457857825748959e-05, |
| "loss": 0.2255, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3458950201884252, |
| "eval_loss": 0.24458986520767212, |
| "eval_runtime": 62.1158, |
| "eval_samples_per_second": 85.051, |
| "eval_steps_per_second": 21.267, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.3728129205921937, |
| "grad_norm": 0.30106064677238464, |
| "learning_rate": 1.3161717941393703e-05, |
| "loss": 0.2293, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.3997308209959622, |
| "grad_norm": 0.21698522567749023, |
| "learning_rate": 1.2862464053757196e-05, |
| "loss": 0.2301, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4266487213997308, |
| "grad_norm": 0.21992221474647522, |
| "learning_rate": 1.2560390900575472e-05, |
| "loss": 0.2264, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.4535666218034993, |
| "grad_norm": 0.25674089789390564, |
| "learning_rate": 1.2255795996303526e-05, |
| "loss": 0.2261, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.4804845222072678, |
| "grad_norm": 0.2653080224990845, |
| "learning_rate": 1.1948979339090758e-05, |
| "loss": 0.2243, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5074024226110363, |
| "grad_norm": 0.3156011998653412, |
| "learning_rate": 1.1640243115310219e-05, |
| "loss": 0.2353, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.5343203230148048, |
| "grad_norm": 0.21554109454154968, |
| "learning_rate": 1.1329891401932631e-05, |
| "loss": 0.2294, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.5612382234185733, |
| "grad_norm": 0.18904979526996613, |
| "learning_rate": 1.1018229867038358e-05, |
| "loss": 0.2272, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.5881561238223418, |
| "grad_norm": 0.23018983006477356, |
| "learning_rate": 1.0705565468762274e-05, |
| "loss": 0.2294, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.6150740242261103, |
| "grad_norm": 0.2061055600643158, |
| "learning_rate": 1.0392206152968058e-05, |
| "loss": 0.2266, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.6419919246298789, |
| "grad_norm": 0.20794202387332916, |
| "learning_rate": 1.0078460549949647e-05, |
| "loss": 0.2357, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.6689098250336474, |
| "grad_norm": 0.19699296355247498, |
| "learning_rate": 9.764637670458595e-06, |
| "loss": 0.224, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.695827725437416, |
| "grad_norm": 0.22355449199676514, |
| "learning_rate": 9.451046601356725e-06, |
| "loss": 0.2365, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.7227456258411844, |
| "grad_norm": 0.20971466600894928, |
| "learning_rate": 9.137996201193807e-06, |
| "loss": 0.2328, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.749663526244953, |
| "grad_norm": 0.24429140985012054, |
| "learning_rate": 8.825794796010101e-06, |
| "loss": 0.2213, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.7765814266487214, |
| "grad_norm": 0.2615514397621155, |
| "learning_rate": 8.514749875663397e-06, |
| "loss": 0.2291, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.80349932705249, |
| "grad_norm": 0.29951363801956177, |
| "learning_rate": 8.20516779097958e-06, |
| "loss": 0.2294, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.8304172274562585, |
| "grad_norm": 0.19812524318695068, |
| "learning_rate": 7.897353452025077e-06, |
| "loss": 0.2288, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.857335127860027, |
| "grad_norm": 0.21179044246673584, |
| "learning_rate": 7.591610027798287e-06, |
| "loss": 0.2294, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.8842530282637955, |
| "grad_norm": 0.193583145737648, |
| "learning_rate": 7.2882386476358304e-06, |
| "loss": 0.227, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.911170928667564, |
| "grad_norm": 0.20502911508083344, |
| "learning_rate": 6.9875381046276605e-06, |
| "loss": 0.2258, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.9380888290713325, |
| "grad_norm": 0.19676484167575836, |
| "learning_rate": 6.689804561333164e-06, |
| "loss": 0.2272, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.965006729475101, |
| "grad_norm": 0.20092357695102692, |
| "learning_rate": 6.39533125808812e-06, |
| "loss": 0.2292, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.9919246298788695, |
| "grad_norm": 0.22104892134666443, |
| "learning_rate": 6.104408224189746e-06, |
| "loss": 0.2269, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.018842530282638, |
| "grad_norm": 0.1946035623550415, |
| "learning_rate": 5.8173219922443516e-06, |
| "loss": 0.2193, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.0457604306864066, |
| "grad_norm": 0.22905437648296356, |
| "learning_rate": 5.5343553159588884e-06, |
| "loss": 0.2353, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.072678331090175, |
| "grad_norm": 0.23081299662590027, |
| "learning_rate": 5.2557868916543996e-06, |
| "loss": 0.2229, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.0995962314939436, |
| "grad_norm": 0.21353456377983093, |
| "learning_rate": 4.981891083775597e-06, |
| "loss": 0.2215, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.126514131897712, |
| "grad_norm": 0.20833438634872437, |
| "learning_rate": 4.712937654666971e-06, |
| "loss": 0.2231, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.1534320323014806, |
| "grad_norm": 0.20027689635753632, |
| "learning_rate": 4.4491914988815055e-06, |
| "loss": 0.2281, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.180349932705249, |
| "grad_norm": 0.22123222053050995, |
| "learning_rate": 4.190912382283749e-06, |
| "loss": 0.2278, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.2072678331090176, |
| "grad_norm": 0.28094470500946045, |
| "learning_rate": 3.9383546862041955e-06, |
| "loss": 0.2228, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.234185733512786, |
| "grad_norm": 0.3237360417842865, |
| "learning_rate": 3.6917671568969006e-06, |
| "loss": 0.2291, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.2611036339165547, |
| "grad_norm": 0.21679522097110748, |
| "learning_rate": 3.4513926605471504e-06, |
| "loss": 0.2285, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.288021534320323, |
| "grad_norm": 0.21422189474105835, |
| "learning_rate": 3.2174679440704616e-06, |
| "loss": 0.2279, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.3149394347240917, |
| "grad_norm": 0.2352222353219986, |
| "learning_rate": 2.9902234019385056e-06, |
| "loss": 0.2264, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.34185733512786, |
| "grad_norm": 0.23439514636993408, |
| "learning_rate": 2.7698828492615992e-06, |
| "loss": 0.2269, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.3687752355316287, |
| "grad_norm": 0.22924348711967468, |
| "learning_rate": 2.5566633013512753e-06, |
| "loss": 0.2267, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.3956931359353972, |
| "grad_norm": 0.23167449235916138, |
| "learning_rate": 2.350774759980027e-06, |
| "loss": 0.2254, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.4226110363391653, |
| "grad_norm": 0.2599547803401947, |
| "learning_rate": 2.1524200065487565e-06, |
| "loss": 0.2291, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.449528936742934, |
| "grad_norm": 0.22817839682102203, |
| "learning_rate": 1.961794402365611e-06, |
| "loss": 0.2284, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.4764468371467023, |
| "grad_norm": 0.2169758379459381, |
| "learning_rate": 1.7790856962329584e-06, |
| "loss": 0.2286, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.503364737550471, |
| "grad_norm": 0.21095937490463257, |
| "learning_rate": 1.6044738395319648e-06, |
| "loss": 0.2253, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.5302826379542394, |
| "grad_norm": 0.21286533772945404, |
| "learning_rate": 1.4381308089869283e-06, |
| "loss": 0.2193, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.557200538358008, |
| "grad_norm": 0.2127334177494049, |
| "learning_rate": 1.2802204372839178e-06, |
| "loss": 0.2198, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.5841184387617764, |
| "grad_norm": 0.19859924912452698, |
| "learning_rate": 1.130898251710547e-06, |
| "loss": 0.2212, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.611036339165545, |
| "grad_norm": 0.23916248977184296, |
| "learning_rate": 9.903113209758098e-07, |
| "loss": 0.2245, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.6379542395693134, |
| "grad_norm": 0.24261216819286346, |
| "learning_rate": 8.585981103608343e-07, |
| "loss": 0.2241, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.664872139973082, |
| "grad_norm": 0.22423197329044342, |
| "learning_rate": 7.358883453432398e-07, |
| "loss": 0.2241, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.6917900403768504, |
| "grad_norm": 0.30151936411857605, |
| "learning_rate": 6.223028838293898e-07, |
| "loss": 0.2265, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.6917900403768504, |
| "eval_loss": 0.2421317845582962, |
| "eval_runtime": 62.8408, |
| "eval_samples_per_second": 84.07, |
| "eval_steps_per_second": 21.021, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.718707940780619, |
| "grad_norm": 0.23664213716983795, |
| "learning_rate": 5.179535971203953e-07, |
| "loss": 0.2199, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.7456258411843875, |
| "grad_norm": 0.21507257223129272, |
| "learning_rate": 4.2294325972911274e-07, |
| "loss": 0.2265, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.772543741588156, |
| "grad_norm": 0.1968134194612503, |
| "learning_rate": 3.3736544815663017e-07, |
| "loss": 0.2204, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.7994616419919245, |
| "grad_norm": 0.2121606171131134, |
| "learning_rate": 2.6130444872797143e-07, |
| "loss": 0.2187, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.826379542395693, |
| "grad_norm": 0.21338069438934326, |
| "learning_rate": 1.9483517457776436e-07, |
| "loss": 0.2156, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.8532974427994615, |
| "grad_norm": 0.22012507915496826, |
| "learning_rate": 1.3802309186764619e-07, |
| "loss": 0.2176, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.88021534320323, |
| "grad_norm": 0.2376081794500351, |
| "learning_rate": 9.092415530807975e-08, |
| "loss": 0.2206, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.9071332436069985, |
| "grad_norm": 0.21504898369312286, |
| "learning_rate": 5.3584753048073756e-08, |
| "loss": 0.2233, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.934051144010767, |
| "grad_norm": 0.2161342054605484, |
| "learning_rate": 2.604166098709504e-08, |
| "loss": 0.2263, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.9609690444145356, |
| "grad_norm": 0.26196786761283875, |
| "learning_rate": 8.322006554171147e-09, |
| "loss": 0.23, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.987886944818304, |
| "grad_norm": 0.26092347502708435, |
| "learning_rate": 4.432419898459106e-10, |
| "loss": 0.2229, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.995962314939435, |
| "step": 1113, |
| "total_flos": 5.031637962748592e+18, |
| "train_loss": 0.232770404511492, |
| "train_runtime": 3990.8809, |
| "train_samples_per_second": 35.74, |
| "train_steps_per_second": 0.279 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1113, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.031637962748592e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|