| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9992652461425422, | |
| "eval_steps": 250, | |
| "global_step": 680, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0014695077149155032, | |
| "grad_norm": 17.0145143368335, | |
| "learning_rate": 4.7619047619047613e-08, | |
| "loss": 1.4955, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0029390154298310064, | |
| "grad_norm": 16.880312139442445, | |
| "learning_rate": 9.523809523809523e-08, | |
| "loss": 1.5062, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00440852314474651, | |
| "grad_norm": 17.464051339263715, | |
| "learning_rate": 1.4285714285714285e-07, | |
| "loss": 1.4331, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.005878030859662013, | |
| "grad_norm": 17.225034100163874, | |
| "learning_rate": 1.9047619047619045e-07, | |
| "loss": 1.4944, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0073475385745775165, | |
| "grad_norm": 16.85132116546643, | |
| "learning_rate": 2.3809523809523806e-07, | |
| "loss": 1.5036, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00881704628949302, | |
| "grad_norm": 17.041527845979022, | |
| "learning_rate": 2.857142857142857e-07, | |
| "loss": 1.4956, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.010286554004408524, | |
| "grad_norm": 15.881569858334686, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 1.5216, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.011756061719324026, | |
| "grad_norm": 16.31701070286596, | |
| "learning_rate": 3.809523809523809e-07, | |
| "loss": 1.3997, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.01322556943423953, | |
| "grad_norm": 14.776486508864684, | |
| "learning_rate": 4.285714285714285e-07, | |
| "loss": 1.4804, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.014695077149155033, | |
| "grad_norm": 14.557720212887741, | |
| "learning_rate": 4.761904761904761e-07, | |
| "loss": 1.5391, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.016164584864070537, | |
| "grad_norm": 10.131668683974828, | |
| "learning_rate": 5.238095238095238e-07, | |
| "loss": 1.402, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01763409257898604, | |
| "grad_norm": 10.271144542549273, | |
| "learning_rate": 5.714285714285714e-07, | |
| "loss": 1.5026, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.019103600293901544, | |
| "grad_norm": 9.729493771535271, | |
| "learning_rate": 6.19047619047619e-07, | |
| "loss": 1.4665, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.020573108008817047, | |
| "grad_norm": 9.027774183461707, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 1.43, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.02204261572373255, | |
| "grad_norm": 7.227763994209855, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 1.4461, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02351212343864805, | |
| "grad_norm": 6.971794402178872, | |
| "learning_rate": 7.619047619047618e-07, | |
| "loss": 1.3946, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.024981631153563555, | |
| "grad_norm": 6.515516782524198, | |
| "learning_rate": 8.095238095238095e-07, | |
| "loss": 1.387, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02645113886847906, | |
| "grad_norm": 6.20741883421894, | |
| "learning_rate": 8.57142857142857e-07, | |
| "loss": 1.3349, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.027920646583394562, | |
| "grad_norm": 6.090657009640401, | |
| "learning_rate": 9.047619047619047e-07, | |
| "loss": 1.4995, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.029390154298310066, | |
| "grad_norm": 4.979473826437438, | |
| "learning_rate": 9.523809523809522e-07, | |
| "loss": 1.2842, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03085966201322557, | |
| "grad_norm": 4.732515914373556, | |
| "learning_rate": 1e-06, | |
| "loss": 1.2816, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03232916972814107, | |
| "grad_norm": 4.780927371855901, | |
| "learning_rate": 9.999943184333936e-07, | |
| "loss": 1.3153, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03379867744305658, | |
| "grad_norm": 4.494683448782365, | |
| "learning_rate": 9.999772738626954e-07, | |
| "loss": 1.3077, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.03526818515797208, | |
| "grad_norm": 4.329105160843414, | |
| "learning_rate": 9.999488666752648e-07, | |
| "loss": 1.2857, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.036737692872887584, | |
| "grad_norm": 4.180952310668344, | |
| "learning_rate": 9.99909097516691e-07, | |
| "loss": 1.3751, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03820720058780309, | |
| "grad_norm": 4.033165642753543, | |
| "learning_rate": 9.998579672907788e-07, | |
| "loss": 1.3178, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03967670830271859, | |
| "grad_norm": 3.4246600875739426, | |
| "learning_rate": 9.99795477159527e-07, | |
| "loss": 1.2873, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.041146216017634095, | |
| "grad_norm": 3.274963547004543, | |
| "learning_rate": 9.99721628543103e-07, | |
| "loss": 1.1994, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0426157237325496, | |
| "grad_norm": 3.331357181342743, | |
| "learning_rate": 9.996364231198103e-07, | |
| "loss": 1.2571, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0440852314474651, | |
| "grad_norm": 3.3784313618805992, | |
| "learning_rate": 9.9953986282605e-07, | |
| "loss": 1.2402, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.045554739162380606, | |
| "grad_norm": 3.0698830421191072, | |
| "learning_rate": 9.99431949856277e-07, | |
| "loss": 1.2305, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0470242468772961, | |
| "grad_norm": 3.4883161944534082, | |
| "learning_rate": 9.9931268666295e-07, | |
| "loss": 1.2108, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.048493754592211606, | |
| "grad_norm": 2.7892474532912663, | |
| "learning_rate": 9.991820759564766e-07, | |
| "loss": 1.2711, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04996326230712711, | |
| "grad_norm": 2.8458896007246666, | |
| "learning_rate": 9.990401207051504e-07, | |
| "loss": 1.173, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.05143277002204261, | |
| "grad_norm": 2.5260926512388284, | |
| "learning_rate": 9.98886824135084e-07, | |
| "loss": 1.2025, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.05290227773695812, | |
| "grad_norm": 2.3877298964215483, | |
| "learning_rate": 9.98722189730136e-07, | |
| "loss": 1.2579, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05437178545187362, | |
| "grad_norm": 2.2852004307826195, | |
| "learning_rate": 9.985462212318322e-07, | |
| "loss": 1.2378, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.055841293166789124, | |
| "grad_norm": 2.36994043645677, | |
| "learning_rate": 9.983589226392792e-07, | |
| "loss": 1.1434, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.05731080088170463, | |
| "grad_norm": 2.3285859279194665, | |
| "learning_rate": 9.98160298209075e-07, | |
| "loss": 1.2527, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.05878030859662013, | |
| "grad_norm": 2.051942671724978, | |
| "learning_rate": 9.97950352455211e-07, | |
| "loss": 1.1443, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.060249816311535635, | |
| "grad_norm": 2.0353118851309318, | |
| "learning_rate": 9.977290901489707e-07, | |
| "loss": 1.1874, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.06171932402645114, | |
| "grad_norm": 2.250387145318064, | |
| "learning_rate": 9.9749651631882e-07, | |
| "loss": 1.1452, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06318883174136664, | |
| "grad_norm": 2.179679997701691, | |
| "learning_rate": 9.972526362502937e-07, | |
| "loss": 1.2108, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.06465833945628215, | |
| "grad_norm": 2.110595858158538, | |
| "learning_rate": 9.969974554858754e-07, | |
| "loss": 1.1414, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06612784717119764, | |
| "grad_norm": 2.2405504959664535, | |
| "learning_rate": 9.967309798248707e-07, | |
| "loss": 1.1567, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.06759735488611315, | |
| "grad_norm": 2.1992469613966414, | |
| "learning_rate": 9.96453215323277e-07, | |
| "loss": 1.1649, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06906686260102865, | |
| "grad_norm": 2.1870509425005804, | |
| "learning_rate": 9.961641682936442e-07, | |
| "loss": 1.1701, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.07053637031594416, | |
| "grad_norm": 2.144331046664921, | |
| "learning_rate": 9.95863845304932e-07, | |
| "loss": 1.1498, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07200587803085966, | |
| "grad_norm": 2.1944289892742774, | |
| "learning_rate": 9.955522531823606e-07, | |
| "loss": 1.2148, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.07347538574577517, | |
| "grad_norm": 2.1596366801336244, | |
| "learning_rate": 9.952293990072557e-07, | |
| "loss": 1.1744, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07494489346069066, | |
| "grad_norm": 2.1930183892423547, | |
| "learning_rate": 9.948952901168874e-07, | |
| "loss": 1.1859, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.07641440117560618, | |
| "grad_norm": 2.1938721237512153, | |
| "learning_rate": 9.945499341043033e-07, | |
| "loss": 1.1199, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07788390889052167, | |
| "grad_norm": 1.9945902863761755, | |
| "learning_rate": 9.94193338818156e-07, | |
| "loss": 1.1325, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.07935341660543718, | |
| "grad_norm": 1.9432383836576688, | |
| "learning_rate": 9.938255123625251e-07, | |
| "loss": 1.1723, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08082292432035268, | |
| "grad_norm": 1.9494589261620257, | |
| "learning_rate": 9.934464630967328e-07, | |
| "loss": 1.1796, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08229243203526819, | |
| "grad_norm": 2.1536664626140762, | |
| "learning_rate": 9.930561996351533e-07, | |
| "loss": 1.1687, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08376193975018369, | |
| "grad_norm": 1.8384845414400017, | |
| "learning_rate": 9.926547308470183e-07, | |
| "loss": 1.0806, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.0852314474650992, | |
| "grad_norm": 2.0289382431580183, | |
| "learning_rate": 9.922420658562144e-07, | |
| "loss": 1.11, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.0867009551800147, | |
| "grad_norm": 2.048447539695954, | |
| "learning_rate": 9.91818214041076e-07, | |
| "loss": 1.1103, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0881704628949302, | |
| "grad_norm": 2.012578737651733, | |
| "learning_rate": 9.913831850341725e-07, | |
| "loss": 1.1158, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0896399706098457, | |
| "grad_norm": 1.896428918202559, | |
| "learning_rate": 9.90936988722089e-07, | |
| "loss": 1.1864, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.09110947832476121, | |
| "grad_norm": 1.9545200740474544, | |
| "learning_rate": 9.904796352452019e-07, | |
| "loss": 1.0444, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09257898603967671, | |
| "grad_norm": 2.0148450645381266, | |
| "learning_rate": 9.900111349974478e-07, | |
| "loss": 1.0916, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.0940484937545922, | |
| "grad_norm": 1.929357772553932, | |
| "learning_rate": 9.895314986260886e-07, | |
| "loss": 1.0798, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.09551800146950772, | |
| "grad_norm": 1.7614353081727012, | |
| "learning_rate": 9.890407370314677e-07, | |
| "loss": 1.0602, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.09698750918442321, | |
| "grad_norm": 1.812427621605287, | |
| "learning_rate": 9.885388613667644e-07, | |
| "loss": 1.1419, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.09845701689933872, | |
| "grad_norm": 1.955547061142271, | |
| "learning_rate": 9.880258830377386e-07, | |
| "loss": 1.1659, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.09992652461425422, | |
| "grad_norm": 1.884597834486459, | |
| "learning_rate": 9.875018137024721e-07, | |
| "loss": 1.0962, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10139603232916973, | |
| "grad_norm": 1.9999685034920418, | |
| "learning_rate": 9.869666652711049e-07, | |
| "loss": 1.2142, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.10286554004408523, | |
| "grad_norm": 1.8727812783053652, | |
| "learning_rate": 9.864204499055622e-07, | |
| "loss": 1.1424, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10433504775900074, | |
| "grad_norm": 1.9270273635952284, | |
| "learning_rate": 9.858631800192804e-07, | |
| "loss": 1.0969, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.10580455547391623, | |
| "grad_norm": 2.0055713055636355, | |
| "learning_rate": 9.852948682769234e-07, | |
| "loss": 1.1969, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.10727406318883174, | |
| "grad_norm": 1.8861847545246249, | |
| "learning_rate": 9.84715527594095e-07, | |
| "loss": 1.0814, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.10874357090374724, | |
| "grad_norm": 1.8457360601707475, | |
| "learning_rate": 9.841251711370457e-07, | |
| "loss": 1.1158, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11021307861866275, | |
| "grad_norm": 1.9137633871950495, | |
| "learning_rate": 9.83523812322374e-07, | |
| "loss": 1.175, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11168258633357825, | |
| "grad_norm": 1.7751460296642658, | |
| "learning_rate": 9.829114648167206e-07, | |
| "loss": 1.1066, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11315209404849376, | |
| "grad_norm": 1.8728263698426233, | |
| "learning_rate": 9.822881425364578e-07, | |
| "loss": 1.0487, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.11462160176340926, | |
| "grad_norm": 1.720777832083087, | |
| "learning_rate": 9.81653859647374e-07, | |
| "loss": 1.1196, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11609110947832477, | |
| "grad_norm": 1.87348358379571, | |
| "learning_rate": 9.810086305643511e-07, | |
| "loss": 1.0961, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.11756061719324026, | |
| "grad_norm": 1.7818512548748757, | |
| "learning_rate": 9.803524699510372e-07, | |
| "loss": 1.0888, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11903012490815577, | |
| "grad_norm": 1.9374157076694885, | |
| "learning_rate": 9.79685392719513e-07, | |
| "loss": 1.1268, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.12049963262307127, | |
| "grad_norm": 2.026201319532345, | |
| "learning_rate": 9.790074140299535e-07, | |
| "loss": 1.1795, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.12196914033798678, | |
| "grad_norm": 1.7669713418401083, | |
| "learning_rate": 9.783185492902831e-07, | |
| "loss": 1.1169, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.12343864805290228, | |
| "grad_norm": 1.8294253428890799, | |
| "learning_rate": 9.776188141558253e-07, | |
| "loss": 1.0919, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12490815576781777, | |
| "grad_norm": 1.8274541174825418, | |
| "learning_rate": 9.769082245289472e-07, | |
| "loss": 1.1123, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.12637766348273327, | |
| "grad_norm": 1.7514454813059583, | |
| "learning_rate": 9.76186796558698e-07, | |
| "loss": 1.1126, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.1278471711976488, | |
| "grad_norm": 2.0848609899286177, | |
| "learning_rate": 9.754545466404423e-07, | |
| "loss": 1.1261, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.1293166789125643, | |
| "grad_norm": 1.8956503679400867, | |
| "learning_rate": 9.747114914154862e-07, | |
| "loss": 1.13, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.1307861866274798, | |
| "grad_norm": 1.8870487896185353, | |
| "learning_rate": 9.73957647770701e-07, | |
| "loss": 1.1469, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.13225569434239529, | |
| "grad_norm": 1.817308542114037, | |
| "learning_rate": 9.731930328381384e-07, | |
| "loss": 1.1055, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1337252020573108, | |
| "grad_norm": 1.9629911385480598, | |
| "learning_rate": 9.72417663994641e-07, | |
| "loss": 1.0912, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.1351947097722263, | |
| "grad_norm": 2.050021078478476, | |
| "learning_rate": 9.716315588614472e-07, | |
| "loss": 1.042, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.1366642174871418, | |
| "grad_norm": 1.9655965919411047, | |
| "learning_rate": 9.708347353037924e-07, | |
| "loss": 1.0731, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.1381337252020573, | |
| "grad_norm": 1.848057789367187, | |
| "learning_rate": 9.700272114305008e-07, | |
| "loss": 1.0957, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.13960323291697282, | |
| "grad_norm": 1.9055742505733597, | |
| "learning_rate": 9.69209005593575e-07, | |
| "loss": 1.1393, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.14107274063188832, | |
| "grad_norm": 1.885959701690901, | |
| "learning_rate": 9.68380136387779e-07, | |
| "loss": 1.1503, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14254224834680382, | |
| "grad_norm": 1.9216191651411287, | |
| "learning_rate": 9.67540622650215e-07, | |
| "loss": 1.0944, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.14401175606171931, | |
| "grad_norm": 1.9221872996269755, | |
| "learning_rate": 9.66690483459896e-07, | |
| "loss": 1.125, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.14548126377663484, | |
| "grad_norm": 1.7695227554963031, | |
| "learning_rate": 9.658297381373117e-07, | |
| "loss": 1.134, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.14695077149155034, | |
| "grad_norm": 1.940990407598247, | |
| "learning_rate": 9.649584062439897e-07, | |
| "loss": 1.1638, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14842027920646583, | |
| "grad_norm": 1.8521569502420978, | |
| "learning_rate": 9.640765075820508e-07, | |
| "loss": 1.1594, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.14988978692138133, | |
| "grad_norm": 1.9347402212683402, | |
| "learning_rate": 9.631840621937585e-07, | |
| "loss": 1.1594, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15135929463629685, | |
| "grad_norm": 1.9130373667873128, | |
| "learning_rate": 9.622810903610653e-07, | |
| "loss": 1.1211, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.15282880235121235, | |
| "grad_norm": 1.813394824124808, | |
| "learning_rate": 9.613676126051488e-07, | |
| "loss": 1.1007, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15429831006612785, | |
| "grad_norm": 1.9095237417724393, | |
| "learning_rate": 9.604436496859482e-07, | |
| "loss": 1.1796, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.15576781778104334, | |
| "grad_norm": 1.730720179443255, | |
| "learning_rate": 9.595092226016912e-07, | |
| "loss": 1.0689, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.15723732549595884, | |
| "grad_norm": 1.8826322939474722, | |
| "learning_rate": 9.585643525884163e-07, | |
| "loss": 1.112, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.15870683321087437, | |
| "grad_norm": 1.792447444970345, | |
| "learning_rate": 9.576090611194915e-07, | |
| "loss": 1.0629, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.16017634092578986, | |
| "grad_norm": 1.851908198387569, | |
| "learning_rate": 9.566433699051248e-07, | |
| "loss": 1.2135, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.16164584864070536, | |
| "grad_norm": 1.7553040384102077, | |
| "learning_rate": 9.556673008918725e-07, | |
| "loss": 1.0854, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.16311535635562086, | |
| "grad_norm": 1.910506976862337, | |
| "learning_rate": 9.546808762621385e-07, | |
| "loss": 1.0775, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.16458486407053638, | |
| "grad_norm": 1.7403892411303623, | |
| "learning_rate": 9.536841184336725e-07, | |
| "loss": 1.102, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.16605437178545188, | |
| "grad_norm": 1.925268524053033, | |
| "learning_rate": 9.526770500590576e-07, | |
| "loss": 1.0036, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.16752387950036737, | |
| "grad_norm": 1.7030002409038387, | |
| "learning_rate": 9.516596940251986e-07, | |
| "loss": 1.0702, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16899338721528287, | |
| "grad_norm": 1.7840043977843556, | |
| "learning_rate": 9.506320734527997e-07, | |
| "loss": 1.0938, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1704628949301984, | |
| "grad_norm": 1.8395314071839606, | |
| "learning_rate": 9.495942116958395e-07, | |
| "loss": 1.168, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.1719324026451139, | |
| "grad_norm": 1.7231121731862031, | |
| "learning_rate": 9.485461323410411e-07, | |
| "loss": 1.1674, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.1734019103600294, | |
| "grad_norm": 1.7743694765332263, | |
| "learning_rate": 9.474878592073352e-07, | |
| "loss": 1.1154, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.17487141807494488, | |
| "grad_norm": 1.889101434336393, | |
| "learning_rate": 9.464194163453188e-07, | |
| "loss": 1.0967, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1763409257898604, | |
| "grad_norm": 1.791474621378483, | |
| "learning_rate": 9.45340828036709e-07, | |
| "loss": 1.0532, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1778104335047759, | |
| "grad_norm": 1.7298856198753365, | |
| "learning_rate": 9.442521187937911e-07, | |
| "loss": 1.1783, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1792799412196914, | |
| "grad_norm": 1.794443334367641, | |
| "learning_rate": 9.431533133588616e-07, | |
| "loss": 1.0877, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1807494489346069, | |
| "grad_norm": 1.8423954972585201, | |
| "learning_rate": 9.420444367036653e-07, | |
| "loss": 1.0762, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.18221895664952242, | |
| "grad_norm": 1.7929621526317383, | |
| "learning_rate": 9.409255140288288e-07, | |
| "loss": 1.0728, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.18368846436443792, | |
| "grad_norm": 1.90760615166962, | |
| "learning_rate": 9.397965707632866e-07, | |
| "loss": 1.0913, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.18515797207935342, | |
| "grad_norm": 1.943714257581344, | |
| "learning_rate": 9.386576325637043e-07, | |
| "loss": 1.1129, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1866274797942689, | |
| "grad_norm": 1.8505495879818339, | |
| "learning_rate": 9.375087253138951e-07, | |
| "loss": 1.1095, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.1880969875091844, | |
| "grad_norm": 1.8351144551335388, | |
| "learning_rate": 9.363498751242307e-07, | |
| "loss": 1.0664, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.18956649522409993, | |
| "grad_norm": 1.8698266517180562, | |
| "learning_rate": 9.351811083310497e-07, | |
| "loss": 1.0695, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.19103600293901543, | |
| "grad_norm": 1.8467428381107462, | |
| "learning_rate": 9.340024514960574e-07, | |
| "loss": 1.1099, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.19250551065393093, | |
| "grad_norm": 1.8383445717272457, | |
| "learning_rate": 9.328139314057233e-07, | |
| "loss": 1.0571, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.19397501836884642, | |
| "grad_norm": 1.7822708434524877, | |
| "learning_rate": 9.316155750706713e-07, | |
| "loss": 1.0643, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19544452608376195, | |
| "grad_norm": 1.775080931488546, | |
| "learning_rate": 9.304074097250671e-07, | |
| "loss": 1.0923, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.19691403379867745, | |
| "grad_norm": 1.7534496873865253, | |
| "learning_rate": 9.291894628259979e-07, | |
| "loss": 1.0934, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.19838354151359294, | |
| "grad_norm": 1.9045369286252938, | |
| "learning_rate": 9.279617620528496e-07, | |
| "loss": 1.1063, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.19985304922850844, | |
| "grad_norm": 1.9789176992872752, | |
| "learning_rate": 9.26724335306677e-07, | |
| "loss": 1.1022, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.20132255694342396, | |
| "grad_norm": 1.8458671473525012, | |
| "learning_rate": 9.254772107095701e-07, | |
| "loss": 1.0984, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.20279206465833946, | |
| "grad_norm": 1.6935951448294548, | |
| "learning_rate": 9.242204166040148e-07, | |
| "loss": 1.0379, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.20426157237325496, | |
| "grad_norm": 1.9390186462720589, | |
| "learning_rate": 9.229539815522485e-07, | |
| "loss": 1.1088, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.20573108008817045, | |
| "grad_norm": 1.8215087232144447, | |
| "learning_rate": 9.216779343356119e-07, | |
| "loss": 1.0305, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.20720058780308598, | |
| "grad_norm": 1.8391377902744837, | |
| "learning_rate": 9.203923039538939e-07, | |
| "loss": 1.03, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.20867009551800147, | |
| "grad_norm": 1.8222570147638864, | |
| "learning_rate": 9.190971196246731e-07, | |
| "loss": 1.1201, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.21013960323291697, | |
| "grad_norm": 1.9074452445907568, | |
| "learning_rate": 9.177924107826535e-07, | |
| "loss": 1.0716, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.21160911094783247, | |
| "grad_norm": 1.8604141888521697, | |
| "learning_rate": 9.164782070789961e-07, | |
| "loss": 1.1475, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.213078618662748, | |
| "grad_norm": 1.8195345943864918, | |
| "learning_rate": 9.151545383806441e-07, | |
| "loss": 1.0211, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2145481263776635, | |
| "grad_norm": 1.7827672387312539, | |
| "learning_rate": 9.138214347696453e-07, | |
| "loss": 1.163, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.216017634092579, | |
| "grad_norm": 1.861693336411916, | |
| "learning_rate": 9.124789265424674e-07, | |
| "loss": 1.1053, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.21748714180749448, | |
| "grad_norm": 1.9925460288336267, | |
| "learning_rate": 9.1112704420931e-07, | |
| "loss": 1.0823, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.21895664952240998, | |
| "grad_norm": 1.8673762233131246, | |
| "learning_rate": 9.097658184934114e-07, | |
| "loss": 1.0893, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.2204261572373255, | |
| "grad_norm": 1.8135361820690228, | |
| "learning_rate": 9.083952803303496e-07, | |
| "loss": 1.1804, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.221895664952241, | |
| "grad_norm": 1.7943832305382856, | |
| "learning_rate": 9.070154608673402e-07, | |
| "loss": 1.1108, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.2233651726671565, | |
| "grad_norm": 1.9887027909424768, | |
| "learning_rate": 9.056263914625277e-07, | |
| "loss": 1.1501, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.224834680382072, | |
| "grad_norm": 1.9146935645186336, | |
| "learning_rate": 9.042281036842739e-07, | |
| "loss": 1.1279, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.22630418809698752, | |
| "grad_norm": 1.7369719877012806, | |
| "learning_rate": 9.028206293104391e-07, | |
| "loss": 1.0497, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.22777369581190302, | |
| "grad_norm": 1.8786686675134308, | |
| "learning_rate": 9.014040003276611e-07, | |
| "loss": 1.1104, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2292432035268185, | |
| "grad_norm": 1.9105299787467294, | |
| "learning_rate": 8.999782489306271e-07, | |
| "loss": 1.0936, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.230712711241734, | |
| "grad_norm": 1.8120395385442194, | |
| "learning_rate": 8.985434075213439e-07, | |
| "loss": 1.083, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.23218221895664953, | |
| "grad_norm": 1.7363191743038984, | |
| "learning_rate": 8.970995087083992e-07, | |
| "loss": 0.9772, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.23365172667156503, | |
| "grad_norm": 1.7820020797816543, | |
| "learning_rate": 8.956465853062222e-07, | |
| "loss": 1.0146, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.23512123438648053, | |
| "grad_norm": 1.7586524905646472, | |
| "learning_rate": 8.941846703343372e-07, | |
| "loss": 1.1279, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23659074210139602, | |
| "grad_norm": 1.7894968447971773, | |
| "learning_rate": 8.927137970166135e-07, | |
| "loss": 1.0527, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.23806024981631155, | |
| "grad_norm": 1.7848783538320003, | |
| "learning_rate": 8.912339987805099e-07, | |
| "loss": 1.0751, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.23952975753122704, | |
| "grad_norm": 1.8693803940347191, | |
| "learning_rate": 8.897453092563153e-07, | |
| "loss": 1.0697, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.24099926524614254, | |
| "grad_norm": 1.8074883029338882, | |
| "learning_rate": 8.882477622763846e-07, | |
| "loss": 1.0944, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.24246877296105804, | |
| "grad_norm": 1.797204547554038, | |
| "learning_rate": 8.867413918743693e-07, | |
| "loss": 1.0973, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.24393828067597356, | |
| "grad_norm": 1.8392322504850838, | |
| "learning_rate": 8.852262322844444e-07, | |
| "loss": 1.0919, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.24540778839088906, | |
| "grad_norm": 1.7623077046993691, | |
| "learning_rate": 8.837023179405308e-07, | |
| "loss": 1.1094, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.24687729610580456, | |
| "grad_norm": 1.7742943941570524, | |
| "learning_rate": 8.821696834755117e-07, | |
| "loss": 1.0372, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.24834680382072005, | |
| "grad_norm": 1.787531814654772, | |
| "learning_rate": 8.806283637204462e-07, | |
| "loss": 1.1277, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.24981631153563555, | |
| "grad_norm": 1.8409448609213654, | |
| "learning_rate": 8.790783937037776e-07, | |
| "loss": 1.0667, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.25128581925055105, | |
| "grad_norm": 1.8241238807877374, | |
| "learning_rate": 8.775198086505375e-07, | |
| "loss": 1.0712, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.25275532696546654, | |
| "grad_norm": 1.9461903617468934, | |
| "learning_rate": 8.759526439815455e-07, | |
| "loss": 0.986, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.2542248346803821, | |
| "grad_norm": 1.8059963729135269, | |
| "learning_rate": 8.743769353126029e-07, | |
| "loss": 1.1027, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.2556943423952976, | |
| "grad_norm": 1.7678488694251377, | |
| "learning_rate": 8.727927184536849e-07, | |
| "loss": 1.0659, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2571638501102131, | |
| "grad_norm": 1.7749804271044742, | |
| "learning_rate": 8.712000294081259e-07, | |
| "loss": 1.022, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2586333578251286, | |
| "grad_norm": 1.707242563839455, | |
| "learning_rate": 8.695989043718015e-07, | |
| "loss": 1.0917, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2601028655400441, | |
| "grad_norm": 1.8427148257842918, | |
| "learning_rate": 8.679893797323058e-07, | |
| "loss": 1.0724, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.2615723732549596, | |
| "grad_norm": 1.8363003275755554, | |
| "learning_rate": 8.663714920681245e-07, | |
| "loss": 1.1111, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.2630418809698751, | |
| "grad_norm": 1.8365044852133108, | |
| "learning_rate": 8.64745278147804e-07, | |
| "loss": 1.0855, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.26451138868479057, | |
| "grad_norm": 1.8731659851674847, | |
| "learning_rate": 8.631107749291148e-07, | |
| "loss": 1.0675, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2659808963997061, | |
| "grad_norm": 1.9141757031458602, | |
| "learning_rate": 8.614680195582127e-07, | |
| "loss": 1.1196, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.2674504041146216, | |
| "grad_norm": 1.8358916498700413, | |
| "learning_rate": 8.598170493687939e-07, | |
| "loss": 1.0301, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.2689199118295371, | |
| "grad_norm": 1.7578208636926487, | |
| "learning_rate": 8.581579018812468e-07, | |
| "loss": 1.089, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.2703894195444526, | |
| "grad_norm": 1.8746198379934478, | |
| "learning_rate": 8.564906148017992e-07, | |
| "loss": 1.1131, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2718589272593681, | |
| "grad_norm": 1.7730333983609725, | |
| "learning_rate": 8.548152260216613e-07, | |
| "loss": 1.034, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2733284349742836, | |
| "grad_norm": 1.7445016041336918, | |
| "learning_rate": 8.531317736161652e-07, | |
| "loss": 1.002, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2747979426891991, | |
| "grad_norm": 1.716730493002548, | |
| "learning_rate": 8.514402958438987e-07, | |
| "loss": 1.0445, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2762674504041146, | |
| "grad_norm": 1.753035784826817, | |
| "learning_rate": 8.497408311458362e-07, | |
| "loss": 1.0203, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2777369581190301, | |
| "grad_norm": 1.7624662841844527, | |
| "learning_rate": 8.480334181444652e-07, | |
| "loss": 1.0724, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.27920646583394565, | |
| "grad_norm": 1.8184473786367534, | |
| "learning_rate": 8.463180956429085e-07, | |
| "loss": 1.1048, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.28067597354886115, | |
| "grad_norm": 1.7695724382850178, | |
| "learning_rate": 8.445949026240424e-07, | |
| "loss": 1.1535, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.28214548126377664, | |
| "grad_norm": 1.7396814430482903, | |
| "learning_rate": 8.428638782496105e-07, | |
| "loss": 1.0529, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.28361498897869214, | |
| "grad_norm": 1.7614538875912402, | |
| "learning_rate": 8.411250618593337e-07, | |
| "loss": 1.1533, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.28508449669360764, | |
| "grad_norm": 1.8555305634941939, | |
| "learning_rate": 8.393784929700169e-07, | |
| "loss": 1.0832, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.28655400440852313, | |
| "grad_norm": 1.7138697869173838, | |
| "learning_rate": 8.376242112746499e-07, | |
| "loss": 0.9737, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.28802351212343863, | |
| "grad_norm": 1.8888153946382773, | |
| "learning_rate": 8.358622566415057e-07, | |
| "loss": 1.0011, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2894930198383541, | |
| "grad_norm": 1.7635141256389373, | |
| "learning_rate": 8.340926691132348e-07, | |
| "loss": 1.0477, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2909625275532697, | |
| "grad_norm": 1.736090710284999, | |
| "learning_rate": 8.323154889059549e-07, | |
| "loss": 1.0677, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2924320352681852, | |
| "grad_norm": 1.9655528448046806, | |
| "learning_rate": 8.305307564083368e-07, | |
| "loss": 1.1974, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.29390154298310067, | |
| "grad_norm": 1.7937853629471947, | |
| "learning_rate": 8.287385121806869e-07, | |
| "loss": 1.0823, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.29537105069801617, | |
| "grad_norm": 1.9050615593905893, | |
| "learning_rate": 8.26938796954025e-07, | |
| "loss": 1.1672, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.29684055841293167, | |
| "grad_norm": 1.7624130253100436, | |
| "learning_rate": 8.251316516291586e-07, | |
| "loss": 1.0203, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.29831006612784716, | |
| "grad_norm": 1.816075547247136, | |
| "learning_rate": 8.233171172757539e-07, | |
| "loss": 1.0583, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.29977957384276266, | |
| "grad_norm": 1.750336923146612, | |
| "learning_rate": 8.214952351314022e-07, | |
| "loss": 1.0375, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.30124908155767816, | |
| "grad_norm": 1.766779650906924, | |
| "learning_rate": 8.196660466006823e-07, | |
| "loss": 1.0997, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3027185892725937, | |
| "grad_norm": 1.8860580284916557, | |
| "learning_rate": 8.178295932542205e-07, | |
| "loss": 1.0788, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.3041880969875092, | |
| "grad_norm": 1.7781746925928104, | |
| "learning_rate": 8.159859168277444e-07, | |
| "loss": 0.968, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.3056576047024247, | |
| "grad_norm": 1.8615566220877084, | |
| "learning_rate": 8.141350592211358e-07, | |
| "loss": 1.066, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.3071271124173402, | |
| "grad_norm": 1.7774696373511238, | |
| "learning_rate": 8.122770624974778e-07, | |
| "loss": 0.9954, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.3085966201322557, | |
| "grad_norm": 1.8457945487006882, | |
| "learning_rate": 8.10411968882099e-07, | |
| "loss": 1.0606, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3100661278471712, | |
| "grad_norm": 1.8137334109126484, | |
| "learning_rate": 8.085398207616138e-07, | |
| "loss": 1.1464, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.3115356355620867, | |
| "grad_norm": 1.824787905787397, | |
| "learning_rate": 8.06660660682959e-07, | |
| "loss": 1.0186, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3130051432770022, | |
| "grad_norm": 1.8040685491884534, | |
| "learning_rate": 8.047745313524275e-07, | |
| "loss": 1.0554, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.3144746509919177, | |
| "grad_norm": 1.7370025880384308, | |
| "learning_rate": 8.028814756346967e-07, | |
| "loss": 1.0432, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.31594415870683323, | |
| "grad_norm": 1.7648192357449863, | |
| "learning_rate": 8.009815365518554e-07, | |
| "loss": 1.1266, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.31741366642174873, | |
| "grad_norm": 1.827301117863129, | |
| "learning_rate": 7.990747572824253e-07, | |
| "loss": 1.0342, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3188831741366642, | |
| "grad_norm": 2.006041793599483, | |
| "learning_rate": 7.971611811603803e-07, | |
| "loss": 1.083, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.3203526818515797, | |
| "grad_norm": 1.8903539087686725, | |
| "learning_rate": 7.952408516741607e-07, | |
| "loss": 1.059, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.3218221895664952, | |
| "grad_norm": 1.8842212775756257, | |
| "learning_rate": 7.933138124656864e-07, | |
| "loss": 1.0677, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.3232916972814107, | |
| "grad_norm": 1.975112068311392, | |
| "learning_rate": 7.913801073293638e-07, | |
| "loss": 1.1073, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3247612049963262, | |
| "grad_norm": 1.681430647589897, | |
| "learning_rate": 7.894397802110908e-07, | |
| "loss": 1.0563, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.3262307127112417, | |
| "grad_norm": 1.8380971719219996, | |
| "learning_rate": 7.87492875207259e-07, | |
| "loss": 1.1822, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.32770022042615726, | |
| "grad_norm": 1.7074365534791345, | |
| "learning_rate": 7.855394365637495e-07, | |
| "loss": 1.0594, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.32916972814107276, | |
| "grad_norm": 1.821964813971357, | |
| "learning_rate": 7.835795086749299e-07, | |
| "loss": 1.113, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.33063923585598826, | |
| "grad_norm": 1.8642253124188575, | |
| "learning_rate": 7.816131360826434e-07, | |
| "loss": 1.0596, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.33210874357090375, | |
| "grad_norm": 1.7175969911981157, | |
| "learning_rate": 7.796403634751973e-07, | |
| "loss": 1.053, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.33357825128581925, | |
| "grad_norm": 1.777915859356033, | |
| "learning_rate": 7.776612356863477e-07, | |
| "loss": 1.0065, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.33504775900073475, | |
| "grad_norm": 1.8085189246724207, | |
| "learning_rate": 7.756757976942798e-07, | |
| "loss": 1.0394, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.33651726671565024, | |
| "grad_norm": 1.8756745576407878, | |
| "learning_rate": 7.736840946205865e-07, | |
| "loss": 1.1273, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.33798677443056574, | |
| "grad_norm": 1.8327083341200932, | |
| "learning_rate": 7.716861717292424e-07, | |
| "loss": 1.0538, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.33945628214548124, | |
| "grad_norm": 1.8631431553314504, | |
| "learning_rate": 7.696820744255756e-07, | |
| "loss": 1.1507, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.3409257898603968, | |
| "grad_norm": 1.7465550716286358, | |
| "learning_rate": 7.676718482552353e-07, | |
| "loss": 1.0697, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3423952975753123, | |
| "grad_norm": 1.7322371342891512, | |
| "learning_rate": 7.65655538903157e-07, | |
| "loss": 1.0302, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.3438648052902278, | |
| "grad_norm": 1.7814973341724534, | |
| "learning_rate": 7.636331921925241e-07, | |
| "loss": 1.0899, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3453343130051433, | |
| "grad_norm": 1.835128046923307, | |
| "learning_rate": 7.61604854083727e-07, | |
| "loss": 1.0511, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.3468038207200588, | |
| "grad_norm": 1.890591777633699, | |
| "learning_rate": 7.595705706733178e-07, | |
| "loss": 1.0503, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.34827332843497427, | |
| "grad_norm": 1.802438897355832, | |
| "learning_rate": 7.575303881929632e-07, | |
| "loss": 1.1125, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.34974283614988977, | |
| "grad_norm": 1.7658119404997528, | |
| "learning_rate": 7.55484353008394e-07, | |
| "loss": 1.0737, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.35121234386480527, | |
| "grad_norm": 1.8011836883001782, | |
| "learning_rate": 7.534325116183508e-07, | |
| "loss": 1.0891, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.3526818515797208, | |
| "grad_norm": 1.7936027919741155, | |
| "learning_rate": 7.513749106535278e-07, | |
| "loss": 1.0433, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3541513592946363, | |
| "grad_norm": 1.8017282268668782, | |
| "learning_rate": 7.493115968755125e-07, | |
| "loss": 1.047, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.3556208670095518, | |
| "grad_norm": 1.9342694434293286, | |
| "learning_rate": 7.472426171757238e-07, | |
| "loss": 1.098, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3570903747244673, | |
| "grad_norm": 1.8077529691670307, | |
| "learning_rate": 7.451680185743454e-07, | |
| "loss": 1.0277, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.3585598824393828, | |
| "grad_norm": 1.8830252116300301, | |
| "learning_rate": 7.430878482192579e-07, | |
| "loss": 0.9846, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.3600293901542983, | |
| "grad_norm": 1.8998227561518721, | |
| "learning_rate": 7.41002153384967e-07, | |
| "loss": 1.0897, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.3614988978692138, | |
| "grad_norm": 1.8229424521327513, | |
| "learning_rate": 7.389109814715292e-07, | |
| "loss": 1.0718, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3629684055841293, | |
| "grad_norm": 1.871640131350982, | |
| "learning_rate": 7.368143800034745e-07, | |
| "loss": 1.1105, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.36443791329904485, | |
| "grad_norm": 1.728622836313891, | |
| "learning_rate": 7.347123966287265e-07, | |
| "loss": 1.0658, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.36590742101396034, | |
| "grad_norm": 1.9339436433410142, | |
| "learning_rate": 7.326050791175196e-07, | |
| "loss": 1.1393, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.36737692872887584, | |
| "grad_norm": 1.716023610009682, | |
| "learning_rate": 7.304924753613127e-07, | |
| "loss": 1.0316, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.36737692872887584, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_loss": 2.707226037979126, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_runtime": 17.8654, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_samples_per_second": 5.206, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_steps_per_second": 0.672, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.36884643644379134, | |
| "grad_norm": 1.7391519225856011, | |
| "learning_rate": 7.283746333717014e-07, | |
| "loss": 1.0377, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.37031594415870683, | |
| "grad_norm": 1.818410985977693, | |
| "learning_rate": 7.262516012793276e-07, | |
| "loss": 1.0345, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.37178545187362233, | |
| "grad_norm": 1.8812075301481235, | |
| "learning_rate": 7.241234273327838e-07, | |
| "loss": 1.0111, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.3732549595885378, | |
| "grad_norm": 1.8256426567686606, | |
| "learning_rate": 7.219901598975185e-07, | |
| "loss": 1.0242, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3747244673034533, | |
| "grad_norm": 1.8032448771328675, | |
| "learning_rate": 7.198518474547354e-07, | |
| "loss": 0.9857, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.3761939750183688, | |
| "grad_norm": 1.8445748755872284, | |
| "learning_rate": 7.17708538600293e-07, | |
| "loss": 1.0593, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3776634827332844, | |
| "grad_norm": 1.8408999808307878, | |
| "learning_rate": 7.155602820435992e-07, | |
| "loss": 1.0839, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.37913299044819987, | |
| "grad_norm": 1.8091016805737823, | |
| "learning_rate": 7.134071266065051e-07, | |
| "loss": 1.058, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.38060249816311537, | |
| "grad_norm": 1.8529955273165009, | |
| "learning_rate": 7.112491212221946e-07, | |
| "loss": 1.0021, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.38207200587803086, | |
| "grad_norm": 1.8092464332644904, | |
| "learning_rate": 7.09086314934073e-07, | |
| "loss": 1.0064, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.38354151359294636, | |
| "grad_norm": 1.795984690847929, | |
| "learning_rate": 7.069187568946524e-07, | |
| "loss": 1.0654, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.38501102130786186, | |
| "grad_norm": 1.7737795304586095, | |
| "learning_rate": 7.047464963644342e-07, | |
| "loss": 1.0151, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.38648052902277735, | |
| "grad_norm": 1.8528676449577293, | |
| "learning_rate": 7.025695827107901e-07, | |
| "loss": 1.0804, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.38795003673769285, | |
| "grad_norm": 1.8476585972521806, | |
| "learning_rate": 7.003880654068395e-07, | |
| "loss": 1.091, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3894195444526084, | |
| "grad_norm": 1.9052486545665286, | |
| "learning_rate": 6.98201994030326e-07, | |
| "loss": 1.0933, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.3908890521675239, | |
| "grad_norm": 1.778971704628668, | |
| "learning_rate": 6.960114182624902e-07, | |
| "loss": 1.1075, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.3923585598824394, | |
| "grad_norm": 1.7435161249168551, | |
| "learning_rate": 6.938163878869405e-07, | |
| "loss": 1.0052, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.3938280675973549, | |
| "grad_norm": 1.7315215450330044, | |
| "learning_rate": 6.916169527885221e-07, | |
| "loss": 1.0512, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.3952975753122704, | |
| "grad_norm": 1.7876056692528195, | |
| "learning_rate": 6.894131629521829e-07, | |
| "loss": 1.1442, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.3967670830271859, | |
| "grad_norm": 1.7710225516067628, | |
| "learning_rate": 6.872050684618381e-07, | |
| "loss": 1.081, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3982365907421014, | |
| "grad_norm": 1.7329335705128692, | |
| "learning_rate": 6.849927194992312e-07, | |
| "loss": 0.9969, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.3997060984570169, | |
| "grad_norm": 1.8592291193285087, | |
| "learning_rate": 6.827761663427943e-07, | |
| "loss": 1.1048, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.4011756061719324, | |
| "grad_norm": 1.795673055617955, | |
| "learning_rate": 6.805554593665049e-07, | |
| "loss": 1.1164, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.4026451138868479, | |
| "grad_norm": 1.7367854154136189, | |
| "learning_rate": 6.783306490387414e-07, | |
| "loss": 0.9914, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.4041146216017634, | |
| "grad_norm": 1.7004321271293306, | |
| "learning_rate": 6.761017859211359e-07, | |
| "loss": 1.0202, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.4055841293166789, | |
| "grad_norm": 1.821764236324883, | |
| "learning_rate": 6.738689206674257e-07, | |
| "loss": 0.9857, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.4070536370315944, | |
| "grad_norm": 1.7847774281240236, | |
| "learning_rate": 6.716321040223014e-07, | |
| "loss": 1.0136, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.4085231447465099, | |
| "grad_norm": 1.8328549796172011, | |
| "learning_rate": 6.693913868202539e-07, | |
| "loss": 1.0137, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.4099926524614254, | |
| "grad_norm": 1.6946865477492625, | |
| "learning_rate": 6.671468199844192e-07, | |
| "loss": 1.0017, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.4114621601763409, | |
| "grad_norm": 1.7971440056715982, | |
| "learning_rate": 6.648984545254216e-07, | |
| "loss": 1.0347, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4129316678912564, | |
| "grad_norm": 1.7300255825420612, | |
| "learning_rate": 6.626463415402131e-07, | |
| "loss": 1.0327, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.41440117560617196, | |
| "grad_norm": 1.7175987687087009, | |
| "learning_rate": 6.603905322109138e-07, | |
| "loss": 1.0332, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.41587068332108745, | |
| "grad_norm": 1.8379257145846943, | |
| "learning_rate": 6.581310778036474e-07, | |
| "loss": 0.8794, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.41734019103600295, | |
| "grad_norm": 1.8820137872210625, | |
| "learning_rate": 6.558680296673766e-07, | |
| "loss": 1.0813, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.41880969875091845, | |
| "grad_norm": 1.7896323310359439, | |
| "learning_rate": 6.536014392327365e-07, | |
| "loss": 1.0797, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.42027920646583394, | |
| "grad_norm": 1.8676894344858546, | |
| "learning_rate": 6.51331358010865e-07, | |
| "loss": 1.0154, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.42174871418074944, | |
| "grad_norm": 1.9247091383586632, | |
| "learning_rate": 6.490578375922328e-07, | |
| "loss": 1.0886, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.42321822189566494, | |
| "grad_norm": 1.9871628498039662, | |
| "learning_rate": 6.467809296454708e-07, | |
| "loss": 1.0183, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.42468772961058043, | |
| "grad_norm": 1.80768668341203, | |
| "learning_rate": 6.445006859161956e-07, | |
| "loss": 1.1154, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.426157237325496, | |
| "grad_norm": 1.7983774844966378, | |
| "learning_rate": 6.422171582258334e-07, | |
| "loss": 1.0147, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4276267450404115, | |
| "grad_norm": 1.7722882920293253, | |
| "learning_rate": 6.399303984704432e-07, | |
| "loss": 1.0592, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.429096252755327, | |
| "grad_norm": 1.8457372162292673, | |
| "learning_rate": 6.376404586195364e-07, | |
| "loss": 0.9973, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.4305657604702425, | |
| "grad_norm": 1.895053197799994, | |
| "learning_rate": 6.353473907148961e-07, | |
| "loss": 1.0714, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.432035268185158, | |
| "grad_norm": 1.7517450211480494, | |
| "learning_rate": 6.330512468693944e-07, | |
| "loss": 1.0012, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.43350477590007347, | |
| "grad_norm": 1.7629299026170842, | |
| "learning_rate": 6.307520792658081e-07, | |
| "loss": 1.0542, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.43497428361498897, | |
| "grad_norm": 1.8006758213208047, | |
| "learning_rate": 6.284499401556328e-07, | |
| "loss": 1.1121, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.43644379132990446, | |
| "grad_norm": 1.8081212325920115, | |
| "learning_rate": 6.261448818578952e-07, | |
| "loss": 1.0156, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.43791329904481996, | |
| "grad_norm": 1.7825996298491462, | |
| "learning_rate": 6.238369567579642e-07, | |
| "loss": 1.0645, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.4393828067597355, | |
| "grad_norm": 1.8083690724086425, | |
| "learning_rate": 6.215262173063607e-07, | |
| "loss": 1.0702, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.440852314474651, | |
| "grad_norm": 1.931050770675179, | |
| "learning_rate": 6.192127160175649e-07, | |
| "loss": 1.0715, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4423218221895665, | |
| "grad_norm": 1.7810570021280987, | |
| "learning_rate": 6.168965054688238e-07, | |
| "loss": 1.0486, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.443791329904482, | |
| "grad_norm": 1.8207804972547699, | |
| "learning_rate": 6.145776382989552e-07, | |
| "loss": 1.0234, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.4452608376193975, | |
| "grad_norm": 1.9112996054609888, | |
| "learning_rate": 6.122561672071521e-07, | |
| "loss": 0.9732, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.446730345334313, | |
| "grad_norm": 1.8392870095393898, | |
| "learning_rate": 6.099321449517851e-07, | |
| "loss": 1.0694, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.4481998530492285, | |
| "grad_norm": 1.746344158928944, | |
| "learning_rate": 6.076056243492035e-07, | |
| "loss": 1.0584, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.449669360764144, | |
| "grad_norm": 1.8625991708058884, | |
| "learning_rate": 6.052766582725339e-07, | |
| "loss": 0.9656, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.45113886847905954, | |
| "grad_norm": 1.8082814780076306, | |
| "learning_rate": 6.029452996504801e-07, | |
| "loss": 1.0642, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.45260837619397504, | |
| "grad_norm": 1.7985254132681678, | |
| "learning_rate": 6.006116014661191e-07, | |
| "loss": 1.1043, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.45407788390889053, | |
| "grad_norm": 1.787084218039004, | |
| "learning_rate": 5.982756167556978e-07, | |
| "loss": 0.9985, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.45554739162380603, | |
| "grad_norm": 1.866387388466877, | |
| "learning_rate": 5.959373986074269e-07, | |
| "loss": 1.0453, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4570168993387215, | |
| "grad_norm": 1.7953700540717752, | |
| "learning_rate": 5.935970001602751e-07, | |
| "loss": 0.994, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.458486407053637, | |
| "grad_norm": 1.6903793992507545, | |
| "learning_rate": 5.912544746027612e-07, | |
| "loss": 1.0204, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4599559147685525, | |
| "grad_norm": 1.7894140258608537, | |
| "learning_rate": 5.88909875171745e-07, | |
| "loss": 1.0958, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.461425422483468, | |
| "grad_norm": 1.919168721100207, | |
| "learning_rate": 5.865632551512175e-07, | |
| "loss": 0.9804, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4628949301983835, | |
| "grad_norm": 1.6965567076432635, | |
| "learning_rate": 5.842146678710911e-07, | |
| "loss": 1.0335, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.46436443791329907, | |
| "grad_norm": 1.8629208572600364, | |
| "learning_rate": 5.818641667059856e-07, | |
| "loss": 1.0513, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.46583394562821456, | |
| "grad_norm": 2.0773647110529296, | |
| "learning_rate": 5.795118050740169e-07, | |
| "loss": 1.0175, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.46730345334313006, | |
| "grad_norm": 1.8282782276172471, | |
| "learning_rate": 5.771576364355819e-07, | |
| "loss": 1.0706, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.46877296105804556, | |
| "grad_norm": 1.8215206601821508, | |
| "learning_rate": 5.748017142921448e-07, | |
| "loss": 1.1167, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.47024246877296105, | |
| "grad_norm": 1.7965481050358185, | |
| "learning_rate": 5.724440921850195e-07, | |
| "loss": 1.1283, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.47171197648787655, | |
| "grad_norm": 1.9457437301973206, | |
| "learning_rate": 5.700848236941543e-07, | |
| "loss": 1.0308, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.47318148420279205, | |
| "grad_norm": 1.834038700326329, | |
| "learning_rate": 5.677239624369134e-07, | |
| "loss": 0.9875, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.47465099191770754, | |
| "grad_norm": 1.8030898471332173, | |
| "learning_rate": 5.653615620668589e-07, | |
| "loss": 1.0106, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.4761204996326231, | |
| "grad_norm": 1.9350537183535073, | |
| "learning_rate": 5.629976762725307e-07, | |
| "loss": 0.9902, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.4775900073475386, | |
| "grad_norm": 1.8510351646518606, | |
| "learning_rate": 5.606323587762275e-07, | |
| "loss": 1.2716, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.4790595150624541, | |
| "grad_norm": 1.8453958565296336, | |
| "learning_rate": 5.582656633327848e-07, | |
| "loss": 1.0647, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4805290227773696, | |
| "grad_norm": 1.8119764316370859, | |
| "learning_rate": 5.558976437283535e-07, | |
| "loss": 1.0782, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.4819985304922851, | |
| "grad_norm": 1.757616643542913, | |
| "learning_rate": 5.535283537791785e-07, | |
| "loss": 1.085, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.4834680382072006, | |
| "grad_norm": 1.8024345990236037, | |
| "learning_rate": 5.511578473303742e-07, | |
| "loss": 1.062, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.4849375459221161, | |
| "grad_norm": 1.79520109860375, | |
| "learning_rate": 5.487861782547017e-07, | |
| "loss": 1.0857, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4864070536370316, | |
| "grad_norm": 1.7467294013133847, | |
| "learning_rate": 5.464134004513442e-07, | |
| "loss": 1.0852, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.4878765613519471, | |
| "grad_norm": 1.762768838854569, | |
| "learning_rate": 5.440395678446825e-07, | |
| "loss": 1.0764, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.4893460690668626, | |
| "grad_norm": 1.8369132895032922, | |
| "learning_rate": 5.416647343830687e-07, | |
| "loss": 1.0335, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.4908155767817781, | |
| "grad_norm": 1.7227201967047996, | |
| "learning_rate": 5.392889540376006e-07, | |
| "loss": 1.0121, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4922850844966936, | |
| "grad_norm": 1.8514670496237804, | |
| "learning_rate": 5.369122808008955e-07, | |
| "loss": 1.0267, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.4937545922116091, | |
| "grad_norm": 1.8281145295077228, | |
| "learning_rate": 5.345347686858626e-07, | |
| "loss": 1.0312, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4952240999265246, | |
| "grad_norm": 2.0505787508652165, | |
| "learning_rate": 5.321564717244757e-07, | |
| "loss": 1.0363, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.4966936076414401, | |
| "grad_norm": 1.9881421461034916, | |
| "learning_rate": 5.297774439665449e-07, | |
| "loss": 1.0525, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.4981631153563556, | |
| "grad_norm": 1.7909173829595026, | |
| "learning_rate": 5.273977394784892e-07, | |
| "loss": 1.0272, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.4996326230712711, | |
| "grad_norm": 1.8358490715566034, | |
| "learning_rate": 5.250174123421068e-07, | |
| "loss": 1.1935, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5011021307861866, | |
| "grad_norm": 1.821995825437999, | |
| "learning_rate": 5.226365166533458e-07, | |
| "loss": 1.0467, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.5025716385011021, | |
| "grad_norm": 1.7931454394377604, | |
| "learning_rate": 5.202551065210768e-07, | |
| "loss": 0.991, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5040411462160176, | |
| "grad_norm": 1.7628339353097855, | |
| "learning_rate": 5.178732360658605e-07, | |
| "loss": 1.063, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.5055106539309331, | |
| "grad_norm": 1.8147261888211064, | |
| "learning_rate": 5.154909594187192e-07, | |
| "loss": 1.055, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5069801616458487, | |
| "grad_norm": 1.7863871342410091, | |
| "learning_rate": 5.131083307199071e-07, | |
| "loss": 1.0626, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5084496693607642, | |
| "grad_norm": 1.730735975505189, | |
| "learning_rate": 5.107254041176788e-07, | |
| "loss": 1.0818, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5099191770756797, | |
| "grad_norm": 1.7642264640801142, | |
| "learning_rate": 5.08342233767059e-07, | |
| "loss": 1.0145, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.5113886847905952, | |
| "grad_norm": 1.8089809513102568, | |
| "learning_rate": 5.059588738286118e-07, | |
| "loss": 1.1006, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5128581925055107, | |
| "grad_norm": 1.8302210019412795, | |
| "learning_rate": 5.035753784672105e-07, | |
| "loss": 1.047, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.5143277002204262, | |
| "grad_norm": 1.9024588115083674, | |
| "learning_rate": 5.011918018508057e-07, | |
| "loss": 1.0686, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5157972079353417, | |
| "grad_norm": 1.759141919781444, | |
| "learning_rate": 4.988081981491944e-07, | |
| "loss": 1.018, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.5172667156502572, | |
| "grad_norm": 1.889727689838186, | |
| "learning_rate": 4.964246215327894e-07, | |
| "loss": 1.1436, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5187362233651727, | |
| "grad_norm": 1.7456126845475135, | |
| "learning_rate": 4.940411261713882e-07, | |
| "loss": 1.015, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.5202057310800882, | |
| "grad_norm": 1.7664326346015127, | |
| "learning_rate": 4.91657766232941e-07, | |
| "loss": 1.09, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5216752387950037, | |
| "grad_norm": 1.7222941541710777, | |
| "learning_rate": 4.892745958823213e-07, | |
| "loss": 1.0243, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.5231447465099192, | |
| "grad_norm": 1.7157057108244869, | |
| "learning_rate": 4.868916692800928e-07, | |
| "loss": 1.0408, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5246142542248347, | |
| "grad_norm": 1.824734949262758, | |
| "learning_rate": 4.845090405812809e-07, | |
| "loss": 1.0149, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.5260837619397501, | |
| "grad_norm": 1.8249245650143116, | |
| "learning_rate": 4.821267639341397e-07, | |
| "loss": 0.9811, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5275532696546656, | |
| "grad_norm": 1.7495581100985684, | |
| "learning_rate": 4.797448934789232e-07, | |
| "loss": 1.0158, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.5290227773695811, | |
| "grad_norm": 1.773554088711297, | |
| "learning_rate": 4.773634833466541e-07, | |
| "loss": 0.9899, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5304922850844966, | |
| "grad_norm": 1.7708057357465794, | |
| "learning_rate": 4.7498258765789335e-07, | |
| "loss": 0.9971, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.5319617927994122, | |
| "grad_norm": 1.8963492839232228, | |
| "learning_rate": 4.726022605215108e-07, | |
| "loss": 1.1343, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5334313005143277, | |
| "grad_norm": 1.8915073493453016, | |
| "learning_rate": 4.7022255603345504e-07, | |
| "loss": 0.9819, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.5349008082292432, | |
| "grad_norm": 1.876306774074019, | |
| "learning_rate": 4.6784352827552433e-07, | |
| "loss": 1.0328, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5363703159441587, | |
| "grad_norm": 1.7402642886140849, | |
| "learning_rate": 4.6546523131413737e-07, | |
| "loss": 1.0202, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.5378398236590742, | |
| "grad_norm": 1.766894077634918, | |
| "learning_rate": 4.6308771919910455e-07, | |
| "loss": 1.0183, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5393093313739897, | |
| "grad_norm": 1.8710398967234652, | |
| "learning_rate": 4.607110459623994e-07, | |
| "loss": 0.9959, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.5407788390889052, | |
| "grad_norm": 1.8609893131836714, | |
| "learning_rate": 4.5833526561693146e-07, | |
| "loss": 1.0279, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5422483468038207, | |
| "grad_norm": 1.8161838491547273, | |
| "learning_rate": 4.559604321553176e-07, | |
| "loss": 1.0187, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.5437178545187362, | |
| "grad_norm": 1.7513215748849267, | |
| "learning_rate": 4.535865995486559e-07, | |
| "loss": 1.0668, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5451873622336517, | |
| "grad_norm": 1.9792905956550824, | |
| "learning_rate": 4.512138217452984e-07, | |
| "loss": 0.9754, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.5466568699485672, | |
| "grad_norm": 1.8313364606031188, | |
| "learning_rate": 4.488421526696259e-07, | |
| "loss": 1.027, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5481263776634827, | |
| "grad_norm": 1.7125757541553044, | |
| "learning_rate": 4.464716462208216e-07, | |
| "loss": 0.9887, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.5495958853783982, | |
| "grad_norm": 1.8168674762154746, | |
| "learning_rate": 4.441023562716464e-07, | |
| "loss": 1.0634, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5510653930933137, | |
| "grad_norm": 1.7943803054480878, | |
| "learning_rate": 4.417343366672154e-07, | |
| "loss": 0.9982, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5525349008082292, | |
| "grad_norm": 1.8144203745899334, | |
| "learning_rate": 4.393676412237726e-07, | |
| "loss": 1.0639, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5540044085231447, | |
| "grad_norm": 1.8410394383068294, | |
| "learning_rate": 4.370023237274693e-07, | |
| "loss": 0.9708, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.5554739162380602, | |
| "grad_norm": 1.8238414471621205, | |
| "learning_rate": 4.3463843793314123e-07, | |
| "loss": 0.9859, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5569434239529758, | |
| "grad_norm": 1.8116442983581678, | |
| "learning_rate": 4.322760375630867e-07, | |
| "loss": 1.1102, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.5584129316678913, | |
| "grad_norm": 1.709109580058877, | |
| "learning_rate": 4.299151763058457e-07, | |
| "loss": 1.0817, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5598824393828068, | |
| "grad_norm": 1.8180233791321092, | |
| "learning_rate": 4.2755590781498056e-07, | |
| "loss": 0.9678, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.5613519470977223, | |
| "grad_norm": 1.748975999116001, | |
| "learning_rate": 4.251982857078553e-07, | |
| "loss": 0.9776, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5628214548126378, | |
| "grad_norm": 1.9292369702259817, | |
| "learning_rate": 4.2284236356441817e-07, | |
| "loss": 1.0483, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.5642909625275533, | |
| "grad_norm": 2.01824515708808, | |
| "learning_rate": 4.204881949259832e-07, | |
| "loss": 1.1453, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5657604702424688, | |
| "grad_norm": 1.7138713007750266, | |
| "learning_rate": 4.181358332940144e-07, | |
| "loss": 1.0395, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5672299779573843, | |
| "grad_norm": 1.821190672284049, | |
| "learning_rate": 4.157853321289089e-07, | |
| "loss": 1.0557, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5686994856722998, | |
| "grad_norm": 1.8426132732229954, | |
| "learning_rate": 4.1343674484878236e-07, | |
| "loss": 1.0369, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.5701689933872153, | |
| "grad_norm": 1.8067549348583807, | |
| "learning_rate": 4.11090124828255e-07, | |
| "loss": 1.0121, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5716385011021308, | |
| "grad_norm": 1.7768823877409583, | |
| "learning_rate": 4.0874552539723873e-07, | |
| "loss": 1.0319, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.5731080088170463, | |
| "grad_norm": 1.9410827099670018, | |
| "learning_rate": 4.064029998397247e-07, | |
| "loss": 1.0548, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5745775165319618, | |
| "grad_norm": 1.7974497316933307, | |
| "learning_rate": 4.04062601392573e-07, | |
| "loss": 1.0468, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.5760470242468773, | |
| "grad_norm": 1.814540333924481, | |
| "learning_rate": 4.017243832443021e-07, | |
| "loss": 1.0443, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5775165319617928, | |
| "grad_norm": 1.7702673169768615, | |
| "learning_rate": 3.993883985338808e-07, | |
| "loss": 1.0199, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.5789860396767083, | |
| "grad_norm": 1.6726234345970885, | |
| "learning_rate": 3.9705470034951986e-07, | |
| "loss": 1.0008, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5804555473916239, | |
| "grad_norm": 1.7525297959261115, | |
| "learning_rate": 3.9472334172746596e-07, | |
| "loss": 1.0525, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5819250551065394, | |
| "grad_norm": 1.8829571527912472, | |
| "learning_rate": 3.9239437565079645e-07, | |
| "loss": 1.0586, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5833945628214549, | |
| "grad_norm": 1.8402564713547034, | |
| "learning_rate": 3.900678550482147e-07, | |
| "loss": 1.0469, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.5848640705363704, | |
| "grad_norm": 1.7484377822738735, | |
| "learning_rate": 3.877438327928478e-07, | |
| "loss": 1.0515, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.5863335782512858, | |
| "grad_norm": 1.7734813614367146, | |
| "learning_rate": 3.854223617010448e-07, | |
| "loss": 0.9925, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.5878030859662013, | |
| "grad_norm": 1.8437569756188645, | |
| "learning_rate": 3.8310349453117617e-07, | |
| "loss": 1.0257, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5892725936811168, | |
| "grad_norm": 1.7695848999552912, | |
| "learning_rate": 3.8078728398243503e-07, | |
| "loss": 1.0602, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.5907421013960323, | |
| "grad_norm": 1.812644515523391, | |
| "learning_rate": 3.784737826936393e-07, | |
| "loss": 1.0012, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5922116091109478, | |
| "grad_norm": 1.7016974236723055, | |
| "learning_rate": 3.761630432420358e-07, | |
| "loss": 0.9757, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.5936811168258633, | |
| "grad_norm": 1.8251040601296213, | |
| "learning_rate": 3.7385511814210493e-07, | |
| "loss": 1.0435, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5951506245407788, | |
| "grad_norm": 1.882206985801385, | |
| "learning_rate": 3.715500598443672e-07, | |
| "loss": 1.0125, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.5966201322556943, | |
| "grad_norm": 1.700935158915743, | |
| "learning_rate": 3.6924792073419193e-07, | |
| "loss": 1.0304, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5980896399706098, | |
| "grad_norm": 1.821591367605163, | |
| "learning_rate": 3.6694875313060567e-07, | |
| "loss": 1.1047, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.5995591476855253, | |
| "grad_norm": 1.9222790614713745, | |
| "learning_rate": 3.646526092851039e-07, | |
| "loss": 1.1109, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.6010286554004408, | |
| "grad_norm": 1.7353537741399607, | |
| "learning_rate": 3.623595413804636e-07, | |
| "loss": 1.0393, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.6024981631153563, | |
| "grad_norm": 1.7671934350452785, | |
| "learning_rate": 3.600696015295568e-07, | |
| "loss": 1.023, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6039676708302718, | |
| "grad_norm": 1.8243296781220124, | |
| "learning_rate": 3.577828417741665e-07, | |
| "loss": 1.0938, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.6054371785451874, | |
| "grad_norm": 1.7808942851487597, | |
| "learning_rate": 3.5549931408380446e-07, | |
| "loss": 1.049, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6069066862601029, | |
| "grad_norm": 1.7751176328929308, | |
| "learning_rate": 3.5321907035452913e-07, | |
| "loss": 0.995, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.6083761939750184, | |
| "grad_norm": 1.80138825273579, | |
| "learning_rate": 3.509421624077672e-07, | |
| "loss": 1.0378, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.6098457016899339, | |
| "grad_norm": 1.734060593988662, | |
| "learning_rate": 3.486686419891349e-07, | |
| "loss": 0.973, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.6113152094048494, | |
| "grad_norm": 1.814138504869964, | |
| "learning_rate": 3.4639856076726346e-07, | |
| "loss": 1.0429, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6127847171197649, | |
| "grad_norm": 1.8227189292521806, | |
| "learning_rate": 3.4413197033262343e-07, | |
| "loss": 1.0348, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.6142542248346804, | |
| "grad_norm": 1.7945450128155505, | |
| "learning_rate": 3.4186892219635254e-07, | |
| "loss": 1.0017, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6157237325495959, | |
| "grad_norm": 1.8233464040803875, | |
| "learning_rate": 3.396094677890862e-07, | |
| "loss": 1.0457, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.6171932402645114, | |
| "grad_norm": 1.8995720630920003, | |
| "learning_rate": 3.373536584597869e-07, | |
| "loss": 0.9766, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6186627479794269, | |
| "grad_norm": 1.7848342121334433, | |
| "learning_rate": 3.3510154547457845e-07, | |
| "loss": 1.0378, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.6201322556943424, | |
| "grad_norm": 1.7452555853428344, | |
| "learning_rate": 3.3285318001558076e-07, | |
| "loss": 1.0456, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6216017634092579, | |
| "grad_norm": 1.7706320471133385, | |
| "learning_rate": 3.306086131797462e-07, | |
| "loss": 1.028, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.6230712711241734, | |
| "grad_norm": 1.8287154878240726, | |
| "learning_rate": 3.283678959776986e-07, | |
| "loss": 1.0694, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6245407788390889, | |
| "grad_norm": 1.9361160647239, | |
| "learning_rate": 3.261310793325742e-07, | |
| "loss": 1.0285, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6260102865540044, | |
| "grad_norm": 1.7593247945393256, | |
| "learning_rate": 3.23898214078864e-07, | |
| "loss": 1.0826, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6274797942689199, | |
| "grad_norm": 1.9132628423312081, | |
| "learning_rate": 3.216693509612587e-07, | |
| "loss": 1.0267, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.6289493019838354, | |
| "grad_norm": 1.8484127698802801, | |
| "learning_rate": 3.19444540633495e-07, | |
| "loss": 1.0718, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.630418809698751, | |
| "grad_norm": 1.8916521672529822, | |
| "learning_rate": 3.172238336572056e-07, | |
| "loss": 1.157, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.6318883174136665, | |
| "grad_norm": 2.9742222052103013, | |
| "learning_rate": 3.1500728050076873e-07, | |
| "loss": 1.0119, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.633357825128582, | |
| "grad_norm": 1.7337670893421475, | |
| "learning_rate": 3.1279493153816183e-07, | |
| "loss": 0.9856, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.6348273328434975, | |
| "grad_norm": 1.7909370519049963, | |
| "learning_rate": 3.1058683704781707e-07, | |
| "loss": 0.9805, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.636296840558413, | |
| "grad_norm": 1.7354057723295175, | |
| "learning_rate": 3.0838304721147803e-07, | |
| "loss": 1.0015, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.6377663482733285, | |
| "grad_norm": 1.7551304317860974, | |
| "learning_rate": 3.0618361211305956e-07, | |
| "loss": 1.1138, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.639235855988244, | |
| "grad_norm": 1.8366440682808711, | |
| "learning_rate": 3.0398858173750994e-07, | |
| "loss": 1.0614, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.6407053637031594, | |
| "grad_norm": 1.7891758756508314, | |
| "learning_rate": 3.0179800596967414e-07, | |
| "loss": 1.084, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6421748714180749, | |
| "grad_norm": 1.7187905850769671, | |
| "learning_rate": 2.996119345931607e-07, | |
| "loss": 1.0292, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.6436443791329904, | |
| "grad_norm": 1.740058780022097, | |
| "learning_rate": 2.9743041728921004e-07, | |
| "loss": 0.9946, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.6451138868479059, | |
| "grad_norm": 1.9099618734962607, | |
| "learning_rate": 2.952535036355659e-07, | |
| "loss": 0.9565, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.6465833945628214, | |
| "grad_norm": 1.7980156429513823, | |
| "learning_rate": 2.930812431053477e-07, | |
| "loss": 1.1037, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6480529022777369, | |
| "grad_norm": 1.813220963862845, | |
| "learning_rate": 2.9091368506592704e-07, | |
| "loss": 1.0499, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.6495224099926524, | |
| "grad_norm": 1.772153448992575, | |
| "learning_rate": 2.8875087877780547e-07, | |
| "loss": 0.9447, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6509919177075679, | |
| "grad_norm": 1.7658892705830358, | |
| "learning_rate": 2.865928733934951e-07, | |
| "loss": 1.0119, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.6524614254224834, | |
| "grad_norm": 1.847269826581132, | |
| "learning_rate": 2.844397179564009e-07, | |
| "loss": 1.0514, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6539309331373989, | |
| "grad_norm": 1.746147021855409, | |
| "learning_rate": 2.8229146139970725e-07, | |
| "loss": 1.0209, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.6554004408523145, | |
| "grad_norm": 1.732044566838772, | |
| "learning_rate": 2.8014815254526475e-07, | |
| "loss": 0.9906, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.65686994856723, | |
| "grad_norm": 1.8978735745750135, | |
| "learning_rate": 2.780098401024816e-07, | |
| "loss": 1.0946, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.6583394562821455, | |
| "grad_norm": 1.87807485244721, | |
| "learning_rate": 2.7587657266721633e-07, | |
| "loss": 0.9462, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.659808963997061, | |
| "grad_norm": 1.7847823162110357, | |
| "learning_rate": 2.737483987206725e-07, | |
| "loss": 0.9834, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.6612784717119765, | |
| "grad_norm": 1.7587621212176707, | |
| "learning_rate": 2.7162536662829836e-07, | |
| "loss": 0.9779, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.662747979426892, | |
| "grad_norm": 1.774690627426415, | |
| "learning_rate": 2.695075246386874e-07, | |
| "loss": 0.977, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.6642174871418075, | |
| "grad_norm": 1.7853964662457704, | |
| "learning_rate": 2.673949208824804e-07, | |
| "loss": 0.9579, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.665686994856723, | |
| "grad_norm": 1.813872909885941, | |
| "learning_rate": 2.6528760337127344e-07, | |
| "loss": 1.0073, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.6671565025716385, | |
| "grad_norm": 1.7502967135919663, | |
| "learning_rate": 2.6318561999652543e-07, | |
| "loss": 1.0442, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.668626010286554, | |
| "grad_norm": 1.8053794650101893, | |
| "learning_rate": 2.610890185284707e-07, | |
| "loss": 1.0947, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.6700955180014695, | |
| "grad_norm": 1.7729452406831345, | |
| "learning_rate": 2.5899784661503306e-07, | |
| "loss": 1.0961, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.671565025716385, | |
| "grad_norm": 1.7354885528303905, | |
| "learning_rate": 2.569121517807421e-07, | |
| "loss": 1.013, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.6730345334313005, | |
| "grad_norm": 1.7765708056979728, | |
| "learning_rate": 2.5483198142565454e-07, | |
| "loss": 1.0584, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.674504041146216, | |
| "grad_norm": 1.7791464282680176, | |
| "learning_rate": 2.5275738282427627e-07, | |
| "loss": 1.098, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.6759735488611315, | |
| "grad_norm": 1.7637882278681942, | |
| "learning_rate": 2.506884031244875e-07, | |
| "loss": 1.0006, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.677443056576047, | |
| "grad_norm": 1.7540544809473422, | |
| "learning_rate": 2.4862508934647215e-07, | |
| "loss": 1.0033, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.6789125642909625, | |
| "grad_norm": 1.7585631184248087, | |
| "learning_rate": 2.465674883816492e-07, | |
| "loss": 1.0656, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6803820720058781, | |
| "grad_norm": 1.808996319528907, | |
| "learning_rate": 2.445156469916059e-07, | |
| "loss": 0.9714, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.6818515797207936, | |
| "grad_norm": 1.809843495968243, | |
| "learning_rate": 2.424696118070367e-07, | |
| "loss": 1.0581, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.6833210874357091, | |
| "grad_norm": 1.6670442041884845, | |
| "learning_rate": 2.404294293266823e-07, | |
| "loss": 0.9425, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.6847905951506246, | |
| "grad_norm": 1.76399522732867, | |
| "learning_rate": 2.3839514591627298e-07, | |
| "loss": 1.0518, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6862601028655401, | |
| "grad_norm": 1.7332927507318467, | |
| "learning_rate": 2.3636680780747574e-07, | |
| "loss": 1.0519, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.6877296105804556, | |
| "grad_norm": 1.7593209946714115, | |
| "learning_rate": 2.3434446109684303e-07, | |
| "loss": 1.0678, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6891991182953711, | |
| "grad_norm": 1.844987519782162, | |
| "learning_rate": 2.323281517447646e-07, | |
| "loss": 1.0948, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.6906686260102866, | |
| "grad_norm": 1.7890685035001295, | |
| "learning_rate": 2.3031792557442426e-07, | |
| "loss": 0.9799, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.692138133725202, | |
| "grad_norm": 1.8257310859124949, | |
| "learning_rate": 2.2831382827075758e-07, | |
| "loss": 0.9977, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.6936076414401176, | |
| "grad_norm": 1.863003326170686, | |
| "learning_rate": 2.2631590537941348e-07, | |
| "loss": 1.0266, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.695077149155033, | |
| "grad_norm": 1.796551415142973, | |
| "learning_rate": 2.2432420230572014e-07, | |
| "loss": 1.05, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.6965466568699485, | |
| "grad_norm": 1.7791843008462636, | |
| "learning_rate": 2.223387643136524e-07, | |
| "loss": 1.0631, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.698016164584864, | |
| "grad_norm": 1.8342190635267368, | |
| "learning_rate": 2.2035963652480266e-07, | |
| "loss": 1.0214, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.6994856722997795, | |
| "grad_norm": 1.8734533497691483, | |
| "learning_rate": 2.183868639173568e-07, | |
| "loss": 0.9609, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.700955180014695, | |
| "grad_norm": 1.7146756401235461, | |
| "learning_rate": 2.1642049132507013e-07, | |
| "loss": 1.0848, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.7024246877296105, | |
| "grad_norm": 1.8197278079364136, | |
| "learning_rate": 2.144605634362504e-07, | |
| "loss": 0.9978, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7038941954445261, | |
| "grad_norm": 1.8066318762651958, | |
| "learning_rate": 2.125071247927412e-07, | |
| "loss": 1.001, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.7053637031594416, | |
| "grad_norm": 1.8332236077939572, | |
| "learning_rate": 2.1056021978890915e-07, | |
| "loss": 1.0248, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7068332108743571, | |
| "grad_norm": 1.752732727944847, | |
| "learning_rate": 2.0861989267063622e-07, | |
| "loss": 1.0468, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.7083027185892726, | |
| "grad_norm": 1.760265234031662, | |
| "learning_rate": 2.0668618753431372e-07, | |
| "loss": 1.0598, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7097722263041881, | |
| "grad_norm": 1.830543381313312, | |
| "learning_rate": 2.0475914832583936e-07, | |
| "loss": 1.0581, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.7112417340191036, | |
| "grad_norm": 1.7527247281185463, | |
| "learning_rate": 2.0283881883961978e-07, | |
| "loss": 1.0606, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7127112417340191, | |
| "grad_norm": 1.7445145256230117, | |
| "learning_rate": 2.0092524271757472e-07, | |
| "loss": 1.0332, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.7141807494489346, | |
| "grad_norm": 1.8241619108049254, | |
| "learning_rate": 1.990184634481446e-07, | |
| "loss": 1.0245, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7156502571638501, | |
| "grad_norm": 1.875499142180598, | |
| "learning_rate": 1.9711852436530318e-07, | |
| "loss": 1.0644, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.7171197648787656, | |
| "grad_norm": 1.846389740515158, | |
| "learning_rate": 1.952254686475726e-07, | |
| "loss": 1.0553, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7185892725936811, | |
| "grad_norm": 1.8163254096673638, | |
| "learning_rate": 1.9333933931704098e-07, | |
| "loss": 0.998, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.7200587803085966, | |
| "grad_norm": 1.7623549998600847, | |
| "learning_rate": 1.914601792383862e-07, | |
| "loss": 1.0027, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7215282880235121, | |
| "grad_norm": 1.714869784410914, | |
| "learning_rate": 1.8958803111790105e-07, | |
| "loss": 1.0056, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.7229977957384276, | |
| "grad_norm": 1.7876284758260355, | |
| "learning_rate": 1.877229375025222e-07, | |
| "loss": 1.0857, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7244673034533431, | |
| "grad_norm": 1.6960378155490023, | |
| "learning_rate": 1.8586494077886416e-07, | |
| "loss": 1.0358, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.7259368111682586, | |
| "grad_norm": 1.8958781149495285, | |
| "learning_rate": 1.840140831722557e-07, | |
| "loss": 1.0456, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7274063188831741, | |
| "grad_norm": 1.847902129513823, | |
| "learning_rate": 1.821704067457795e-07, | |
| "loss": 1.0299, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.7288758265980897, | |
| "grad_norm": 1.781753600620522, | |
| "learning_rate": 1.803339533993175e-07, | |
| "loss": 1.0461, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7303453343130052, | |
| "grad_norm": 1.7044619507439027, | |
| "learning_rate": 1.7850476486859784e-07, | |
| "loss": 1.0307, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.7318148420279207, | |
| "grad_norm": 1.7635434595560795, | |
| "learning_rate": 1.766828827242461e-07, | |
| "loss": 1.0086, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7332843497428362, | |
| "grad_norm": 1.7820427578619338, | |
| "learning_rate": 1.7486834837084147e-07, | |
| "loss": 1.1165, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.7347538574577517, | |
| "grad_norm": 1.9097746083160565, | |
| "learning_rate": 1.7306120304597516e-07, | |
| "loss": 1.0461, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7347538574577517, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_loss": 2.7085540294647217, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_runtime": 17.8085, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_samples_per_second": 5.222, | |
| "eval_ical_mcts_chains_sft_val_MORECHAINS_steps_per_second": 0.674, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7362233651726672, | |
| "grad_norm": 1.783765348036335, | |
| "learning_rate": 1.7126148781931309e-07, | |
| "loss": 1.1337, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.7376928728875827, | |
| "grad_norm": 1.7555139522882506, | |
| "learning_rate": 1.6946924359166332e-07, | |
| "loss": 1.0658, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7391623806024982, | |
| "grad_norm": 1.740923755234073, | |
| "learning_rate": 1.6768451109404518e-07, | |
| "loss": 1.0809, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.7406318883174137, | |
| "grad_norm": 1.7576518353306747, | |
| "learning_rate": 1.659073308867653e-07, | |
| "loss": 1.0141, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7421013960323292, | |
| "grad_norm": 1.8195633960647817, | |
| "learning_rate": 1.641377433584945e-07, | |
| "loss": 1.0799, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.7435709037472447, | |
| "grad_norm": 1.724184703939458, | |
| "learning_rate": 1.6237578872535023e-07, | |
| "loss": 1.0377, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7450404114621602, | |
| "grad_norm": 1.7366929295964126, | |
| "learning_rate": 1.6062150702998307e-07, | |
| "loss": 1.0373, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.7465099191770757, | |
| "grad_norm": 1.828841387885778, | |
| "learning_rate": 1.5887493814066632e-07, | |
| "loss": 1.0053, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7479794268919912, | |
| "grad_norm": 1.799844847030494, | |
| "learning_rate": 1.5713612175038953e-07, | |
| "loss": 1.0182, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.7494489346069066, | |
| "grad_norm": 1.7736398311243526, | |
| "learning_rate": 1.5540509737595752e-07, | |
| "loss": 0.9885, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7509184423218221, | |
| "grad_norm": 1.7318314688384178, | |
| "learning_rate": 1.536819043570915e-07, | |
| "loss": 1.0109, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.7523879500367376, | |
| "grad_norm": 1.7009255541723038, | |
| "learning_rate": 1.5196658185553484e-07, | |
| "loss": 1.0023, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.7538574577516532, | |
| "grad_norm": 1.7989686822366986, | |
| "learning_rate": 1.5025916885416385e-07, | |
| "loss": 1.0307, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.7553269654665687, | |
| "grad_norm": 1.8392247482233148, | |
| "learning_rate": 1.485597041561014e-07, | |
| "loss": 1.1094, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7567964731814842, | |
| "grad_norm": 1.7569776212028672, | |
| "learning_rate": 1.4686822638383485e-07, | |
| "loss": 1.0628, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.7582659808963997, | |
| "grad_norm": 1.7919919083958182, | |
| "learning_rate": 1.4518477397833868e-07, | |
| "loss": 1.0299, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7597354886113152, | |
| "grad_norm": 1.7659155447820256, | |
| "learning_rate": 1.4350938519820082e-07, | |
| "loss": 1.0487, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.7612049963262307, | |
| "grad_norm": 1.7726004655315755, | |
| "learning_rate": 1.4184209811875314e-07, | |
| "loss": 0.9892, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7626745040411462, | |
| "grad_norm": 1.7352203131127852, | |
| "learning_rate": 1.401829506312061e-07, | |
| "loss": 1.0795, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.7641440117560617, | |
| "grad_norm": 1.7457325967667972, | |
| "learning_rate": 1.385319804417872e-07, | |
| "loss": 1.0278, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7656135194709772, | |
| "grad_norm": 1.7557251595785084, | |
| "learning_rate": 1.3688922507088506e-07, | |
| "loss": 1.0023, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.7670830271858927, | |
| "grad_norm": 1.8500663234582253, | |
| "learning_rate": 1.35254721852196e-07, | |
| "loss": 1.1031, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.7685525349008082, | |
| "grad_norm": 1.7675527728702798, | |
| "learning_rate": 1.3362850793187536e-07, | |
| "loss": 1.0998, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.7700220426157237, | |
| "grad_norm": 1.7802520405309632, | |
| "learning_rate": 1.3201062026769415e-07, | |
| "loss": 1.081, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.7714915503306392, | |
| "grad_norm": 1.8571095737653232, | |
| "learning_rate": 1.3040109562819852e-07, | |
| "loss": 1.0501, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.7729610580455547, | |
| "grad_norm": 1.737514001066965, | |
| "learning_rate": 1.2879997059187402e-07, | |
| "loss": 1.1032, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.7744305657604702, | |
| "grad_norm": 1.7415002496273955, | |
| "learning_rate": 1.27207281546315e-07, | |
| "loss": 1.0373, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.7759000734753857, | |
| "grad_norm": 1.7233552370133884, | |
| "learning_rate": 1.2562306468739707e-07, | |
| "loss": 1.0011, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.7773695811903012, | |
| "grad_norm": 1.7771726750205339, | |
| "learning_rate": 1.2404735601845446e-07, | |
| "loss": 1.0235, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.7788390889052168, | |
| "grad_norm": 1.7589516685296818, | |
| "learning_rate": 1.2248019134946224e-07, | |
| "loss": 1.0871, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7803085966201323, | |
| "grad_norm": 1.8904879764816582, | |
| "learning_rate": 1.2092160629622243e-07, | |
| "loss": 1.1743, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.7817781043350478, | |
| "grad_norm": 1.679478054850629, | |
| "learning_rate": 1.1937163627955388e-07, | |
| "loss": 0.9987, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.7832476120499633, | |
| "grad_norm": 1.7700957005048465, | |
| "learning_rate": 1.1783031652448844e-07, | |
| "loss": 1.013, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.7847171197648788, | |
| "grad_norm": 1.698212822022776, | |
| "learning_rate": 1.1629768205946916e-07, | |
| "loss": 1.0289, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.7861866274797943, | |
| "grad_norm": 1.6982580846175706, | |
| "learning_rate": 1.1477376771555547e-07, | |
| "loss": 1.0437, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.7876561351947098, | |
| "grad_norm": 1.8951494938436424, | |
| "learning_rate": 1.1325860812563082e-07, | |
| "loss": 1.0241, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7891256429096253, | |
| "grad_norm": 1.7809478211990022, | |
| "learning_rate": 1.1175223772361548e-07, | |
| "loss": 1.0971, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.7905951506245408, | |
| "grad_norm": 1.8138774494697176, | |
| "learning_rate": 1.1025469074368465e-07, | |
| "loss": 1.0308, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.7920646583394563, | |
| "grad_norm": 1.7706413795439244, | |
| "learning_rate": 1.0876600121949014e-07, | |
| "loss": 0.9894, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.7935341660543718, | |
| "grad_norm": 1.7128344759591312, | |
| "learning_rate": 1.0728620298338647e-07, | |
| "loss": 0.9308, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7950036737692873, | |
| "grad_norm": 1.7376472280744786, | |
| "learning_rate": 1.058153296656627e-07, | |
| "loss": 0.9786, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.7964731814842028, | |
| "grad_norm": 1.6180557950522634, | |
| "learning_rate": 1.0435341469377785e-07, | |
| "loss": 0.9778, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.7979426891991183, | |
| "grad_norm": 1.779926196605351, | |
| "learning_rate": 1.0290049129160083e-07, | |
| "loss": 1.0213, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.7994121969140338, | |
| "grad_norm": 1.7660627522019654, | |
| "learning_rate": 1.0145659247865606e-07, | |
| "loss": 0.9912, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.8008817046289493, | |
| "grad_norm": 1.7480192933335235, | |
| "learning_rate": 1.0002175106937282e-07, | |
| "loss": 1.0209, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.8023512123438648, | |
| "grad_norm": 1.771603438272948, | |
| "learning_rate": 9.859599967233901e-08, | |
| "loss": 1.0015, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.8038207200587804, | |
| "grad_norm": 1.6631521749746068, | |
| "learning_rate": 9.717937068956083e-08, | |
| "loss": 1.0255, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.8052902277736959, | |
| "grad_norm": 1.725895874046473, | |
| "learning_rate": 9.577189631572613e-08, | |
| "loss": 1.0477, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8067597354886114, | |
| "grad_norm": 1.7384294189757843, | |
| "learning_rate": 9.437360853747223e-08, | |
| "loss": 1.0759, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.8082292432035268, | |
| "grad_norm": 1.7812991412940928, | |
| "learning_rate": 9.29845391326598e-08, | |
| "loss": 0.9964, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8096987509184423, | |
| "grad_norm": 1.8853925917414667, | |
| "learning_rate": 9.16047196696505e-08, | |
| "loss": 1.0252, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.8111682586333578, | |
| "grad_norm": 1.87531769900298, | |
| "learning_rate": 9.023418150658863e-08, | |
| "loss": 1.0244, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8126377663482733, | |
| "grad_norm": 1.7505658777412774, | |
| "learning_rate": 8.887295579068988e-08, | |
| "loss": 0.9692, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.8141072740631888, | |
| "grad_norm": 1.7482379564431354, | |
| "learning_rate": 8.752107345753262e-08, | |
| "loss": 0.958, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8155767817781043, | |
| "grad_norm": 1.7375873910532686, | |
| "learning_rate": 8.617856523035466e-08, | |
| "loss": 1.034, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.8170462894930198, | |
| "grad_norm": 1.7433829403227212, | |
| "learning_rate": 8.484546161935596e-08, | |
| "loss": 1.011, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8185157972079353, | |
| "grad_norm": 1.8154504530341387, | |
| "learning_rate": 8.352179292100403e-08, | |
| "loss": 1.0048, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.8199853049228508, | |
| "grad_norm": 1.758428463882583, | |
| "learning_rate": 8.220758921734649e-08, | |
| "loss": 0.9964, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8214548126377663, | |
| "grad_norm": 1.7379172896067956, | |
| "learning_rate": 8.090288037532706e-08, | |
| "loss": 1.0351, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.8229243203526818, | |
| "grad_norm": 1.8067076107445252, | |
| "learning_rate": 7.960769604610618e-08, | |
| "loss": 1.0556, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8243938280675973, | |
| "grad_norm": 1.7093430682749875, | |
| "learning_rate": 7.83220656643881e-08, | |
| "loss": 0.9723, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.8258633357825128, | |
| "grad_norm": 1.7301749018028927, | |
| "learning_rate": 7.704601844775155e-08, | |
| "loss": 1.0169, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8273328434974284, | |
| "grad_norm": 1.9789670450938064, | |
| "learning_rate": 7.577958339598529e-08, | |
| "loss": 0.9434, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.8288023512123439, | |
| "grad_norm": 1.8296698182174271, | |
| "learning_rate": 7.452278929042982e-08, | |
| "loss": 1.0645, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8302718589272594, | |
| "grad_norm": 1.732152158537409, | |
| "learning_rate": 7.327566469332303e-08, | |
| "loss": 1.0619, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.8317413666421749, | |
| "grad_norm": 1.7544744051868846, | |
| "learning_rate": 7.203823794715041e-08, | |
| "loss": 1.0365, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8332108743570904, | |
| "grad_norm": 1.6893639940770133, | |
| "learning_rate": 7.08105371740021e-08, | |
| "loss": 1.0231, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.8346803820720059, | |
| "grad_norm": 1.8424586490873391, | |
| "learning_rate": 6.959259027493303e-08, | |
| "loss": 1.0513, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8361498897869214, | |
| "grad_norm": 1.773145972559333, | |
| "learning_rate": 6.838442492932867e-08, | |
| "loss": 1.0079, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.8376193975018369, | |
| "grad_norm": 1.8325252478515535, | |
| "learning_rate": 6.718606859427673e-08, | |
| "loss": 1.0529, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8390889052167524, | |
| "grad_norm": 1.7519464554535977, | |
| "learning_rate": 6.599754850394263e-08, | |
| "loss": 1.0249, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.8405584129316679, | |
| "grad_norm": 1.7817640686446343, | |
| "learning_rate": 6.481889166895033e-08, | |
| "loss": 0.9944, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8420279206465834, | |
| "grad_norm": 1.692594190403071, | |
| "learning_rate": 6.365012487576926e-08, | |
| "loss": 0.9993, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.8434974283614989, | |
| "grad_norm": 1.795806347454193, | |
| "learning_rate": 6.249127468610504e-08, | |
| "loss": 1.0714, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8449669360764144, | |
| "grad_norm": 1.7511008874312695, | |
| "learning_rate": 6.134236743629562e-08, | |
| "loss": 1.0483, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.8464364437913299, | |
| "grad_norm": 1.8161885855117905, | |
| "learning_rate": 6.020342923671334e-08, | |
| "loss": 0.9827, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8479059515062454, | |
| "grad_norm": 1.769523032001142, | |
| "learning_rate": 5.907448597117126e-08, | |
| "loss": 1.0706, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.8493754592211609, | |
| "grad_norm": 1.7280914783545198, | |
| "learning_rate": 5.7955563296334664e-08, | |
| "loss": 1.0984, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8508449669360764, | |
| "grad_norm": 1.748415591220106, | |
| "learning_rate": 5.6846686641138394e-08, | |
| "loss": 1.0084, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.852314474650992, | |
| "grad_norm": 1.772067208429697, | |
| "learning_rate": 5.5747881206208936e-08, | |
| "loss": 1.0337, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8537839823659075, | |
| "grad_norm": 1.794873769633449, | |
| "learning_rate": 5.465917196329106e-08, | |
| "loss": 1.0163, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.855253490080823, | |
| "grad_norm": 1.65717460494895, | |
| "learning_rate": 5.3580583654681266e-08, | |
| "loss": 0.9955, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.8567229977957385, | |
| "grad_norm": 1.8162911342334482, | |
| "learning_rate": 5.251214079266475e-08, | |
| "loss": 1.0089, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.858192505510654, | |
| "grad_norm": 1.7542998181212444, | |
| "learning_rate": 5.1453867658958704e-08, | |
| "loss": 1.0556, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.8596620132255695, | |
| "grad_norm": 1.8004202990199547, | |
| "learning_rate": 5.0405788304160426e-08, | |
| "loss": 1.0302, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.861131520940485, | |
| "grad_norm": 1.8579500585467426, | |
| "learning_rate": 4.936792654720029e-08, | |
| "loss": 0.9883, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.8626010286554004, | |
| "grad_norm": 1.8684587727448756, | |
| "learning_rate": 4.8340305974801266e-08, | |
| "loss": 1.0037, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.864070536370316, | |
| "grad_norm": 1.7812638910539453, | |
| "learning_rate": 4.7322949940942325e-08, | |
| "loss": 0.9856, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.8655400440852314, | |
| "grad_norm": 1.7535934340855097, | |
| "learning_rate": 4.63158815663276e-08, | |
| "loss": 0.9972, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.8670095518001469, | |
| "grad_norm": 1.6677865713866784, | |
| "learning_rate": 4.53191237378614e-08, | |
| "loss": 0.9824, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8684790595150624, | |
| "grad_norm": 1.7665411769395538, | |
| "learning_rate": 4.433269910812759e-08, | |
| "loss": 1.0083, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.8699485672299779, | |
| "grad_norm": 1.7251596215832548, | |
| "learning_rate": 4.335663009487511e-08, | |
| "loss": 1.1064, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.8714180749448934, | |
| "grad_norm": 1.7761940662623423, | |
| "learning_rate": 4.2390938880508595e-08, | |
| "loss": 0.9743, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.8728875826598089, | |
| "grad_norm": 1.7665619723627777, | |
| "learning_rate": 4.143564741158362e-08, | |
| "loss": 1.0002, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.8743570903747244, | |
| "grad_norm": 1.7774593865163342, | |
| "learning_rate": 4.0490777398308753e-08, | |
| "loss": 1.1467, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.8758265980896399, | |
| "grad_norm": 1.8769247217431873, | |
| "learning_rate": 3.955635031405169e-08, | |
| "loss": 1.0388, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.8772961058045555, | |
| "grad_norm": 1.696854448182369, | |
| "learning_rate": 3.86323873948512e-08, | |
| "loss": 1.1079, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.878765613519471, | |
| "grad_norm": 1.740381033337976, | |
| "learning_rate": 3.771890963893476e-08, | |
| "loss": 1.0556, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.8802351212343865, | |
| "grad_norm": 1.8733585897024647, | |
| "learning_rate": 3.681593780624137e-08, | |
| "loss": 1.0254, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.881704628949302, | |
| "grad_norm": 1.7391199819992769, | |
| "learning_rate": 3.5923492417949285e-08, | |
| "loss": 1.0785, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8831741366642175, | |
| "grad_norm": 1.768770527552518, | |
| "learning_rate": 3.5041593756010234e-08, | |
| "loss": 0.9474, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.884643644379133, | |
| "grad_norm": 1.7597254899905834, | |
| "learning_rate": 3.417026186268829e-08, | |
| "loss": 1.0168, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.8861131520940485, | |
| "grad_norm": 1.766391016102045, | |
| "learning_rate": 3.3309516540104e-08, | |
| "loss": 1.0638, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.887582659808964, | |
| "grad_norm": 1.7917528115242387, | |
| "learning_rate": 3.2459377349784986e-08, | |
| "loss": 1.0187, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.8890521675238795, | |
| "grad_norm": 1.8178797362114174, | |
| "learning_rate": 3.1619863612221075e-08, | |
| "loss": 1.1161, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.890521675238795, | |
| "grad_norm": 1.8510839028666215, | |
| "learning_rate": 3.079099440642496e-08, | |
| "loss": 1.1099, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.8919911829537105, | |
| "grad_norm": 1.8373581400255845, | |
| "learning_rate": 2.997278856949914e-08, | |
| "loss": 1.0275, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.893460690668626, | |
| "grad_norm": 1.7886561701096182, | |
| "learning_rate": 2.916526469620756e-08, | |
| "loss": 1.0033, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.8949301983835415, | |
| "grad_norm": 1.807687857402709, | |
| "learning_rate": 2.836844113855269e-08, | |
| "loss": 1.1667, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.896399706098457, | |
| "grad_norm": 1.9522297555824326, | |
| "learning_rate": 2.758233600535914e-08, | |
| "loss": 1.0008, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8978692138133725, | |
| "grad_norm": 1.746748545987193, | |
| "learning_rate": 2.6806967161861593e-08, | |
| "loss": 0.9866, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.899338721528288, | |
| "grad_norm": 1.8722599434434155, | |
| "learning_rate": 2.6042352229298902e-08, | |
| "loss": 1.0111, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.9008082292432035, | |
| "grad_norm": 1.9844512513700931, | |
| "learning_rate": 2.5288508584513814e-08, | |
| "loss": 1.0208, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.9022777369581191, | |
| "grad_norm": 1.8438532955412186, | |
| "learning_rate": 2.4545453359557765e-08, | |
| "loss": 1.0134, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.9037472446730346, | |
| "grad_norm": 1.997698936619193, | |
| "learning_rate": 2.3813203441301778e-08, | |
| "loss": 1.0442, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.9052167523879501, | |
| "grad_norm": 1.8633689608393182, | |
| "learning_rate": 2.3091775471052734e-08, | |
| "loss": 1.0536, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.9066862601028656, | |
| "grad_norm": 1.803504912241452, | |
| "learning_rate": 2.2381185844174644e-08, | |
| "loss": 0.9618, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.9081557678177811, | |
| "grad_norm": 1.842132096723327, | |
| "learning_rate": 2.168145070971683e-08, | |
| "loss": 1.0246, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9096252755326966, | |
| "grad_norm": 1.6938059931200713, | |
| "learning_rate": 2.099258597004644e-08, | |
| "loss": 0.9949, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.9110947832476121, | |
| "grad_norm": 1.8449249437590283, | |
| "learning_rate": 2.031460728048695e-08, | |
| "loss": 0.9308, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9125642909625276, | |
| "grad_norm": 1.7680792178846148, | |
| "learning_rate": 1.9647530048962747e-08, | |
| "loss": 1.0323, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.914033798677443, | |
| "grad_norm": 1.7245217442805951, | |
| "learning_rate": 1.8991369435648774e-08, | |
| "loss": 1.066, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.9155033063923586, | |
| "grad_norm": 1.6824175711845812, | |
| "learning_rate": 1.8346140352625883e-08, | |
| "loss": 0.9527, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.916972814107274, | |
| "grad_norm": 1.8432023340759272, | |
| "learning_rate": 1.771185746354209e-08, | |
| "loss": 1.0189, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9184423218221895, | |
| "grad_norm": 1.710169997477168, | |
| "learning_rate": 1.7088535183279407e-08, | |
| "loss": 1.0606, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.919911829537105, | |
| "grad_norm": 1.7851652504906605, | |
| "learning_rate": 1.647618767762593e-08, | |
| "loss": 1.0862, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9213813372520205, | |
| "grad_norm": 1.7742564953179376, | |
| "learning_rate": 1.5874828862954327e-08, | |
| "loss": 0.9628, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.922850844966936, | |
| "grad_norm": 1.7396155476524742, | |
| "learning_rate": 1.5284472405905247e-08, | |
| "loss": 1.0092, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9243203526818515, | |
| "grad_norm": 1.8451127978395603, | |
| "learning_rate": 1.4705131723076692e-08, | |
| "loss": 1.0866, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.925789860396767, | |
| "grad_norm": 1.7647190484524309, | |
| "learning_rate": 1.4136819980719472e-08, | |
| "loss": 0.9677, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9272593681116826, | |
| "grad_norm": 1.7313371584788355, | |
| "learning_rate": 1.3579550094437676e-08, | |
| "loss": 1.0212, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.9287288758265981, | |
| "grad_norm": 1.8145993858567757, | |
| "learning_rate": 1.3033334728895119e-08, | |
| "loss": 1.1065, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9301983835415136, | |
| "grad_norm": 1.717794777422694, | |
| "learning_rate": 1.2498186297527802e-08, | |
| "loss": 1.0607, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.9316678912564291, | |
| "grad_norm": 1.792322328153233, | |
| "learning_rate": 1.1974116962261527e-08, | |
| "loss": 1.0869, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9331373989713446, | |
| "grad_norm": 1.693093472386974, | |
| "learning_rate": 1.1461138633235611e-08, | |
| "loss": 1.0148, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.9346069066862601, | |
| "grad_norm": 1.6734060797181096, | |
| "learning_rate": 1.095926296853228e-08, | |
| "loss": 1.1259, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.9360764144011756, | |
| "grad_norm": 1.7040871240195783, | |
| "learning_rate": 1.0468501373911532e-08, | |
| "loss": 1.0137, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.9375459221160911, | |
| "grad_norm": 1.7386557010935322, | |
| "learning_rate": 9.988865002552138e-09, | |
| "loss": 1.0393, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9390154298310066, | |
| "grad_norm": 1.7617286641427552, | |
| "learning_rate": 9.520364754798116e-09, | |
| "loss": 1.0118, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.9404849375459221, | |
| "grad_norm": 1.6838936053556308, | |
| "learning_rate": 9.06301127791087e-09, | |
| "loss": 1.0124, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9419544452608376, | |
| "grad_norm": 1.7685007619311106, | |
| "learning_rate": 8.61681496582739e-09, | |
| "loss": 1.0152, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.9434239529757531, | |
| "grad_norm": 1.7655039690066217, | |
| "learning_rate": 8.181785958923938e-09, | |
| "loss": 1.0482, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9448934606906686, | |
| "grad_norm": 1.7291794104286682, | |
| "learning_rate": 7.757934143785561e-09, | |
| "loss": 1.0601, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.9463629684055841, | |
| "grad_norm": 1.7987300815999252, | |
| "learning_rate": 7.345269152981614e-09, | |
| "loss": 1.0143, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9478324761204996, | |
| "grad_norm": 1.9004775415700146, | |
| "learning_rate": 6.943800364846653e-09, | |
| "loss": 1.0523, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.9493019838354151, | |
| "grad_norm": 1.8776605483787512, | |
| "learning_rate": 6.5535369032672095e-09, | |
| "loss": 1.0512, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.9507714915503307, | |
| "grad_norm": 1.838241026881048, | |
| "learning_rate": 6.174487637474801e-09, | |
| "loss": 1.0331, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.9522409992652462, | |
| "grad_norm": 1.8662114005878103, | |
| "learning_rate": 5.806661181843919e-09, | |
| "loss": 1.0158, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.9537105069801617, | |
| "grad_norm": 1.8290957478227903, | |
| "learning_rate": 5.450065895696632e-09, | |
| "loss": 1.0203, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.9551800146950772, | |
| "grad_norm": 1.816824005969282, | |
| "learning_rate": 5.1047098831125124e-09, | |
| "loss": 1.0538, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9566495224099927, | |
| "grad_norm": 1.7373242719164779, | |
| "learning_rate": 4.770600992744178e-09, | |
| "loss": 1.0476, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.9581190301249082, | |
| "grad_norm": 1.7624643989224076, | |
| "learning_rate": 4.4477468176393196e-09, | |
| "loss": 0.9904, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.9595885378398237, | |
| "grad_norm": 1.6940800434963768, | |
| "learning_rate": 4.136154695068006e-09, | |
| "loss": 0.9917, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.9610580455547392, | |
| "grad_norm": 1.7767015691295511, | |
| "learning_rate": 3.8358317063557635e-09, | |
| "loss": 0.9986, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.9625275532696547, | |
| "grad_norm": 1.8376231539249999, | |
| "learning_rate": 3.546784676722925e-09, | |
| "loss": 1.0045, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.9639970609845702, | |
| "grad_norm": 1.7864023251483983, | |
| "learning_rate": 3.2690201751292002e-09, | |
| "loss": 1.0509, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.9654665686994857, | |
| "grad_norm": 1.7872367497837103, | |
| "learning_rate": 3.002544514124683e-09, | |
| "loss": 1.0038, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.9669360764144012, | |
| "grad_norm": 1.813415766447048, | |
| "learning_rate": 2.747363749706244e-09, | |
| "loss": 1.0775, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.9684055841293167, | |
| "grad_norm": 1.6985870578620685, | |
| "learning_rate": 2.5034836811799744e-09, | |
| "loss": 0.9979, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.9698750918442322, | |
| "grad_norm": 1.799775560049143, | |
| "learning_rate": 2.2709098510292347e-09, | |
| "loss": 1.0341, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9713445995591476, | |
| "grad_norm": 1.79062920637246, | |
| "learning_rate": 2.049647544788813e-09, | |
| "loss": 0.9512, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.9728141072740631, | |
| "grad_norm": 1.7662794380324007, | |
| "learning_rate": 1.8397017909249634e-09, | |
| "loss": 1.0268, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.9742836149889786, | |
| "grad_norm": 1.8355770556221809, | |
| "learning_rate": 1.6410773607206663e-09, | |
| "loss": 1.0576, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.9757531227038942, | |
| "grad_norm": 1.8043186627448684, | |
| "learning_rate": 1.4537787681677683e-09, | |
| "loss": 1.0364, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.9772226304188097, | |
| "grad_norm": 1.7794287516609661, | |
| "learning_rate": 1.2778102698638993e-09, | |
| "loss": 1.0147, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.9786921381337252, | |
| "grad_norm": 1.7980949111509996, | |
| "learning_rate": 1.1131758649160494e-09, | |
| "loss": 1.0211, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.9801616458486407, | |
| "grad_norm": 1.7978516181015844, | |
| "learning_rate": 9.598792948496414e-10, | |
| "loss": 1.026, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.9816311535635562, | |
| "grad_norm": 1.749274502717889, | |
| "learning_rate": 8.179240435232659e-10, | |
| "loss": 1.0138, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.9831006612784717, | |
| "grad_norm": 1.6895077719396587, | |
| "learning_rate": 6.873133370498551e-10, | |
| "loss": 0.9675, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.9845701689933872, | |
| "grad_norm": 1.7863102632275802, | |
| "learning_rate": 5.680501437230755e-10, | |
| "loss": 1.039, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9860396767083027, | |
| "grad_norm": 1.8150425834625068, | |
| "learning_rate": 4.6013717395010365e-10, | |
| "loss": 1.0414, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.9875091844232182, | |
| "grad_norm": 1.8149761019770996, | |
| "learning_rate": 3.63576880189731e-10, | |
| "loss": 1.0658, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.9889786921381337, | |
| "grad_norm": 1.773148981178707, | |
| "learning_rate": 2.783714568970197e-10, | |
| "loss": 1.0608, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.9904481998530492, | |
| "grad_norm": 1.8037996402075553, | |
| "learning_rate": 2.045228404731203e-10, | |
| "loss": 0.998, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.9919177075679647, | |
| "grad_norm": 1.7297660404171118, | |
| "learning_rate": 1.4203270922125143e-10, | |
| "loss": 0.9825, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.9933872152828802, | |
| "grad_norm": 1.9244611901290156, | |
| "learning_rate": 9.090248330889671e-11, | |
| "loss": 1.0265, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.9948567229977957, | |
| "grad_norm": 1.8339125626229225, | |
| "learning_rate": 5.1133324735164183e-11, | |
| "loss": 0.9576, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.9963262307127112, | |
| "grad_norm": 1.8612723910880795, | |
| "learning_rate": 2.2726137304529546e-11, | |
| "loss": 0.9926, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.9977957384276267, | |
| "grad_norm": 1.7458716115831705, | |
| "learning_rate": 5.6815666063525505e-12, | |
| "loss": 1.0218, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.9992652461425422, | |
| "grad_norm": 1.7932712816026757, | |
| "learning_rate": 0.0, | |
| "loss": 1.0722, | |
| "step": 680 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 680, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 799593529671680.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |