| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 716, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006985679357317499, |
| "grad_norm": 9.417793273925781, |
| "learning_rate": 6.153846153846155e-07, |
| "loss": 0.4856, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013971358714634998, |
| "grad_norm": 7.6536865234375, |
| "learning_rate": 1.3846153846153848e-06, |
| "loss": 0.4457, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.020957038071952497, |
| "grad_norm": 6.9425950050354, |
| "learning_rate": 2.153846153846154e-06, |
| "loss": 0.4359, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.027942717429269997, |
| "grad_norm": 7.008752822875977, |
| "learning_rate": 2.9230769230769236e-06, |
| "loss": 0.4247, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0349283967865875, |
| "grad_norm": 7.943561553955078, |
| "learning_rate": 3.692307692307693e-06, |
| "loss": 0.4144, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.041914076143904994, |
| "grad_norm": 8.960087776184082, |
| "learning_rate": 4.461538461538462e-06, |
| "loss": 0.4095, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0488997555012225, |
| "grad_norm": 9.126522064208984, |
| "learning_rate": 5.230769230769232e-06, |
| "loss": 0.4195, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.055885434858539994, |
| "grad_norm": 6.81967306137085, |
| "learning_rate": 6e-06, |
| "loss": 0.409, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06287111421585749, |
| "grad_norm": 9.193132400512695, |
| "learning_rate": 6.76923076923077e-06, |
| "loss": 0.4256, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.069856793573175, |
| "grad_norm": 7.224976062774658, |
| "learning_rate": 7.538461538461539e-06, |
| "loss": 0.41, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07684247293049248, |
| "grad_norm": 7.260319232940674, |
| "learning_rate": 8.307692307692309e-06, |
| "loss": 0.4534, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.08382815228780999, |
| "grad_norm": 8.081445693969727, |
| "learning_rate": 9.076923076923078e-06, |
| "loss": 0.4435, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09081383164512749, |
| "grad_norm": 8.150150299072266, |
| "learning_rate": 9.846153846153848e-06, |
| "loss": 0.4457, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.097799511002445, |
| "grad_norm": 6.927804946899414, |
| "learning_rate": 9.999909012888162e-06, |
| "loss": 0.4559, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10478519035976248, |
| "grad_norm": 7.140408039093018, |
| "learning_rate": 9.999539383421715e-06, |
| "loss": 0.4217, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.11177086971707999, |
| "grad_norm": 7.418540000915527, |
| "learning_rate": 9.998885445909666e-06, |
| "loss": 0.4857, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11875654907439749, |
| "grad_norm": 8.253525733947754, |
| "learning_rate": 9.997947237539373e-06, |
| "loss": 0.4863, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.12574222843171498, |
| "grad_norm": 6.550653457641602, |
| "learning_rate": 9.996724811663797e-06, |
| "loss": 0.4663, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.13272790778903248, |
| "grad_norm": 6.81130313873291, |
| "learning_rate": 9.99521823779844e-06, |
| "loss": 0.4684, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.13971358714635, |
| "grad_norm": 7.913576602935791, |
| "learning_rate": 9.99342760161741e-06, |
| "loss": 0.5049, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1466992665036675, |
| "grad_norm": 8.523401260375977, |
| "learning_rate": 9.991353004948538e-06, |
| "loss": 0.4635, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.15368494586098497, |
| "grad_norm": 6.084781169891357, |
| "learning_rate": 9.988994565767604e-06, |
| "loss": 0.4752, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.16067062521830247, |
| "grad_norm": 5.975230693817139, |
| "learning_rate": 9.986352418191608e-06, |
| "loss": 0.4722, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.16765630457561997, |
| "grad_norm": 6.941432476043701, |
| "learning_rate": 9.983426712471151e-06, |
| "loss": 0.4991, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.17464198393293748, |
| "grad_norm": 6.190479755401611, |
| "learning_rate": 9.9802176149819e-06, |
| "loss": 0.4821, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.18162766329025498, |
| "grad_norm": 6.3899006843566895, |
| "learning_rate": 9.976725308215109e-06, |
| "loss": 0.5163, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.18861334264757249, |
| "grad_norm": 5.7873430252075195, |
| "learning_rate": 9.97294999076726e-06, |
| "loss": 0.4517, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.19559902200489, |
| "grad_norm": 6.523060321807861, |
| "learning_rate": 9.968891877328752e-06, |
| "loss": 0.4632, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.20258470136220746, |
| "grad_norm": 7.010469436645508, |
| "learning_rate": 9.964551198671709e-06, |
| "loss": 0.4643, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.20957038071952497, |
| "grad_norm": 6.9780473709106445, |
| "learning_rate": 9.95992820163684e-06, |
| "loss": 0.4737, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.21655606007684247, |
| "grad_norm": 6.335629463195801, |
| "learning_rate": 9.955023149119413e-06, |
| "loss": 0.4552, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.22354173943415998, |
| "grad_norm": 5.958488464355469, |
| "learning_rate": 9.949836320054309e-06, |
| "loss": 0.4553, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.23052741879147748, |
| "grad_norm": 6.195608139038086, |
| "learning_rate": 9.944368009400145e-06, |
| "loss": 0.5117, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.23751309814879498, |
| "grad_norm": 6.441509246826172, |
| "learning_rate": 9.93861852812251e-06, |
| "loss": 0.4652, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.24449877750611246, |
| "grad_norm": 5.994947910308838, |
| "learning_rate": 9.932588203176281e-06, |
| "loss": 0.4832, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.25148445686342996, |
| "grad_norm": 6.672873020172119, |
| "learning_rate": 9.92627737748703e-06, |
| "loss": 0.4827, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2584701362207475, |
| "grad_norm": 6.571279048919678, |
| "learning_rate": 9.919686409931523e-06, |
| "loss": 0.5003, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.26545581557806497, |
| "grad_norm": 6.049185276031494, |
| "learning_rate": 9.912815675317307e-06, |
| "loss": 0.4905, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.27244149493538244, |
| "grad_norm": 6.473181247711182, |
| "learning_rate": 9.905665564361403e-06, |
| "loss": 0.4843, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.2794271742927, |
| "grad_norm": 7.581283092498779, |
| "learning_rate": 9.898236483668086e-06, |
| "loss": 0.5097, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.28641285365001745, |
| "grad_norm": 7.333038806915283, |
| "learning_rate": 9.890528855705753e-06, |
| "loss": 0.4698, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.293398533007335, |
| "grad_norm": 6.216223239898682, |
| "learning_rate": 9.882543118782913e-06, |
| "loss": 0.5034, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.30038421236465246, |
| "grad_norm": 6.861793041229248, |
| "learning_rate": 9.874279727023253e-06, |
| "loss": 0.4756, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.30736989172196993, |
| "grad_norm": 6.37476110458374, |
| "learning_rate": 9.865739150339815e-06, |
| "loss": 0.5117, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.31435557107928747, |
| "grad_norm": 5.816511154174805, |
| "learning_rate": 9.856921874408272e-06, |
| "loss": 0.4925, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.32134125043660494, |
| "grad_norm": 6.3242692947387695, |
| "learning_rate": 9.847828400639317e-06, |
| "loss": 0.4679, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3283269297939225, |
| "grad_norm": 6.17088508605957, |
| "learning_rate": 9.838459246150138e-06, |
| "loss": 0.4646, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.33531260915123995, |
| "grad_norm": 7.1427812576293945, |
| "learning_rate": 9.828814943735019e-06, |
| "loss": 0.4633, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3422982885085575, |
| "grad_norm": 6.307351112365723, |
| "learning_rate": 9.818896041835036e-06, |
| "loss": 0.4586, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.34928396786587496, |
| "grad_norm": 6.033569812774658, |
| "learning_rate": 9.808703104506874e-06, |
| "loss": 0.5144, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.35626964722319243, |
| "grad_norm": 6.253153324127197, |
| "learning_rate": 9.798236711390754e-06, |
| "loss": 0.4936, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.36325532658050996, |
| "grad_norm": 6.369323253631592, |
| "learning_rate": 9.787497457677456e-06, |
| "loss": 0.5142, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.37024100593782744, |
| "grad_norm": 7.059694766998291, |
| "learning_rate": 9.776485954074492e-06, |
| "loss": 0.508, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.37722668529514497, |
| "grad_norm": 5.437386989593506, |
| "learning_rate": 9.765202826771358e-06, |
| "loss": 0.4517, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.38421236465246245, |
| "grad_norm": 6.353536605834961, |
| "learning_rate": 9.753648717403944e-06, |
| "loss": 0.5177, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.39119804400978, |
| "grad_norm": 6.7363481521606445, |
| "learning_rate": 9.741824283018022e-06, |
| "loss": 0.4768, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.39818372336709745, |
| "grad_norm": 6.804571151733398, |
| "learning_rate": 9.729730196031908e-06, |
| "loss": 0.5009, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.40516940272441493, |
| "grad_norm": 6.188082695007324, |
| "learning_rate": 9.717367144198209e-06, |
| "loss": 0.4975, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.41215508208173246, |
| "grad_norm": 6.352089881896973, |
| "learning_rate": 9.704735830564707e-06, |
| "loss": 0.458, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.41914076143904994, |
| "grad_norm": 5.823917388916016, |
| "learning_rate": 9.6918369734344e-06, |
| "loss": 0.4863, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.42612644079636747, |
| "grad_norm": 6.301001071929932, |
| "learning_rate": 9.678671306324627e-06, |
| "loss": 0.5175, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.43311212015368494, |
| "grad_norm": 6.185693740844727, |
| "learning_rate": 9.665239577925384e-06, |
| "loss": 0.4844, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4400977995110024, |
| "grad_norm": 6.580015182495117, |
| "learning_rate": 9.65154255205672e-06, |
| "loss": 0.4907, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.44708347886831995, |
| "grad_norm": 5.962856769561768, |
| "learning_rate": 9.637581007625328e-06, |
| "loss": 0.4552, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4540691582256374, |
| "grad_norm": 6.2723774909973145, |
| "learning_rate": 9.623355738580226e-06, |
| "loss": 0.4994, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.46105483758295496, |
| "grad_norm": 6.140941143035889, |
| "learning_rate": 9.608867553867628e-06, |
| "loss": 0.4829, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.46804051694027243, |
| "grad_norm": 6.278982639312744, |
| "learning_rate": 9.594117277384928e-06, |
| "loss": 0.5118, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.47502619629758996, |
| "grad_norm": 6.221822261810303, |
| "learning_rate": 9.579105747933858e-06, |
| "loss": 0.4903, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.48201187565490744, |
| "grad_norm": 6.153563976287842, |
| "learning_rate": 9.563833819172777e-06, |
| "loss": 0.4729, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.4889975550122249, |
| "grad_norm": 5.723773956298828, |
| "learning_rate": 9.548302359568136e-06, |
| "loss": 0.536, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.49598323436954245, |
| "grad_norm": 5.7412238121032715, |
| "learning_rate": 9.532512252345086e-06, |
| "loss": 0.4818, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.5029689137268599, |
| "grad_norm": 5.776915073394775, |
| "learning_rate": 9.516464395437251e-06, |
| "loss": 0.4818, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5099545930841775, |
| "grad_norm": 6.4714741706848145, |
| "learning_rate": 9.50015970143567e-06, |
| "loss": 0.486, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.516940272441495, |
| "grad_norm": 5.570414066314697, |
| "learning_rate": 9.483599097536895e-06, |
| "loss": 0.4821, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5239259517988124, |
| "grad_norm": 5.86240816116333, |
| "learning_rate": 9.466783525490271e-06, |
| "loss": 0.5014, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.5309116311561299, |
| "grad_norm": 5.690156936645508, |
| "learning_rate": 9.449713941544375e-06, |
| "loss": 0.505, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5378973105134475, |
| "grad_norm": 5.273125648498535, |
| "learning_rate": 9.432391316392639e-06, |
| "loss": 0.4738, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.5448829898707649, |
| "grad_norm": 6.2202887535095215, |
| "learning_rate": 9.414816635118154e-06, |
| "loss": 0.5023, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5518686692280824, |
| "grad_norm": 6.270415782928467, |
| "learning_rate": 9.396990897137647e-06, |
| "loss": 0.4772, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.5588543485854, |
| "grad_norm": 5.550614356994629, |
| "learning_rate": 9.378915116144646e-06, |
| "loss": 0.4556, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5658400279427175, |
| "grad_norm": 5.853529930114746, |
| "learning_rate": 9.360590320051844e-06, |
| "loss": 0.4942, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.5728257073000349, |
| "grad_norm": 5.692981243133545, |
| "learning_rate": 9.342017550932627e-06, |
| "loss": 0.4772, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5798113866573524, |
| "grad_norm": 5.6134185791015625, |
| "learning_rate": 9.323197864961835e-06, |
| "loss": 0.4245, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.58679706601467, |
| "grad_norm": 6.195576190948486, |
| "learning_rate": 9.304132332355685e-06, |
| "loss": 0.5083, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5937827453719874, |
| "grad_norm": 6.21861457824707, |
| "learning_rate": 9.28482203731092e-06, |
| "loss": 0.5366, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6007684247293049, |
| "grad_norm": 5.109756946563721, |
| "learning_rate": 9.265268077943152e-06, |
| "loss": 0.4774, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6077541040866224, |
| "grad_norm": 6.137323379516602, |
| "learning_rate": 9.245471566224416e-06, |
| "loss": 0.5125, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6147397834439399, |
| "grad_norm": 5.771416187286377, |
| "learning_rate": 9.225433627919927e-06, |
| "loss": 0.5453, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6217254628012574, |
| "grad_norm": 5.52296781539917, |
| "learning_rate": 9.20515540252408e-06, |
| "loss": 0.4663, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.6287111421585749, |
| "grad_norm": 6.380407333374023, |
| "learning_rate": 9.184638043195628e-06, |
| "loss": 0.4752, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6356968215158925, |
| "grad_norm": 5.940698623657227, |
| "learning_rate": 9.16388271669213e-06, |
| "loss": 0.4309, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.6426825008732099, |
| "grad_norm": 6.604904651641846, |
| "learning_rate": 9.142890603303573e-06, |
| "loss": 0.5043, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6496681802305274, |
| "grad_norm": 6.345200061798096, |
| "learning_rate": 9.121662896785285e-06, |
| "loss": 0.4991, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.656653859587845, |
| "grad_norm": 5.294460773468018, |
| "learning_rate": 9.100200804290014e-06, |
| "loss": 0.4788, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6636395389451624, |
| "grad_norm": 5.905223846435547, |
| "learning_rate": 9.078505546299317e-06, |
| "loss": 0.4871, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.6706252183024799, |
| "grad_norm": 4.967826843261719, |
| "learning_rate": 9.056578356554124e-06, |
| "loss": 0.5174, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6776108976597974, |
| "grad_norm": 5.444486618041992, |
| "learning_rate": 9.034420481984604e-06, |
| "loss": 0.4889, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.684596577017115, |
| "grad_norm": 5.278167724609375, |
| "learning_rate": 9.012033182639238e-06, |
| "loss": 0.4793, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6915822563744324, |
| "grad_norm": 6.902711391448975, |
| "learning_rate": 8.989417731613169e-06, |
| "loss": 0.5259, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.6985679357317499, |
| "grad_norm": 6.086877346038818, |
| "learning_rate": 8.966575414975813e-06, |
| "loss": 0.5203, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7055536150890674, |
| "grad_norm": 6.7947564125061035, |
| "learning_rate": 8.943507531697715e-06, |
| "loss": 0.4609, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.7125392944463849, |
| "grad_norm": 6.169280052185059, |
| "learning_rate": 8.920215393576685e-06, |
| "loss": 0.4799, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7195249738037024, |
| "grad_norm": 6.514042377471924, |
| "learning_rate": 8.8967003251632e-06, |
| "loss": 0.5, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.7265106531610199, |
| "grad_norm": 7.015414714813232, |
| "learning_rate": 8.872963663685075e-06, |
| "loss": 0.5226, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7334963325183375, |
| "grad_norm": 4.968021869659424, |
| "learning_rate": 8.849006758971429e-06, |
| "loss": 0.4697, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.7404820118756549, |
| "grad_norm": 6.74957799911499, |
| "learning_rate": 8.824830973375917e-06, |
| "loss": 0.5045, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7474676912329724, |
| "grad_norm": 5.4804816246032715, |
| "learning_rate": 8.800437681699264e-06, |
| "loss": 0.465, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.7544533705902899, |
| "grad_norm": 5.658529281616211, |
| "learning_rate": 8.775828271111074e-06, |
| "loss": 0.474, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7614390499476074, |
| "grad_norm": 5.418574333190918, |
| "learning_rate": 8.751004141070958e-06, |
| "loss": 0.4904, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.7684247293049249, |
| "grad_norm": 5.922712802886963, |
| "learning_rate": 8.72596670324894e-06, |
| "loss": 0.4721, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7754104086622424, |
| "grad_norm": 6.200154781341553, |
| "learning_rate": 8.70071738144519e-06, |
| "loss": 0.4902, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.78239608801956, |
| "grad_norm": 5.705479145050049, |
| "learning_rate": 8.67525761150905e-06, |
| "loss": 0.4726, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7893817673768774, |
| "grad_norm": 6.879328727722168, |
| "learning_rate": 8.649588841257383e-06, |
| "loss": 0.4757, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.7963674467341949, |
| "grad_norm": 7.012612819671631, |
| "learning_rate": 8.623712530392244e-06, |
| "loss": 0.4522, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8033531260915124, |
| "grad_norm": 6.293166160583496, |
| "learning_rate": 8.597630150417867e-06, |
| "loss": 0.4651, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.8103388054488299, |
| "grad_norm": 5.768970012664795, |
| "learning_rate": 8.571343184556985e-06, |
| "loss": 0.4277, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8173244848061474, |
| "grad_norm": 6.452169895172119, |
| "learning_rate": 8.544853127666483e-06, |
| "loss": 0.5337, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.8243101641634649, |
| "grad_norm": 5.4347615242004395, |
| "learning_rate": 8.518161486152406e-06, |
| "loss": 0.468, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8312958435207825, |
| "grad_norm": 5.982383728027344, |
| "learning_rate": 8.491269777884264e-06, |
| "loss": 0.4511, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.8382815228780999, |
| "grad_norm": 5.2124457359313965, |
| "learning_rate": 8.464179532108742e-06, |
| "loss": 0.4667, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8452672022354174, |
| "grad_norm": 5.130034923553467, |
| "learning_rate": 8.436892289362728e-06, |
| "loss": 0.4551, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.8522528815927349, |
| "grad_norm": 5.86264705657959, |
| "learning_rate": 8.409409601385702e-06, |
| "loss": 0.5026, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8592385609500524, |
| "grad_norm": 6.521675109863281, |
| "learning_rate": 8.381733031031503e-06, |
| "loss": 0.5017, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.8662242403073699, |
| "grad_norm": 5.794342994689941, |
| "learning_rate": 8.353864152179445e-06, |
| "loss": 0.4638, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8732099196646874, |
| "grad_norm": 5.721254348754883, |
| "learning_rate": 8.325804549644823e-06, |
| "loss": 0.4652, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.8801955990220048, |
| "grad_norm": 5.914518356323242, |
| "learning_rate": 8.297555819088787e-06, |
| "loss": 0.4613, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.8871812783793224, |
| "grad_norm": 5.939746856689453, |
| "learning_rate": 8.269119566927599e-06, |
| "loss": 0.4599, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.8941669577366399, |
| "grad_norm": 5.655862808227539, |
| "learning_rate": 8.240497410241286e-06, |
| "loss": 0.4602, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.9011526370939574, |
| "grad_norm": 6.510814666748047, |
| "learning_rate": 8.211690976681678e-06, |
| "loss": 0.4582, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.9081383164512749, |
| "grad_norm": 5.543778419494629, |
| "learning_rate": 8.18270190437985e-06, |
| "loss": 0.4777, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9151239958085924, |
| "grad_norm": 5.059428691864014, |
| "learning_rate": 8.153531841852969e-06, |
| "loss": 0.4756, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.9221096751659099, |
| "grad_norm": 5.040985584259033, |
| "learning_rate": 8.12418244791054e-06, |
| "loss": 0.4742, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9290953545232273, |
| "grad_norm": 5.490272521972656, |
| "learning_rate": 8.094655391560086e-06, |
| "loss": 0.4614, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.9360810338805449, |
| "grad_norm": 5.865604877471924, |
| "learning_rate": 8.064952351912227e-06, |
| "loss": 0.4671, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.9430667132378624, |
| "grad_norm": 5.491858005523682, |
| "learning_rate": 8.035075018085204e-06, |
| "loss": 0.4624, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.9500523925951799, |
| "grad_norm": 5.839197158813477, |
| "learning_rate": 8.005025089108812e-06, |
| "loss": 0.4811, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9570380719524973, |
| "grad_norm": 6.362710952758789, |
| "learning_rate": 7.974804273827791e-06, |
| "loss": 0.4829, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.9640237513098149, |
| "grad_norm": 5.933167457580566, |
| "learning_rate": 7.944414290804647e-06, |
| "loss": 0.4895, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9710094306671324, |
| "grad_norm": 6.338855743408203, |
| "learning_rate": 7.913856868221922e-06, |
| "loss": 0.4729, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.9779951100244498, |
| "grad_norm": 5.022581577301025, |
| "learning_rate": 7.883133743783918e-06, |
| "loss": 0.4772, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9849807893817674, |
| "grad_norm": 5.898752212524414, |
| "learning_rate": 7.852246664617878e-06, |
| "loss": 0.457, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.9919664687390849, |
| "grad_norm": 4.9067864418029785, |
| "learning_rate": 7.821197387174638e-06, |
| "loss": 0.4577, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9989521480964024, |
| "grad_norm": 4.425784111022949, |
| "learning_rate": 7.78998767712873e-06, |
| "loss": 0.4683, |
| "step": 715 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2148, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.684922567617413e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|