| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.036250196710082, | |
| "eval_steps": 500, | |
| "global_step": 82000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0037027780092014034, | |
| "grad_norm": 21.161716771412177, | |
| "learning_rate": 3.6657162957751695e-08, | |
| "loss": 2.6783, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.007405556018402807, | |
| "grad_norm": 17.61802823072404, | |
| "learning_rate": 7.368460028881402e-08, | |
| "loss": 2.6426, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01110833402760421, | |
| "grad_norm": 8.635640044259958, | |
| "learning_rate": 1.1071203761987633e-07, | |
| "loss": 2.5725, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.014811112036805614, | |
| "grad_norm": 4.290727817061978, | |
| "learning_rate": 1.4773947495093866e-07, | |
| "loss": 2.4757, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.018513890046007016, | |
| "grad_norm": 3.791476034639625, | |
| "learning_rate": 1.8476691228200099e-07, | |
| "loss": 2.367, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02221666805520842, | |
| "grad_norm": 4.547923325980467, | |
| "learning_rate": 2.2179434961306329e-07, | |
| "loss": 2.3288, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.025919446064409823, | |
| "grad_norm": 4.402404364420564, | |
| "learning_rate": 2.5882178694412564e-07, | |
| "loss": 2.2759, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.029622224073611227, | |
| "grad_norm": 3.6276565743103535, | |
| "learning_rate": 2.958492242751879e-07, | |
| "loss": 2.2766, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.03332500208281263, | |
| "grad_norm": 4.170244852808218, | |
| "learning_rate": 3.3287666160625024e-07, | |
| "loss": 2.2338, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.03702778009201403, | |
| "grad_norm": 4.412766673186702, | |
| "learning_rate": 3.699040989373126e-07, | |
| "loss": 2.1561, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04073055810121544, | |
| "grad_norm": 4.628401400135504, | |
| "learning_rate": 4.069315362683749e-07, | |
| "loss": 2.1544, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.04443333611041684, | |
| "grad_norm": 3.7081302230204867, | |
| "learning_rate": 4.439589735994372e-07, | |
| "loss": 2.106, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.04813611411961825, | |
| "grad_norm": 4.2137908215712905, | |
| "learning_rate": 4.809864109304995e-07, | |
| "loss": 2.0794, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.05183889212881965, | |
| "grad_norm": 4.60851346975717, | |
| "learning_rate": 5.180138482615619e-07, | |
| "loss": 2.0641, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.05554167013802105, | |
| "grad_norm": 4.313023013354129, | |
| "learning_rate": 5.550412855926242e-07, | |
| "loss": 1.9983, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.059244448147222455, | |
| "grad_norm": 3.966177577383383, | |
| "learning_rate": 5.920687229236865e-07, | |
| "loss": 2.0366, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.06294722615642385, | |
| "grad_norm": 4.661276659671688, | |
| "learning_rate": 6.290961602547487e-07, | |
| "loss": 2.0112, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.06665000416562526, | |
| "grad_norm": 3.856161388179088, | |
| "learning_rate": 6.661235975858112e-07, | |
| "loss": 1.9547, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.07035278217482667, | |
| "grad_norm": 4.5116015322440015, | |
| "learning_rate": 7.031510349168734e-07, | |
| "loss": 1.9599, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.07405556018402806, | |
| "grad_norm": 3.9605628970368416, | |
| "learning_rate": 7.401784722479357e-07, | |
| "loss": 1.9673, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.07775833819322947, | |
| "grad_norm": 4.534104810402031, | |
| "learning_rate": 7.772059095789982e-07, | |
| "loss": 1.8987, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.08146111620243088, | |
| "grad_norm": 3.7768720377686815, | |
| "learning_rate": 8.142333469100604e-07, | |
| "loss": 1.9039, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.08516389421163227, | |
| "grad_norm": 3.709870476374346, | |
| "learning_rate": 8.512607842411227e-07, | |
| "loss": 1.8572, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.08886667222083368, | |
| "grad_norm": 3.861469813158571, | |
| "learning_rate": 8.882882215721851e-07, | |
| "loss": 1.831, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.09256945023003509, | |
| "grad_norm": 4.046784580473015, | |
| "learning_rate": 9.253156589032473e-07, | |
| "loss": 1.856, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0962722282392365, | |
| "grad_norm": 4.439862554726597, | |
| "learning_rate": 9.623430962343098e-07, | |
| "loss": 1.8344, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.09997500624843789, | |
| "grad_norm": 6.222158633649652, | |
| "learning_rate": 9.993705335653721e-07, | |
| "loss": 1.8408, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.1036777842576393, | |
| "grad_norm": 3.7129912385070503, | |
| "learning_rate": 1.0363979708964342e-06, | |
| "loss": 1.8195, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.1073805622668407, | |
| "grad_norm": 4.073889809938404, | |
| "learning_rate": 1.0734254082274968e-06, | |
| "loss": 1.7714, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.1110833402760421, | |
| "grad_norm": 3.8485467079112805, | |
| "learning_rate": 1.1104528455585589e-06, | |
| "loss": 1.7782, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.1147861182852435, | |
| "grad_norm": 3.548076088775858, | |
| "learning_rate": 1.1474802828896212e-06, | |
| "loss": 1.7646, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.11848889629444491, | |
| "grad_norm": 3.754340846452376, | |
| "learning_rate": 1.1845077202206837e-06, | |
| "loss": 1.7733, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.12219167430364632, | |
| "grad_norm": 6.281470572024916, | |
| "learning_rate": 1.2215351575517459e-06, | |
| "loss": 1.7361, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.1258944523128477, | |
| "grad_norm": 4.065489405313006, | |
| "learning_rate": 1.2585625948828082e-06, | |
| "loss": 1.7397, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.12959723032204912, | |
| "grad_norm": 4.260390115086277, | |
| "learning_rate": 1.2955900322138707e-06, | |
| "loss": 1.7571, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.13330000833125052, | |
| "grad_norm": 5.9838104492225215, | |
| "learning_rate": 1.3326174695449328e-06, | |
| "loss": 1.7205, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.13700278634045193, | |
| "grad_norm": 4.7607112587769915, | |
| "learning_rate": 1.3696449068759952e-06, | |
| "loss": 1.713, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.14070556434965334, | |
| "grad_norm": 4.047013434162942, | |
| "learning_rate": 1.4066723442070577e-06, | |
| "loss": 1.7084, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.14440834235885472, | |
| "grad_norm": 6.154993338003737, | |
| "learning_rate": 1.4436997815381198e-06, | |
| "loss": 1.6784, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.14811112036805613, | |
| "grad_norm": 3.6172501531655614, | |
| "learning_rate": 1.4807272188691821e-06, | |
| "loss": 1.6514, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.15181389837725753, | |
| "grad_norm": 3.9661920321991757, | |
| "learning_rate": 1.5177546562002447e-06, | |
| "loss": 1.6642, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.15551667638645894, | |
| "grad_norm": 4.468519494026533, | |
| "learning_rate": 1.5547820935313068e-06, | |
| "loss": 1.6517, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.15921945439566035, | |
| "grad_norm": 4.809090349712038, | |
| "learning_rate": 1.5918095308623691e-06, | |
| "loss": 1.6456, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.16292223240486176, | |
| "grad_norm": 3.991862639554731, | |
| "learning_rate": 1.6288369681934317e-06, | |
| "loss": 1.6474, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.16662501041406316, | |
| "grad_norm": 4.719090103521477, | |
| "learning_rate": 1.6658644055244938e-06, | |
| "loss": 1.6042, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.17032778842326454, | |
| "grad_norm": 4.927706214636146, | |
| "learning_rate": 1.702891842855556e-06, | |
| "loss": 1.5918, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.17403056643246595, | |
| "grad_norm": 4.566524993389404, | |
| "learning_rate": 1.7399192801866186e-06, | |
| "loss": 1.6103, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.17773334444166736, | |
| "grad_norm": 4.908438965819459, | |
| "learning_rate": 1.7769467175176807e-06, | |
| "loss": 1.6101, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.18143612245086876, | |
| "grad_norm": 3.8011540864311866, | |
| "learning_rate": 1.813974154848743e-06, | |
| "loss": 1.6162, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.18513890046007017, | |
| "grad_norm": 4.241301056669864, | |
| "learning_rate": 1.8510015921798056e-06, | |
| "loss": 1.6117, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.18884167846927158, | |
| "grad_norm": 4.305210424709023, | |
| "learning_rate": 1.8880290295108677e-06, | |
| "loss": 1.5634, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.192544456478473, | |
| "grad_norm": 5.188453275523176, | |
| "learning_rate": 1.92505646684193e-06, | |
| "loss": 1.5839, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.19624723448767437, | |
| "grad_norm": 4.390695329489216, | |
| "learning_rate": 1.9620839041729924e-06, | |
| "loss": 1.5747, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.19995001249687577, | |
| "grad_norm": 4.581970539434446, | |
| "learning_rate": 1.999111341504055e-06, | |
| "loss": 1.5597, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.20365279050607718, | |
| "grad_norm": 4.0470801193804755, | |
| "learning_rate": 2.036138778835117e-06, | |
| "loss": 1.5275, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.2073555685152786, | |
| "grad_norm": 4.1287928029941385, | |
| "learning_rate": 2.073166216166179e-06, | |
| "loss": 1.5455, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.21105834652448, | |
| "grad_norm": 5.5739542390705274, | |
| "learning_rate": 2.1101936534972417e-06, | |
| "loss": 1.526, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.2147611245336814, | |
| "grad_norm": 4.072688346808655, | |
| "learning_rate": 2.147221090828304e-06, | |
| "loss": 1.5006, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.2184639025428828, | |
| "grad_norm": 5.331904305941977, | |
| "learning_rate": 2.1842485281593663e-06, | |
| "loss": 1.5417, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.2221666805520842, | |
| "grad_norm": 4.378202415797325, | |
| "learning_rate": 2.221275965490429e-06, | |
| "loss": 1.4885, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.2258694585612856, | |
| "grad_norm": 4.318453195850846, | |
| "learning_rate": 2.258303402821491e-06, | |
| "loss": 1.5359, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.229572236570487, | |
| "grad_norm": 4.475047536813248, | |
| "learning_rate": 2.295330840152553e-06, | |
| "loss": 1.5168, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.2332750145796884, | |
| "grad_norm": 4.665314820774367, | |
| "learning_rate": 2.3323582774836156e-06, | |
| "loss": 1.4688, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.23697779258888982, | |
| "grad_norm": 5.210970519844203, | |
| "learning_rate": 2.3693857148146778e-06, | |
| "loss": 1.4775, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.24068057059809123, | |
| "grad_norm": 4.410640014744449, | |
| "learning_rate": 2.4064131521457403e-06, | |
| "loss": 1.4738, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.24438334860729263, | |
| "grad_norm": 3.944391436658137, | |
| "learning_rate": 2.443440589476803e-06, | |
| "loss": 1.4469, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.248086126616494, | |
| "grad_norm": 4.96868756013431, | |
| "learning_rate": 2.480468026807865e-06, | |
| "loss": 1.4821, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.2517889046256954, | |
| "grad_norm": 4.462942085694235, | |
| "learning_rate": 2.517495464138927e-06, | |
| "loss": 1.4591, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.25549168263489686, | |
| "grad_norm": 4.378031684132469, | |
| "learning_rate": 2.5545229014699896e-06, | |
| "loss": 1.4578, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.25919446064409823, | |
| "grad_norm": 4.241655047493595, | |
| "learning_rate": 2.5915503388010517e-06, | |
| "loss": 1.4705, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2628972386532996, | |
| "grad_norm": 3.981686552106327, | |
| "learning_rate": 2.6285777761321142e-06, | |
| "loss": 1.4538, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.26660001666250105, | |
| "grad_norm": 4.3607343276496575, | |
| "learning_rate": 2.6656052134631768e-06, | |
| "loss": 1.411, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.27030279467170243, | |
| "grad_norm": 4.807349625008967, | |
| "learning_rate": 2.7026326507942385e-06, | |
| "loss": 1.4205, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.27400557268090386, | |
| "grad_norm": 4.289974925492848, | |
| "learning_rate": 2.739660088125301e-06, | |
| "loss": 1.4374, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.27770835069010524, | |
| "grad_norm": 3.491997941988729, | |
| "learning_rate": 2.7766875254563636e-06, | |
| "loss": 1.4393, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.2814111286993067, | |
| "grad_norm": 4.317473381602871, | |
| "learning_rate": 2.8137149627874257e-06, | |
| "loss": 1.3965, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.28511390670850806, | |
| "grad_norm": 4.317169353881641, | |
| "learning_rate": 2.850742400118488e-06, | |
| "loss": 1.4139, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.28881668471770944, | |
| "grad_norm": 4.342306783527047, | |
| "learning_rate": 2.8877698374495503e-06, | |
| "loss": 1.3966, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.2925194627269109, | |
| "grad_norm": 4.561613447481574, | |
| "learning_rate": 2.9247972747806124e-06, | |
| "loss": 1.4207, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.29622224073611225, | |
| "grad_norm": 4.603226423674348, | |
| "learning_rate": 2.961824712111675e-06, | |
| "loss": 1.4369, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.2999250187453137, | |
| "grad_norm": 4.628660307539308, | |
| "learning_rate": 2.9988521494427375e-06, | |
| "loss": 1.3871, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.30362779675451507, | |
| "grad_norm": 3.8865443379767415, | |
| "learning_rate": 3.0358795867737996e-06, | |
| "loss": 1.4182, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.3073305747637165, | |
| "grad_norm": 4.038269978955141, | |
| "learning_rate": 3.072907024104862e-06, | |
| "loss": 1.3622, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.3110333527729179, | |
| "grad_norm": 5.136502242831012, | |
| "learning_rate": 3.1099344614359243e-06, | |
| "loss": 1.3889, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.31473613078211926, | |
| "grad_norm": 4.457630892251384, | |
| "learning_rate": 3.1469618987669864e-06, | |
| "loss": 1.387, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.3184389087913207, | |
| "grad_norm": 4.651901883399858, | |
| "learning_rate": 3.183989336098049e-06, | |
| "loss": 1.3741, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.3221416868005221, | |
| "grad_norm": 4.800399697463299, | |
| "learning_rate": 3.2210167734291115e-06, | |
| "loss": 1.3473, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.3258444648097235, | |
| "grad_norm": 5.122987743156367, | |
| "learning_rate": 3.2580442107601736e-06, | |
| "loss": 1.3733, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.3295472428189249, | |
| "grad_norm": 5.307686552433291, | |
| "learning_rate": 3.295071648091236e-06, | |
| "loss": 1.341, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.3332500208281263, | |
| "grad_norm": 4.4204932532106085, | |
| "learning_rate": 3.3320990854222982e-06, | |
| "loss": 1.3615, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.3369527988373277, | |
| "grad_norm": 4.937899325267276, | |
| "learning_rate": 3.3691265227533603e-06, | |
| "loss": 1.368, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.3406555768465291, | |
| "grad_norm": 3.6435580501639326, | |
| "learning_rate": 3.406153960084423e-06, | |
| "loss": 1.341, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.3443583548557305, | |
| "grad_norm": 3.884956476912519, | |
| "learning_rate": 3.4431813974154854e-06, | |
| "loss": 1.3458, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.3480611328649319, | |
| "grad_norm": 4.291808958034935, | |
| "learning_rate": 3.4802088347465475e-06, | |
| "loss": 1.301, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.35176391087413333, | |
| "grad_norm": 4.55718057133725, | |
| "learning_rate": 3.51723627207761e-06, | |
| "loss": 1.3203, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.3554666888833347, | |
| "grad_norm": 3.927788263058885, | |
| "learning_rate": 3.554263709408672e-06, | |
| "loss": 1.3087, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.35916946689253615, | |
| "grad_norm": 4.334142071876793, | |
| "learning_rate": 3.5912911467397343e-06, | |
| "loss": 1.328, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.36287224490173753, | |
| "grad_norm": 3.9826425837051715, | |
| "learning_rate": 3.628318584070797e-06, | |
| "loss": 1.3162, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.3665750229109389, | |
| "grad_norm": 3.7373318962103834, | |
| "learning_rate": 3.665346021401859e-06, | |
| "loss": 1.3427, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.37027780092014034, | |
| "grad_norm": 4.309257631492866, | |
| "learning_rate": 3.7023734587329215e-06, | |
| "loss": 1.2918, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.3739805789293417, | |
| "grad_norm": 4.000949746879435, | |
| "learning_rate": 3.739400896063984e-06, | |
| "loss": 1.2864, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.37768335693854316, | |
| "grad_norm": 4.314641431918059, | |
| "learning_rate": 3.7764283333950457e-06, | |
| "loss": 1.2784, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.38138613494774454, | |
| "grad_norm": 4.136976500403523, | |
| "learning_rate": 3.8134557707261083e-06, | |
| "loss": 1.2821, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.385088912956946, | |
| "grad_norm": 4.8607889216449385, | |
| "learning_rate": 3.85048320805717e-06, | |
| "loss": 1.281, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.38879169096614735, | |
| "grad_norm": 4.432173453928133, | |
| "learning_rate": 3.8875106453882325e-06, | |
| "loss": 1.2959, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.39249446897534873, | |
| "grad_norm": 5.118601631131207, | |
| "learning_rate": 3.9245380827192954e-06, | |
| "loss": 1.2772, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.39619724698455017, | |
| "grad_norm": 4.189210920096741, | |
| "learning_rate": 3.9615655200503576e-06, | |
| "loss": 1.311, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.39990002499375155, | |
| "grad_norm": 4.9933785952086875, | |
| "learning_rate": 3.99859295738142e-06, | |
| "loss": 1.2798, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.403602803002953, | |
| "grad_norm": 3.8535620848124785, | |
| "learning_rate": 4.035620394712483e-06, | |
| "loss": 1.2532, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.40730558101215436, | |
| "grad_norm": 4.040521438500001, | |
| "learning_rate": 4.072647832043545e-06, | |
| "loss": 1.2739, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.4110083590213558, | |
| "grad_norm": 4.280568957640515, | |
| "learning_rate": 4.109675269374607e-06, | |
| "loss": 1.2755, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.4147111370305572, | |
| "grad_norm": 4.230099582418301, | |
| "learning_rate": 4.146702706705669e-06, | |
| "loss": 1.2676, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.41841391503975855, | |
| "grad_norm": 3.807801983954757, | |
| "learning_rate": 4.183730144036732e-06, | |
| "loss": 1.2487, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.42211669304896, | |
| "grad_norm": 4.2784766116809365, | |
| "learning_rate": 4.220757581367794e-06, | |
| "loss": 1.2545, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.42581947105816137, | |
| "grad_norm": 4.855973605257093, | |
| "learning_rate": 4.257785018698856e-06, | |
| "loss": 1.2436, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.4295222490673628, | |
| "grad_norm": 3.5115767446990995, | |
| "learning_rate": 4.294812456029918e-06, | |
| "loss": 1.2505, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.4332250270765642, | |
| "grad_norm": 3.7062317227814408, | |
| "learning_rate": 4.33183989336098e-06, | |
| "loss": 1.2175, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.4369278050857656, | |
| "grad_norm": 3.9387496036649936, | |
| "learning_rate": 4.368867330692043e-06, | |
| "loss": 1.222, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.440630583094967, | |
| "grad_norm": 5.23595658030001, | |
| "learning_rate": 4.4058947680231055e-06, | |
| "loss": 1.2388, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.4443333611041684, | |
| "grad_norm": 4.759096268753192, | |
| "learning_rate": 4.442922205354168e-06, | |
| "loss": 1.2036, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.4480361391133698, | |
| "grad_norm": 4.094075469283295, | |
| "learning_rate": 4.4799496426852306e-06, | |
| "loss": 1.2543, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.4517389171225712, | |
| "grad_norm": 4.153721175621665, | |
| "learning_rate": 4.516977080016293e-06, | |
| "loss": 1.227, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.45544169513177263, | |
| "grad_norm": 4.508419619958273, | |
| "learning_rate": 4.554004517347355e-06, | |
| "loss": 1.188, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.459144473140974, | |
| "grad_norm": 5.431232416017837, | |
| "learning_rate": 4.591031954678417e-06, | |
| "loss": 1.2254, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.46284725115017544, | |
| "grad_norm": 4.195672755415828, | |
| "learning_rate": 4.62805939200948e-06, | |
| "loss": 1.2161, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.4665500291593768, | |
| "grad_norm": 4.928316304383083, | |
| "learning_rate": 4.665086829340542e-06, | |
| "loss": 1.2117, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.4702528071685782, | |
| "grad_norm": 3.591246874782549, | |
| "learning_rate": 4.702114266671604e-06, | |
| "loss": 1.2114, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.47395558517777964, | |
| "grad_norm": 3.4385074546101473, | |
| "learning_rate": 4.739141704002666e-06, | |
| "loss": 1.1622, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.477658363186981, | |
| "grad_norm": 4.266312659389707, | |
| "learning_rate": 4.776169141333728e-06, | |
| "loss": 1.1994, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.48136114119618245, | |
| "grad_norm": 4.318134728161222, | |
| "learning_rate": 4.813196578664791e-06, | |
| "loss": 1.194, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.48506391920538383, | |
| "grad_norm": 3.743433627557919, | |
| "learning_rate": 4.850224015995853e-06, | |
| "loss": 1.1862, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.48876669721458527, | |
| "grad_norm": 4.432513953759492, | |
| "learning_rate": 4.8872514533269155e-06, | |
| "loss": 1.2034, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.49246947522378665, | |
| "grad_norm": 4.932972639382486, | |
| "learning_rate": 4.924278890657978e-06, | |
| "loss": 1.1829, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.496172253232988, | |
| "grad_norm": 4.552124318708114, | |
| "learning_rate": 4.961306327989041e-06, | |
| "loss": 1.1877, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.49987503124218946, | |
| "grad_norm": 4.57039085993272, | |
| "learning_rate": 4.998333765320103e-06, | |
| "loss": 1.1804, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.5035778092513908, | |
| "grad_norm": 4.435102306960172, | |
| "learning_rate": 5.035361202651165e-06, | |
| "loss": 1.1673, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.5072805872605922, | |
| "grad_norm": 4.186670025498094, | |
| "learning_rate": 5.072388639982228e-06, | |
| "loss": 1.1933, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.5109833652697937, | |
| "grad_norm": 4.405934801625066, | |
| "learning_rate": 5.10941607731329e-06, | |
| "loss": 1.1772, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.5146861432789951, | |
| "grad_norm": 4.311924669929619, | |
| "learning_rate": 5.146443514644351e-06, | |
| "loss": 1.1665, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.5183889212881965, | |
| "grad_norm": 5.625220459288197, | |
| "learning_rate": 5.183470951975414e-06, | |
| "loss": 1.1735, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.5220916992973978, | |
| "grad_norm": 3.8405703022228606, | |
| "learning_rate": 5.220498389306476e-06, | |
| "loss": 1.1904, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.5257944773065992, | |
| "grad_norm": 4.059806709247945, | |
| "learning_rate": 5.257525826637538e-06, | |
| "loss": 1.1588, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.5294972553158007, | |
| "grad_norm": 3.8722695281592485, | |
| "learning_rate": 5.294553263968601e-06, | |
| "loss": 1.1469, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.5332000333250021, | |
| "grad_norm": 4.66275202781423, | |
| "learning_rate": 5.331580701299663e-06, | |
| "loss": 1.1442, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.5369028113342035, | |
| "grad_norm": 4.6182096996448845, | |
| "learning_rate": 5.3686081386307255e-06, | |
| "loss": 1.1543, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.5406055893434049, | |
| "grad_norm": 3.9837337174635103, | |
| "learning_rate": 5.4056355759617885e-06, | |
| "loss": 1.158, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.5443083673526063, | |
| "grad_norm": 3.8034049725872356, | |
| "learning_rate": 5.442663013292851e-06, | |
| "loss": 1.1546, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.5480111453618077, | |
| "grad_norm": 4.62083026318626, | |
| "learning_rate": 5.479690450623913e-06, | |
| "loss": 1.1553, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.5517139233710091, | |
| "grad_norm": 4.462803142186011, | |
| "learning_rate": 5.516717887954976e-06, | |
| "loss": 1.1506, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.5554167013802105, | |
| "grad_norm": 4.165198766777481, | |
| "learning_rate": 5.553745325286038e-06, | |
| "loss": 1.147, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.5591194793894119, | |
| "grad_norm": 3.5365327477483297, | |
| "learning_rate": 5.590772762617099e-06, | |
| "loss": 1.1341, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.5628222573986134, | |
| "grad_norm": 3.8324766687461653, | |
| "learning_rate": 5.627800199948162e-06, | |
| "loss": 1.1123, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.5665250354078147, | |
| "grad_norm": 4.271061462203587, | |
| "learning_rate": 5.664827637279224e-06, | |
| "loss": 1.1212, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.5702278134170161, | |
| "grad_norm": 4.5272229115194875, | |
| "learning_rate": 5.701855074610286e-06, | |
| "loss": 1.1438, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.5739305914262175, | |
| "grad_norm": 4.660071317336288, | |
| "learning_rate": 5.738882511941349e-06, | |
| "loss": 1.1174, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.5776333694354189, | |
| "grad_norm": 3.8174601055554094, | |
| "learning_rate": 5.775909949272411e-06, | |
| "loss": 1.095, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.5813361474446204, | |
| "grad_norm": 3.9772037532519784, | |
| "learning_rate": 5.8129373866034734e-06, | |
| "loss": 1.0901, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.5850389254538217, | |
| "grad_norm": 4.117368390246338, | |
| "learning_rate": 5.849964823934536e-06, | |
| "loss": 1.1218, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.5887417034630231, | |
| "grad_norm": 3.6969429985121387, | |
| "learning_rate": 5.8869922612655985e-06, | |
| "loss": 1.1172, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.5924444814722245, | |
| "grad_norm": 4.208647101328797, | |
| "learning_rate": 5.924019698596661e-06, | |
| "loss": 1.1005, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.596147259481426, | |
| "grad_norm": 4.0291894246178455, | |
| "learning_rate": 5.961047135927724e-06, | |
| "loss": 1.1129, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.5998500374906274, | |
| "grad_norm": 4.713616701400172, | |
| "learning_rate": 5.998074573258785e-06, | |
| "loss": 1.1002, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.6035528154998288, | |
| "grad_norm": 4.1930372418205355, | |
| "learning_rate": 6.035102010589847e-06, | |
| "loss": 1.0886, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.6072555935090301, | |
| "grad_norm": 4.254442138954682, | |
| "learning_rate": 6.07212944792091e-06, | |
| "loss": 1.0954, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.6109583715182315, | |
| "grad_norm": 3.7190710738003014, | |
| "learning_rate": 6.109156885251972e-06, | |
| "loss": 1.1288, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.614661149527433, | |
| "grad_norm": 5.286326819627371, | |
| "learning_rate": 6.146184322583034e-06, | |
| "loss": 1.0861, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.6183639275366344, | |
| "grad_norm": 3.5036265705416, | |
| "learning_rate": 6.183211759914097e-06, | |
| "loss": 1.1086, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.6220667055458358, | |
| "grad_norm": 3.5111130437153495, | |
| "learning_rate": 6.220239197245159e-06, | |
| "loss": 1.1022, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.6257694835550371, | |
| "grad_norm": 3.2896238161080946, | |
| "learning_rate": 6.257266634576221e-06, | |
| "loss": 1.0955, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.6294722615642385, | |
| "grad_norm": 3.6540403667854604, | |
| "learning_rate": 6.294294071907284e-06, | |
| "loss": 1.058, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.63317503957344, | |
| "grad_norm": 4.4670975803638475, | |
| "learning_rate": 6.3313215092383464e-06, | |
| "loss": 1.068, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.6368778175826414, | |
| "grad_norm": 4.530195581158383, | |
| "learning_rate": 6.3683489465694085e-06, | |
| "loss": 1.1105, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.6405805955918428, | |
| "grad_norm": 3.8288830335210995, | |
| "learning_rate": 6.4053763839004715e-06, | |
| "loss": 1.0332, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.6442833736010442, | |
| "grad_norm": 3.815833342915358, | |
| "learning_rate": 6.442403821231533e-06, | |
| "loss": 1.0794, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.6479861516102456, | |
| "grad_norm": 4.604021991413655, | |
| "learning_rate": 6.479431258562595e-06, | |
| "loss": 1.079, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.651688929619447, | |
| "grad_norm": 3.577028223699294, | |
| "learning_rate": 6.516458695893658e-06, | |
| "loss": 1.0653, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.6553917076286484, | |
| "grad_norm": 4.111818385244818, | |
| "learning_rate": 6.55348613322472e-06, | |
| "loss": 1.035, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.6590944856378498, | |
| "grad_norm": 3.56811764186164, | |
| "learning_rate": 6.590513570555782e-06, | |
| "loss": 1.0452, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.6627972636470512, | |
| "grad_norm": 3.87646009188637, | |
| "learning_rate": 6.627541007886845e-06, | |
| "loss": 1.0493, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.6665000416562527, | |
| "grad_norm": 3.406407594729227, | |
| "learning_rate": 6.664568445217907e-06, | |
| "loss": 1.0651, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.670202819665454, | |
| "grad_norm": 2.866315730154109, | |
| "learning_rate": 6.701595882548969e-06, | |
| "loss": 1.0561, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.6739055976746554, | |
| "grad_norm": 4.376427115975032, | |
| "learning_rate": 6.738623319880032e-06, | |
| "loss": 1.0602, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.6776083756838568, | |
| "grad_norm": 3.2373014543024072, | |
| "learning_rate": 6.775650757211094e-06, | |
| "loss": 1.0397, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.6813111536930582, | |
| "grad_norm": 3.9666084272474245, | |
| "learning_rate": 6.8126781945421565e-06, | |
| "loss": 1.0445, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.6850139317022597, | |
| "grad_norm": 3.800460648153449, | |
| "learning_rate": 6.8497056318732194e-06, | |
| "loss": 1.0494, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.688716709711461, | |
| "grad_norm": 4.274139967670289, | |
| "learning_rate": 6.886733069204281e-06, | |
| "loss": 1.0756, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.6924194877206624, | |
| "grad_norm": 3.9994897603573665, | |
| "learning_rate": 6.923760506535343e-06, | |
| "loss": 1.0412, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.6961222657298638, | |
| "grad_norm": 3.8633706482810553, | |
| "learning_rate": 6.960787943866406e-06, | |
| "loss": 1.0522, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.6998250437390653, | |
| "grad_norm": 4.381038145148409, | |
| "learning_rate": 6.997815381197468e-06, | |
| "loss": 1.0209, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.7035278217482667, | |
| "grad_norm": 4.1366815727300175, | |
| "learning_rate": 7.03484281852853e-06, | |
| "loss": 1.0351, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.707230599757468, | |
| "grad_norm": 3.675160954426471, | |
| "learning_rate": 7.071870255859593e-06, | |
| "loss": 1.0291, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.7109333777666694, | |
| "grad_norm": 3.603001585625626, | |
| "learning_rate": 7.108897693190655e-06, | |
| "loss": 1.0093, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.7146361557758708, | |
| "grad_norm": 3.264233985905883, | |
| "learning_rate": 7.145925130521717e-06, | |
| "loss": 1.0394, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.7183389337850723, | |
| "grad_norm": 3.8898393773576614, | |
| "learning_rate": 7.182952567852779e-06, | |
| "loss": 1.0309, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.7220417117942737, | |
| "grad_norm": 3.2365236970202917, | |
| "learning_rate": 7.219980005183842e-06, | |
| "loss": 1.0304, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.7257444898034751, | |
| "grad_norm": 3.497985907223146, | |
| "learning_rate": 7.257007442514904e-06, | |
| "loss": 1.0375, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.7294472678126764, | |
| "grad_norm": 3.6359487776078714, | |
| "learning_rate": 7.294034879845966e-06, | |
| "loss": 1.0132, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.7331500458218778, | |
| "grad_norm": 3.9379821448814343, | |
| "learning_rate": 7.331062317177029e-06, | |
| "loss": 1.019, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.7368528238310793, | |
| "grad_norm": 4.750912299244358, | |
| "learning_rate": 7.368089754508091e-06, | |
| "loss": 1.0172, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.7405556018402807, | |
| "grad_norm": 4.253487251656156, | |
| "learning_rate": 7.405117191839153e-06, | |
| "loss": 1.0093, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.7442583798494821, | |
| "grad_norm": 3.5926201949515284, | |
| "learning_rate": 7.442144629170216e-06, | |
| "loss": 1.0163, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.7479611578586834, | |
| "grad_norm": 3.8927981125403073, | |
| "learning_rate": 7.479172066501278e-06, | |
| "loss": 0.9827, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.7516639358678849, | |
| "grad_norm": 4.573750708649385, | |
| "learning_rate": 7.51619950383234e-06, | |
| "loss": 1.0087, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.7553667138770863, | |
| "grad_norm": 3.1260071677382952, | |
| "learning_rate": 7.553226941163403e-06, | |
| "loss": 1.0275, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.7590694918862877, | |
| "grad_norm": 3.965170800530151, | |
| "learning_rate": 7.590254378494465e-06, | |
| "loss": 1.0132, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.7627722698954891, | |
| "grad_norm": 3.1267324185244876, | |
| "learning_rate": 7.627281815825527e-06, | |
| "loss": 0.9803, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.7664750479046905, | |
| "grad_norm": 3.4086206206945358, | |
| "learning_rate": 7.66430925315659e-06, | |
| "loss": 0.9954, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.770177825913892, | |
| "grad_norm": 4.254989394844253, | |
| "learning_rate": 7.701336690487652e-06, | |
| "loss": 0.9922, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.7738806039230933, | |
| "grad_norm": 3.7295802263455564, | |
| "learning_rate": 7.738364127818714e-06, | |
| "loss": 1.0004, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.7775833819322947, | |
| "grad_norm": 4.671822378080011, | |
| "learning_rate": 7.775391565149777e-06, | |
| "loss": 0.9887, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.7812861599414961, | |
| "grad_norm": 3.941447202712077, | |
| "learning_rate": 7.81241900248084e-06, | |
| "loss": 0.982, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.7849889379506975, | |
| "grad_norm": 3.6530756566069633, | |
| "learning_rate": 7.8494464398119e-06, | |
| "loss": 0.9668, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.788691715959899, | |
| "grad_norm": 3.9080413448774625, | |
| "learning_rate": 7.886473877142964e-06, | |
| "loss": 1.0055, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.7923944939691003, | |
| "grad_norm": 3.603292889224154, | |
| "learning_rate": 7.923501314474025e-06, | |
| "loss": 1.0039, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.7960972719783017, | |
| "grad_norm": 3.677324652882952, | |
| "learning_rate": 7.960528751805088e-06, | |
| "loss": 1.0226, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.7998000499875031, | |
| "grad_norm": 3.6513050899215056, | |
| "learning_rate": 7.997556189136151e-06, | |
| "loss": 0.9766, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.8035028279967046, | |
| "grad_norm": 3.357793359434031, | |
| "learning_rate": 8.034583626467212e-06, | |
| "loss": 0.9813, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.807205606005906, | |
| "grad_norm": 2.760916399152151, | |
| "learning_rate": 8.071611063798275e-06, | |
| "loss": 0.979, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.8109083840151073, | |
| "grad_norm": 3.6887561411257046, | |
| "learning_rate": 8.108638501129338e-06, | |
| "loss": 0.9601, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.8146111620243087, | |
| "grad_norm": 3.652784111767623, | |
| "learning_rate": 8.1456659384604e-06, | |
| "loss": 0.9799, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.8183139400335101, | |
| "grad_norm": 3.479191284518454, | |
| "learning_rate": 8.182693375791462e-06, | |
| "loss": 0.9785, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.8220167180427116, | |
| "grad_norm": 4.128090009925586, | |
| "learning_rate": 8.219720813122525e-06, | |
| "loss": 0.9841, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.825719496051913, | |
| "grad_norm": 3.662863541747893, | |
| "learning_rate": 8.256748250453587e-06, | |
| "loss": 0.947, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.8294222740611144, | |
| "grad_norm": 3.217491905494941, | |
| "learning_rate": 8.29377568778465e-06, | |
| "loss": 0.9757, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.8331250520703157, | |
| "grad_norm": 3.326399094958347, | |
| "learning_rate": 8.330803125115712e-06, | |
| "loss": 0.9598, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.8368278300795171, | |
| "grad_norm": 2.909529562627351, | |
| "learning_rate": 8.367830562446774e-06, | |
| "loss": 0.9587, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.8405306080887186, | |
| "grad_norm": 3.1644941957196346, | |
| "learning_rate": 8.404857999777835e-06, | |
| "loss": 0.9689, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.84423338609792, | |
| "grad_norm": 3.152721404570826, | |
| "learning_rate": 8.441885437108898e-06, | |
| "loss": 0.9822, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.8479361641071214, | |
| "grad_norm": 3.879640971417378, | |
| "learning_rate": 8.478912874439961e-06, | |
| "loss": 0.9379, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.8516389421163227, | |
| "grad_norm": 4.432110940230918, | |
| "learning_rate": 8.515940311771022e-06, | |
| "loss": 0.9544, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.8553417201255242, | |
| "grad_norm": 3.2420578574260697, | |
| "learning_rate": 8.552967749102085e-06, | |
| "loss": 0.9292, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.8590444981347256, | |
| "grad_norm": 4.617591314029601, | |
| "learning_rate": 8.589995186433148e-06, | |
| "loss": 0.9477, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.862747276143927, | |
| "grad_norm": 3.0259937163445194, | |
| "learning_rate": 8.62702262376421e-06, | |
| "loss": 0.9559, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.8664500541531284, | |
| "grad_norm": 3.278192583185341, | |
| "learning_rate": 8.664050061095272e-06, | |
| "loss": 0.9583, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.8701528321623297, | |
| "grad_norm": 3.6509553777490424, | |
| "learning_rate": 8.701077498426335e-06, | |
| "loss": 0.9749, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.8738556101715312, | |
| "grad_norm": 3.4075169195241677, | |
| "learning_rate": 8.738104935757397e-06, | |
| "loss": 0.9223, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.8775583881807326, | |
| "grad_norm": 2.893435896152694, | |
| "learning_rate": 8.77513237308846e-06, | |
| "loss": 0.9721, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.881261166189934, | |
| "grad_norm": 4.064038243050667, | |
| "learning_rate": 8.81215981041952e-06, | |
| "loss": 0.9648, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.8849639441991354, | |
| "grad_norm": 3.28397127001984, | |
| "learning_rate": 8.849187247750584e-06, | |
| "loss": 0.9413, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.8886667222083368, | |
| "grad_norm": 2.7396572790329183, | |
| "learning_rate": 8.886214685081647e-06, | |
| "loss": 0.9538, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.8923695002175382, | |
| "grad_norm": 3.831935799094677, | |
| "learning_rate": 8.923242122412708e-06, | |
| "loss": 0.9488, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.8960722782267396, | |
| "grad_norm": 3.083787982483977, | |
| "learning_rate": 8.960269559743771e-06, | |
| "loss": 0.9798, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.899775056235941, | |
| "grad_norm": 3.2638182611068784, | |
| "learning_rate": 8.997296997074834e-06, | |
| "loss": 0.9364, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.9034778342451424, | |
| "grad_norm": 2.8271807284549824, | |
| "learning_rate": 9.034324434405895e-06, | |
| "loss": 0.9592, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.9071806122543439, | |
| "grad_norm": 3.1878575837021295, | |
| "learning_rate": 9.071351871736958e-06, | |
| "loss": 0.944, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.9108833902635453, | |
| "grad_norm": 3.344639834530028, | |
| "learning_rate": 9.108379309068021e-06, | |
| "loss": 0.9305, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.9145861682727466, | |
| "grad_norm": 3.271225972067076, | |
| "learning_rate": 9.145406746399082e-06, | |
| "loss": 0.9334, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.918288946281948, | |
| "grad_norm": 3.7284762668702314, | |
| "learning_rate": 9.182434183730145e-06, | |
| "loss": 0.9195, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.9219917242911494, | |
| "grad_norm": 4.098726803359245, | |
| "learning_rate": 9.219461621061208e-06, | |
| "loss": 0.9398, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.9256945023003509, | |
| "grad_norm": 2.8965683319135795, | |
| "learning_rate": 9.25648905839227e-06, | |
| "loss": 0.9365, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.9293972803095523, | |
| "grad_norm": 3.472751774536713, | |
| "learning_rate": 9.29351649572333e-06, | |
| "loss": 0.942, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.9331000583187536, | |
| "grad_norm": 2.741682625283635, | |
| "learning_rate": 9.330543933054394e-06, | |
| "loss": 0.9463, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.936802836327955, | |
| "grad_norm": 2.9695965336662584, | |
| "learning_rate": 9.367571370385457e-06, | |
| "loss": 0.9355, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.9405056143371564, | |
| "grad_norm": 3.0507053957289814, | |
| "learning_rate": 9.404598807716518e-06, | |
| "loss": 0.9123, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.9442083923463579, | |
| "grad_norm": 3.997694772330171, | |
| "learning_rate": 9.441626245047581e-06, | |
| "loss": 0.9326, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.9479111703555593, | |
| "grad_norm": 3.006394895446821, | |
| "learning_rate": 9.478653682378644e-06, | |
| "loss": 0.9343, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.9516139483647607, | |
| "grad_norm": 3.030113997176904, | |
| "learning_rate": 9.515681119709705e-06, | |
| "loss": 0.9263, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.955316726373962, | |
| "grad_norm": 2.9203358557009187, | |
| "learning_rate": 9.552708557040768e-06, | |
| "loss": 0.9432, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.9590195043831635, | |
| "grad_norm": 3.320595109219397, | |
| "learning_rate": 9.589735994371831e-06, | |
| "loss": 0.9132, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.9627222823923649, | |
| "grad_norm": 3.2953825381075883, | |
| "learning_rate": 9.626763431702892e-06, | |
| "loss": 0.945, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.9664250604015663, | |
| "grad_norm": 2.8690624980388097, | |
| "learning_rate": 9.663790869033955e-06, | |
| "loss": 0.9269, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.9701278384107677, | |
| "grad_norm": 3.238448651703217, | |
| "learning_rate": 9.700818306365017e-06, | |
| "loss": 0.9223, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.973830616419969, | |
| "grad_norm": 3.8191924390794076, | |
| "learning_rate": 9.73784574369608e-06, | |
| "loss": 0.9294, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.9775333944291705, | |
| "grad_norm": 2.89593322378588, | |
| "learning_rate": 9.774873181027143e-06, | |
| "loss": 0.92, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.9812361724383719, | |
| "grad_norm": 3.701573808189793, | |
| "learning_rate": 9.811900618358204e-06, | |
| "loss": 0.9471, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.9849389504475733, | |
| "grad_norm": 3.334857306171596, | |
| "learning_rate": 9.848928055689267e-06, | |
| "loss": 0.9205, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.9886417284567747, | |
| "grad_norm": 3.3187413615000705, | |
| "learning_rate": 9.88595549302033e-06, | |
| "loss": 0.9065, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.992344506465976, | |
| "grad_norm": 3.3451918803521945, | |
| "learning_rate": 9.922982930351391e-06, | |
| "loss": 0.8933, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.9960472844751775, | |
| "grad_norm": 3.2100420698017147, | |
| "learning_rate": 9.960010367682454e-06, | |
| "loss": 0.9112, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.9997500624843789, | |
| "grad_norm": 2.837918388436053, | |
| "learning_rate": 9.997037805013515e-06, | |
| "loss": 0.9567, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.0034435835485573, | |
| "grad_norm": 2.9548645107416087, | |
| "learning_rate": 9.99999646509579e-06, | |
| "loss": 0.8909, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 1.0071463615577587, | |
| "grad_norm": 3.1446751198623435, | |
| "learning_rate": 9.999984604128388e-06, | |
| "loss": 0.8726, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.01084913956696, | |
| "grad_norm": 2.976258203122292, | |
| "learning_rate": 9.999964390388652e-06, | |
| "loss": 0.893, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 1.0145519175761615, | |
| "grad_norm": 2.980666927786789, | |
| "learning_rate": 9.999935823910352e-06, | |
| "loss": 0.8665, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.0182546955853629, | |
| "grad_norm": 3.1487567474786795, | |
| "learning_rate": 9.999898904741209e-06, | |
| "loss": 0.8649, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.0219574735945642, | |
| "grad_norm": 2.629105326617924, | |
| "learning_rate": 9.999853632942897e-06, | |
| "loss": 0.8607, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.0256602516037656, | |
| "grad_norm": 3.8572529888050826, | |
| "learning_rate": 9.999800008591049e-06, | |
| "loss": 0.8761, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 1.0293630296129672, | |
| "grad_norm": 2.767732632774148, | |
| "learning_rate": 9.999738031775246e-06, | |
| "loss": 0.8778, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.0330658076221686, | |
| "grad_norm": 3.4506007181787606, | |
| "learning_rate": 9.99966770259902e-06, | |
| "loss": 0.8842, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.03676858563137, | |
| "grad_norm": 3.3838219329729764, | |
| "learning_rate": 9.999589021179867e-06, | |
| "loss": 0.8653, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.0404713636405714, | |
| "grad_norm": 2.7380768463734673, | |
| "learning_rate": 9.999501987649225e-06, | |
| "loss": 0.8715, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.0441741416497727, | |
| "grad_norm": 2.5724850888468818, | |
| "learning_rate": 9.999406602152487e-06, | |
| "loss": 0.873, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.0478769196589741, | |
| "grad_norm": 2.814547694838622, | |
| "learning_rate": 9.999302864849006e-06, | |
| "loss": 0.8652, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.0515796976681755, | |
| "grad_norm": 2.8671305113874985, | |
| "learning_rate": 9.999190775912075e-06, | |
| "loss": 0.8773, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.0552824756773769, | |
| "grad_norm": 3.4364162307062016, | |
| "learning_rate": 9.999070335528951e-06, | |
| "loss": 0.8722, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.0589852536865783, | |
| "grad_norm": 3.1668831518960747, | |
| "learning_rate": 9.99894154390083e-06, | |
| "loss": 0.878, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.0626880316957799, | |
| "grad_norm": 2.5661208890092215, | |
| "learning_rate": 9.998804401242874e-06, | |
| "loss": 0.8642, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.0663908097049812, | |
| "grad_norm": 2.702695496460383, | |
| "learning_rate": 9.998658907784183e-06, | |
| "loss": 0.8576, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.0700935877141826, | |
| "grad_norm": 2.9176557901328666, | |
| "learning_rate": 9.998505063767811e-06, | |
| "loss": 0.8705, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.073796365723384, | |
| "grad_norm": 3.7212751721205937, | |
| "learning_rate": 9.998342869450767e-06, | |
| "loss": 0.8641, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.0774991437325854, | |
| "grad_norm": 3.1124898105603767, | |
| "learning_rate": 9.998172325104007e-06, | |
| "loss": 0.8679, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.0812019217417868, | |
| "grad_norm": 3.211449706725418, | |
| "learning_rate": 9.997993431012433e-06, | |
| "loss": 0.8698, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.0849046997509881, | |
| "grad_norm": 3.3735417926416105, | |
| "learning_rate": 9.997806187474899e-06, | |
| "loss": 0.8593, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.0886074777601895, | |
| "grad_norm": 3.100346897510759, | |
| "learning_rate": 9.997610594804206e-06, | |
| "loss": 0.8852, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.092310255769391, | |
| "grad_norm": 3.046486604721806, | |
| "learning_rate": 9.997406653327103e-06, | |
| "loss": 0.8734, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.0960130337785925, | |
| "grad_norm": 3.462276437104938, | |
| "learning_rate": 9.99719436338429e-06, | |
| "loss": 0.8333, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.0997158117877939, | |
| "grad_norm": 2.8629449765918564, | |
| "learning_rate": 9.996973725330405e-06, | |
| "loss": 0.8584, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.1034185897969953, | |
| "grad_norm": 2.9165610257153873, | |
| "learning_rate": 9.996744739534042e-06, | |
| "loss": 0.8665, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.1071213678061966, | |
| "grad_norm": 3.1796750168940315, | |
| "learning_rate": 9.996507406377728e-06, | |
| "loss": 0.8787, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.110824145815398, | |
| "grad_norm": 3.160592130889014, | |
| "learning_rate": 9.99626172625795e-06, | |
| "loss": 0.8718, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.1145269238245994, | |
| "grad_norm": 2.779237960939386, | |
| "learning_rate": 9.99600769958513e-06, | |
| "loss": 0.878, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.1182297018338008, | |
| "grad_norm": 3.108400228708763, | |
| "learning_rate": 9.995745326783628e-06, | |
| "loss": 0.868, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.1219324798430022, | |
| "grad_norm": 3.0911030432916817, | |
| "learning_rate": 9.995474608291761e-06, | |
| "loss": 0.8621, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.1256352578522035, | |
| "grad_norm": 2.5787433018725, | |
| "learning_rate": 9.995195544561778e-06, | |
| "loss": 0.8754, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.1293380358614051, | |
| "grad_norm": 2.8342296943136165, | |
| "learning_rate": 9.994908136059868e-06, | |
| "loss": 0.8373, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.1330408138706065, | |
| "grad_norm": 2.476461823350524, | |
| "learning_rate": 9.994612383266171e-06, | |
| "loss": 0.842, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.136743591879808, | |
| "grad_norm": 3.9271884057807296, | |
| "learning_rate": 9.994308286674754e-06, | |
| "loss": 0.8453, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.1404463698890093, | |
| "grad_norm": 3.1786946065406236, | |
| "learning_rate": 9.99399584679363e-06, | |
| "loss": 0.8648, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.1441491478982106, | |
| "grad_norm": 2.634901131802063, | |
| "learning_rate": 9.99367506414475e-06, | |
| "loss": 0.8751, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.147851925907412, | |
| "grad_norm": 3.078376805123231, | |
| "learning_rate": 9.993345939264e-06, | |
| "loss": 0.8635, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.1515547039166134, | |
| "grad_norm": 3.1528853117678786, | |
| "learning_rate": 9.9930084727012e-06, | |
| "loss": 0.843, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.1552574819258148, | |
| "grad_norm": 2.828228109125317, | |
| "learning_rate": 9.992662665020112e-06, | |
| "loss": 0.8624, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.1589602599350162, | |
| "grad_norm": 3.1953124851506707, | |
| "learning_rate": 9.992308516798426e-06, | |
| "loss": 0.8579, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.1626630379442178, | |
| "grad_norm": 3.0902899613744603, | |
| "learning_rate": 9.991946028627768e-06, | |
| "loss": 0.8527, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.1663658159534191, | |
| "grad_norm": 2.9463681925783023, | |
| "learning_rate": 9.991575201113695e-06, | |
| "loss": 0.8268, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.1700685939626205, | |
| "grad_norm": 2.8044002498862057, | |
| "learning_rate": 9.991196034875698e-06, | |
| "loss": 0.8395, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.173771371971822, | |
| "grad_norm": 2.9461135183049936, | |
| "learning_rate": 9.990808530547197e-06, | |
| "loss": 0.858, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.1774741499810233, | |
| "grad_norm": 3.213674861669168, | |
| "learning_rate": 9.990412688775542e-06, | |
| "loss": 0.864, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.1811769279902247, | |
| "grad_norm": 2.71190688635739, | |
| "learning_rate": 9.99000851022201e-06, | |
| "loss": 0.855, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.184879705999426, | |
| "grad_norm": 3.723777231794139, | |
| "learning_rate": 9.9895959955618e-06, | |
| "loss": 0.8456, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.1885824840086274, | |
| "grad_norm": 2.4622343303272918, | |
| "learning_rate": 9.989175145484049e-06, | |
| "loss": 0.8217, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.1922852620178288, | |
| "grad_norm": 2.8875388301298472, | |
| "learning_rate": 9.98874596069181e-06, | |
| "loss": 0.8591, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.1959880400270302, | |
| "grad_norm": 2.5910572126310716, | |
| "learning_rate": 9.988308441902061e-06, | |
| "loss": 0.8453, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.1996908180362316, | |
| "grad_norm": 2.4069698963541755, | |
| "learning_rate": 9.987862589845703e-06, | |
| "loss": 0.8503, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.2033935960454332, | |
| "grad_norm": 2.914526087822122, | |
| "learning_rate": 9.987408405267561e-06, | |
| "loss": 0.8668, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.2070963740546345, | |
| "grad_norm": 2.8077292984671485, | |
| "learning_rate": 9.986945888926374e-06, | |
| "loss": 0.8314, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.210799152063836, | |
| "grad_norm": 3.6704712964311437, | |
| "learning_rate": 9.986475041594805e-06, | |
| "loss": 0.8371, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.2145019300730373, | |
| "grad_norm": 2.6706897230097297, | |
| "learning_rate": 9.985995864059433e-06, | |
| "loss": 0.876, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.2182047080822387, | |
| "grad_norm": 3.0940143448561037, | |
| "learning_rate": 9.98550835712075e-06, | |
| "loss": 0.8364, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.22190748609144, | |
| "grad_norm": 3.6081252765429963, | |
| "learning_rate": 9.98501252159317e-06, | |
| "loss": 0.8378, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.2256102641006414, | |
| "grad_norm": 2.77425534329751, | |
| "learning_rate": 9.984508358305012e-06, | |
| "loss": 0.8449, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.2293130421098428, | |
| "grad_norm": 2.847973382987711, | |
| "learning_rate": 9.98399586809851e-06, | |
| "loss": 0.8446, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.2330158201190442, | |
| "grad_norm": 3.1916476231654984, | |
| "learning_rate": 9.983475051829814e-06, | |
| "loss": 0.8499, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.2367185981282458, | |
| "grad_norm": 3.0278042743633047, | |
| "learning_rate": 9.982945910368974e-06, | |
| "loss": 0.8427, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.2404213761374472, | |
| "grad_norm": 2.9987823840994685, | |
| "learning_rate": 9.982408444599955e-06, | |
| "loss": 0.8565, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.2441241541466486, | |
| "grad_norm": 2.956201689254189, | |
| "learning_rate": 9.981862655420626e-06, | |
| "loss": 0.8303, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.24782693215585, | |
| "grad_norm": 3.306173716101804, | |
| "learning_rate": 9.981308543742759e-06, | |
| "loss": 0.8351, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.2515297101650513, | |
| "grad_norm": 2.8358161347669624, | |
| "learning_rate": 9.98074611049203e-06, | |
| "loss": 0.854, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.2552324881742527, | |
| "grad_norm": 3.2862147001432263, | |
| "learning_rate": 9.980175356608018e-06, | |
| "loss": 0.8176, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.258935266183454, | |
| "grad_norm": 3.839933772493448, | |
| "learning_rate": 9.979596283044202e-06, | |
| "loss": 0.8353, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.2626380441926555, | |
| "grad_norm": 3.039521277363643, | |
| "learning_rate": 9.979008890767958e-06, | |
| "loss": 0.8313, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.2663408222018568, | |
| "grad_norm": 2.670418682490729, | |
| "learning_rate": 9.97841318076056e-06, | |
| "loss": 0.8299, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.2700436002110584, | |
| "grad_norm": 2.6610287960828947, | |
| "learning_rate": 9.977809154017177e-06, | |
| "loss": 0.8255, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.2737463782202598, | |
| "grad_norm": 4.255372007943821, | |
| "learning_rate": 9.977196811546874e-06, | |
| "loss": 0.8178, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.2774491562294612, | |
| "grad_norm": 2.486491204040578, | |
| "learning_rate": 9.976576154372603e-06, | |
| "loss": 0.8131, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.2811519342386626, | |
| "grad_norm": 2.5063224331855967, | |
| "learning_rate": 9.975947183531208e-06, | |
| "loss": 0.8425, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.284854712247864, | |
| "grad_norm": 2.7512179307220226, | |
| "learning_rate": 9.975309900073424e-06, | |
| "loss": 0.8593, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.2885574902570653, | |
| "grad_norm": 2.712022237784725, | |
| "learning_rate": 9.974664305063872e-06, | |
| "loss": 0.8217, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.2922602682662667, | |
| "grad_norm": 2.5781139265649213, | |
| "learning_rate": 9.974010399581056e-06, | |
| "loss": 0.8009, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.295963046275468, | |
| "grad_norm": 2.3094975445159927, | |
| "learning_rate": 9.973348184717362e-06, | |
| "loss": 0.8441, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.2996658242846695, | |
| "grad_norm": 3.222306020034265, | |
| "learning_rate": 9.972677661579062e-06, | |
| "loss": 0.8453, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.303368602293871, | |
| "grad_norm": 2.4004480742086383, | |
| "learning_rate": 9.971998831286305e-06, | |
| "loss": 0.8352, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.3070713803030725, | |
| "grad_norm": 2.9242567540358193, | |
| "learning_rate": 9.971311694973115e-06, | |
| "loss": 0.8251, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.3107741583122738, | |
| "grad_norm": 3.3760497497529234, | |
| "learning_rate": 9.970616253787394e-06, | |
| "loss": 0.8212, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.3144769363214752, | |
| "grad_norm": 2.738484575208949, | |
| "learning_rate": 9.969912508890924e-06, | |
| "loss": 0.8338, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.3181797143306766, | |
| "grad_norm": 2.403858688871253, | |
| "learning_rate": 9.969200461459344e-06, | |
| "loss": 0.8051, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.321882492339878, | |
| "grad_norm": 3.317873477816687, | |
| "learning_rate": 9.96848011268218e-06, | |
| "loss": 0.8275, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.3255852703490794, | |
| "grad_norm": 3.0383409211764465, | |
| "learning_rate": 9.967751463762811e-06, | |
| "loss": 0.8102, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.3292880483582807, | |
| "grad_norm": 2.716682345656308, | |
| "learning_rate": 9.967014515918491e-06, | |
| "loss": 0.7922, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.3329908263674821, | |
| "grad_norm": 2.5903044471345407, | |
| "learning_rate": 9.966269270380338e-06, | |
| "loss": 0.8152, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.3366936043766837, | |
| "grad_norm": 2.6042198988611505, | |
| "learning_rate": 9.965515728393324e-06, | |
| "loss": 0.816, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.3403963823858849, | |
| "grad_norm": 2.693329966895918, | |
| "learning_rate": 9.96475389121629e-06, | |
| "loss": 0.8213, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.3440991603950865, | |
| "grad_norm": 2.8636639283082683, | |
| "learning_rate": 9.963983760121927e-06, | |
| "loss": 0.8028, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.3478019384042879, | |
| "grad_norm": 2.265142729976952, | |
| "learning_rate": 9.963205336396789e-06, | |
| "loss": 0.8312, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.3515047164134892, | |
| "grad_norm": 2.346991317901365, | |
| "learning_rate": 9.962418621341275e-06, | |
| "loss": 0.8057, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.3552074944226906, | |
| "grad_norm": 2.9365358115995988, | |
| "learning_rate": 9.961623616269642e-06, | |
| "loss": 0.811, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.358910272431892, | |
| "grad_norm": 2.765547820893004, | |
| "learning_rate": 9.960820322509991e-06, | |
| "loss": 0.8176, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.3626130504410934, | |
| "grad_norm": 2.242257331515756, | |
| "learning_rate": 9.960008741404278e-06, | |
| "loss": 0.8093, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.3663158284502948, | |
| "grad_norm": 2.6584008858920396, | |
| "learning_rate": 9.959188874308289e-06, | |
| "loss": 0.8128, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.3700186064594964, | |
| "grad_norm": 2.426868840194363, | |
| "learning_rate": 9.958360722591666e-06, | |
| "loss": 0.8356, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.3737213844686975, | |
| "grad_norm": 2.372175307387934, | |
| "learning_rate": 9.957524287637887e-06, | |
| "loss": 0.7955, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.3774241624778991, | |
| "grad_norm": 3.223986523587691, | |
| "learning_rate": 9.956679570844263e-06, | |
| "loss": 0.8446, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.3811269404871005, | |
| "grad_norm": 2.9758674723626495, | |
| "learning_rate": 9.955826573621947e-06, | |
| "loss": 0.8258, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.3848297184963019, | |
| "grad_norm": 2.4986387404446972, | |
| "learning_rate": 9.954965297395917e-06, | |
| "loss": 0.838, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.3885324965055033, | |
| "grad_norm": 3.0506103639317383, | |
| "learning_rate": 9.954095743604993e-06, | |
| "loss": 0.8106, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.3922352745147046, | |
| "grad_norm": 2.596803194782613, | |
| "learning_rate": 9.953217913701809e-06, | |
| "loss": 0.8101, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.395938052523906, | |
| "grad_norm": 3.0268925428493034, | |
| "learning_rate": 9.952331809152837e-06, | |
| "loss": 0.7984, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.3996408305331074, | |
| "grad_norm": 2.811749288978374, | |
| "learning_rate": 9.951437431438368e-06, | |
| "loss": 0.7956, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.403343608542309, | |
| "grad_norm": 2.5791340034648673, | |
| "learning_rate": 9.95053478205251e-06, | |
| "loss": 0.807, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.4070463865515102, | |
| "grad_norm": 2.73932100840186, | |
| "learning_rate": 9.949623862503194e-06, | |
| "loss": 0.8045, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.4107491645607118, | |
| "grad_norm": 2.4120952001387836, | |
| "learning_rate": 9.948704674312166e-06, | |
| "loss": 0.8062, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.4144519425699131, | |
| "grad_norm": 2.876732239954283, | |
| "learning_rate": 9.947777219014985e-06, | |
| "loss": 0.8153, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.4181547205791145, | |
| "grad_norm": 2.648889760862942, | |
| "learning_rate": 9.94684149816102e-06, | |
| "loss": 0.7769, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.421857498588316, | |
| "grad_norm": 2.3672035754478253, | |
| "learning_rate": 9.945897513313446e-06, | |
| "loss": 0.8248, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.4255602765975173, | |
| "grad_norm": 2.5897674265990966, | |
| "learning_rate": 9.944945266049249e-06, | |
| "loss": 0.8168, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.4292630546067187, | |
| "grad_norm": 2.3627727017427986, | |
| "learning_rate": 9.943984757959214e-06, | |
| "loss": 0.8061, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.43296583261592, | |
| "grad_norm": 2.75793352812743, | |
| "learning_rate": 9.943015990647928e-06, | |
| "loss": 0.8406, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.4366686106251216, | |
| "grad_norm": 2.93848471854443, | |
| "learning_rate": 9.942038965733772e-06, | |
| "loss": 0.8093, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.4403713886343228, | |
| "grad_norm": 2.649269836710229, | |
| "learning_rate": 9.941053684848927e-06, | |
| "loss": 0.8156, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.4440741666435244, | |
| "grad_norm": 2.8474414546277336, | |
| "learning_rate": 9.940060149639362e-06, | |
| "loss": 0.803, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.4477769446527258, | |
| "grad_norm": 2.48930011170331, | |
| "learning_rate": 9.939058361764835e-06, | |
| "loss": 0.8149, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.4514797226619272, | |
| "grad_norm": 3.0541359408620954, | |
| "learning_rate": 9.938048322898897e-06, | |
| "loss": 0.7905, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 1.4551825006711285, | |
| "grad_norm": 2.4906684578035634, | |
| "learning_rate": 9.937030034728875e-06, | |
| "loss": 0.7983, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 1.45888527868033, | |
| "grad_norm": 2.522647352158736, | |
| "learning_rate": 9.93600349895588e-06, | |
| "loss": 0.8257, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 1.4625880566895313, | |
| "grad_norm": 2.3593412219963636, | |
| "learning_rate": 9.934968717294801e-06, | |
| "loss": 0.8156, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.4662908346987327, | |
| "grad_norm": 2.557490073344118, | |
| "learning_rate": 9.933925691474306e-06, | |
| "loss": 0.8168, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 1.469993612707934, | |
| "grad_norm": 2.338179664285831, | |
| "learning_rate": 9.932874423236827e-06, | |
| "loss": 0.8037, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 1.4736963907171354, | |
| "grad_norm": 2.5845812397294106, | |
| "learning_rate": 9.931814914338574e-06, | |
| "loss": 0.8037, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 1.477399168726337, | |
| "grad_norm": 2.738120618908721, | |
| "learning_rate": 9.930747166549517e-06, | |
| "loss": 0.8248, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 1.4811019467355384, | |
| "grad_norm": 3.0001861261521077, | |
| "learning_rate": 9.929671181653393e-06, | |
| "loss": 0.8158, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.4848047247447398, | |
| "grad_norm": 2.8790789963198864, | |
| "learning_rate": 9.9285869614477e-06, | |
| "loss": 0.81, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 1.4885075027539412, | |
| "grad_norm": 2.3530303824789263, | |
| "learning_rate": 9.927494507743693e-06, | |
| "loss": 0.8065, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 1.4922102807631425, | |
| "grad_norm": 2.5001939120344563, | |
| "learning_rate": 9.926393822366378e-06, | |
| "loss": 0.7986, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 1.495913058772344, | |
| "grad_norm": 3.322122232875802, | |
| "learning_rate": 9.925284907154518e-06, | |
| "loss": 0.8018, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 1.4996158367815453, | |
| "grad_norm": 2.632512694974853, | |
| "learning_rate": 9.924167763960622e-06, | |
| "loss": 0.7926, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.503318614790747, | |
| "grad_norm": 3.298194101210266, | |
| "learning_rate": 9.923042394650944e-06, | |
| "loss": 0.8012, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 1.507021392799948, | |
| "grad_norm": 2.5336893450551714, | |
| "learning_rate": 9.921908801105478e-06, | |
| "loss": 0.7979, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 1.5107241708091497, | |
| "grad_norm": 2.805390982714785, | |
| "learning_rate": 9.920766985217964e-06, | |
| "loss": 0.8007, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 1.5144269488183508, | |
| "grad_norm": 2.6228261966166846, | |
| "learning_rate": 9.919616948895869e-06, | |
| "loss": 0.7925, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 1.5181297268275524, | |
| "grad_norm": 2.608871750206852, | |
| "learning_rate": 9.918458694060401e-06, | |
| "loss": 0.8165, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.5218325048367538, | |
| "grad_norm": 2.3331476004266802, | |
| "learning_rate": 9.917292222646494e-06, | |
| "loss": 0.812, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 1.5255352828459552, | |
| "grad_norm": 2.256474693417922, | |
| "learning_rate": 9.916117536602805e-06, | |
| "loss": 0.8252, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 1.5292380608551566, | |
| "grad_norm": 2.2698106482233444, | |
| "learning_rate": 9.914934637891717e-06, | |
| "loss": 0.8049, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 1.532940838864358, | |
| "grad_norm": 2.505148031562781, | |
| "learning_rate": 9.913743528489335e-06, | |
| "loss": 0.7945, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 1.5366436168735595, | |
| "grad_norm": 2.273130621446297, | |
| "learning_rate": 9.912544210385478e-06, | |
| "loss": 0.7592, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.5403463948827607, | |
| "grad_norm": 2.7980874710174746, | |
| "learning_rate": 9.911336685583678e-06, | |
| "loss": 0.788, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 1.5440491728919623, | |
| "grad_norm": 2.518652055633435, | |
| "learning_rate": 9.910120956101177e-06, | |
| "loss": 0.7985, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.5477519509011635, | |
| "grad_norm": 2.8908580237428727, | |
| "learning_rate": 9.908897023968923e-06, | |
| "loss": 0.8022, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.551454728910365, | |
| "grad_norm": 2.796915452230328, | |
| "learning_rate": 9.907664891231567e-06, | |
| "loss": 0.7891, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.5551575069195664, | |
| "grad_norm": 2.58199772952833, | |
| "learning_rate": 9.906424559947463e-06, | |
| "loss": 0.8127, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.5588602849287678, | |
| "grad_norm": 2.9876242988374795, | |
| "learning_rate": 9.905176032188657e-06, | |
| "loss": 0.8103, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.5625630629379692, | |
| "grad_norm": 2.5402919151901284, | |
| "learning_rate": 9.903919310040888e-06, | |
| "loss": 0.8088, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.5662658409471706, | |
| "grad_norm": 2.759850752080656, | |
| "learning_rate": 9.902654395603585e-06, | |
| "loss": 0.7802, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.569968618956372, | |
| "grad_norm": 2.303462256584801, | |
| "learning_rate": 9.901381290989866e-06, | |
| "loss": 0.814, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.5736713969655733, | |
| "grad_norm": 2.5606229953487007, | |
| "learning_rate": 9.900099998326524e-06, | |
| "loss": 0.8199, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.577374174974775, | |
| "grad_norm": 2.429988282841457, | |
| "learning_rate": 9.898810519754038e-06, | |
| "loss": 0.8119, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.581076952983976, | |
| "grad_norm": 2.5219967389765823, | |
| "learning_rate": 9.897512857426559e-06, | |
| "loss": 0.8047, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.5847797309931777, | |
| "grad_norm": 1.9311972103887236, | |
| "learning_rate": 9.896207013511906e-06, | |
| "loss": 0.7986, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.588482509002379, | |
| "grad_norm": 2.234707722695266, | |
| "learning_rate": 9.894892990191572e-06, | |
| "loss": 0.8208, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.5921852870115805, | |
| "grad_norm": 2.52585572604136, | |
| "learning_rate": 9.89357078966071e-06, | |
| "loss": 0.8055, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.5958880650207818, | |
| "grad_norm": 2.4607414449083564, | |
| "learning_rate": 9.892240414128134e-06, | |
| "loss": 0.814, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.5995908430299832, | |
| "grad_norm": 2.397978043007156, | |
| "learning_rate": 9.890901865816318e-06, | |
| "loss": 0.7858, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.6032936210391846, | |
| "grad_norm": 2.6825010522588464, | |
| "learning_rate": 9.889555146961386e-06, | |
| "loss": 0.7643, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.606996399048386, | |
| "grad_norm": 2.296924382807737, | |
| "learning_rate": 9.888200259813112e-06, | |
| "loss": 0.774, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.6106991770575876, | |
| "grad_norm": 3.086030898842717, | |
| "learning_rate": 9.886837206634913e-06, | |
| "loss": 0.7903, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.6144019550667887, | |
| "grad_norm": 2.1770814434757852, | |
| "learning_rate": 9.885465989703855e-06, | |
| "loss": 0.7992, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.6181047330759903, | |
| "grad_norm": 2.1173853625857677, | |
| "learning_rate": 9.884086611310636e-06, | |
| "loss": 0.7562, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.6218075110851915, | |
| "grad_norm": 2.6347540026530383, | |
| "learning_rate": 9.88269907375959e-06, | |
| "loss": 0.7812, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.625510289094393, | |
| "grad_norm": 2.3677734896044367, | |
| "learning_rate": 9.881303379368679e-06, | |
| "loss": 0.7949, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.6292130671035945, | |
| "grad_norm": 1.9541227462181452, | |
| "learning_rate": 9.879899530469495e-06, | |
| "loss": 0.8014, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.6329158451127959, | |
| "grad_norm": 2.250577456663235, | |
| "learning_rate": 9.878487529407252e-06, | |
| "loss": 0.789, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.6366186231219972, | |
| "grad_norm": 2.332159230411907, | |
| "learning_rate": 9.877067378540783e-06, | |
| "loss": 0.7583, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.6403214011311986, | |
| "grad_norm": 2.620339688070587, | |
| "learning_rate": 9.875639080242532e-06, | |
| "loss": 0.7609, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.6440241791404002, | |
| "grad_norm": 2.281843212752168, | |
| "learning_rate": 9.874202636898557e-06, | |
| "loss": 0.7923, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.6477269571496014, | |
| "grad_norm": 2.7756614740728756, | |
| "learning_rate": 9.872758050908525e-06, | |
| "loss": 0.8133, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.651429735158803, | |
| "grad_norm": 2.0566706027251933, | |
| "learning_rate": 9.871305324685698e-06, | |
| "loss": 0.7771, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.6551325131680041, | |
| "grad_norm": 2.7157875508307203, | |
| "learning_rate": 9.869844460656946e-06, | |
| "loss": 0.7887, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.6588352911772057, | |
| "grad_norm": 2.3909529963011225, | |
| "learning_rate": 9.868375461262729e-06, | |
| "loss": 0.786, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.6625380691864071, | |
| "grad_norm": 3.0348798043450107, | |
| "learning_rate": 9.866898328957097e-06, | |
| "loss": 0.7658, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.6662408471956085, | |
| "grad_norm": 2.6335015906277564, | |
| "learning_rate": 9.865413066207686e-06, | |
| "loss": 0.7995, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.6699436252048099, | |
| "grad_norm": 2.385629891283996, | |
| "learning_rate": 9.863919675495718e-06, | |
| "loss": 0.7915, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.6736464032140113, | |
| "grad_norm": 2.9226049040665196, | |
| "learning_rate": 9.862418159315994e-06, | |
| "loss": 0.7846, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.6773491812232129, | |
| "grad_norm": 2.0898569179597546, | |
| "learning_rate": 9.860908520176881e-06, | |
| "loss": 0.7798, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.681051959232414, | |
| "grad_norm": 2.486543038672127, | |
| "learning_rate": 9.859390760600323e-06, | |
| "loss": 0.788, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.6847547372416156, | |
| "grad_norm": 2.150826890053404, | |
| "learning_rate": 9.857864883121829e-06, | |
| "loss": 0.8, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.6884575152508168, | |
| "grad_norm": 2.0064655253486494, | |
| "learning_rate": 9.856330890290467e-06, | |
| "loss": 0.7893, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.6921602932600184, | |
| "grad_norm": 1.974144415250403, | |
| "learning_rate": 9.854788784668862e-06, | |
| "loss": 0.8071, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.6958630712692198, | |
| "grad_norm": 2.8759598688034553, | |
| "learning_rate": 9.853238568833198e-06, | |
| "loss": 0.795, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.6995658492784211, | |
| "grad_norm": 2.4899159117702325, | |
| "learning_rate": 9.851680245373201e-06, | |
| "loss": 0.7933, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.7032686272876225, | |
| "grad_norm": 2.4900169396878535, | |
| "learning_rate": 9.85011381689214e-06, | |
| "loss": 0.7734, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.706971405296824, | |
| "grad_norm": 2.422580198248974, | |
| "learning_rate": 9.848539286006832e-06, | |
| "loss": 0.7951, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.7106741833060255, | |
| "grad_norm": 2.8907724621020634, | |
| "learning_rate": 9.846956655347621e-06, | |
| "loss": 0.7905, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.7143769613152267, | |
| "grad_norm": 2.013474316995975, | |
| "learning_rate": 9.845365927558387e-06, | |
| "loss": 0.8006, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.7180797393244283, | |
| "grad_norm": 2.1840681748691444, | |
| "learning_rate": 9.843767105296536e-06, | |
| "loss": 0.7635, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.7217825173336294, | |
| "grad_norm": 2.5617018354083934, | |
| "learning_rate": 9.842160191232996e-06, | |
| "loss": 0.7824, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.725485295342831, | |
| "grad_norm": 2.308519604503349, | |
| "learning_rate": 9.840545188052214e-06, | |
| "loss": 0.774, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.7291880733520324, | |
| "grad_norm": 2.2499525442223853, | |
| "learning_rate": 9.838922098452146e-06, | |
| "loss": 0.756, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.7328908513612338, | |
| "grad_norm": 2.3820412376888322, | |
| "learning_rate": 9.83729092514426e-06, | |
| "loss": 0.789, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.7365936293704352, | |
| "grad_norm": 2.5725331335845127, | |
| "learning_rate": 9.835651670853532e-06, | |
| "loss": 0.7854, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.7402964073796365, | |
| "grad_norm": 2.309343999229651, | |
| "learning_rate": 9.83400433831843e-06, | |
| "loss": 0.775, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.7439991853888381, | |
| "grad_norm": 2.5850280201791436, | |
| "learning_rate": 9.832348930290925e-06, | |
| "loss": 0.7802, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.7477019633980393, | |
| "grad_norm": 2.2679853588645105, | |
| "learning_rate": 9.830685449536472e-06, | |
| "loss": 0.7678, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.751404741407241, | |
| "grad_norm": 2.5086985240224635, | |
| "learning_rate": 9.829013898834014e-06, | |
| "loss": 0.7577, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.755107519416442, | |
| "grad_norm": 1.9266513380957035, | |
| "learning_rate": 9.827334280975978e-06, | |
| "loss": 0.7758, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.7588102974256437, | |
| "grad_norm": 2.2928770756948547, | |
| "learning_rate": 9.825646598768267e-06, | |
| "loss": 0.7637, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.762513075434845, | |
| "grad_norm": 2.5105321008988146, | |
| "learning_rate": 9.82395085503025e-06, | |
| "loss": 0.7832, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.7662158534440464, | |
| "grad_norm": 2.2393983265475867, | |
| "learning_rate": 9.822247052594775e-06, | |
| "loss": 0.7957, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.7699186314532478, | |
| "grad_norm": 2.2356517021326447, | |
| "learning_rate": 9.82053519430814e-06, | |
| "loss": 0.7786, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.7736214094624492, | |
| "grad_norm": 2.1349870329764467, | |
| "learning_rate": 9.818815283030107e-06, | |
| "loss": 0.7639, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.7773241874716506, | |
| "grad_norm": 2.207362188864924, | |
| "learning_rate": 9.817087321633891e-06, | |
| "loss": 0.7774, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.781026965480852, | |
| "grad_norm": 2.357569522929328, | |
| "learning_rate": 9.815351313006155e-06, | |
| "loss": 0.7903, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.7847297434900535, | |
| "grad_norm": 2.5604354649057512, | |
| "learning_rate": 9.813607260047007e-06, | |
| "loss": 0.7861, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.7884325214992547, | |
| "grad_norm": 2.3900699599516964, | |
| "learning_rate": 9.811855165669985e-06, | |
| "loss": 0.7883, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.7921352995084563, | |
| "grad_norm": 2.2498623353917093, | |
| "learning_rate": 9.810095032802075e-06, | |
| "loss": 0.7749, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.7958380775176577, | |
| "grad_norm": 2.523396892959795, | |
| "learning_rate": 9.808326864383679e-06, | |
| "loss": 0.773, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.799540855526859, | |
| "grad_norm": 2.2014201396256214, | |
| "learning_rate": 9.806550663368628e-06, | |
| "loss": 0.7784, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.8032436335360604, | |
| "grad_norm": 2.137068120876505, | |
| "learning_rate": 9.804766432724172e-06, | |
| "loss": 0.781, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.8069464115452618, | |
| "grad_norm": 2.1941678037156036, | |
| "learning_rate": 9.802974175430975e-06, | |
| "loss": 0.7813, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.8106491895544632, | |
| "grad_norm": 2.3496769370735775, | |
| "learning_rate": 9.801173894483111e-06, | |
| "loss": 0.7758, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.8143519675636646, | |
| "grad_norm": 2.6951877515683917, | |
| "learning_rate": 9.799365592888054e-06, | |
| "loss": 0.7753, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.8180547455728662, | |
| "grad_norm": 2.2987294149497504, | |
| "learning_rate": 9.797549273666682e-06, | |
| "loss": 0.7979, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.8217575235820673, | |
| "grad_norm": 2.1550107736300883, | |
| "learning_rate": 9.795724939853265e-06, | |
| "loss": 0.7547, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.825460301591269, | |
| "grad_norm": 2.1082078725588724, | |
| "learning_rate": 9.793892594495457e-06, | |
| "loss": 0.7481, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.82916307960047, | |
| "grad_norm": 2.535213902917527, | |
| "learning_rate": 9.792052240654304e-06, | |
| "loss": 0.7568, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.8328658576096717, | |
| "grad_norm": 2.3198113159972595, | |
| "learning_rate": 9.790203881404228e-06, | |
| "loss": 0.7834, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.836568635618873, | |
| "grad_norm": 2.519238802449602, | |
| "learning_rate": 9.78834751983302e-06, | |
| "loss": 0.7699, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.8402714136280744, | |
| "grad_norm": 2.3333401238694798, | |
| "learning_rate": 9.786483159041842e-06, | |
| "loss": 0.7834, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.8439741916372758, | |
| "grad_norm": 2.4205774253956385, | |
| "learning_rate": 9.784610802145222e-06, | |
| "loss": 0.7863, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.8476769696464772, | |
| "grad_norm": 2.178068900898099, | |
| "learning_rate": 9.782730452271046e-06, | |
| "loss": 0.7674, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.8513797476556788, | |
| "grad_norm": 2.6080625282619714, | |
| "learning_rate": 9.780842112560548e-06, | |
| "loss": 0.7642, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.85508252566488, | |
| "grad_norm": 2.843984991990864, | |
| "learning_rate": 9.778945786168308e-06, | |
| "loss": 0.7655, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.8587853036740816, | |
| "grad_norm": 2.2308315520099424, | |
| "learning_rate": 9.777041476262259e-06, | |
| "loss": 0.7656, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.8624880816832827, | |
| "grad_norm": 2.400873208112685, | |
| "learning_rate": 9.775129186023661e-06, | |
| "loss": 0.7588, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.8661908596924843, | |
| "grad_norm": 2.9815623334199604, | |
| "learning_rate": 9.773208918647111e-06, | |
| "loss": 0.7722, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.8698936377016857, | |
| "grad_norm": 2.6488046885793373, | |
| "learning_rate": 9.771280677340528e-06, | |
| "loss": 0.7813, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.873596415710887, | |
| "grad_norm": 2.521885076282361, | |
| "learning_rate": 9.769344465325153e-06, | |
| "loss": 0.7846, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.8772991937200885, | |
| "grad_norm": 1.8398874480846792, | |
| "learning_rate": 9.767400285835546e-06, | |
| "loss": 0.7799, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.8810019717292898, | |
| "grad_norm": 2.6273527775975114, | |
| "learning_rate": 9.765448142119575e-06, | |
| "loss": 0.7463, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.8847047497384914, | |
| "grad_norm": 2.4232551189720626, | |
| "learning_rate": 9.763488037438412e-06, | |
| "loss": 0.7763, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.8884075277476926, | |
| "grad_norm": 2.182119510957546, | |
| "learning_rate": 9.761519975066524e-06, | |
| "loss": 0.76, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.8921103057568942, | |
| "grad_norm": 2.4800365305029106, | |
| "learning_rate": 9.759543958291683e-06, | |
| "loss": 0.7878, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.8958130837660954, | |
| "grad_norm": 2.2209159681563055, | |
| "learning_rate": 9.757559990414941e-06, | |
| "loss": 0.7706, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.899515861775297, | |
| "grad_norm": 2.2456207460433175, | |
| "learning_rate": 9.755568074750635e-06, | |
| "loss": 0.7533, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.9032186397844983, | |
| "grad_norm": 2.216946549826359, | |
| "learning_rate": 9.753568214626375e-06, | |
| "loss": 0.7651, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.9069214177936997, | |
| "grad_norm": 2.1353696650613556, | |
| "learning_rate": 9.751560413383051e-06, | |
| "loss": 0.7451, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.910624195802901, | |
| "grad_norm": 2.2188964222997227, | |
| "learning_rate": 9.749544674374814e-06, | |
| "loss": 0.771, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.9143269738121025, | |
| "grad_norm": 2.6602884956835373, | |
| "learning_rate": 9.747521000969074e-06, | |
| "loss": 0.7652, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.918029751821304, | |
| "grad_norm": 2.366026652497562, | |
| "learning_rate": 9.745489396546499e-06, | |
| "loss": 0.7778, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.9217325298305052, | |
| "grad_norm": 2.4178576890485166, | |
| "learning_rate": 9.743449864501006e-06, | |
| "loss": 0.7682, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.9254353078397068, | |
| "grad_norm": 2.461073225865995, | |
| "learning_rate": 9.741402408239753e-06, | |
| "loss": 0.7379, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.929138085848908, | |
| "grad_norm": 2.3169585710466443, | |
| "learning_rate": 9.739347031183142e-06, | |
| "loss": 0.74, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.9328408638581096, | |
| "grad_norm": 2.480606692215648, | |
| "learning_rate": 9.737283736764798e-06, | |
| "loss": 0.7811, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.936543641867311, | |
| "grad_norm": 2.33302742635216, | |
| "learning_rate": 9.73521252843158e-06, | |
| "loss": 0.7853, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.9402464198765124, | |
| "grad_norm": 2.178325980295709, | |
| "learning_rate": 9.733133409643565e-06, | |
| "loss": 0.7678, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.9439491978857137, | |
| "grad_norm": 2.6202132000217, | |
| "learning_rate": 9.731046383874044e-06, | |
| "loss": 0.7496, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.9476519758949151, | |
| "grad_norm": 3.155040247361292, | |
| "learning_rate": 9.728951454609517e-06, | |
| "loss": 0.7728, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.9513547539041167, | |
| "grad_norm": 2.115338674943332, | |
| "learning_rate": 9.726848625349691e-06, | |
| "loss": 0.7625, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.9550575319133179, | |
| "grad_norm": 2.203447982138841, | |
| "learning_rate": 9.724737899607466e-06, | |
| "loss": 0.7693, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.9587603099225195, | |
| "grad_norm": 2.314534740326119, | |
| "learning_rate": 9.722619280908934e-06, | |
| "loss": 0.7628, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.9624630879317206, | |
| "grad_norm": 2.1956336825914793, | |
| "learning_rate": 9.720492772793375e-06, | |
| "loss": 0.7636, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.9661658659409222, | |
| "grad_norm": 2.237936138713292, | |
| "learning_rate": 9.718358378813248e-06, | |
| "loss": 0.7559, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.9698686439501236, | |
| "grad_norm": 2.166260133162057, | |
| "learning_rate": 9.716216102534186e-06, | |
| "loss": 0.7619, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.973571421959325, | |
| "grad_norm": 2.4562700667901933, | |
| "learning_rate": 9.714065947534987e-06, | |
| "loss": 0.7596, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.9772741999685264, | |
| "grad_norm": 2.0820216516365027, | |
| "learning_rate": 9.711907917407614e-06, | |
| "loss": 0.7526, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.9809769779777278, | |
| "grad_norm": 2.3755910874830657, | |
| "learning_rate": 9.709742015757187e-06, | |
| "loss": 0.7553, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.9846797559869291, | |
| "grad_norm": 2.369452864927645, | |
| "learning_rate": 9.707568246201972e-06, | |
| "loss": 0.753, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.9883825339961305, | |
| "grad_norm": 2.3410608706416762, | |
| "learning_rate": 9.70538661237338e-06, | |
| "loss": 0.7787, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.9920853120053321, | |
| "grad_norm": 2.087568589173381, | |
| "learning_rate": 9.70319711791596e-06, | |
| "loss": 0.7586, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.9957880900145333, | |
| "grad_norm": 2.6786991173682373, | |
| "learning_rate": 9.700999766487395e-06, | |
| "loss": 0.7465, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.9994908680237349, | |
| "grad_norm": 3.03880633662284, | |
| "learning_rate": 9.698794561758493e-06, | |
| "loss": 0.7403, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.003184389087913, | |
| "grad_norm": 2.5868756494206497, | |
| "learning_rate": 9.696581507413174e-06, | |
| "loss": 0.6992, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 2.0068871670971147, | |
| "grad_norm": 2.0423484305586994, | |
| "learning_rate": 9.694360607148484e-06, | |
| "loss": 0.6838, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 2.010589945106316, | |
| "grad_norm": 2.313387880714559, | |
| "learning_rate": 9.692131864674563e-06, | |
| "loss": 0.6912, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 2.0142927231155174, | |
| "grad_norm": 2.274235624804895, | |
| "learning_rate": 9.689895283714663e-06, | |
| "loss": 0.6854, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 2.017995501124719, | |
| "grad_norm": 2.119309140091523, | |
| "learning_rate": 9.687650868005124e-06, | |
| "loss": 0.6786, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.02169827913392, | |
| "grad_norm": 2.3759711399354413, | |
| "learning_rate": 9.685398621295377e-06, | |
| "loss": 0.6841, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 2.025401057143122, | |
| "grad_norm": 2.652183252752739, | |
| "learning_rate": 9.683138547347933e-06, | |
| "loss": 0.6938, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 2.029103835152323, | |
| "grad_norm": 2.497195487638967, | |
| "learning_rate": 9.68087064993838e-06, | |
| "loss": 0.6834, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 2.0328066131615246, | |
| "grad_norm": 2.0788434226359174, | |
| "learning_rate": 9.678594932855377e-06, | |
| "loss": 0.681, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 2.0365093911707257, | |
| "grad_norm": 2.557154847367296, | |
| "learning_rate": 9.676311399900644e-06, | |
| "loss": 0.6956, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.0402121691799273, | |
| "grad_norm": 2.5976913602453595, | |
| "learning_rate": 9.674020054888962e-06, | |
| "loss": 0.673, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 2.0439149471891285, | |
| "grad_norm": 2.1397702042236206, | |
| "learning_rate": 9.671720901648157e-06, | |
| "loss": 0.6939, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 2.04761772519833, | |
| "grad_norm": 2.384247384660651, | |
| "learning_rate": 9.669413944019099e-06, | |
| "loss": 0.6757, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 2.0513205032075312, | |
| "grad_norm": 2.1863152443770786, | |
| "learning_rate": 9.667099185855703e-06, | |
| "loss": 0.6968, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 2.055023281216733, | |
| "grad_norm": 4.405956537339234, | |
| "learning_rate": 9.664776631024908e-06, | |
| "loss": 0.683, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.0587260592259344, | |
| "grad_norm": 2.049043630631007, | |
| "learning_rate": 9.662446283406682e-06, | |
| "loss": 0.6914, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 2.0624288372351356, | |
| "grad_norm": 2.5664972711721625, | |
| "learning_rate": 9.660108146894007e-06, | |
| "loss": 0.6915, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 2.066131615244337, | |
| "grad_norm": 2.005386472768463, | |
| "learning_rate": 9.65776222539288e-06, | |
| "loss": 0.6598, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 2.0698343932535384, | |
| "grad_norm": 2.449106381543406, | |
| "learning_rate": 9.655408522822306e-06, | |
| "loss": 0.66, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 2.07353717126274, | |
| "grad_norm": 3.0381314839729177, | |
| "learning_rate": 9.653047043114281e-06, | |
| "loss": 0.6685, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.077239949271941, | |
| "grad_norm": 2.412440777034649, | |
| "learning_rate": 9.650677790213799e-06, | |
| "loss": 0.666, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 2.0809427272811427, | |
| "grad_norm": 2.3635812683682222, | |
| "learning_rate": 9.64830076807884e-06, | |
| "loss": 0.6719, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 2.084645505290344, | |
| "grad_norm": 2.337252826890419, | |
| "learning_rate": 9.64591598068036e-06, | |
| "loss": 0.6761, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 2.0883482832995455, | |
| "grad_norm": 2.1411084610979856, | |
| "learning_rate": 9.643523432002288e-06, | |
| "loss": 0.6699, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 2.092051061308747, | |
| "grad_norm": 2.396579312617091, | |
| "learning_rate": 9.64112312604152e-06, | |
| "loss": 0.6811, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.0957538393179482, | |
| "grad_norm": 1.991014001057909, | |
| "learning_rate": 9.638715066807908e-06, | |
| "loss": 0.6921, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 2.09945661732715, | |
| "grad_norm": 1.8170099574989464, | |
| "learning_rate": 9.636299258324263e-06, | |
| "loss": 0.6748, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 2.103159395336351, | |
| "grad_norm": 2.407538918588163, | |
| "learning_rate": 9.633875704626332e-06, | |
| "loss": 0.6556, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 2.1068621733455526, | |
| "grad_norm": 2.0009350084583186, | |
| "learning_rate": 9.63144440976281e-06, | |
| "loss": 0.6804, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 2.1105649513547537, | |
| "grad_norm": 2.318529373742503, | |
| "learning_rate": 9.629005377795318e-06, | |
| "loss": 0.6766, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.1142677293639554, | |
| "grad_norm": 2.901357640692631, | |
| "learning_rate": 9.626558612798404e-06, | |
| "loss": 0.6794, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 2.1179705073731565, | |
| "grad_norm": 2.154557247934532, | |
| "learning_rate": 9.624104118859535e-06, | |
| "loss": 0.6691, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 2.121673285382358, | |
| "grad_norm": 2.3903883257512577, | |
| "learning_rate": 9.62164190007909e-06, | |
| "loss": 0.6545, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 2.1253760633915597, | |
| "grad_norm": 2.1742676469308093, | |
| "learning_rate": 9.619171960570353e-06, | |
| "loss": 0.6894, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 2.129078841400761, | |
| "grad_norm": 2.654747582200517, | |
| "learning_rate": 9.616694304459504e-06, | |
| "loss": 0.6784, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.1327816194099625, | |
| "grad_norm": 2.402727194182496, | |
| "learning_rate": 9.614208935885615e-06, | |
| "loss": 0.6724, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 2.1364843974191636, | |
| "grad_norm": 2.2360152593662743, | |
| "learning_rate": 9.611715859000643e-06, | |
| "loss": 0.6622, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 2.1401871754283652, | |
| "grad_norm": 2.591011720239606, | |
| "learning_rate": 9.609215077969422e-06, | |
| "loss": 0.6981, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 2.1438899534375664, | |
| "grad_norm": 2.059137036300649, | |
| "learning_rate": 9.606706596969655e-06, | |
| "loss": 0.6665, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 2.147592731446768, | |
| "grad_norm": 2.5471008394463768, | |
| "learning_rate": 9.604190420191908e-06, | |
| "loss": 0.6725, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.151295509455969, | |
| "grad_norm": 2.3973486591141504, | |
| "learning_rate": 9.601666551839606e-06, | |
| "loss": 0.6855, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 2.1549982874651707, | |
| "grad_norm": 2.6375773664347286, | |
| "learning_rate": 9.599134996129022e-06, | |
| "loss": 0.6826, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 2.1587010654743723, | |
| "grad_norm": 2.332457102848655, | |
| "learning_rate": 9.596595757289268e-06, | |
| "loss": 0.6814, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 2.1624038434835735, | |
| "grad_norm": 2.4870013874361745, | |
| "learning_rate": 9.594048839562298e-06, | |
| "loss": 0.6792, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 2.166106621492775, | |
| "grad_norm": 2.3255182488507034, | |
| "learning_rate": 9.591494247202886e-06, | |
| "loss": 0.6954, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 2.1698093995019763, | |
| "grad_norm": 2.83057658696704, | |
| "learning_rate": 9.588931984478633e-06, | |
| "loss": 0.6914, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 2.173512177511178, | |
| "grad_norm": 2.354736029056531, | |
| "learning_rate": 9.58636205566995e-06, | |
| "loss": 0.6705, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 2.177214955520379, | |
| "grad_norm": 2.413081651067487, | |
| "learning_rate": 9.583784465070056e-06, | |
| "loss": 0.692, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 2.1809177335295806, | |
| "grad_norm": 1.9994020495850402, | |
| "learning_rate": 9.581199216984974e-06, | |
| "loss": 0.6789, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 2.184620511538782, | |
| "grad_norm": 2.336676864459619, | |
| "learning_rate": 9.57860631573351e-06, | |
| "loss": 0.6746, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.1883232895479834, | |
| "grad_norm": 1.9765951823994232, | |
| "learning_rate": 9.576005765647262e-06, | |
| "loss": 0.6841, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 2.192026067557185, | |
| "grad_norm": 2.6704707200624567, | |
| "learning_rate": 9.573397571070606e-06, | |
| "loss": 0.6606, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 2.195728845566386, | |
| "grad_norm": 2.7273193459509057, | |
| "learning_rate": 9.570781736360682e-06, | |
| "loss": 0.694, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 2.1994316235755877, | |
| "grad_norm": 2.4075381061640475, | |
| "learning_rate": 9.568158265887402e-06, | |
| "loss": 0.7058, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 2.203134401584789, | |
| "grad_norm": 2.7561200770361283, | |
| "learning_rate": 9.565527164033428e-06, | |
| "loss": 0.6635, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 2.2068371795939905, | |
| "grad_norm": 1.9296516325452246, | |
| "learning_rate": 9.562888435194171e-06, | |
| "loss": 0.6944, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 2.2105399576031917, | |
| "grad_norm": 2.3953208065104445, | |
| "learning_rate": 9.56024208377779e-06, | |
| "loss": 0.6511, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 2.2142427356123933, | |
| "grad_norm": 2.3935986970014507, | |
| "learning_rate": 9.557588114205166e-06, | |
| "loss": 0.684, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 2.2179455136215944, | |
| "grad_norm": 2.474410928209759, | |
| "learning_rate": 9.554926530909918e-06, | |
| "loss": 0.6944, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 2.221648291630796, | |
| "grad_norm": 2.300308607407991, | |
| "learning_rate": 9.552257338338377e-06, | |
| "loss": 0.6619, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.2253510696399976, | |
| "grad_norm": 2.2318245286663347, | |
| "learning_rate": 9.549580540949592e-06, | |
| "loss": 0.6737, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 2.229053847649199, | |
| "grad_norm": 3.050417412156981, | |
| "learning_rate": 9.546896143215307e-06, | |
| "loss": 0.6588, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 2.2327566256584004, | |
| "grad_norm": 2.096536346012167, | |
| "learning_rate": 9.544204149619973e-06, | |
| "loss": 0.6529, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 2.2364594036676015, | |
| "grad_norm": 2.81530679664561, | |
| "learning_rate": 9.541504564660726e-06, | |
| "loss": 0.6691, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 2.240162181676803, | |
| "grad_norm": 2.5250157137123606, | |
| "learning_rate": 9.53879739284738e-06, | |
| "loss": 0.6956, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 2.2438649596860043, | |
| "grad_norm": 2.1361057888741057, | |
| "learning_rate": 9.536082638702428e-06, | |
| "loss": 0.6622, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 2.247567737695206, | |
| "grad_norm": 2.418961423369967, | |
| "learning_rate": 9.533360306761032e-06, | |
| "loss": 0.6718, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 2.251270515704407, | |
| "grad_norm": 2.0162128525744984, | |
| "learning_rate": 9.530630401571006e-06, | |
| "loss": 0.6757, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 2.2549732937136087, | |
| "grad_norm": 1.7747207441276736, | |
| "learning_rate": 9.527892927692819e-06, | |
| "loss": 0.6895, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 2.2586760717228103, | |
| "grad_norm": 2.3868183988130007, | |
| "learning_rate": 9.525147889699587e-06, | |
| "loss": 0.6982, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 2.2623788497320114, | |
| "grad_norm": 2.370342809471196, | |
| "learning_rate": 9.52239529217706e-06, | |
| "loss": 0.6802, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 2.266081627741213, | |
| "grad_norm": 2.001291795407346, | |
| "learning_rate": 9.519635139723613e-06, | |
| "loss": 0.6836, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 2.269784405750414, | |
| "grad_norm": 2.4678585122972367, | |
| "learning_rate": 9.516867436950247e-06, | |
| "loss": 0.6709, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 2.273487183759616, | |
| "grad_norm": 2.6005957215451754, | |
| "learning_rate": 9.514092188480574e-06, | |
| "loss": 0.6818, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 2.277189961768817, | |
| "grad_norm": 2.920386936383934, | |
| "learning_rate": 9.511309398950815e-06, | |
| "loss": 0.7052, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 2.2808927397780185, | |
| "grad_norm": 2.5403436437373865, | |
| "learning_rate": 9.50851907300978e-06, | |
| "loss": 0.6955, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 2.2845955177872197, | |
| "grad_norm": 2.2492931914844347, | |
| "learning_rate": 9.505721215318879e-06, | |
| "loss": 0.6785, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 2.2882982957964213, | |
| "grad_norm": 2.6880292587251047, | |
| "learning_rate": 9.5029158305521e-06, | |
| "loss": 0.68, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 2.292001073805623, | |
| "grad_norm": 2.402997197868183, | |
| "learning_rate": 9.500102923396004e-06, | |
| "loss": 0.6927, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 2.295703851814824, | |
| "grad_norm": 1.9864142200066779, | |
| "learning_rate": 9.49728249854972e-06, | |
| "loss": 0.6919, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 2.2994066298240257, | |
| "grad_norm": 2.8456743709517163, | |
| "learning_rate": 9.494454560724938e-06, | |
| "loss": 0.6762, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 2.303109407833227, | |
| "grad_norm": 2.7748514053291484, | |
| "learning_rate": 9.491619114645892e-06, | |
| "loss": 0.6777, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 2.3068121858424284, | |
| "grad_norm": 2.559623553355795, | |
| "learning_rate": 9.48877616504937e-06, | |
| "loss": 0.6885, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 2.3105149638516296, | |
| "grad_norm": 2.1469384529226008, | |
| "learning_rate": 9.485925716684684e-06, | |
| "loss": 0.7014, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 2.314217741860831, | |
| "grad_norm": 2.6264777887477444, | |
| "learning_rate": 9.48306777431368e-06, | |
| "loss": 0.6778, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 2.3179205198700323, | |
| "grad_norm": 2.621989964486446, | |
| "learning_rate": 9.48020234271072e-06, | |
| "loss": 0.6805, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 2.321623297879234, | |
| "grad_norm": 2.272202713631239, | |
| "learning_rate": 9.47732942666268e-06, | |
| "loss": 0.6867, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 2.3253260758884355, | |
| "grad_norm": 1.9762020868593124, | |
| "learning_rate": 9.474449030968937e-06, | |
| "loss": 0.6854, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 2.3290288538976367, | |
| "grad_norm": 2.1226904406736984, | |
| "learning_rate": 9.471561160441363e-06, | |
| "loss": 0.6688, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 2.3327316319068383, | |
| "grad_norm": 2.316689916305218, | |
| "learning_rate": 9.468665819904317e-06, | |
| "loss": 0.6951, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 2.3364344099160395, | |
| "grad_norm": 2.316136479919069, | |
| "learning_rate": 9.465763014194638e-06, | |
| "loss": 0.6808, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 2.340137187925241, | |
| "grad_norm": 2.307220790631874, | |
| "learning_rate": 9.46285274816164e-06, | |
| "loss": 0.6869, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 2.343839965934442, | |
| "grad_norm": 2.213269812970463, | |
| "learning_rate": 9.459935026667089e-06, | |
| "loss": 0.6578, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 2.347542743943644, | |
| "grad_norm": 2.344279831358738, | |
| "learning_rate": 9.457009854585219e-06, | |
| "loss": 0.6971, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 2.351245521952845, | |
| "grad_norm": 2.0096880506357446, | |
| "learning_rate": 9.454077236802702e-06, | |
| "loss": 0.6828, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 2.3549482999620466, | |
| "grad_norm": 2.2548311729082253, | |
| "learning_rate": 9.45113717821865e-06, | |
| "loss": 0.6727, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 2.358651077971248, | |
| "grad_norm": 2.4341693614642996, | |
| "learning_rate": 9.448189683744608e-06, | |
| "loss": 0.6809, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 2.3623538559804493, | |
| "grad_norm": 2.419848393996797, | |
| "learning_rate": 9.445234758304537e-06, | |
| "loss": 0.6928, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 2.3660566339896505, | |
| "grad_norm": 2.7840357590734994, | |
| "learning_rate": 9.442272406834823e-06, | |
| "loss": 0.6698, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 2.369759411998852, | |
| "grad_norm": 2.3936132840359665, | |
| "learning_rate": 9.439302634284244e-06, | |
| "loss": 0.6741, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 2.3734621900080537, | |
| "grad_norm": 2.2628698955348923, | |
| "learning_rate": 9.436325445613988e-06, | |
| "loss": 0.6982, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 2.377164968017255, | |
| "grad_norm": 2.5913137677554645, | |
| "learning_rate": 9.43334084579762e-06, | |
| "loss": 0.6843, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 2.3808677460264565, | |
| "grad_norm": 2.5306835812838027, | |
| "learning_rate": 9.430348839821095e-06, | |
| "loss": 0.6931, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 2.3845705240356576, | |
| "grad_norm": 1.824497906863608, | |
| "learning_rate": 9.42734943268274e-06, | |
| "loss": 0.6784, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 2.388273302044859, | |
| "grad_norm": 2.031648470909946, | |
| "learning_rate": 9.424342629393238e-06, | |
| "loss": 0.6845, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 2.3919760800540604, | |
| "grad_norm": 2.5806566539882274, | |
| "learning_rate": 9.421328434975636e-06, | |
| "loss": 0.6893, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 2.395678858063262, | |
| "grad_norm": 2.1526340438291807, | |
| "learning_rate": 9.418306854465327e-06, | |
| "loss": 0.6973, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 2.399381636072463, | |
| "grad_norm": 2.4285535244597702, | |
| "learning_rate": 9.41527789291004e-06, | |
| "loss": 0.7019, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 2.4030844140816647, | |
| "grad_norm": 2.621188381463244, | |
| "learning_rate": 9.412241555369834e-06, | |
| "loss": 0.6653, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 2.4067871920908663, | |
| "grad_norm": 2.224098798333827, | |
| "learning_rate": 9.409197846917093e-06, | |
| "loss": 0.6725, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 2.4104899701000675, | |
| "grad_norm": 1.997533351532834, | |
| "learning_rate": 9.406146772636516e-06, | |
| "loss": 0.6812, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 2.414192748109269, | |
| "grad_norm": 2.19098514780732, | |
| "learning_rate": 9.403088337625099e-06, | |
| "loss": 0.6677, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 2.4178955261184703, | |
| "grad_norm": 2.1908878890803605, | |
| "learning_rate": 9.400022546992148e-06, | |
| "loss": 0.6813, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 2.421598304127672, | |
| "grad_norm": 2.324540384353367, | |
| "learning_rate": 9.396949405859239e-06, | |
| "loss": 0.6579, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 2.425301082136873, | |
| "grad_norm": 2.5054016122271374, | |
| "learning_rate": 9.393868919360244e-06, | |
| "loss": 0.6744, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 2.4290038601460746, | |
| "grad_norm": 2.582887067658994, | |
| "learning_rate": 9.390781092641301e-06, | |
| "loss": 0.6913, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 2.4327066381552758, | |
| "grad_norm": 2.2584713627681428, | |
| "learning_rate": 9.387685930860804e-06, | |
| "loss": 0.6645, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 2.4364094161644774, | |
| "grad_norm": 2.202586980967711, | |
| "learning_rate": 9.384583439189406e-06, | |
| "loss": 0.6599, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 2.440112194173679, | |
| "grad_norm": 2.0537705242407256, | |
| "learning_rate": 9.381473622810005e-06, | |
| "loss": 0.6524, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 2.44381497218288, | |
| "grad_norm": 2.446679586314843, | |
| "learning_rate": 9.378356486917736e-06, | |
| "loss": 0.6586, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 2.4475177501920817, | |
| "grad_norm": 2.3254324060908886, | |
| "learning_rate": 9.37523203671996e-06, | |
| "loss": 0.6716, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 2.451220528201283, | |
| "grad_norm": 2.402871716965202, | |
| "learning_rate": 9.372100277436253e-06, | |
| "loss": 0.6771, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 2.4549233062104845, | |
| "grad_norm": 2.4014908865791402, | |
| "learning_rate": 9.368961214298414e-06, | |
| "loss": 0.6892, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 2.4586260842196856, | |
| "grad_norm": 2.309859916718413, | |
| "learning_rate": 9.365814852550426e-06, | |
| "loss": 0.6725, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 2.4623288622288872, | |
| "grad_norm": 2.657756967242288, | |
| "learning_rate": 9.36266119744848e-06, | |
| "loss": 0.6835, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 2.4660316402380884, | |
| "grad_norm": 2.3060608989482327, | |
| "learning_rate": 9.35950025426094e-06, | |
| "loss": 0.6694, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 2.46973441824729, | |
| "grad_norm": 1.9200073351424498, | |
| "learning_rate": 9.356332028268356e-06, | |
| "loss": 0.6725, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 2.4734371962564916, | |
| "grad_norm": 1.7930879502348702, | |
| "learning_rate": 9.353156524763433e-06, | |
| "loss": 0.6674, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 2.4771399742656928, | |
| "grad_norm": 2.312137593139913, | |
| "learning_rate": 9.349973749051042e-06, | |
| "loss": 0.665, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 2.4808427522748944, | |
| "grad_norm": 2.7119648286693536, | |
| "learning_rate": 9.346783706448199e-06, | |
| "loss": 0.6925, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 2.4845455302840955, | |
| "grad_norm": 2.356555621714717, | |
| "learning_rate": 9.343586402284061e-06, | |
| "loss": 0.6774, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 2.488248308293297, | |
| "grad_norm": 3.4467386366257196, | |
| "learning_rate": 9.340381841899913e-06, | |
| "loss": 0.6907, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 2.4919510863024983, | |
| "grad_norm": 2.5874996764431, | |
| "learning_rate": 9.337170030649166e-06, | |
| "loss": 0.6808, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 2.4956538643117, | |
| "grad_norm": 2.4262991872836093, | |
| "learning_rate": 9.33395097389734e-06, | |
| "loss": 0.6714, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 2.499356642320901, | |
| "grad_norm": 2.2026220733741737, | |
| "learning_rate": 9.330724677022063e-06, | |
| "loss": 0.6798, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 2.5030594203301026, | |
| "grad_norm": 2.1788581179144395, | |
| "learning_rate": 9.327491145413057e-06, | |
| "loss": 0.6811, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 2.5067621983393042, | |
| "grad_norm": 2.275441203213566, | |
| "learning_rate": 9.324250384472127e-06, | |
| "loss": 0.6627, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 2.5104649763485054, | |
| "grad_norm": 2.283344949810879, | |
| "learning_rate": 9.32100239961316e-06, | |
| "loss": 0.6642, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 2.514167754357707, | |
| "grad_norm": 2.4267848884723167, | |
| "learning_rate": 9.317747196262105e-06, | |
| "loss": 0.6787, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 2.517870532366908, | |
| "grad_norm": 2.5953019278693965, | |
| "learning_rate": 9.314484779856977e-06, | |
| "loss": 0.6737, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 2.5215733103761098, | |
| "grad_norm": 2.036941282735882, | |
| "learning_rate": 9.311215155847834e-06, | |
| "loss": 0.6589, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 2.525276088385311, | |
| "grad_norm": 2.597845885761239, | |
| "learning_rate": 9.30793832969678e-06, | |
| "loss": 0.6717, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 2.5289788663945125, | |
| "grad_norm": 2.4622763848737774, | |
| "learning_rate": 9.304654306877946e-06, | |
| "loss": 0.6897, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 2.5326816444037137, | |
| "grad_norm": 2.2606318900396047, | |
| "learning_rate": 9.30136309287749e-06, | |
| "loss": 0.6811, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 2.5363844224129153, | |
| "grad_norm": 2.4860591476196423, | |
| "learning_rate": 9.298064693193581e-06, | |
| "loss": 0.6776, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 2.540087200422117, | |
| "grad_norm": 2.4829377853240837, | |
| "learning_rate": 9.29475911333639e-06, | |
| "loss": 0.7002, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 2.543789978431318, | |
| "grad_norm": 2.367231988606884, | |
| "learning_rate": 9.291446358828091e-06, | |
| "loss": 0.6675, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 2.5474927564405196, | |
| "grad_norm": 2.4582997910649484, | |
| "learning_rate": 9.288126435202831e-06, | |
| "loss": 0.6656, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 2.551195534449721, | |
| "grad_norm": 2.0143015209204185, | |
| "learning_rate": 9.284799348006743e-06, | |
| "loss": 0.6811, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 2.5548983124589224, | |
| "grad_norm": 2.9078889050531473, | |
| "learning_rate": 9.281465102797926e-06, | |
| "loss": 0.677, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 2.5586010904681236, | |
| "grad_norm": 2.2408419976033693, | |
| "learning_rate": 9.278123705146434e-06, | |
| "loss": 0.6884, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 2.562303868477325, | |
| "grad_norm": 2.626201962148744, | |
| "learning_rate": 9.27477516063427e-06, | |
| "loss": 0.6612, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 2.5660066464865263, | |
| "grad_norm": 2.1552325508134134, | |
| "learning_rate": 9.271419474855377e-06, | |
| "loss": 0.666, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 2.569709424495728, | |
| "grad_norm": 2.3227195866166768, | |
| "learning_rate": 9.268056653415632e-06, | |
| "loss": 0.6652, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 2.5734122025049295, | |
| "grad_norm": 2.753551170952296, | |
| "learning_rate": 9.264686701932825e-06, | |
| "loss": 0.6791, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 2.5771149805141307, | |
| "grad_norm": 2.2976640888247415, | |
| "learning_rate": 9.261309626036661e-06, | |
| "loss": 0.6705, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 2.5808177585233323, | |
| "grad_norm": 2.339779663329093, | |
| "learning_rate": 9.257925431368749e-06, | |
| "loss": 0.6669, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 2.5845205365325334, | |
| "grad_norm": 2.1019636120259695, | |
| "learning_rate": 9.254534123582585e-06, | |
| "loss": 0.6734, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 2.588223314541735, | |
| "grad_norm": 2.2977009075813744, | |
| "learning_rate": 9.251135708343555e-06, | |
| "loss": 0.6724, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 2.591926092550936, | |
| "grad_norm": 2.1726587191847386, | |
| "learning_rate": 9.247730191328908e-06, | |
| "loss": 0.686, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 2.595628870560138, | |
| "grad_norm": 2.2059169127907907, | |
| "learning_rate": 9.244317578227769e-06, | |
| "loss": 0.6829, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 2.599331648569339, | |
| "grad_norm": 2.2693764246927843, | |
| "learning_rate": 9.240897874741108e-06, | |
| "loss": 0.6706, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 2.6030344265785406, | |
| "grad_norm": 2.3773955458790192, | |
| "learning_rate": 9.237471086581744e-06, | |
| "loss": 0.6403, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 2.606737204587742, | |
| "grad_norm": 2.376894341944025, | |
| "learning_rate": 9.234037219474332e-06, | |
| "loss": 0.6556, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 2.6104399825969433, | |
| "grad_norm": 2.2164027402226756, | |
| "learning_rate": 9.230596279155353e-06, | |
| "loss": 0.6677, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 2.614142760606145, | |
| "grad_norm": 2.6004295191699596, | |
| "learning_rate": 9.227148271373102e-06, | |
| "loss": 0.6656, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 2.617845538615346, | |
| "grad_norm": 2.2941992876843145, | |
| "learning_rate": 9.223693201887677e-06, | |
| "loss": 0.671, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 2.6215483166245477, | |
| "grad_norm": 2.423457996166192, | |
| "learning_rate": 9.220231076470985e-06, | |
| "loss": 0.671, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 2.625251094633749, | |
| "grad_norm": 2.028224896189644, | |
| "learning_rate": 9.216761900906707e-06, | |
| "loss": 0.6633, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 2.6289538726429504, | |
| "grad_norm": 2.8043596176994234, | |
| "learning_rate": 9.213285680990311e-06, | |
| "loss": 0.6733, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 2.6326566506521516, | |
| "grad_norm": 2.631240157715802, | |
| "learning_rate": 9.209802422529028e-06, | |
| "loss": 0.6694, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 2.636359428661353, | |
| "grad_norm": 3.27526490574497, | |
| "learning_rate": 9.206312131341848e-06, | |
| "loss": 0.6736, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 2.640062206670555, | |
| "grad_norm": 2.2969788418244734, | |
| "learning_rate": 9.202814813259514e-06, | |
| "loss": 0.6685, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 2.643764984679756, | |
| "grad_norm": 2.0448759395992693, | |
| "learning_rate": 9.199310474124501e-06, | |
| "loss": 0.6734, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 2.647467762688957, | |
| "grad_norm": 2.194888247981071, | |
| "learning_rate": 9.195799119791018e-06, | |
| "loss": 0.6853, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 2.6511705406981587, | |
| "grad_norm": 2.3157294382898037, | |
| "learning_rate": 9.19228075612499e-06, | |
| "loss": 0.6936, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 2.6548733187073603, | |
| "grad_norm": 2.2600268640470516, | |
| "learning_rate": 9.188755389004056e-06, | |
| "loss": 0.6482, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 2.6585760967165615, | |
| "grad_norm": 2.7867346539584026, | |
| "learning_rate": 9.18522302431755e-06, | |
| "loss": 0.6736, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 2.662278874725763, | |
| "grad_norm": 2.4244549380103284, | |
| "learning_rate": 9.181683667966497e-06, | |
| "loss": 0.6612, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 2.6659816527349642, | |
| "grad_norm": 2.467628082595294, | |
| "learning_rate": 9.178137325863606e-06, | |
| "loss": 0.662, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 2.669684430744166, | |
| "grad_norm": 1.991818914003808, | |
| "learning_rate": 9.17458400393325e-06, | |
| "loss": 0.6546, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 2.6733872087533674, | |
| "grad_norm": 2.6084822187687893, | |
| "learning_rate": 9.171023708111467e-06, | |
| "loss": 0.6707, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 2.6770899867625686, | |
| "grad_norm": 2.4759759139487674, | |
| "learning_rate": 9.16745644434594e-06, | |
| "loss": 0.6589, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 2.6807927647717698, | |
| "grad_norm": 2.417557023636743, | |
| "learning_rate": 9.163882218595998e-06, | |
| "loss": 0.6692, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 2.6844955427809714, | |
| "grad_norm": 2.4757717167657303, | |
| "learning_rate": 9.160301036832601e-06, | |
| "loss": 0.6824, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 2.688198320790173, | |
| "grad_norm": 1.7864811103086602, | |
| "learning_rate": 9.156712905038324e-06, | |
| "loss": 0.6549, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 2.691901098799374, | |
| "grad_norm": 1.931145360031176, | |
| "learning_rate": 9.153117829207353e-06, | |
| "loss": 0.6707, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 2.6956038768085757, | |
| "grad_norm": 2.6583751811214515, | |
| "learning_rate": 9.149515815345477e-06, | |
| "loss": 0.6746, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 2.699306654817777, | |
| "grad_norm": 2.3434065726826874, | |
| "learning_rate": 9.14590686947008e-06, | |
| "loss": 0.6746, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 2.7030094328269785, | |
| "grad_norm": 2.1951946075529003, | |
| "learning_rate": 9.142290997610114e-06, | |
| "loss": 0.672, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 2.70671221083618, | |
| "grad_norm": 2.1266717085417715, | |
| "learning_rate": 9.138668205806116e-06, | |
| "loss": 0.6596, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 2.7104149888453812, | |
| "grad_norm": 2.052871241822731, | |
| "learning_rate": 9.135038500110169e-06, | |
| "loss": 0.6562, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 2.7141177668545824, | |
| "grad_norm": 2.3890278232506144, | |
| "learning_rate": 9.131401886585916e-06, | |
| "loss": 0.6791, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 2.717820544863784, | |
| "grad_norm": 2.339795856765528, | |
| "learning_rate": 9.127758371308537e-06, | |
| "loss": 0.6769, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 2.7215233228729856, | |
| "grad_norm": 2.0980772669298946, | |
| "learning_rate": 9.124107960364738e-06, | |
| "loss": 0.687, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 2.7252261008821868, | |
| "grad_norm": 1.9520671316508236, | |
| "learning_rate": 9.120450659852754e-06, | |
| "loss": 0.6619, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 2.7289288788913884, | |
| "grad_norm": 1.9489571927723024, | |
| "learning_rate": 9.116786475882318e-06, | |
| "loss": 0.6643, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 2.7326316569005895, | |
| "grad_norm": 2.1143535490363963, | |
| "learning_rate": 9.11311541457467e-06, | |
| "loss": 0.6647, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 2.736334434909791, | |
| "grad_norm": 2.4454265529124415, | |
| "learning_rate": 9.109437482062538e-06, | |
| "loss": 0.6791, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 2.7400372129189927, | |
| "grad_norm": 2.12417553054465, | |
| "learning_rate": 9.105752684490125e-06, | |
| "loss": 0.6751, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 2.743739990928194, | |
| "grad_norm": 3.3698294360651286, | |
| "learning_rate": 9.102061028013108e-06, | |
| "loss": 0.6805, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 2.747442768937395, | |
| "grad_norm": 2.6079682276880694, | |
| "learning_rate": 9.098362518798615e-06, | |
| "loss": 0.6542, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 2.7511455469465966, | |
| "grad_norm": 2.9285278794017167, | |
| "learning_rate": 9.094657163025228e-06, | |
| "loss": 0.6798, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 2.7548483249557982, | |
| "grad_norm": 2.029512121868359, | |
| "learning_rate": 9.090944966882968e-06, | |
| "loss": 0.6716, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 2.7585511029649994, | |
| "grad_norm": 2.37703823122831, | |
| "learning_rate": 9.087225936573275e-06, | |
| "loss": 0.6664, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 2.762253880974201, | |
| "grad_norm": 2.290740875061313, | |
| "learning_rate": 9.083500078309013e-06, | |
| "loss": 0.7054, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 2.765956658983402, | |
| "grad_norm": 1.9826452203518832, | |
| "learning_rate": 9.079767398314452e-06, | |
| "loss": 0.6574, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 2.7696594369926038, | |
| "grad_norm": 2.510390912417119, | |
| "learning_rate": 9.076027902825252e-06, | |
| "loss": 0.6573, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 2.7733622150018054, | |
| "grad_norm": 3.253767602420802, | |
| "learning_rate": 9.072281598088467e-06, | |
| "loss": 0.6565, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 2.7770649930110065, | |
| "grad_norm": 3.1743643654172278, | |
| "learning_rate": 9.068528490362524e-06, | |
| "loss": 0.6636, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 2.7807677710202077, | |
| "grad_norm": 2.9292198577340463, | |
| "learning_rate": 9.064768585917207e-06, | |
| "loss": 0.6763, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 2.7844705490294093, | |
| "grad_norm": 2.3225242842709766, | |
| "learning_rate": 9.061001891033666e-06, | |
| "loss": 0.6696, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 2.788173327038611, | |
| "grad_norm": 2.272648856356267, | |
| "learning_rate": 9.057228412004386e-06, | |
| "loss": 0.6585, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 2.791876105047812, | |
| "grad_norm": 2.708064532509065, | |
| "learning_rate": 9.053448155133192e-06, | |
| "loss": 0.6674, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 2.7955788830570136, | |
| "grad_norm": 2.0878561841156706, | |
| "learning_rate": 9.049661126735223e-06, | |
| "loss": 0.6523, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 2.799281661066215, | |
| "grad_norm": 2.0218162021372637, | |
| "learning_rate": 9.045867333136939e-06, | |
| "loss": 0.667, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 2.8029844390754164, | |
| "grad_norm": 1.9615749815202044, | |
| "learning_rate": 9.042066780676101e-06, | |
| "loss": 0.6644, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 2.806687217084618, | |
| "grad_norm": 2.458125241194594, | |
| "learning_rate": 9.038259475701756e-06, | |
| "loss": 0.6592, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 2.810389995093819, | |
| "grad_norm": 2.5321957606480887, | |
| "learning_rate": 9.034445424574232e-06, | |
| "loss": 0.6542, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 2.8140927731030203, | |
| "grad_norm": 2.305578502814208, | |
| "learning_rate": 9.030624633665131e-06, | |
| "loss": 0.6626, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 2.817795551112222, | |
| "grad_norm": 2.304093777477429, | |
| "learning_rate": 9.026797109357313e-06, | |
| "loss": 0.6585, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 2.8214983291214235, | |
| "grad_norm": 1.9063487829056964, | |
| "learning_rate": 9.022962858044881e-06, | |
| "loss": 0.6634, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 2.8252011071306247, | |
| "grad_norm": 2.4605756536089998, | |
| "learning_rate": 9.019121886133185e-06, | |
| "loss": 0.659, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 2.8289038851398263, | |
| "grad_norm": 2.908256690477109, | |
| "learning_rate": 9.015274200038798e-06, | |
| "loss": 0.6873, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 2.8326066631490274, | |
| "grad_norm": 2.195376131615668, | |
| "learning_rate": 9.011419806189503e-06, | |
| "loss": 0.6786, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 2.836309441158229, | |
| "grad_norm": 2.4481520740229588, | |
| "learning_rate": 9.0075587110243e-06, | |
| "loss": 0.6586, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 2.8400122191674306, | |
| "grad_norm": 2.5436298766851024, | |
| "learning_rate": 9.003690920993378e-06, | |
| "loss": 0.6732, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 2.843714997176632, | |
| "grad_norm": 2.0233903955790664, | |
| "learning_rate": 8.999816442558112e-06, | |
| "loss": 0.6694, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 2.847417775185833, | |
| "grad_norm": 1.9592757597831238, | |
| "learning_rate": 8.995935282191044e-06, | |
| "loss": 0.642, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 2.8511205531950345, | |
| "grad_norm": 2.4999659621973676, | |
| "learning_rate": 8.992047446375887e-06, | |
| "loss": 0.6758, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 2.854823331204236, | |
| "grad_norm": 2.320920562047208, | |
| "learning_rate": 8.988152941607505e-06, | |
| "loss": 0.6686, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 2.8585261092134373, | |
| "grad_norm": 2.180371204577853, | |
| "learning_rate": 8.984251774391895e-06, | |
| "loss": 0.6572, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 2.862228887222639, | |
| "grad_norm": 2.548377630577026, | |
| "learning_rate": 8.980343951246193e-06, | |
| "loss": 0.6858, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 2.86593166523184, | |
| "grad_norm": 2.2916044435835023, | |
| "learning_rate": 8.976429478698651e-06, | |
| "loss": 0.6612, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 2.8696344432410417, | |
| "grad_norm": 2.137867387232337, | |
| "learning_rate": 8.972508363288627e-06, | |
| "loss": 0.656, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 2.8733372212502433, | |
| "grad_norm": 2.6319833480679713, | |
| "learning_rate": 8.968580611566578e-06, | |
| "loss": 0.6505, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 2.8770399992594444, | |
| "grad_norm": 2.1088025728984907, | |
| "learning_rate": 8.96464623009405e-06, | |
| "loss": 0.6667, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 2.8807427772686456, | |
| "grad_norm": 1.9521003147155882, | |
| "learning_rate": 8.960705225443657e-06, | |
| "loss": 0.6596, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 2.884445555277847, | |
| "grad_norm": 2.5972066347938294, | |
| "learning_rate": 8.956757604199085e-06, | |
| "loss": 0.6545, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 2.888148333287049, | |
| "grad_norm": 2.4786047868289964, | |
| "learning_rate": 8.952803372955073e-06, | |
| "loss": 0.6722, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 2.89185111129625, | |
| "grad_norm": 2.2514808731629112, | |
| "learning_rate": 8.948842538317395e-06, | |
| "loss": 0.6556, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 2.8955538893054515, | |
| "grad_norm": 2.365087481495297, | |
| "learning_rate": 8.944875106902864e-06, | |
| "loss": 0.6482, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 2.8992566673146527, | |
| "grad_norm": 2.452402390597274, | |
| "learning_rate": 8.94090108533931e-06, | |
| "loss": 0.6893, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 2.9029594453238543, | |
| "grad_norm": 2.1846111061646885, | |
| "learning_rate": 8.936920480265576e-06, | |
| "loss": 0.6565, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 2.9066622233330555, | |
| "grad_norm": 2.5440937876149907, | |
| "learning_rate": 8.932933298331496e-06, | |
| "loss": 0.6731, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 2.910365001342257, | |
| "grad_norm": 2.1791116047812125, | |
| "learning_rate": 8.928939546197897e-06, | |
| "loss": 0.6747, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 2.914067779351458, | |
| "grad_norm": 2.5381792077290934, | |
| "learning_rate": 8.92493923053658e-06, | |
| "loss": 0.6759, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 2.91777055736066, | |
| "grad_norm": 2.3236635792732137, | |
| "learning_rate": 8.920932358030309e-06, | |
| "loss": 0.6675, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 2.9214733353698614, | |
| "grad_norm": 1.9029464622582775, | |
| "learning_rate": 8.916918935372805e-06, | |
| "loss": 0.6634, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 2.9251761133790626, | |
| "grad_norm": 2.224078219093189, | |
| "learning_rate": 8.912898969268731e-06, | |
| "loss": 0.6546, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 2.928878891388264, | |
| "grad_norm": 2.9148804782966233, | |
| "learning_rate": 8.908872466433677e-06, | |
| "loss": 0.6549, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 2.9325816693974653, | |
| "grad_norm": 2.4717406257998773, | |
| "learning_rate": 8.904839433594158e-06, | |
| "loss": 0.6522, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 2.936284447406667, | |
| "grad_norm": 2.6821434461084896, | |
| "learning_rate": 8.900799877487595e-06, | |
| "loss": 0.669, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 2.939987225415868, | |
| "grad_norm": 2.5288488175630057, | |
| "learning_rate": 8.896753804862308e-06, | |
| "loss": 0.6675, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 2.9436900034250697, | |
| "grad_norm": 2.3118984656483748, | |
| "learning_rate": 8.892701222477503e-06, | |
| "loss": 0.6428, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 2.947392781434271, | |
| "grad_norm": 1.7707450134385863, | |
| "learning_rate": 8.888642137103258e-06, | |
| "loss": 0.6423, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 2.9510955594434725, | |
| "grad_norm": 2.7951973513737016, | |
| "learning_rate": 8.884576555520521e-06, | |
| "loss": 0.6666, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 2.954798337452674, | |
| "grad_norm": 1.9441758598215642, | |
| "learning_rate": 8.880504484521084e-06, | |
| "loss": 0.6911, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 2.958501115461875, | |
| "grad_norm": 2.311415822913053, | |
| "learning_rate": 8.876425930907587e-06, | |
| "loss": 0.69, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 2.962203893471077, | |
| "grad_norm": 1.962196622233137, | |
| "learning_rate": 8.872340901493496e-06, | |
| "loss": 0.6991, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 2.965906671480278, | |
| "grad_norm": 2.2188989804402635, | |
| "learning_rate": 8.868249403103098e-06, | |
| "loss": 0.6512, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 2.9696094494894796, | |
| "grad_norm": 2.0738562772495217, | |
| "learning_rate": 8.864151442571481e-06, | |
| "loss": 0.6673, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 2.9733122274986807, | |
| "grad_norm": 2.45682348863258, | |
| "learning_rate": 8.860047026744535e-06, | |
| "loss": 0.6488, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 2.9770150055078823, | |
| "grad_norm": 2.876210559752475, | |
| "learning_rate": 8.855936162478933e-06, | |
| "loss": 0.641, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 2.9807177835170835, | |
| "grad_norm": 2.113010077915775, | |
| "learning_rate": 8.851818856642116e-06, | |
| "loss": 0.6482, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 2.984420561526285, | |
| "grad_norm": 2.2593684990909297, | |
| "learning_rate": 8.84769511611229e-06, | |
| "loss": 0.6596, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 2.9881233395354867, | |
| "grad_norm": 2.213052710368658, | |
| "learning_rate": 8.843564947778408e-06, | |
| "loss": 0.6674, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 2.991826117544688, | |
| "grad_norm": 1.9824851077389378, | |
| "learning_rate": 8.839428358540165e-06, | |
| "loss": 0.6606, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 2.9955288955538895, | |
| "grad_norm": 1.8350785430581344, | |
| "learning_rate": 8.835285355307979e-06, | |
| "loss": 0.6625, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 2.9992316735630906, | |
| "grad_norm": 2.2196935514359537, | |
| "learning_rate": 8.831135945002982e-06, | |
| "loss": 0.6483, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 3.0029251946272693, | |
| "grad_norm": 2.180481700028787, | |
| "learning_rate": 8.826980134557012e-06, | |
| "loss": 0.5716, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 3.0066279726364704, | |
| "grad_norm": 2.3154128557009166, | |
| "learning_rate": 8.8228179309126e-06, | |
| "loss": 0.5747, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 3.010330750645672, | |
| "grad_norm": 2.5911631549986316, | |
| "learning_rate": 8.818649341022954e-06, | |
| "loss": 0.5708, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 3.014033528654873, | |
| "grad_norm": 2.172878251158029, | |
| "learning_rate": 8.81447437185195e-06, | |
| "loss": 0.586, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 3.0177363066640748, | |
| "grad_norm": 2.285708121202155, | |
| "learning_rate": 8.810293030374126e-06, | |
| "loss": 0.5279, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 3.021439084673276, | |
| "grad_norm": 1.8325067800290862, | |
| "learning_rate": 8.80610532357466e-06, | |
| "loss": 0.5743, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 3.0251418626824775, | |
| "grad_norm": 2.4724163520836617, | |
| "learning_rate": 8.801911258449367e-06, | |
| "loss": 0.5686, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 3.028844640691679, | |
| "grad_norm": 2.8204386478402657, | |
| "learning_rate": 8.797710842004683e-06, | |
| "loss": 0.5661, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 3.0325474187008803, | |
| "grad_norm": 2.1624621580723504, | |
| "learning_rate": 8.793504081257653e-06, | |
| "loss": 0.5609, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 3.036250196710082, | |
| "grad_norm": 1.9578194242090217, | |
| "learning_rate": 8.789290983235925e-06, | |
| "loss": 0.5557, | |
| "step": 82000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 270070, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2119412823359488.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |