| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 4252, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0023518344308560675, | |
| "grad_norm": 116.89811794868847, | |
| "learning_rate": 2.1126760563380282e-08, | |
| "loss": 1.1517, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004703668861712135, | |
| "grad_norm": 277.9710155529788, | |
| "learning_rate": 4.460093896713615e-08, | |
| "loss": 1.2647, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0070555032925682035, | |
| "grad_norm": 136.25013023553325, | |
| "learning_rate": 6.807511737089202e-08, | |
| "loss": 1.1512, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.00940733772342427, | |
| "grad_norm": 113.12348827889794, | |
| "learning_rate": 9.154929577464789e-08, | |
| "loss": 1.0798, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.011759172154280339, | |
| "grad_norm": 116.51120422837609, | |
| "learning_rate": 1.1502347417840374e-07, | |
| "loss": 1.0682, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.014111006585136407, | |
| "grad_norm": 158.25517802003571, | |
| "learning_rate": 1.384976525821596e-07, | |
| "loss": 1.0344, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.016462841015992474, | |
| "grad_norm": 100.5101888275114, | |
| "learning_rate": 1.619718309859155e-07, | |
| "loss": 0.9328, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01881467544684854, | |
| "grad_norm": 56.20830713816404, | |
| "learning_rate": 1.8544600938967138e-07, | |
| "loss": 0.8777, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02116650987770461, | |
| "grad_norm": 240.15395280772944, | |
| "learning_rate": 2.089201877934272e-07, | |
| "loss": 0.9159, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.023518344308560677, | |
| "grad_norm": 203.37555869337805, | |
| "learning_rate": 2.323943661971831e-07, | |
| "loss": 0.8656, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.025870178739416744, | |
| "grad_norm": 118.0083364283108, | |
| "learning_rate": 2.5586854460093895e-07, | |
| "loss": 0.7863, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.028222013170272814, | |
| "grad_norm": 79.77067789322896, | |
| "learning_rate": 2.7934272300469483e-07, | |
| "loss": 0.8134, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03057384760112888, | |
| "grad_norm": 267.7680667397846, | |
| "learning_rate": 3.0281690140845066e-07, | |
| "loss": 0.7945, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03292568203198495, | |
| "grad_norm": 189.00926278298175, | |
| "learning_rate": 3.2629107981220654e-07, | |
| "loss": 0.8096, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03527751646284102, | |
| "grad_norm": 57.70972405124874, | |
| "learning_rate": 3.497652582159624e-07, | |
| "loss": 0.7752, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03762935089369708, | |
| "grad_norm": 69.99759640153404, | |
| "learning_rate": 3.732394366197183e-07, | |
| "loss": 0.7083, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03998118532455315, | |
| "grad_norm": 8809.659775787271, | |
| "learning_rate": 3.967136150234742e-07, | |
| "loss": 0.748, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04233301975540922, | |
| "grad_norm": 71.05386775505833, | |
| "learning_rate": 4.2018779342723e-07, | |
| "loss": 0.7624, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.044684854186265284, | |
| "grad_norm": 35.37716956890071, | |
| "learning_rate": 4.436619718309859e-07, | |
| "loss": 0.751, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.047036688617121354, | |
| "grad_norm": 80.85891409276856, | |
| "learning_rate": 4.671361502347418e-07, | |
| "loss": 0.7739, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.049388523047977424, | |
| "grad_norm": 45.92516752004636, | |
| "learning_rate": 4.906103286384976e-07, | |
| "loss": 0.7545, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05174035747883349, | |
| "grad_norm": 68.60575847289506, | |
| "learning_rate": 5.140845070422535e-07, | |
| "loss": 0.726, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05409219190968956, | |
| "grad_norm": 442.4046356251326, | |
| "learning_rate": 5.375586854460093e-07, | |
| "loss": 0.7692, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.05644402634054563, | |
| "grad_norm": 148.22276704875333, | |
| "learning_rate": 5.610328638497653e-07, | |
| "loss": 0.7193, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05879586077140169, | |
| "grad_norm": 121.42536705157094, | |
| "learning_rate": 5.845070422535211e-07, | |
| "loss": 0.7235, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06114769520225776, | |
| "grad_norm": 57.2265048995909, | |
| "learning_rate": 6.079812206572769e-07, | |
| "loss": 0.749, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06349952963311382, | |
| "grad_norm": 142.6135442328223, | |
| "learning_rate": 6.314553990610329e-07, | |
| "loss": 0.677, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0658513640639699, | |
| "grad_norm": 66.8189535712452, | |
| "learning_rate": 6.549295774647887e-07, | |
| "loss": 0.7162, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.06820319849482596, | |
| "grad_norm": 47.59798393227095, | |
| "learning_rate": 6.784037558685446e-07, | |
| "loss": 0.7246, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07055503292568203, | |
| "grad_norm": 201.28699484373632, | |
| "learning_rate": 7.018779342723005e-07, | |
| "loss": 0.6994, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0729068673565381, | |
| "grad_norm": 236.38896159577624, | |
| "learning_rate": 7.253521126760564e-07, | |
| "loss": 0.6451, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.07525870178739416, | |
| "grad_norm": 162.15190996547886, | |
| "learning_rate": 7.488262910798122e-07, | |
| "loss": 0.6597, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.07761053621825023, | |
| "grad_norm": 56.18228731138108, | |
| "learning_rate": 7.72300469483568e-07, | |
| "loss": 0.6914, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.0799623706491063, | |
| "grad_norm": 324.865693829362, | |
| "learning_rate": 7.95774647887324e-07, | |
| "loss": 0.6758, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.08231420507996237, | |
| "grad_norm": 137.83272350854858, | |
| "learning_rate": 8.192488262910797e-07, | |
| "loss": 0.6552, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08466603951081844, | |
| "grad_norm": 51.74661746915652, | |
| "learning_rate": 8.427230046948356e-07, | |
| "loss": 0.6257, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.08701787394167451, | |
| "grad_norm": 94.85986008371302, | |
| "learning_rate": 8.661971830985915e-07, | |
| "loss": 0.6168, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.08936970837253057, | |
| "grad_norm": 83.03153262845805, | |
| "learning_rate": 8.896713615023473e-07, | |
| "loss": 0.6406, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09172154280338664, | |
| "grad_norm": 295.36784847538064, | |
| "learning_rate": 9.131455399061032e-07, | |
| "loss": 0.6493, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.09407337723424271, | |
| "grad_norm": 87.86096387558246, | |
| "learning_rate": 9.366197183098591e-07, | |
| "loss": 0.6093, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09642521166509878, | |
| "grad_norm": 55.21521035928988, | |
| "learning_rate": 9.60093896713615e-07, | |
| "loss": 0.681, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.09877704609595485, | |
| "grad_norm": 149.75943970393467, | |
| "learning_rate": 9.83568075117371e-07, | |
| "loss": 0.6635, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.10112888052681092, | |
| "grad_norm": 96.72986417277598, | |
| "learning_rate": 9.999984829771844e-07, | |
| "loss": 0.6782, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.10348071495766697, | |
| "grad_norm": 42.184520996896104, | |
| "learning_rate": 9.999715139387692e-07, | |
| "loss": 0.6748, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.10583254938852305, | |
| "grad_norm": 93.47313883617879, | |
| "learning_rate": 9.99910835375207e-07, | |
| "loss": 0.6883, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.10818438381937912, | |
| "grad_norm": 51.181550134553135, | |
| "learning_rate": 9.99816451377622e-07, | |
| "loss": 0.6567, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.11053621825023519, | |
| "grad_norm": 88.52012869382804, | |
| "learning_rate": 9.996883683096559e-07, | |
| "loss": 0.6579, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.11288805268109126, | |
| "grad_norm": 56.48549654574926, | |
| "learning_rate": 9.995265948070397e-07, | |
| "loss": 0.6076, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.11523988711194733, | |
| "grad_norm": 50.9131497233012, | |
| "learning_rate": 9.99331141777011e-07, | |
| "loss": 0.5999, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.11759172154280338, | |
| "grad_norm": 70.73723550031531, | |
| "learning_rate": 9.991020223975778e-07, | |
| "loss": 0.63, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11994355597365945, | |
| "grad_norm": 88.64709002603719, | |
| "learning_rate": 9.988392521166315e-07, | |
| "loss": 0.6039, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.12229539040451552, | |
| "grad_norm": 75.39690510928469, | |
| "learning_rate": 9.98542848650904e-07, | |
| "loss": 0.586, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.12464722483537159, | |
| "grad_norm": 113.20042490187971, | |
| "learning_rate": 9.98212831984774e-07, | |
| "loss": 0.5865, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.12699905926622765, | |
| "grad_norm": 94.68667178533609, | |
| "learning_rate": 9.978492243689197e-07, | |
| "loss": 0.6069, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.12935089369708372, | |
| "grad_norm": 34.52540469666706, | |
| "learning_rate": 9.974520503188178e-07, | |
| "loss": 0.6206, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1317027281279398, | |
| "grad_norm": 117.14521192427702, | |
| "learning_rate": 9.970213366130908e-07, | |
| "loss": 0.6274, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.13405456255879586, | |
| "grad_norm": 53.776543081710706, | |
| "learning_rate": 9.965571122917027e-07, | |
| "loss": 0.6426, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.13640639698965193, | |
| "grad_norm": 124.09156480152814, | |
| "learning_rate": 9.960594086539992e-07, | |
| "loss": 0.6068, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.138758231420508, | |
| "grad_norm": 58.91257394960272, | |
| "learning_rate": 9.95528259256599e-07, | |
| "loss": 0.6136, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.14111006585136407, | |
| "grad_norm": 44.14364925959302, | |
| "learning_rate": 9.949636999111302e-07, | |
| "loss": 0.6182, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.14346190028222014, | |
| "grad_norm": 102.41392308653452, | |
| "learning_rate": 9.94365768681816e-07, | |
| "loss": 0.5759, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.1458137347130762, | |
| "grad_norm": 105.69613721621276, | |
| "learning_rate": 9.937345058829093e-07, | |
| "loss": 0.5786, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.14816556914393228, | |
| "grad_norm": 38.98828708919228, | |
| "learning_rate": 9.930699540759728e-07, | |
| "loss": 0.6084, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.15051740357478832, | |
| "grad_norm": 354.4940196727006, | |
| "learning_rate": 9.923721580670113e-07, | |
| "loss": 0.5689, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.1528692380056444, | |
| "grad_norm": 153.95231269118221, | |
| "learning_rate": 9.916411649034491e-07, | |
| "loss": 0.5657, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.15522107243650046, | |
| "grad_norm": 928.564170107045, | |
| "learning_rate": 9.908770238709592e-07, | |
| "loss": 0.6084, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.15757290686735653, | |
| "grad_norm": 92.12341812343223, | |
| "learning_rate": 9.900797864901394e-07, | |
| "loss": 0.5847, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.1599247412982126, | |
| "grad_norm": 50.21703372463601, | |
| "learning_rate": 9.892495065130394e-07, | |
| "loss": 0.6206, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.16227657572906867, | |
| "grad_norm": 38.305107297666325, | |
| "learning_rate": 9.883862399195357e-07, | |
| "loss": 0.6206, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.16462841015992474, | |
| "grad_norm": 105.72112815211388, | |
| "learning_rate": 9.874900449135582e-07, | |
| "loss": 0.5821, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1669802445907808, | |
| "grad_norm": 162.13055243234115, | |
| "learning_rate": 9.865609819191659e-07, | |
| "loss": 0.5742, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.16933207902163688, | |
| "grad_norm": 58.70026610582205, | |
| "learning_rate": 9.855991135764718e-07, | |
| "loss": 0.547, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.17168391345249295, | |
| "grad_norm": 100.43914638169913, | |
| "learning_rate": 9.846045047374215e-07, | |
| "loss": 0.5794, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.17403574788334902, | |
| "grad_norm": 139.51085792167504, | |
| "learning_rate": 9.83577222461418e-07, | |
| "loss": 0.5835, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.1763875823142051, | |
| "grad_norm": 91.55902670992894, | |
| "learning_rate": 9.825173360108034e-07, | |
| "loss": 0.568, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.17873941674506114, | |
| "grad_norm": 49.09585389161329, | |
| "learning_rate": 9.814249168461868e-07, | |
| "loss": 0.5416, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.1810912511759172, | |
| "grad_norm": 39.30524914731049, | |
| "learning_rate": 9.80300038621627e-07, | |
| "loss": 0.5974, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.18344308560677328, | |
| "grad_norm": 437.03477978317704, | |
| "learning_rate": 9.79142777179666e-07, | |
| "loss": 0.614, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.18579492003762935, | |
| "grad_norm": 113.50973293517667, | |
| "learning_rate": 9.779532105462173e-07, | |
| "loss": 0.5798, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.18814675446848542, | |
| "grad_norm": 310.99758984616733, | |
| "learning_rate": 9.767314189253023e-07, | |
| "loss": 0.5609, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1904985888993415, | |
| "grad_norm": 78.52431684901823, | |
| "learning_rate": 9.754774846936455e-07, | |
| "loss": 0.582, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.19285042333019756, | |
| "grad_norm": 34.79880147456029, | |
| "learning_rate": 9.74191492395118e-07, | |
| "loss": 0.5538, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.19520225776105363, | |
| "grad_norm": 48.17636180244228, | |
| "learning_rate": 9.728735287350395e-07, | |
| "loss": 0.5686, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.1975540921919097, | |
| "grad_norm": 222.11427267611163, | |
| "learning_rate": 9.715236825743306e-07, | |
| "loss": 0.5695, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.19990592662276577, | |
| "grad_norm": 163.48188564313824, | |
| "learning_rate": 9.701420449235224e-07, | |
| "loss": 0.5993, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.20225776105362184, | |
| "grad_norm": 180.8729345274767, | |
| "learning_rate": 9.687287089366208e-07, | |
| "loss": 0.5765, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.20460959548447788, | |
| "grad_norm": 64.88971976462376, | |
| "learning_rate": 9.672837699048247e-07, | |
| "loss": 0.6076, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.20696142991533395, | |
| "grad_norm": 45.66335791522097, | |
| "learning_rate": 9.65807325250101e-07, | |
| "loss": 0.5102, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.20931326434619002, | |
| "grad_norm": 69.3940533519506, | |
| "learning_rate": 9.642994745186186e-07, | |
| "loss": 0.596, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2116650987770461, | |
| "grad_norm": 90.66599474951411, | |
| "learning_rate": 9.627603193740329e-07, | |
| "loss": 0.5539, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.21401693320790216, | |
| "grad_norm": 203.88445976449455, | |
| "learning_rate": 9.611899635906345e-07, | |
| "loss": 0.5494, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.21636876763875823, | |
| "grad_norm": 35.55143547858949, | |
| "learning_rate": 9.595885130463512e-07, | |
| "loss": 0.5821, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2187206020696143, | |
| "grad_norm": 135.08153342145144, | |
| "learning_rate": 9.579560757156092e-07, | |
| "loss": 0.5466, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.22107243650047037, | |
| "grad_norm": 89.381610871162, | |
| "learning_rate": 9.562927616620534e-07, | |
| "loss": 0.532, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.22342427093132644, | |
| "grad_norm": 178.58317872348456, | |
| "learning_rate": 9.545986830311271e-07, | |
| "loss": 0.6014, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2257761053621825, | |
| "grad_norm": 84.70137793347699, | |
| "learning_rate": 9.528739540425097e-07, | |
| "loss": 0.5751, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.22812793979303858, | |
| "grad_norm": 26.339901114155346, | |
| "learning_rate": 9.511186909824171e-07, | |
| "loss": 0.5783, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.23047977422389465, | |
| "grad_norm": 67.79506982367464, | |
| "learning_rate": 9.493330121957599e-07, | |
| "loss": 0.563, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2328316086547507, | |
| "grad_norm": 119.24074854076687, | |
| "learning_rate": 9.475170380781654e-07, | |
| "loss": 0.5572, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.23518344308560676, | |
| "grad_norm": 146.1667351412439, | |
| "learning_rate": 9.456708910678595e-07, | |
| "loss": 0.5367, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.23753527751646283, | |
| "grad_norm": 88.88690460802079, | |
| "learning_rate": 9.437946956374117e-07, | |
| "loss": 0.5375, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.2398871119473189, | |
| "grad_norm": 81.31694958580168, | |
| "learning_rate": 9.41888578285343e-07, | |
| "loss": 0.5458, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.24223894637817497, | |
| "grad_norm": 45.5082728792024, | |
| "learning_rate": 9.399526675275968e-07, | |
| "loss": 0.5648, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.24459078080903104, | |
| "grad_norm": 54.62994929806069, | |
| "learning_rate": 9.379870938888743e-07, | |
| "loss": 0.5464, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.24694261523988711, | |
| "grad_norm": 95.25990550468921, | |
| "learning_rate": 9.359919898938336e-07, | |
| "loss": 0.5276, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.24929444967074318, | |
| "grad_norm": 56.86836827412844, | |
| "learning_rate": 9.33967490058155e-07, | |
| "loss": 0.5037, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2516462841015992, | |
| "grad_norm": 263.1519863594519, | |
| "learning_rate": 9.319137308794712e-07, | |
| "loss": 0.5763, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2539981185324553, | |
| "grad_norm": 48.975787835710975, | |
| "learning_rate": 9.298308508281645e-07, | |
| "loss": 0.5204, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.25634995296331137, | |
| "grad_norm": 618.5840505320624, | |
| "learning_rate": 9.277189903380308e-07, | |
| "loss": 0.5375, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.25870178739416744, | |
| "grad_norm": 113.884112676073, | |
| "learning_rate": 9.255782917968107e-07, | |
| "loss": 0.568, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2610536218250235, | |
| "grad_norm": 385.97433675604555, | |
| "learning_rate": 9.234088995365898e-07, | |
| "loss": 0.5479, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.2634054562558796, | |
| "grad_norm": 235.00439426458524, | |
| "learning_rate": 9.212109598240669e-07, | |
| "loss": 0.5667, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.26575729068673565, | |
| "grad_norm": 526.3091500577647, | |
| "learning_rate": 9.189846208506931e-07, | |
| "loss": 0.519, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.2681091251175917, | |
| "grad_norm": 150.0750656097666, | |
| "learning_rate": 9.167300327226794e-07, | |
| "loss": 0.4965, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.2704609595484478, | |
| "grad_norm": 30.016558195777204, | |
| "learning_rate": 9.144473474508765e-07, | |
| "loss": 0.4756, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.27281279397930386, | |
| "grad_norm": 60.193732037518764, | |
| "learning_rate": 9.12136718940526e-07, | |
| "loss": 0.5377, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.27516462841015993, | |
| "grad_norm": 219.87409938307607, | |
| "learning_rate": 9.097983029808831e-07, | |
| "loss": 0.5231, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.277516462841016, | |
| "grad_norm": 691.5188548277733, | |
| "learning_rate": 9.074322572347135e-07, | |
| "loss": 0.5553, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.27986829727187207, | |
| "grad_norm": 58.14856175427267, | |
| "learning_rate": 9.050387412276628e-07, | |
| "loss": 0.5261, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.28222013170272814, | |
| "grad_norm": 72.18580518142099, | |
| "learning_rate": 9.026179163375012e-07, | |
| "loss": 0.5083, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.2845719661335842, | |
| "grad_norm": 48.319128442643155, | |
| "learning_rate": 9.001699457832425e-07, | |
| "loss": 0.5661, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.2869238005644403, | |
| "grad_norm": 85.74869777359726, | |
| "learning_rate": 8.976949946141399e-07, | |
| "loss": 0.5502, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.28927563499529635, | |
| "grad_norm": 27.470838511127315, | |
| "learning_rate": 8.951932296985576e-07, | |
| "loss": 0.5194, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.2916274694261524, | |
| "grad_norm": 103.72100326338919, | |
| "learning_rate": 8.926648197127202e-07, | |
| "loss": 0.5538, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.2939793038570085, | |
| "grad_norm": 37.395281219640474, | |
| "learning_rate": 8.901099351293397e-07, | |
| "loss": 0.5585, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.29633113828786456, | |
| "grad_norm": 76.92500519691636, | |
| "learning_rate": 8.875287482061225e-07, | |
| "loss": 0.5556, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.29868297271872063, | |
| "grad_norm": 115.83402029314064, | |
| "learning_rate": 8.849214329741542e-07, | |
| "loss": 0.5347, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.30103480714957664, | |
| "grad_norm": 26.861221309569398, | |
| "learning_rate": 8.822881652261671e-07, | |
| "loss": 0.5434, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3033866415804327, | |
| "grad_norm": 47.06480089234017, | |
| "learning_rate": 8.796291225046866e-07, | |
| "loss": 0.515, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3057384760112888, | |
| "grad_norm": 242.58390403927206, | |
| "learning_rate": 8.76944484090062e-07, | |
| "loss": 0.5204, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.30809031044214485, | |
| "grad_norm": 69.9924587472249, | |
| "learning_rate": 8.742344309883776e-07, | |
| "loss": 0.5266, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3104421448730009, | |
| "grad_norm": 174.79009098016476, | |
| "learning_rate": 8.7149914591925e-07, | |
| "loss": 0.5269, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.312793979303857, | |
| "grad_norm": 51.84251807068747, | |
| "learning_rate": 8.687388133035074e-07, | |
| "loss": 0.513, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.31514581373471307, | |
| "grad_norm": 101.12451895232418, | |
| "learning_rate": 8.659536192507564e-07, | |
| "loss": 0.5578, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.31749764816556914, | |
| "grad_norm": 91.89776616663202, | |
| "learning_rate": 8.631437515468336e-07, | |
| "loss": 0.509, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3198494825964252, | |
| "grad_norm": 363.4004724877564, | |
| "learning_rate": 8.603093996411444e-07, | |
| "loss": 0.5398, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.3222013170272813, | |
| "grad_norm": 66.77614323450521, | |
| "learning_rate": 8.574507546338895e-07, | |
| "loss": 0.4921, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.32455315145813735, | |
| "grad_norm": 49.08933240076176, | |
| "learning_rate": 8.545680092631815e-07, | |
| "loss": 0.5313, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.3269049858889934, | |
| "grad_norm": 49.554772352006694, | |
| "learning_rate": 8.516613578920488e-07, | |
| "loss": 0.447, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.3292568203198495, | |
| "grad_norm": 43.55060672795549, | |
| "learning_rate": 8.487309964953311e-07, | |
| "loss": 0.5278, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.33160865475070556, | |
| "grad_norm": 53.0380482362615, | |
| "learning_rate": 8.457771226464674e-07, | |
| "loss": 0.505, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.3339604891815616, | |
| "grad_norm": 79.64660023563455, | |
| "learning_rate": 8.427999355041735e-07, | |
| "loss": 0.5226, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.3363123236124177, | |
| "grad_norm": 55.95587949885615, | |
| "learning_rate": 8.397996357990153e-07, | |
| "loss": 0.5258, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.33866415804327377, | |
| "grad_norm": 411.2146929812851, | |
| "learning_rate": 8.367764258198744e-07, | |
| "loss": 0.5709, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.34101599247412984, | |
| "grad_norm": 43.734123642110056, | |
| "learning_rate": 8.337305094003091e-07, | |
| "loss": 0.5129, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.3433678269049859, | |
| "grad_norm": 227.72574989130484, | |
| "learning_rate": 8.306620919048115e-07, | |
| "loss": 0.5365, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.345719661335842, | |
| "grad_norm": 230.09797130940922, | |
| "learning_rate": 8.275713802149622e-07, | |
| "loss": 0.4906, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.34807149576669805, | |
| "grad_norm": 54.19875482542778, | |
| "learning_rate": 8.244585827154795e-07, | |
| "loss": 0.499, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3504233301975541, | |
| "grad_norm": 55.791984177409674, | |
| "learning_rate": 8.213239092801718e-07, | |
| "loss": 0.5155, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.3527751646284102, | |
| "grad_norm": 38.61566415419522, | |
| "learning_rate": 8.181675712577864e-07, | |
| "loss": 0.4924, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.3551269990592662, | |
| "grad_norm": 59.61394018369017, | |
| "learning_rate": 8.149897814577589e-07, | |
| "loss": 0.4948, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.35747883349012227, | |
| "grad_norm": 52.530770820466806, | |
| "learning_rate": 8.117907541358664e-07, | |
| "loss": 0.4625, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.35983066792097834, | |
| "grad_norm": 53.03111212968418, | |
| "learning_rate": 8.08570704979781e-07, | |
| "loss": 0.4853, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.3621825023518344, | |
| "grad_norm": 65.37886086917705, | |
| "learning_rate": 8.053298510945279e-07, | |
| "loss": 0.499, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.3645343367826905, | |
| "grad_norm": 274.0211083038785, | |
| "learning_rate": 8.020684109878465e-07, | |
| "loss": 0.5308, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.36688617121354655, | |
| "grad_norm": 207.52089838136985, | |
| "learning_rate": 7.987866045554598e-07, | |
| "loss": 0.5111, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.3692380056444026, | |
| "grad_norm": 201.91551525915267, | |
| "learning_rate": 7.954846530662467e-07, | |
| "loss": 0.5095, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.3715898400752587, | |
| "grad_norm": 66.61996761679119, | |
| "learning_rate": 7.921627791473242e-07, | |
| "loss": 0.4668, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.37394167450611476, | |
| "grad_norm": 29.688431988926887, | |
| "learning_rate": 7.888212067690372e-07, | |
| "loss": 0.5093, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.37629350893697083, | |
| "grad_norm": 96.58502652217979, | |
| "learning_rate": 7.854601612298577e-07, | |
| "loss": 0.5492, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3786453433678269, | |
| "grad_norm": 160.08236167143332, | |
| "learning_rate": 7.820798691411945e-07, | |
| "loss": 0.5056, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.380997177798683, | |
| "grad_norm": 94.89520708968821, | |
| "learning_rate": 7.786805584121143e-07, | |
| "loss": 0.5017, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.38334901222953904, | |
| "grad_norm": 111.60483541208626, | |
| "learning_rate": 7.75262458233976e-07, | |
| "loss": 0.4888, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.3857008466603951, | |
| "grad_norm": 52.130149242500984, | |
| "learning_rate": 7.718257990649766e-07, | |
| "loss": 0.5116, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.3880526810912512, | |
| "grad_norm": 132.947730554617, | |
| "learning_rate": 7.683708126146146e-07, | |
| "loss": 0.4888, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.39040451552210725, | |
| "grad_norm": 119.19032436133351, | |
| "learning_rate": 7.648977318280667e-07, | |
| "loss": 0.5562, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.3927563499529633, | |
| "grad_norm": 79.62177704099146, | |
| "learning_rate": 7.614067908704822e-07, | |
| "loss": 0.4979, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.3951081843838194, | |
| "grad_norm": 276.43996320587473, | |
| "learning_rate": 7.578982251111941e-07, | |
| "loss": 0.5253, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.39746001881467546, | |
| "grad_norm": 33.12487289775825, | |
| "learning_rate": 7.543722711078515e-07, | |
| "loss": 0.4725, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.39981185324553153, | |
| "grad_norm": 196.6122063462283, | |
| "learning_rate": 7.508291665904684e-07, | |
| "loss": 0.4929, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4021636876763876, | |
| "grad_norm": 65.99306854525632, | |
| "learning_rate": 7.472691504453963e-07, | |
| "loss": 0.5353, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.4045155221072437, | |
| "grad_norm": 419.0366078449193, | |
| "learning_rate": 7.436924626992177e-07, | |
| "loss": 0.5149, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.40686735653809974, | |
| "grad_norm": 63.6596547753076, | |
| "learning_rate": 7.400993445025623e-07, | |
| "loss": 0.5043, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.40921919096895576, | |
| "grad_norm": 94.23732036703356, | |
| "learning_rate": 7.364900381138488e-07, | |
| "loss": 0.5123, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.41157102539981183, | |
| "grad_norm": 125.17343539075726, | |
| "learning_rate": 7.3286478688295e-07, | |
| "loss": 0.5495, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.4139228598306679, | |
| "grad_norm": 490.21785388787504, | |
| "learning_rate": 7.292238352347865e-07, | |
| "loss": 0.5369, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.41627469426152397, | |
| "grad_norm": 35.884085037680386, | |
| "learning_rate": 7.255674286528462e-07, | |
| "loss": 0.5539, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.41862652869238004, | |
| "grad_norm": 106.9642458702286, | |
| "learning_rate": 7.218958136626336e-07, | |
| "loss": 0.441, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.4209783631232361, | |
| "grad_norm": 71.82566114451349, | |
| "learning_rate": 7.182092378150479e-07, | |
| "loss": 0.5205, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.4233301975540922, | |
| "grad_norm": 779.4365683391414, | |
| "learning_rate": 7.145079496696924e-07, | |
| "loss": 0.4959, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.42568203198494825, | |
| "grad_norm": 44.667117126603884, | |
| "learning_rate": 7.107921987781162e-07, | |
| "loss": 0.4753, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.4280338664158043, | |
| "grad_norm": 110.85747495169443, | |
| "learning_rate": 7.070622356669887e-07, | |
| "loss": 0.4958, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.4303857008466604, | |
| "grad_norm": 113.10527215049324, | |
| "learning_rate": 7.03318311821208e-07, | |
| "loss": 0.454, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.43273753527751646, | |
| "grad_norm": 17.84232109387917, | |
| "learning_rate": 6.995606796669454e-07, | |
| "loss": 0.5019, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.43508936970837253, | |
| "grad_norm": 110.74344931358036, | |
| "learning_rate": 6.957895925546262e-07, | |
| "loss": 0.4719, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.4374412041392286, | |
| "grad_norm": 41.489250407979306, | |
| "learning_rate": 6.920053047418475e-07, | |
| "loss": 0.5075, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.43979303857008467, | |
| "grad_norm": 30.1685856705556, | |
| "learning_rate": 6.88208071376236e-07, | |
| "loss": 0.4843, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.44214487300094074, | |
| "grad_norm": 145.26880468994483, | |
| "learning_rate": 6.843981484782452e-07, | |
| "loss": 0.4794, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.4444967074317968, | |
| "grad_norm": 108.30784248543398, | |
| "learning_rate": 6.80575792923893e-07, | |
| "loss": 0.4918, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.4468485418626529, | |
| "grad_norm": 272.0716196572391, | |
| "learning_rate": 6.767412624274434e-07, | |
| "loss": 0.4926, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.44920037629350895, | |
| "grad_norm": 53.98848770173013, | |
| "learning_rate": 6.728948155240303e-07, | |
| "loss": 0.5, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.451552210724365, | |
| "grad_norm": 90.77607158800988, | |
| "learning_rate": 6.690367115522257e-07, | |
| "loss": 0.4901, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.4539040451552211, | |
| "grad_norm": 42.55048877269962, | |
| "learning_rate": 6.651672106365554e-07, | |
| "loss": 0.5189, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.45625587958607716, | |
| "grad_norm": 44.97567866783233, | |
| "learning_rate": 6.612865736699598e-07, | |
| "loss": 0.5033, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.45860771401693323, | |
| "grad_norm": 67.49282430103631, | |
| "learning_rate": 6.573950622962039e-07, | |
| "loss": 0.5276, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.4609595484477893, | |
| "grad_norm": 59.17058897377058, | |
| "learning_rate": 6.534929388922374e-07, | |
| "loss": 0.4735, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.4633113828786453, | |
| "grad_norm": 111.86181053840893, | |
| "learning_rate": 6.495804665505029e-07, | |
| "loss": 0.4788, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.4656632173095014, | |
| "grad_norm": 189.94498833584603, | |
| "learning_rate": 6.456579090611987e-07, | |
| "loss": 0.5144, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.46801505174035746, | |
| "grad_norm": 122.35102212459157, | |
| "learning_rate": 6.417255308944928e-07, | |
| "loss": 0.5283, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.4703668861712135, | |
| "grad_norm": 186.0617213529026, | |
| "learning_rate": 6.37783597182692e-07, | |
| "loss": 0.4672, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.4727187206020696, | |
| "grad_norm": 115.36410352920348, | |
| "learning_rate": 6.338323737023651e-07, | |
| "loss": 0.4965, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.47507055503292567, | |
| "grad_norm": 85.9333267678821, | |
| "learning_rate": 6.298721268564243e-07, | |
| "loss": 0.4895, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.47742238946378174, | |
| "grad_norm": 211.95325107965013, | |
| "learning_rate": 6.259031236561632e-07, | |
| "loss": 0.5538, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.4797742238946378, | |
| "grad_norm": 91.27430764319159, | |
| "learning_rate": 6.219256317032537e-07, | |
| "loss": 0.4437, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.4821260583254939, | |
| "grad_norm": 298.2235290567097, | |
| "learning_rate": 6.179399191717046e-07, | |
| "loss": 0.4904, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.48447789275634995, | |
| "grad_norm": 36.409310791398326, | |
| "learning_rate": 6.139462547897793e-07, | |
| "loss": 0.5011, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.486829727187206, | |
| "grad_norm": 61.35140792241313, | |
| "learning_rate": 6.099449078218781e-07, | |
| "loss": 0.4999, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.4891815616180621, | |
| "grad_norm": 192.0635061382374, | |
| "learning_rate": 6.059361480503839e-07, | |
| "loss": 0.4978, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.49153339604891816, | |
| "grad_norm": 1602.6959579230695, | |
| "learning_rate": 6.019202457574717e-07, | |
| "loss": 0.5296, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.49388523047977423, | |
| "grad_norm": 46.88396962107588, | |
| "learning_rate": 5.97897471706886e-07, | |
| "loss": 0.4666, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4962370649106303, | |
| "grad_norm": 42.5404300683936, | |
| "learning_rate": 5.938680971256855e-07, | |
| "loss": 0.4917, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.49858889934148637, | |
| "grad_norm": 168.45591180658585, | |
| "learning_rate": 5.898323936859554e-07, | |
| "loss": 0.4982, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5009407337723424, | |
| "grad_norm": 98.29895214863133, | |
| "learning_rate": 5.857906334864908e-07, | |
| "loss": 0.4613, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.5032925682031985, | |
| "grad_norm": 84.14383238818485, | |
| "learning_rate": 5.817430890344514e-07, | |
| "loss": 0.4573, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.5056444026340545, | |
| "grad_norm": 63.91190561893289, | |
| "learning_rate": 5.776900332269874e-07, | |
| "loss": 0.4658, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5079962370649106, | |
| "grad_norm": 105.97500836643457, | |
| "learning_rate": 5.73631739332841e-07, | |
| "loss": 0.4943, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.5103480714957667, | |
| "grad_norm": 84.24147849783279, | |
| "learning_rate": 5.695684809739212e-07, | |
| "loss": 0.4429, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.5126999059266227, | |
| "grad_norm": 189.39210679587683, | |
| "learning_rate": 5.655005321068556e-07, | |
| "loss": 0.4739, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.5150517403574788, | |
| "grad_norm": 154.99555922544172, | |
| "learning_rate": 5.614281670045191e-07, | |
| "loss": 0.4914, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.5174035747883349, | |
| "grad_norm": 54.60327223607897, | |
| "learning_rate": 5.573516602375427e-07, | |
| "loss": 0.4801, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.519755409219191, | |
| "grad_norm": 83.19875850848948, | |
| "learning_rate": 5.532712866557994e-07, | |
| "loss": 0.5323, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.522107243650047, | |
| "grad_norm": 42.60436730500025, | |
| "learning_rate": 5.491873213698749e-07, | |
| "loss": 0.4573, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.5244590780809031, | |
| "grad_norm": 65.42258392696083, | |
| "learning_rate": 5.451000397325176e-07, | |
| "loss": 0.4814, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.5268109125117592, | |
| "grad_norm": 115.80636504665334, | |
| "learning_rate": 5.410097173200738e-07, | |
| "loss": 0.4587, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.5291627469426152, | |
| "grad_norm": 69.60117550551328, | |
| "learning_rate": 5.36916629913908e-07, | |
| "loss": 0.5132, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.5315145813734713, | |
| "grad_norm": 66.375344126355, | |
| "learning_rate": 5.32821053481808e-07, | |
| "loss": 0.4671, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.5338664158043274, | |
| "grad_norm": 35.28013711011858, | |
| "learning_rate": 5.287232641593799e-07, | |
| "loss": 0.4786, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.5362182502351834, | |
| "grad_norm": 786.6344541862824, | |
| "learning_rate": 5.24623538231428e-07, | |
| "loss": 0.446, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.5385700846660395, | |
| "grad_norm": 123.10978108283794, | |
| "learning_rate": 5.205221521133293e-07, | |
| "loss": 0.4589, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.5409219190968956, | |
| "grad_norm": 219.42520028684848, | |
| "learning_rate": 5.164193823323949e-07, | |
| "loss": 0.5345, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5432737535277516, | |
| "grad_norm": 42.749936673742305, | |
| "learning_rate": 5.123155055092266e-07, | |
| "loss": 0.5589, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.5456255879586077, | |
| "grad_norm": 132.15751551013375, | |
| "learning_rate": 5.082107983390663e-07, | |
| "loss": 0.4669, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.5479774223894638, | |
| "grad_norm": 102.8246996509365, | |
| "learning_rate": 5.041055375731404e-07, | |
| "loss": 0.4539, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.5503292568203199, | |
| "grad_norm": 38.44411987577502, | |
| "learning_rate": 5e-07, | |
| "loss": 0.5467, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.5526810912511759, | |
| "grad_norm": 74.77277783685864, | |
| "learning_rate": 4.958944624268596e-07, | |
| "loss": 0.5021, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.555032925682032, | |
| "grad_norm": 38.676119419707206, | |
| "learning_rate": 4.917892016609336e-07, | |
| "loss": 0.4276, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.5573847601128881, | |
| "grad_norm": 108.70553186871935, | |
| "learning_rate": 4.876844944907734e-07, | |
| "loss": 0.4719, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.5597365945437441, | |
| "grad_norm": 295.3911082440077, | |
| "learning_rate": 4.835806176676051e-07, | |
| "loss": 0.4445, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.5620884289746002, | |
| "grad_norm": 49.65291441301108, | |
| "learning_rate": 4.794778478866707e-07, | |
| "loss": 0.4846, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.5644402634054563, | |
| "grad_norm": 60.72248586512295, | |
| "learning_rate": 4.753764617685719e-07, | |
| "loss": 0.4897, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.5667920978363123, | |
| "grad_norm": 64.47512708646681, | |
| "learning_rate": 4.7127673584062015e-07, | |
| "loss": 0.4741, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.5691439322671684, | |
| "grad_norm": 47.47422172419129, | |
| "learning_rate": 4.671789465181919e-07, | |
| "loss": 0.4669, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.5714957666980245, | |
| "grad_norm": 23.291621066236825, | |
| "learning_rate": 4.6308337008609215e-07, | |
| "loss": 0.4387, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.5738476011288806, | |
| "grad_norm": 38.681058186421566, | |
| "learning_rate": 4.5899028267992613e-07, | |
| "loss": 0.478, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.5761994355597366, | |
| "grad_norm": 49.47985557527302, | |
| "learning_rate": 4.548999602674824e-07, | |
| "loss": 0.4768, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.5785512699905927, | |
| "grad_norm": 61.860260636343476, | |
| "learning_rate": 4.5081267863012504e-07, | |
| "loss": 0.4633, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.5809031044214488, | |
| "grad_norm": 71.87446520905469, | |
| "learning_rate": 4.4672871334420054e-07, | |
| "loss": 0.4902, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.5832549388523048, | |
| "grad_norm": 59.913860308252595, | |
| "learning_rate": 4.4264833976245736e-07, | |
| "loss": 0.4773, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.5856067732831609, | |
| "grad_norm": 66.42267107533596, | |
| "learning_rate": 4.3857183299548084e-07, | |
| "loss": 0.4871, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.587958607714017, | |
| "grad_norm": 31.108180028188862, | |
| "learning_rate": 4.344994678931445e-07, | |
| "loss": 0.4645, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.590310442144873, | |
| "grad_norm": 28.34707994823098, | |
| "learning_rate": 4.304315190260787e-07, | |
| "loss": 0.4702, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.5926622765757291, | |
| "grad_norm": 171.3392721123324, | |
| "learning_rate": 4.2636826066715895e-07, | |
| "loss": 0.4394, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.5950141110065852, | |
| "grad_norm": 52.496086063752934, | |
| "learning_rate": 4.2230996677301265e-07, | |
| "loss": 0.468, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.5973659454374413, | |
| "grad_norm": 152.40165257833203, | |
| "learning_rate": 4.182569109655488e-07, | |
| "loss": 0.4449, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.5997177798682972, | |
| "grad_norm": 49.22527606031466, | |
| "learning_rate": 4.142093665135092e-07, | |
| "loss": 0.4621, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.6020696142991533, | |
| "grad_norm": 204.6300032968629, | |
| "learning_rate": 4.101676063140447e-07, | |
| "loss": 0.4697, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.6044214487300094, | |
| "grad_norm": 49.04313064544541, | |
| "learning_rate": 4.0613190287431457e-07, | |
| "loss": 0.4672, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.6067732831608654, | |
| "grad_norm": 75.74845619142411, | |
| "learning_rate": 4.0210252829311384e-07, | |
| "loss": 0.4547, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.6091251175917215, | |
| "grad_norm": 41.52522300616638, | |
| "learning_rate": 3.980797542425284e-07, | |
| "loss": 0.4841, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.6114769520225776, | |
| "grad_norm": 24.660847040050093, | |
| "learning_rate": 3.9406385194961617e-07, | |
| "loss": 0.505, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6138287864534336, | |
| "grad_norm": 32.07608549927192, | |
| "learning_rate": 3.9005509217812195e-07, | |
| "loss": 0.4588, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.6161806208842897, | |
| "grad_norm": 39.70294250231196, | |
| "learning_rate": 3.8605374521022074e-07, | |
| "loss": 0.4559, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.6185324553151458, | |
| "grad_norm": 77.97272809649765, | |
| "learning_rate": 3.8206008082829546e-07, | |
| "loss": 0.4406, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.6208842897460018, | |
| "grad_norm": 45.67613106527001, | |
| "learning_rate": 3.7807436829674625e-07, | |
| "loss": 0.4385, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.6232361241768579, | |
| "grad_norm": 76.59247815028435, | |
| "learning_rate": 3.740968763438369e-07, | |
| "loss": 0.4869, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.625587958607714, | |
| "grad_norm": 39.68012913221431, | |
| "learning_rate": 3.7012787314357564e-07, | |
| "loss": 0.4721, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.6279397930385701, | |
| "grad_norm": 141.25589970566216, | |
| "learning_rate": 3.6616762629763485e-07, | |
| "loss": 0.5125, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.6302916274694261, | |
| "grad_norm": 23.55149435862232, | |
| "learning_rate": 3.6221640281730807e-07, | |
| "loss": 0.4115, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.6326434619002822, | |
| "grad_norm": 143.42525243309444, | |
| "learning_rate": 3.5827446910550706e-07, | |
| "loss": 0.4668, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.6349952963311383, | |
| "grad_norm": 77.81781401332093, | |
| "learning_rate": 3.543420909388013e-07, | |
| "loss": 0.4647, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.6373471307619943, | |
| "grad_norm": 368.31014969438706, | |
| "learning_rate": 3.5041953344949713e-07, | |
| "loss": 0.5143, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.6396989651928504, | |
| "grad_norm": 191.99282418445105, | |
| "learning_rate": 3.4650706110776263e-07, | |
| "loss": 0.4714, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.6420507996237065, | |
| "grad_norm": 172.6805749126617, | |
| "learning_rate": 3.4260493770379594e-07, | |
| "loss": 0.4876, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.6444026340545626, | |
| "grad_norm": 61.17327984316229, | |
| "learning_rate": 3.387134263300403e-07, | |
| "loss": 0.4178, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.6467544684854186, | |
| "grad_norm": 114.17761831777952, | |
| "learning_rate": 3.3483278936344473e-07, | |
| "loss": 0.4889, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.6491063029162747, | |
| "grad_norm": 29.259182763118652, | |
| "learning_rate": 3.3096328844777445e-07, | |
| "loss": 0.4508, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.6514581373471308, | |
| "grad_norm": 119.4854028220211, | |
| "learning_rate": 3.2710518447596975e-07, | |
| "loss": 0.4392, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.6538099717779868, | |
| "grad_norm": 25.936405577527868, | |
| "learning_rate": 3.232587375725566e-07, | |
| "loss": 0.4725, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.6561618062088429, | |
| "grad_norm": 67.32516092680038, | |
| "learning_rate": 3.1942420707610713e-07, | |
| "loss": 0.4363, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.658513640639699, | |
| "grad_norm": 49.096928986568244, | |
| "learning_rate": 3.156018515217549e-07, | |
| "loss": 0.4891, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.660865475070555, | |
| "grad_norm": 48.270407025181726, | |
| "learning_rate": 3.11791928623764e-07, | |
| "loss": 0.4846, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.6632173095014111, | |
| "grad_norm": 33.146338229003334, | |
| "learning_rate": 3.079946952581526e-07, | |
| "loss": 0.4372, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.6655691439322672, | |
| "grad_norm": 65.86339846284933, | |
| "learning_rate": 3.042104074453739e-07, | |
| "loss": 0.4938, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.6679209783631233, | |
| "grad_norm": 81.43044910317796, | |
| "learning_rate": 3.0043932033305455e-07, | |
| "loss": 0.5127, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.6702728127939793, | |
| "grad_norm": 28.25621020100993, | |
| "learning_rate": 2.9668168817879205e-07, | |
| "loss": 0.5283, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.6726246472248354, | |
| "grad_norm": 70.5644459258473, | |
| "learning_rate": 2.9293776433301144e-07, | |
| "loss": 0.4676, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.6749764816556915, | |
| "grad_norm": 833.1173554079337, | |
| "learning_rate": 2.8920780122188393e-07, | |
| "loss": 0.4238, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.6773283160865475, | |
| "grad_norm": 50.15505341992137, | |
| "learning_rate": 2.854920503303076e-07, | |
| "loss": 0.4317, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.6796801505174036, | |
| "grad_norm": 53.62160731503517, | |
| "learning_rate": 2.8179076218495213e-07, | |
| "loss": 0.5074, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.6820319849482597, | |
| "grad_norm": 82.67486758357786, | |
| "learning_rate": 2.7810418633736637e-07, | |
| "loss": 0.451, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.6843838193791157, | |
| "grad_norm": 70.65167543458996, | |
| "learning_rate": 2.744325713471536e-07, | |
| "loss": 0.4832, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.6867356538099718, | |
| "grad_norm": 70.82701033248681, | |
| "learning_rate": 2.707761647652135e-07, | |
| "loss": 0.4679, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.6890874882408279, | |
| "grad_norm": 119.20686302974886, | |
| "learning_rate": 2.6713521311705e-07, | |
| "loss": 0.4545, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.691439322671684, | |
| "grad_norm": 46.64006057310895, | |
| "learning_rate": 2.635099618861513e-07, | |
| "loss": 0.5192, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.69379115710254, | |
| "grad_norm": 62.9139122700984, | |
| "learning_rate": 2.5990065549743766e-07, | |
| "loss": 0.4554, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.6961429915333961, | |
| "grad_norm": 134.31949058487535, | |
| "learning_rate": 2.5630753730078236e-07, | |
| "loss": 0.5069, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.6984948259642522, | |
| "grad_norm": 91.61539909081964, | |
| "learning_rate": 2.527308495546038e-07, | |
| "loss": 0.4289, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.7008466603951082, | |
| "grad_norm": 104.86257682935386, | |
| "learning_rate": 2.4917083340953175e-07, | |
| "loss": 0.433, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.7031984948259643, | |
| "grad_norm": 21.474222567958595, | |
| "learning_rate": 2.456277288921485e-07, | |
| "loss": 0.4365, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.7055503292568204, | |
| "grad_norm": 324.3124016674876, | |
| "learning_rate": 2.4210177488880587e-07, | |
| "loss": 0.4179, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.7079021636876763, | |
| "grad_norm": 39.24490171045256, | |
| "learning_rate": 2.3859320912951797e-07, | |
| "loss": 0.4258, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.7102539981185324, | |
| "grad_norm": 26.55750023296075, | |
| "learning_rate": 2.3510226817193319e-07, | |
| "loss": 0.4575, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.7126058325493885, | |
| "grad_norm": 76.18210272558196, | |
| "learning_rate": 2.3162918738538539e-07, | |
| "loss": 0.4474, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.7149576669802445, | |
| "grad_norm": 163.82835782749893, | |
| "learning_rate": 2.281742009350235e-07, | |
| "loss": 0.5178, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.7173095014111006, | |
| "grad_norm": 71.20621946298115, | |
| "learning_rate": 2.247375417660241e-07, | |
| "loss": 0.4542, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.7196613358419567, | |
| "grad_norm": 29.44780246695801, | |
| "learning_rate": 2.2131944158788545e-07, | |
| "loss": 0.4696, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.7220131702728128, | |
| "grad_norm": 28.85017992785853, | |
| "learning_rate": 2.1792013085880539e-07, | |
| "loss": 0.4262, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.7243650047036688, | |
| "grad_norm": 161.1358114916103, | |
| "learning_rate": 2.1453983877014224e-07, | |
| "loss": 0.4506, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.7267168391345249, | |
| "grad_norm": 48.21294290482945, | |
| "learning_rate": 2.1117879323096283e-07, | |
| "loss": 0.4762, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.729068673565381, | |
| "grad_norm": 92.39652488615809, | |
| "learning_rate": 2.0783722085267592e-07, | |
| "loss": 0.4569, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.731420507996237, | |
| "grad_norm": 72.21113504991627, | |
| "learning_rate": 2.0451534693375344e-07, | |
| "loss": 0.4738, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.7337723424270931, | |
| "grad_norm": 92.26016205643168, | |
| "learning_rate": 2.0121339544454035e-07, | |
| "loss": 0.4435, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.7361241768579492, | |
| "grad_norm": 68.48228287299885, | |
| "learning_rate": 1.9793158901215346e-07, | |
| "loss": 0.4313, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.7384760112888052, | |
| "grad_norm": 164.80360664767645, | |
| "learning_rate": 1.9467014890547223e-07, | |
| "loss": 0.4148, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.7408278457196613, | |
| "grad_norm": 66.07514802799065, | |
| "learning_rate": 1.9142929502021904e-07, | |
| "loss": 0.4557, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.7431796801505174, | |
| "grad_norm": 54.18230405158542, | |
| "learning_rate": 1.8820924586413373e-07, | |
| "loss": 0.4389, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.7455315145813735, | |
| "grad_norm": 54.933584541808706, | |
| "learning_rate": 1.8501021854224115e-07, | |
| "loss": 0.4405, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.7478833490122295, | |
| "grad_norm": 73.88687819153088, | |
| "learning_rate": 1.8183242874221365e-07, | |
| "loss": 0.444, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.7502351834430856, | |
| "grad_norm": 155.9296620837369, | |
| "learning_rate": 1.786760907198281e-07, | |
| "loss": 0.4456, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.7525870178739417, | |
| "grad_norm": 98.13292721707111, | |
| "learning_rate": 1.7554141728452038e-07, | |
| "loss": 0.4804, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7549388523047977, | |
| "grad_norm": 20.87180402977683, | |
| "learning_rate": 1.7242861978503782e-07, | |
| "loss": 0.3997, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.7572906867356538, | |
| "grad_norm": 139.8758062253345, | |
| "learning_rate": 1.6933790809518839e-07, | |
| "loss": 0.4336, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.7596425211665099, | |
| "grad_norm": 67.67816700607545, | |
| "learning_rate": 1.6626949059969098e-07, | |
| "loss": 0.4371, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.761994355597366, | |
| "grad_norm": 31.044524767135282, | |
| "learning_rate": 1.632235741801255e-07, | |
| "loss": 0.4678, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.764346190028222, | |
| "grad_norm": 31.59032271901563, | |
| "learning_rate": 1.6020036420098455e-07, | |
| "loss": 0.4474, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.7666980244590781, | |
| "grad_norm": 56.09519646865281, | |
| "learning_rate": 1.5720006449582635e-07, | |
| "loss": 0.4464, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.7690498588899342, | |
| "grad_norm": 60.68844089135062, | |
| "learning_rate": 1.5422287735353257e-07, | |
| "loss": 0.4525, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.7714016933207902, | |
| "grad_norm": 44.61096552113006, | |
| "learning_rate": 1.5126900350466886e-07, | |
| "loss": 0.4514, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.7737535277516463, | |
| "grad_norm": 79.00286386953672, | |
| "learning_rate": 1.4833864210795132e-07, | |
| "loss": 0.4546, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.7761053621825024, | |
| "grad_norm": 96.77266999382469, | |
| "learning_rate": 1.4543199073681855e-07, | |
| "loss": 0.4188, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.7784571966133584, | |
| "grad_norm": 62.93527984098046, | |
| "learning_rate": 1.4254924536611046e-07, | |
| "loss": 0.4931, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.7808090310442145, | |
| "grad_norm": 78.89136393447265, | |
| "learning_rate": 1.396906003588557e-07, | |
| "loss": 0.478, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.7831608654750706, | |
| "grad_norm": 123.25424347020814, | |
| "learning_rate": 1.368562484531664e-07, | |
| "loss": 0.4439, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.7855126999059266, | |
| "grad_norm": 72.17648953992217, | |
| "learning_rate": 1.3404638074924356e-07, | |
| "loss": 0.4661, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.7878645343367827, | |
| "grad_norm": 29.181609218156055, | |
| "learning_rate": 1.3126118669649255e-07, | |
| "loss": 0.4775, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.7902163687676388, | |
| "grad_norm": 26.80987859871448, | |
| "learning_rate": 1.2850085408075e-07, | |
| "loss": 0.432, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.7925682031984949, | |
| "grad_norm": 42.77599635619014, | |
| "learning_rate": 1.2576556901162234e-07, | |
| "loss": 0.4705, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.7949200376293509, | |
| "grad_norm": 45.81217647451442, | |
| "learning_rate": 1.2305551590993806e-07, | |
| "loss": 0.47, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.797271872060207, | |
| "grad_norm": 101.96913312576068, | |
| "learning_rate": 1.2037087749531328e-07, | |
| "loss": 0.4229, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.7996237064910631, | |
| "grad_norm": 215.88495935464798, | |
| "learning_rate": 1.177118347738329e-07, | |
| "loss": 0.4447, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8019755409219191, | |
| "grad_norm": 72.55346736718074, | |
| "learning_rate": 1.1507856702584573e-07, | |
| "loss": 0.4563, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.8043273753527752, | |
| "grad_norm": 267.60170600595404, | |
| "learning_rate": 1.1247125179387734e-07, | |
| "loss": 0.4835, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.8066792097836313, | |
| "grad_norm": 38.91519432313084, | |
| "learning_rate": 1.098900648706601e-07, | |
| "loss": 0.445, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.8090310442144873, | |
| "grad_norm": 107.08048276724904, | |
| "learning_rate": 1.0733518028727973e-07, | |
| "loss": 0.437, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.8113828786453434, | |
| "grad_norm": 39.41515052926627, | |
| "learning_rate": 1.048067703014423e-07, | |
| "loss": 0.4016, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.8137347130761995, | |
| "grad_norm": 64.66608261177336, | |
| "learning_rate": 1.0230500538586012e-07, | |
| "loss": 0.4537, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.8160865475070554, | |
| "grad_norm": 61.054737141139334, | |
| "learning_rate": 9.983005421675761e-08, | |
| "loss": 0.4124, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.8184383819379115, | |
| "grad_norm": 59.104871236625186, | |
| "learning_rate": 9.738208366249895e-08, | |
| "loss": 0.447, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.8207902163687676, | |
| "grad_norm": 110.57928796201509, | |
| "learning_rate": 9.496125877233736e-08, | |
| "loss": 0.461, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.8231420507996237, | |
| "grad_norm": 234.52785886205078, | |
| "learning_rate": 9.256774276528655e-08, | |
| "loss": 0.4856, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8254938852304797, | |
| "grad_norm": 63.124590798290406, | |
| "learning_rate": 9.020169701911695e-08, | |
| "loss": 0.4501, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.8278457196613358, | |
| "grad_norm": 146.5239433816864, | |
| "learning_rate": 8.786328105947405e-08, | |
| "loss": 0.4902, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.8301975540921919, | |
| "grad_norm": 34.913788680016566, | |
| "learning_rate": 8.555265254912337e-08, | |
| "loss": 0.4275, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.8325493885230479, | |
| "grad_norm": 79.65521261255103, | |
| "learning_rate": 8.326996727732055e-08, | |
| "loss": 0.4531, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.834901222953904, | |
| "grad_norm": 47.72263945629518, | |
| "learning_rate": 8.101537914930684e-08, | |
| "loss": 0.4062, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.8372530573847601, | |
| "grad_norm": 94.48797265907596, | |
| "learning_rate": 7.878904017593302e-08, | |
| "loss": 0.4462, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.8396048918156162, | |
| "grad_norm": 89.24680993944979, | |
| "learning_rate": 7.659110046341016e-08, | |
| "loss": 0.4334, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.8419567262464722, | |
| "grad_norm": 31.758114332081302, | |
| "learning_rate": 7.442170820318922e-08, | |
| "loss": 0.423, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.8443085606773283, | |
| "grad_norm": 108.36139891290028, | |
| "learning_rate": 7.228100966196916e-08, | |
| "loss": 0.46, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.8466603951081844, | |
| "grad_norm": 45.42121511612232, | |
| "learning_rate": 7.016914917183541e-08, | |
| "loss": 0.4393, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.8490122295390404, | |
| "grad_norm": 38.06302239041297, | |
| "learning_rate": 6.808626912052878e-08, | |
| "loss": 0.4123, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.8513640639698965, | |
| "grad_norm": 144.0496907397513, | |
| "learning_rate": 6.603250994184506e-08, | |
| "loss": 0.4226, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.8537158984007526, | |
| "grad_norm": 57.78752403702503, | |
| "learning_rate": 6.40080101061664e-08, | |
| "loss": 0.4378, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.8560677328316086, | |
| "grad_norm": 37.60367979124333, | |
| "learning_rate": 6.201290611112564e-08, | |
| "loss": 0.4561, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.8584195672624647, | |
| "grad_norm": 53.18398073653025, | |
| "learning_rate": 6.004733247240317e-08, | |
| "loss": 0.4678, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.8607714016933208, | |
| "grad_norm": 133.8658799415832, | |
| "learning_rate": 5.8111421714657105e-08, | |
| "loss": 0.4677, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.8631232361241769, | |
| "grad_norm": 53.25914607379931, | |
| "learning_rate": 5.620530436258841e-08, | |
| "loss": 0.5194, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.8654750705550329, | |
| "grad_norm": 70.42760542084339, | |
| "learning_rate": 5.4329108932140546e-08, | |
| "loss": 0.427, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.867826904985889, | |
| "grad_norm": 64.38658865994383, | |
| "learning_rate": 5.2482961921834604e-08, | |
| "loss": 0.4482, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.8701787394167451, | |
| "grad_norm": 42.99630254744219, | |
| "learning_rate": 5.066698780424006e-08, | |
| "loss": 0.4426, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.8725305738476011, | |
| "grad_norm": 51.11741118831597, | |
| "learning_rate": 4.888130901758292e-08, | |
| "loss": 0.434, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.8748824082784572, | |
| "grad_norm": 53.96851327406331, | |
| "learning_rate": 4.71260459574902e-08, | |
| "loss": 0.3842, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.8772342427093133, | |
| "grad_norm": 56.59443774149579, | |
| "learning_rate": 4.5401316968873004e-08, | |
| "loss": 0.4481, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.8795860771401693, | |
| "grad_norm": 83.85863914669255, | |
| "learning_rate": 4.370723833794665e-08, | |
| "loss": 0.4266, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.8819379115710254, | |
| "grad_norm": 39.06100931375012, | |
| "learning_rate": 4.2043924284390854e-08, | |
| "loss": 0.4238, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.8842897460018815, | |
| "grad_norm": 107.61806818011783, | |
| "learning_rate": 4.041148695364882e-08, | |
| "loss": 0.4485, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.8866415804327376, | |
| "grad_norm": 33.85213758711094, | |
| "learning_rate": 3.881003640936548e-08, | |
| "loss": 0.446, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.8889934148635936, | |
| "grad_norm": 61.187373812156466, | |
| "learning_rate": 3.723968062596711e-08, | |
| "loss": 0.4344, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.8913452492944497, | |
| "grad_norm": 736.9312025906532, | |
| "learning_rate": 3.570052548138147e-08, | |
| "loss": 0.4541, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.8936970837253058, | |
| "grad_norm": 119.33344609512177, | |
| "learning_rate": 3.4192674749898785e-08, | |
| "loss": 0.4908, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.8960489181561618, | |
| "grad_norm": 25.5797619702394, | |
| "learning_rate": 3.2716230095175435e-08, | |
| "loss": 0.4004, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.8984007525870179, | |
| "grad_norm": 49.359917397601, | |
| "learning_rate": 3.127129106337917e-08, | |
| "loss": 0.4518, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.900752587017874, | |
| "grad_norm": 25.07989197247865, | |
| "learning_rate": 2.985795507647754e-08, | |
| "loss": 0.4131, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.90310442144873, | |
| "grad_norm": 64.61035042248321, | |
| "learning_rate": 2.8476317425669527e-08, | |
| "loss": 0.447, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.9054562558795861, | |
| "grad_norm": 93.42176266818919, | |
| "learning_rate": 2.7126471264960593e-08, | |
| "loss": 0.4329, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.9078080903104422, | |
| "grad_norm": 89.24070781803138, | |
| "learning_rate": 2.580850760488196e-08, | |
| "loss": 0.4753, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.9101599247412983, | |
| "grad_norm": 26.373133567746557, | |
| "learning_rate": 2.4522515306354517e-08, | |
| "loss": 0.4606, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.9125117591721543, | |
| "grad_norm": 473.7854777781819, | |
| "learning_rate": 2.3268581074697536e-08, | |
| "loss": 0.5009, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.9148635936030104, | |
| "grad_norm": 32.436085698187235, | |
| "learning_rate": 2.204678945378269e-08, | |
| "loss": 0.4162, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.9172154280338665, | |
| "grad_norm": 170.05001192780844, | |
| "learning_rate": 2.0857222820333808e-08, | |
| "loss": 0.4458, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9195672624647225, | |
| "grad_norm": 48.18623345953366, | |
| "learning_rate": 1.9699961378373097e-08, | |
| "loss": 0.4123, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.9219190968955786, | |
| "grad_norm": 1358.9466832068094, | |
| "learning_rate": 1.8575083153813175e-08, | |
| "loss": 0.4355, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.9242709313264346, | |
| "grad_norm": 113.07515347161218, | |
| "learning_rate": 1.7482663989196456e-08, | |
| "loss": 0.4339, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.9266227657572906, | |
| "grad_norm": 70.74972682616625, | |
| "learning_rate": 1.64227775385819e-08, | |
| "loss": 0.4448, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.9289746001881467, | |
| "grad_norm": 42.63045190997477, | |
| "learning_rate": 1.539549526257866e-08, | |
| "loss": 0.446, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.9313264346190028, | |
| "grad_norm": 297.25978639344856, | |
| "learning_rate": 1.4400886423528103e-08, | |
| "loss": 0.422, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.9336782690498588, | |
| "grad_norm": 67.23940820577212, | |
| "learning_rate": 1.3439018080834142e-08, | |
| "loss": 0.4835, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.9360301034807149, | |
| "grad_norm": 77.9763692203697, | |
| "learning_rate": 1.2509955086441758e-08, | |
| "loss": 0.4637, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.938381937911571, | |
| "grad_norm": 118.62750518663206, | |
| "learning_rate": 1.1613760080464385e-08, | |
| "loss": 0.44, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.940733772342427, | |
| "grad_norm": 34.90355156969723, | |
| "learning_rate": 1.0750493486960666e-08, | |
| "loss": 0.4278, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9430856067732831, | |
| "grad_norm": 25.802830924781176, | |
| "learning_rate": 9.920213509860498e-09, | |
| "loss": 0.4353, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.9454374412041392, | |
| "grad_norm": 44.67589158879238, | |
| "learning_rate": 9.122976129040782e-09, | |
| "loss": 0.4026, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.9477892756349953, | |
| "grad_norm": 31.063785760081664, | |
| "learning_rate": 8.358835096550886e-09, | |
| "loss": 0.4056, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.9501411100658513, | |
| "grad_norm": 29.538919263142972, | |
| "learning_rate": 7.627841932988765e-09, | |
| "loss": 0.392, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.9524929444967074, | |
| "grad_norm": 48.153765650560715, | |
| "learning_rate": 6.9300459240271835e-09, | |
| "loss": 0.434, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.9548447789275635, | |
| "grad_norm": 97.9376546668311, | |
| "learning_rate": 6.265494117090764e-09, | |
| "loss": 0.4637, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.9571966133584195, | |
| "grad_norm": 46.369837122592855, | |
| "learning_rate": 5.634231318183913e-09, | |
| "loss": 0.4483, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.9595484477892756, | |
| "grad_norm": 34.112979889172955, | |
| "learning_rate": 5.036300088869794e-09, | |
| "loss": 0.3958, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.9619002822201317, | |
| "grad_norm": 67.68046313703896, | |
| "learning_rate": 4.471740743400843e-09, | |
| "loss": 0.3998, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.9642521166509878, | |
| "grad_norm": 64.94301439710068, | |
| "learning_rate": 3.94059134600061e-09, | |
| "loss": 0.5025, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.9666039510818438, | |
| "grad_norm": 72.14615132171306, | |
| "learning_rate": 3.4428877082972597e-09, | |
| "loss": 0.4421, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.9689557855126999, | |
| "grad_norm": 24.113378758695696, | |
| "learning_rate": 2.9786633869091083e-09, | |
| "loss": 0.4162, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.971307619943556, | |
| "grad_norm": 241.97699208841857, | |
| "learning_rate": 2.5479496811823264e-09, | |
| "loss": 0.4298, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.973659454374412, | |
| "grad_norm": 133.43074381257875, | |
| "learning_rate": 2.1507756310802926e-09, | |
| "loss": 0.4428, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.9760112888052681, | |
| "grad_norm": 26.108621458874143, | |
| "learning_rate": 1.7871680152258816e-09, | |
| "loss": 0.478, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.9783631232361242, | |
| "grad_norm": 48.05086153194788, | |
| "learning_rate": 1.4571513490960197e-09, | |
| "loss": 0.4364, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.9807149576669802, | |
| "grad_norm": 135.6321121517264, | |
| "learning_rate": 1.1607478833685624e-09, | |
| "loss": 0.4654, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.9830667920978363, | |
| "grad_norm": 43.352481280379564, | |
| "learning_rate": 8.97977602422162e-10, | |
| "loss": 0.4522, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.9854186265286924, | |
| "grad_norm": 400.18828509454596, | |
| "learning_rate": 6.688582229890106e-10, | |
| "loss": 0.4308, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.9877704609595485, | |
| "grad_norm": 118.9697384571146, | |
| "learning_rate": 4.734051929601857e-10, | |
| "loss": 0.4335, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.9901222953904045, | |
| "grad_norm": 620.0407392361634, | |
| "learning_rate": 3.116316903440941e-10, | |
| "loss": 0.4353, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.9924741298212606, | |
| "grad_norm": 93.82732661926246, | |
| "learning_rate": 1.8354862237812685e-10, | |
| "loss": 0.4672, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.9948259642521167, | |
| "grad_norm": 39.8947952442234, | |
| "learning_rate": 8.916462479297005e-11, | |
| "loss": 0.4519, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.9971777986829727, | |
| "grad_norm": 220.48761793592806, | |
| "learning_rate": 2.8486061230736934e-11, | |
| "loss": 0.4331, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.9995296331138288, | |
| "grad_norm": 32.77645356865427, | |
| "learning_rate": 1.5170228155891861e-12, | |
| "loss": 0.4223, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 4252, | |
| "total_flos": 4.149639304655667e+17, | |
| "train_loss": 0.5222998527373667, | |
| "train_runtime": 59750.607, | |
| "train_samples_per_second": 1.139, | |
| "train_steps_per_second": 0.071 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4252, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 4000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.149639304655667e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |