| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.5026072752403091, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00025130363762015457, |
| "grad_norm": 2.0144803524017334, |
| "learning_rate": 0.0, |
| "loss": 2.7742, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0005026072752403091, |
| "grad_norm": 1.5290026664733887, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.9485, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0007539109128604636, |
| "grad_norm": 1.7576098442077637, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 3.2699, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0010052145504806183, |
| "grad_norm": 1.4803149700164795, |
| "learning_rate": 6e-06, |
| "loss": 2.6531, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0012565181881007727, |
| "grad_norm": 1.2919175624847412, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 2.7198, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0015078218257209273, |
| "grad_norm": 1.0887187719345093, |
| "learning_rate": 1e-05, |
| "loss": 2.9058, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.001759125463341082, |
| "grad_norm": 1.176196575164795, |
| "learning_rate": 1.2e-05, |
| "loss": 2.769, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0020104291009612365, |
| "grad_norm": 1.4506360292434692, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 2.8493, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.002261732738581391, |
| "grad_norm": 0.8232998251914978, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 2.7691, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0025130363762015454, |
| "grad_norm": 0.8385952711105347, |
| "learning_rate": 1.8e-05, |
| "loss": 2.7073, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0027643400138217, |
| "grad_norm": 0.594434916973114, |
| "learning_rate": 2e-05, |
| "loss": 2.1646, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0030156436514418546, |
| "grad_norm": 0.8067689538002014, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 2.5741, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0032669472890620092, |
| "grad_norm": 0.8038071393966675, |
| "learning_rate": 2.4e-05, |
| "loss": 2.6733, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.003518250926682164, |
| "grad_norm": 0.48843103647232056, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 2.598, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0037695545643023185, |
| "grad_norm": 0.8283182978630066, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 2.8211, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.004020858201922473, |
| "grad_norm": 0.4953489601612091, |
| "learning_rate": 3e-05, |
| "loss": 2.3855, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.004272161839542627, |
| "grad_norm": 0.4421069025993347, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 2.6665, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.004523465477162782, |
| "grad_norm": 0.6795067191123962, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 2.5962, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0047747691147829365, |
| "grad_norm": 0.5411877036094666, |
| "learning_rate": 3.6e-05, |
| "loss": 2.4962, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.005026072752403091, |
| "grad_norm": 0.5864161252975464, |
| "learning_rate": 3.8e-05, |
| "loss": 2.4219, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.005277376390023246, |
| "grad_norm": 0.5454627871513367, |
| "learning_rate": 4e-05, |
| "loss": 2.8872, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0055286800276434, |
| "grad_norm": 0.4071284532546997, |
| "learning_rate": 4.2e-05, |
| "loss": 2.4573, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.005779983665263555, |
| "grad_norm": 0.2068735957145691, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.8764, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.006031287302883709, |
| "grad_norm": 0.5132932662963867, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 2.4306, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.006282590940503863, |
| "grad_norm": 0.3241199851036072, |
| "learning_rate": 4.8e-05, |
| "loss": 2.1521, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0065338945781240184, |
| "grad_norm": 0.4157683551311493, |
| "learning_rate": 5e-05, |
| "loss": 2.7139, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.006785198215744173, |
| "grad_norm": 0.5854943990707397, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 2.2383, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.007036501853364328, |
| "grad_norm": 0.5483741760253906, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 2.4569, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.007287805490984482, |
| "grad_norm": 0.41493722796440125, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 2.9473, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.007539109128604637, |
| "grad_norm": 0.3696433901786804, |
| "learning_rate": 5.8e-05, |
| "loss": 2.2744, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.007790412766224791, |
| "grad_norm": 0.18437762558460236, |
| "learning_rate": 6e-05, |
| "loss": 1.0493, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.008041716403844946, |
| "grad_norm": 0.46951478719711304, |
| "learning_rate": 6.2e-05, |
| "loss": 2.4904, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0082930200414651, |
| "grad_norm": 0.32901376485824585, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 2.0627, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.008544323679085255, |
| "grad_norm": 0.668229877948761, |
| "learning_rate": 6.6e-05, |
| "loss": 2.3694, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.008795627316705409, |
| "grad_norm": 0.9680635333061218, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 2.3719, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.009046930954325565, |
| "grad_norm": 0.6750502586364746, |
| "learning_rate": 7e-05, |
| "loss": 2.3538, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.009298234591945719, |
| "grad_norm": 0.1759599894285202, |
| "learning_rate": 7.2e-05, |
| "loss": 1.0368, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.009549538229565873, |
| "grad_norm": 0.5437096953392029, |
| "learning_rate": 7.4e-05, |
| "loss": 2.7952, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.009800841867186027, |
| "grad_norm": 0.31723931431770325, |
| "learning_rate": 7.6e-05, |
| "loss": 2.4566, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.010052145504806181, |
| "grad_norm": 0.34227266907691956, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 2.281, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.010303449142426337, |
| "grad_norm": 0.3893303871154785, |
| "learning_rate": 8e-05, |
| "loss": 2.7128, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.010554752780046492, |
| "grad_norm": 0.32380804419517517, |
| "learning_rate": 8.2e-05, |
| "loss": 2.7928, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.010806056417666646, |
| "grad_norm": 0.29380202293395996, |
| "learning_rate": 8.4e-05, |
| "loss": 2.305, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0110573600552868, |
| "grad_norm": 0.4324714243412018, |
| "learning_rate": 8.6e-05, |
| "loss": 2.4824, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.011308663692906954, |
| "grad_norm": 0.4421226978302002, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 2.1697, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01155996733052711, |
| "grad_norm": 0.5161215662956238, |
| "learning_rate": 9e-05, |
| "loss": 2.4065, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.011811270968147264, |
| "grad_norm": 0.3017684817314148, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 2.4247, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.012062574605767418, |
| "grad_norm": 0.5015223622322083, |
| "learning_rate": 9.4e-05, |
| "loss": 2.3724, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.012313878243387573, |
| "grad_norm": 0.3141859471797943, |
| "learning_rate": 9.6e-05, |
| "loss": 1.8871, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.012565181881007727, |
| "grad_norm": 0.1826818883419037, |
| "learning_rate": 9.8e-05, |
| "loss": 1.9633, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.012816485518627883, |
| "grad_norm": 0.37197422981262207, |
| "learning_rate": 0.0001, |
| "loss": 2.4161, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.013067789156248037, |
| "grad_norm": 0.3732236623764038, |
| "learning_rate": 9.99999840163606e-05, |
| "loss": 2.2943, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.013319092793868191, |
| "grad_norm": 0.5340951681137085, |
| "learning_rate": 9.99999360654526e-05, |
| "loss": 2.2039, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.013570396431488345, |
| "grad_norm": 0.394661545753479, |
| "learning_rate": 9.999985614730664e-05, |
| "loss": 2.3037, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.013821700069108501, |
| "grad_norm": 0.3042058050632477, |
| "learning_rate": 9.999974426197384e-05, |
| "loss": 2.5117, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.014073003706728655, |
| "grad_norm": 0.362106591463089, |
| "learning_rate": 9.999960040952574e-05, |
| "loss": 2.6846, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.01432430734434881, |
| "grad_norm": 0.728489875793457, |
| "learning_rate": 9.99994245900543e-05, |
| "loss": 2.4936, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.014575610981968964, |
| "grad_norm": 0.2623450756072998, |
| "learning_rate": 9.999921680367191e-05, |
| "loss": 2.6129, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.014826914619589118, |
| "grad_norm": 0.24758170545101166, |
| "learning_rate": 9.999897705051145e-05, |
| "loss": 2.1818, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.015078218257209274, |
| "grad_norm": 0.3045499324798584, |
| "learning_rate": 9.99987053307262e-05, |
| "loss": 2.3918, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.015329521894829428, |
| "grad_norm": 0.3922071158885956, |
| "learning_rate": 9.999840164448984e-05, |
| "loss": 2.2996, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.015580825532449582, |
| "grad_norm": 0.20261235535144806, |
| "learning_rate": 9.999806599199659e-05, |
| "loss": 1.9872, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.015832129170069736, |
| "grad_norm": 0.3178957402706146, |
| "learning_rate": 9.999769837346103e-05, |
| "loss": 2.4412, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.016083432807689892, |
| "grad_norm": 0.304047167301178, |
| "learning_rate": 9.999729878911816e-05, |
| "loss": 2.705, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.016334736445310045, |
| "grad_norm": 0.5113909840583801, |
| "learning_rate": 9.99968672392235e-05, |
| "loss": 2.533, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0165860400829302, |
| "grad_norm": 0.29634010791778564, |
| "learning_rate": 9.999640372405295e-05, |
| "loss": 2.4283, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.016837343720550357, |
| "grad_norm": 0.3125896453857422, |
| "learning_rate": 9.999590824390281e-05, |
| "loss": 2.4421, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.01708864735817051, |
| "grad_norm": 0.38444283604621887, |
| "learning_rate": 9.999538079908993e-05, |
| "loss": 2.1928, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.017339950995790665, |
| "grad_norm": 0.3269999325275421, |
| "learning_rate": 9.999482138995149e-05, |
| "loss": 2.6259, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.017591254633410817, |
| "grad_norm": 0.30345460772514343, |
| "learning_rate": 9.999423001684513e-05, |
| "loss": 2.6393, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.017842558271030973, |
| "grad_norm": 1.1956677436828613, |
| "learning_rate": 9.9993606680149e-05, |
| "loss": 2.9455, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.01809386190865113, |
| "grad_norm": 0.4432305097579956, |
| "learning_rate": 9.999295138026157e-05, |
| "loss": 2.3158, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.018345165546271282, |
| "grad_norm": 0.24386799335479736, |
| "learning_rate": 9.999226411760185e-05, |
| "loss": 2.6777, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.018596469183891438, |
| "grad_norm": 0.34685081243515015, |
| "learning_rate": 9.99915448926092e-05, |
| "loss": 2.4423, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.01884777282151159, |
| "grad_norm": 0.5708550214767456, |
| "learning_rate": 9.999079370574347e-05, |
| "loss": 2.2399, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.019099076459131746, |
| "grad_norm": 0.2775181829929352, |
| "learning_rate": 9.999001055748492e-05, |
| "loss": 1.4073, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.019350380096751902, |
| "grad_norm": 0.44185924530029297, |
| "learning_rate": 9.998919544833427e-05, |
| "loss": 2.2447, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.019601683734372054, |
| "grad_norm": 0.35135698318481445, |
| "learning_rate": 9.998834837881263e-05, |
| "loss": 2.4701, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.01985298737199221, |
| "grad_norm": 0.2450733631849289, |
| "learning_rate": 9.998746934946159e-05, |
| "loss": 2.1066, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.020104291009612363, |
| "grad_norm": 0.3851965367794037, |
| "learning_rate": 9.998655836084316e-05, |
| "loss": 2.2371, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02035559464723252, |
| "grad_norm": 0.2562990188598633, |
| "learning_rate": 9.998561541353976e-05, |
| "loss": 2.5131, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.020606898284852675, |
| "grad_norm": 0.35332947969436646, |
| "learning_rate": 9.998464050815426e-05, |
| "loss": 2.4474, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.020858201922472827, |
| "grad_norm": 0.3566109836101532, |
| "learning_rate": 9.998363364530998e-05, |
| "loss": 2.5007, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.021109505560092983, |
| "grad_norm": 0.27204859256744385, |
| "learning_rate": 9.998259482565063e-05, |
| "loss": 2.221, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.021360809197713135, |
| "grad_norm": 0.1774623841047287, |
| "learning_rate": 9.998152404984036e-05, |
| "loss": 1.5693, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.02161211283533329, |
| "grad_norm": 0.31386587023735046, |
| "learning_rate": 9.998042131856382e-05, |
| "loss": 2.1838, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.021863416472953447, |
| "grad_norm": 0.4387069344520569, |
| "learning_rate": 9.997928663252601e-05, |
| "loss": 2.2176, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0221147201105736, |
| "grad_norm": 0.5427992343902588, |
| "learning_rate": 9.997811999245236e-05, |
| "loss": 2.2632, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.022366023748193756, |
| "grad_norm": 0.14437389373779297, |
| "learning_rate": 9.997692139908879e-05, |
| "loss": 1.0033, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.022617327385813908, |
| "grad_norm": 0.18704228103160858, |
| "learning_rate": 9.99756908532016e-05, |
| "loss": 1.1516, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.022868631023434064, |
| "grad_norm": 0.23126636445522308, |
| "learning_rate": 9.997442835557753e-05, |
| "loss": 1.9892, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.02311993466105422, |
| "grad_norm": 0.2967139780521393, |
| "learning_rate": 9.997313390702377e-05, |
| "loss": 2.0742, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.023371238298674372, |
| "grad_norm": 0.25964412093162537, |
| "learning_rate": 9.997180750836792e-05, |
| "loss": 2.4486, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.02362254193629453, |
| "grad_norm": 0.21999791264533997, |
| "learning_rate": 9.997044916045799e-05, |
| "loss": 1.0311, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.02387384557391468, |
| "grad_norm": 0.22438450157642365, |
| "learning_rate": 9.996905886416244e-05, |
| "loss": 1.2788, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.024125149211534837, |
| "grad_norm": 0.3627947270870209, |
| "learning_rate": 9.996763662037014e-05, |
| "loss": 2.2382, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.024376452849154993, |
| "grad_norm": 0.2582647502422333, |
| "learning_rate": 9.996618242999042e-05, |
| "loss": 2.2999, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.024627756486775145, |
| "grad_norm": 0.34755995869636536, |
| "learning_rate": 9.9964696293953e-05, |
| "loss": 2.2329, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0248790601243953, |
| "grad_norm": 0.15312296152114868, |
| "learning_rate": 9.996317821320802e-05, |
| "loss": 0.6043, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.025130363762015454, |
| "grad_norm": 0.22593067586421967, |
| "learning_rate": 9.996162818872607e-05, |
| "loss": 2.3993, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02538166739963561, |
| "grad_norm": 0.3924887776374817, |
| "learning_rate": 9.996004622149814e-05, |
| "loss": 2.3133, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.025632971037255765, |
| "grad_norm": 0.5138020515441895, |
| "learning_rate": 9.995843231253569e-05, |
| "loss": 2.4782, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.025884274674875918, |
| "grad_norm": 0.42053958773612976, |
| "learning_rate": 9.995678646287053e-05, |
| "loss": 2.0569, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.026135578312496074, |
| "grad_norm": 0.2818872034549713, |
| "learning_rate": 9.995510867355494e-05, |
| "loss": 2.7051, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.02638688195011623, |
| "grad_norm": 0.4185803532600403, |
| "learning_rate": 9.995339894566158e-05, |
| "loss": 2.2182, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.026638185587736382, |
| "grad_norm": 0.3512636721134186, |
| "learning_rate": 9.995165728028359e-05, |
| "loss": 2.5701, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.026889489225356538, |
| "grad_norm": 0.3916986584663391, |
| "learning_rate": 9.994988367853451e-05, |
| "loss": 2.034, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.02714079286297669, |
| "grad_norm": 0.3517094850540161, |
| "learning_rate": 9.994807814154824e-05, |
| "loss": 2.0577, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.027392096500596846, |
| "grad_norm": 0.3244116008281708, |
| "learning_rate": 9.994624067047917e-05, |
| "loss": 2.1661, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.027643400138217002, |
| "grad_norm": 0.42575743794441223, |
| "learning_rate": 9.994437126650207e-05, |
| "loss": 2.5129, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.027894703775837155, |
| "grad_norm": 0.1691725254058838, |
| "learning_rate": 9.994246993081213e-05, |
| "loss": 1.5939, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.02814600741345731, |
| "grad_norm": 0.33062514662742615, |
| "learning_rate": 9.994053666462498e-05, |
| "loss": 2.222, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.028397311051077463, |
| "grad_norm": 0.3275945484638214, |
| "learning_rate": 9.993857146917662e-05, |
| "loss": 2.3336, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.02864861468869762, |
| "grad_norm": 0.3942461907863617, |
| "learning_rate": 9.993657434572353e-05, |
| "loss": 2.3289, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.028899918326317775, |
| "grad_norm": 0.3180122971534729, |
| "learning_rate": 9.993454529554251e-05, |
| "loss": 2.4353, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.029151221963937927, |
| "grad_norm": 0.7072325944900513, |
| "learning_rate": 9.993248431993086e-05, |
| "loss": 1.9157, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.029402525601558083, |
| "grad_norm": 0.32835853099823, |
| "learning_rate": 9.993039142020622e-05, |
| "loss": 2.8819, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.029653829239178236, |
| "grad_norm": 0.2762772738933563, |
| "learning_rate": 9.992826659770672e-05, |
| "loss": 2.504, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.029905132876798392, |
| "grad_norm": 0.2669197618961334, |
| "learning_rate": 9.992610985379082e-05, |
| "loss": 2.3047, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.030156436514418548, |
| "grad_norm": 0.47167858481407166, |
| "learning_rate": 9.992392118983746e-05, |
| "loss": 2.2432, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0304077401520387, |
| "grad_norm": 0.18089920282363892, |
| "learning_rate": 9.992170060724593e-05, |
| "loss": 1.5848, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.030659043789658856, |
| "grad_norm": 0.33109933137893677, |
| "learning_rate": 9.991944810743597e-05, |
| "loss": 2.3503, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.03091034742727901, |
| "grad_norm": 0.24957267940044403, |
| "learning_rate": 9.991716369184765e-05, |
| "loss": 2.2894, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.031161651064899164, |
| "grad_norm": 0.3461008667945862, |
| "learning_rate": 9.991484736194157e-05, |
| "loss": 2.2987, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.03141295470251932, |
| "grad_norm": 0.2855774760246277, |
| "learning_rate": 9.991249911919862e-05, |
| "loss": 2.5193, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.03166425834013947, |
| "grad_norm": 0.30880868434906006, |
| "learning_rate": 9.991011896512016e-05, |
| "loss": 2.5596, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.031915561977759625, |
| "grad_norm": 0.4237726032733917, |
| "learning_rate": 9.990770690122793e-05, |
| "loss": 2.1401, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.032166865615379785, |
| "grad_norm": 0.2743227183818817, |
| "learning_rate": 9.990526292906405e-05, |
| "loss": 1.858, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.03241816925299994, |
| "grad_norm": 0.28934118151664734, |
| "learning_rate": 9.99027870501911e-05, |
| "loss": 1.9751, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.03266947289062009, |
| "grad_norm": 0.4869619905948639, |
| "learning_rate": 9.990027926619197e-05, |
| "loss": 2.7332, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03292077652824025, |
| "grad_norm": 0.2246621996164322, |
| "learning_rate": 9.989773957867006e-05, |
| "loss": 2.2951, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.0331720801658604, |
| "grad_norm": 0.46311891078948975, |
| "learning_rate": 9.989516798924908e-05, |
| "loss": 2.0357, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.033423383803480554, |
| "grad_norm": 0.45459482073783875, |
| "learning_rate": 9.989256449957316e-05, |
| "loss": 2.2661, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.03367468744110071, |
| "grad_norm": 0.4114730656147003, |
| "learning_rate": 9.988992911130683e-05, |
| "loss": 2.096, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.033925991078720866, |
| "grad_norm": 0.468787282705307, |
| "learning_rate": 9.988726182613502e-05, |
| "loss": 2.2083, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.03417729471634102, |
| "grad_norm": 0.30824795365333557, |
| "learning_rate": 9.988456264576305e-05, |
| "loss": 2.4525, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.03442859835396117, |
| "grad_norm": 0.22959741950035095, |
| "learning_rate": 9.988183157191662e-05, |
| "loss": 2.4861, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.03467990199158133, |
| "grad_norm": 0.37763601541519165, |
| "learning_rate": 9.987906860634184e-05, |
| "loss": 2.4736, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.03493120562920148, |
| "grad_norm": 0.3474363386631012, |
| "learning_rate": 9.987627375080519e-05, |
| "loss": 2.4363, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.035182509266821635, |
| "grad_norm": 0.3558574616909027, |
| "learning_rate": 9.987344700709356e-05, |
| "loss": 2.0973, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.035433812904441794, |
| "grad_norm": 0.19336606562137604, |
| "learning_rate": 9.98705883770142e-05, |
| "loss": 1.3915, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.03568511654206195, |
| "grad_norm": 0.4760534465312958, |
| "learning_rate": 9.986769786239477e-05, |
| "loss": 2.0163, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.0359364201796821, |
| "grad_norm": 0.2839144170284271, |
| "learning_rate": 9.98647754650833e-05, |
| "loss": 1.7691, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.03618772381730226, |
| "grad_norm": 0.3936741352081299, |
| "learning_rate": 9.986182118694825e-05, |
| "loss": 2.1423, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.03643902745492241, |
| "grad_norm": 0.35313600301742554, |
| "learning_rate": 9.985883502987838e-05, |
| "loss": 2.3156, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.036690331092542564, |
| "grad_norm": 0.3352813720703125, |
| "learning_rate": 9.985581699578287e-05, |
| "loss": 2.2992, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.036941634730162716, |
| "grad_norm": 0.43075451254844666, |
| "learning_rate": 9.985276708659134e-05, |
| "loss": 2.7181, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.037192938367782875, |
| "grad_norm": 0.4140123128890991, |
| "learning_rate": 9.984968530425369e-05, |
| "loss": 2.2454, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.03744424200540303, |
| "grad_norm": 0.31063738465309143, |
| "learning_rate": 9.984657165074027e-05, |
| "loss": 2.4994, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.03769554564302318, |
| "grad_norm": 0.22530898451805115, |
| "learning_rate": 9.984342612804176e-05, |
| "loss": 2.0645, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03794684928064334, |
| "grad_norm": 0.2811500132083893, |
| "learning_rate": 9.984024873816924e-05, |
| "loss": 2.3546, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.03819815291826349, |
| "grad_norm": 0.4520877003669739, |
| "learning_rate": 9.983703948315417e-05, |
| "loss": 2.1208, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.038449456555883645, |
| "grad_norm": 0.37516894936561584, |
| "learning_rate": 9.983379836504838e-05, |
| "loss": 2.3911, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.038700760193503804, |
| "grad_norm": 0.4740954637527466, |
| "learning_rate": 9.983052538592404e-05, |
| "loss": 2.4538, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.038952063831123956, |
| "grad_norm": 0.34146809577941895, |
| "learning_rate": 9.982722054787372e-05, |
| "loss": 2.5654, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.03920336746874411, |
| "grad_norm": 0.3912610709667206, |
| "learning_rate": 9.982388385301038e-05, |
| "loss": 2.4403, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.03945467110636426, |
| "grad_norm": 0.5822238922119141, |
| "learning_rate": 9.98205153034673e-05, |
| "loss": 2.3008, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.03970597474398442, |
| "grad_norm": 0.1491788774728775, |
| "learning_rate": 9.981711490139814e-05, |
| "loss": 1.1215, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.03995727838160457, |
| "grad_norm": 0.3438681960105896, |
| "learning_rate": 9.981368264897694e-05, |
| "loss": 2.7011, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.040208582019224726, |
| "grad_norm": 0.3378736972808838, |
| "learning_rate": 9.98102185483981e-05, |
| "loss": 2.4961, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.040459885656844885, |
| "grad_norm": 0.2592676281929016, |
| "learning_rate": 9.980672260187638e-05, |
| "loss": 1.8838, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.04071118929446504, |
| "grad_norm": 0.31087106466293335, |
| "learning_rate": 9.980319481164688e-05, |
| "loss": 2.2719, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.04096249293208519, |
| "grad_norm": 0.33119067549705505, |
| "learning_rate": 9.979963517996509e-05, |
| "loss": 2.0298, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.04121379656970535, |
| "grad_norm": 0.4013742506504059, |
| "learning_rate": 9.979604370910685e-05, |
| "loss": 2.5852, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0414651002073255, |
| "grad_norm": 0.21013818681240082, |
| "learning_rate": 9.979242040136835e-05, |
| "loss": 1.1489, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.041716403844945654, |
| "grad_norm": 0.5467535257339478, |
| "learning_rate": 9.978876525906613e-05, |
| "loss": 2.2094, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.04196770748256581, |
| "grad_norm": 0.3550753593444824, |
| "learning_rate": 9.978507828453708e-05, |
| "loss": 2.2849, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.042219011120185966, |
| "grad_norm": 0.2620997726917267, |
| "learning_rate": 9.978135948013847e-05, |
| "loss": 2.3229, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.04247031475780612, |
| "grad_norm": 0.2706509530544281, |
| "learning_rate": 9.977760884824788e-05, |
| "loss": 2.2731, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.04272161839542627, |
| "grad_norm": 0.41886723041534424, |
| "learning_rate": 9.977382639126328e-05, |
| "loss": 2.42, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04297292203304643, |
| "grad_norm": 0.33128440380096436, |
| "learning_rate": 9.977001211160296e-05, |
| "loss": 2.3203, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.04322422567066658, |
| "grad_norm": 0.24908378720283508, |
| "learning_rate": 9.976616601170557e-05, |
| "loss": 1.8275, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.043475529308286735, |
| "grad_norm": 0.3278106451034546, |
| "learning_rate": 9.976228809403008e-05, |
| "loss": 2.6484, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.043726832945906895, |
| "grad_norm": 0.5737075209617615, |
| "learning_rate": 9.975837836105581e-05, |
| "loss": 2.2196, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.04397813658352705, |
| "grad_norm": 0.3396255373954773, |
| "learning_rate": 9.975443681528247e-05, |
| "loss": 2.1756, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0442294402211472, |
| "grad_norm": 0.4173082411289215, |
| "learning_rate": 9.975046345923004e-05, |
| "loss": 1.9394, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.04448074385876736, |
| "grad_norm": 0.2630142867565155, |
| "learning_rate": 9.974645829543889e-05, |
| "loss": 1.9798, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.04473204749638751, |
| "grad_norm": 0.23479297757148743, |
| "learning_rate": 9.974242132646967e-05, |
| "loss": 1.8764, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.044983351134007664, |
| "grad_norm": 0.2806346118450165, |
| "learning_rate": 9.973835255490343e-05, |
| "loss": 2.2942, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.045234654771627816, |
| "grad_norm": 0.2745998203754425, |
| "learning_rate": 9.97342519833415e-05, |
| "loss": 2.1606, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.045485958409247976, |
| "grad_norm": 0.46454527974128723, |
| "learning_rate": 9.973011961440559e-05, |
| "loss": 2.2312, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.04573726204686813, |
| "grad_norm": 0.34017616510391235, |
| "learning_rate": 9.972595545073769e-05, |
| "loss": 2.3171, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.04598856568448828, |
| "grad_norm": 0.34439125657081604, |
| "learning_rate": 9.972175949500012e-05, |
| "loss": 2.3098, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.04623986932210844, |
| "grad_norm": 0.47229647636413574, |
| "learning_rate": 9.97175317498756e-05, |
| "loss": 2.1515, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.04649117295972859, |
| "grad_norm": 0.3148500621318817, |
| "learning_rate": 9.971327221806706e-05, |
| "loss": 2.5827, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.046742476597348745, |
| "grad_norm": 0.20196139812469482, |
| "learning_rate": 9.970898090229785e-05, |
| "loss": 1.5597, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.046993780234968904, |
| "grad_norm": 0.26498332619667053, |
| "learning_rate": 9.97046578053116e-05, |
| "loss": 2.4454, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.04724508387258906, |
| "grad_norm": 0.4051288068294525, |
| "learning_rate": 9.970030292987225e-05, |
| "loss": 2.3758, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.04749638751020921, |
| "grad_norm": 0.3003399074077606, |
| "learning_rate": 9.969591627876409e-05, |
| "loss": 2.691, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.04774769114782936, |
| "grad_norm": 0.22101780772209167, |
| "learning_rate": 9.96914978547917e-05, |
| "loss": 2.361, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04799899478544952, |
| "grad_norm": 0.342926025390625, |
| "learning_rate": 9.968704766077997e-05, |
| "loss": 2.4845, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.048250298423069674, |
| "grad_norm": 0.3619941174983978, |
| "learning_rate": 9.968256569957411e-05, |
| "loss": 2.6731, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.048501602060689826, |
| "grad_norm": 0.4262664020061493, |
| "learning_rate": 9.967805197403965e-05, |
| "loss": 2.6423, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.048752905698309985, |
| "grad_norm": 0.21453841030597687, |
| "learning_rate": 9.96735064870624e-05, |
| "loss": 1.0993, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.04900420933593014, |
| "grad_norm": 0.5756468176841736, |
| "learning_rate": 9.966892924154853e-05, |
| "loss": 2.1122, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.04925551297355029, |
| "grad_norm": 0.5691533088684082, |
| "learning_rate": 9.96643202404245e-05, |
| "loss": 2.3865, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.04950681661117045, |
| "grad_norm": 0.4420826733112335, |
| "learning_rate": 9.965967948663698e-05, |
| "loss": 2.1786, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.0497581202487906, |
| "grad_norm": 0.4547330141067505, |
| "learning_rate": 9.965500698315306e-05, |
| "loss": 2.5167, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.050009423886410755, |
| "grad_norm": 0.18379782140254974, |
| "learning_rate": 9.96503027329601e-05, |
| "loss": 1.0926, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.05026072752403091, |
| "grad_norm": 0.20280440151691437, |
| "learning_rate": 9.964556673906572e-05, |
| "loss": 2.0984, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.050512031161651066, |
| "grad_norm": 0.5554583072662354, |
| "learning_rate": 9.964079900449785e-05, |
| "loss": 2.8414, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.05076333479927122, |
| "grad_norm": 0.3430071473121643, |
| "learning_rate": 9.963599953230473e-05, |
| "loss": 2.4415, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.05101463843689137, |
| "grad_norm": 0.34130024909973145, |
| "learning_rate": 9.96311683255549e-05, |
| "loss": 2.1872, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.05126594207451153, |
| "grad_norm": 0.3914225995540619, |
| "learning_rate": 9.962630538733715e-05, |
| "loss": 2.3896, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.05151724571213168, |
| "grad_norm": 0.6636744141578674, |
| "learning_rate": 9.962141072076057e-05, |
| "loss": 2.5256, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.051768549349751836, |
| "grad_norm": 0.419360876083374, |
| "learning_rate": 9.961648432895454e-05, |
| "loss": 2.1897, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.052019852987371995, |
| "grad_norm": 0.3426320552825928, |
| "learning_rate": 9.961152621506876e-05, |
| "loss": 2.433, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.05227115662499215, |
| "grad_norm": 0.24097320437431335, |
| "learning_rate": 9.960653638227315e-05, |
| "loss": 1.899, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.0525224602626123, |
| "grad_norm": 0.40818917751312256, |
| "learning_rate": 9.960151483375795e-05, |
| "loss": 2.4313, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.05277376390023246, |
| "grad_norm": 0.34848183393478394, |
| "learning_rate": 9.959646157273366e-05, |
| "loss": 1.9527, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.05302506753785261, |
| "grad_norm": 0.18201400339603424, |
| "learning_rate": 9.959137660243105e-05, |
| "loss": 1.8784, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.053276371175472764, |
| "grad_norm": 0.3388751745223999, |
| "learning_rate": 9.95862599261012e-05, |
| "loss": 2.4083, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.05352767481309292, |
| "grad_norm": 0.2567930519580841, |
| "learning_rate": 9.958111154701542e-05, |
| "loss": 1.9071, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.053778978450713076, |
| "grad_norm": 0.39157772064208984, |
| "learning_rate": 9.957593146846529e-05, |
| "loss": 2.072, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.05403028208833323, |
| "grad_norm": 0.40348386764526367, |
| "learning_rate": 9.95707196937627e-05, |
| "loss": 2.427, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.05428158572595338, |
| "grad_norm": 0.47843194007873535, |
| "learning_rate": 9.956547622623973e-05, |
| "loss": 2.0144, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.05453288936357354, |
| "grad_norm": 0.337272971868515, |
| "learning_rate": 9.956020106924882e-05, |
| "loss": 1.7783, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.05478419300119369, |
| "grad_norm": 0.40009111166000366, |
| "learning_rate": 9.955489422616258e-05, |
| "loss": 2.2961, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.055035496638813845, |
| "grad_norm": 0.28611692786216736, |
| "learning_rate": 9.954955570037395e-05, |
| "loss": 2.44, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.055286800276434005, |
| "grad_norm": 0.3352760374546051, |
| "learning_rate": 9.954418549529605e-05, |
| "loss": 2.2449, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.05553810391405416, |
| "grad_norm": 0.295691579580307, |
| "learning_rate": 9.953878361436232e-05, |
| "loss": 1.9398, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.05578940755167431, |
| "grad_norm": 0.41174155473709106, |
| "learning_rate": 9.953335006102643e-05, |
| "loss": 2.241, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.05604071118929446, |
| "grad_norm": 0.32655176520347595, |
| "learning_rate": 9.95278848387623e-05, |
| "loss": 2.078, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.05629201482691462, |
| "grad_norm": 0.26872923970222473, |
| "learning_rate": 9.95223879510641e-05, |
| "loss": 1.9153, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.056543318464534774, |
| "grad_norm": 0.37920647859573364, |
| "learning_rate": 9.951685940144622e-05, |
| "loss": 2.5823, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.056794622102154926, |
| "grad_norm": 0.18780489265918732, |
| "learning_rate": 9.951129919344334e-05, |
| "loss": 0.9544, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.057045925739775086, |
| "grad_norm": 0.3492078185081482, |
| "learning_rate": 9.950570733061033e-05, |
| "loss": 2.3073, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.05729722937739524, |
| "grad_norm": 0.29748043417930603, |
| "learning_rate": 9.950008381652235e-05, |
| "loss": 1.6764, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.05754853301501539, |
| "grad_norm": 0.30301064252853394, |
| "learning_rate": 9.949442865477474e-05, |
| "loss": 2.4839, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.05779983665263555, |
| "grad_norm": 1.056869387626648, |
| "learning_rate": 9.948874184898313e-05, |
| "loss": 1.9098, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0580511402902557, |
| "grad_norm": 0.33054810762405396, |
| "learning_rate": 9.948302340278333e-05, |
| "loss": 2.6708, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.058302443927875855, |
| "grad_norm": 0.37433305382728577, |
| "learning_rate": 9.94772733198314e-05, |
| "loss": 2.4231, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.05855374756549601, |
| "grad_norm": 0.35789650678634644, |
| "learning_rate": 9.947149160380366e-05, |
| "loss": 2.1926, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.05880505120311617, |
| "grad_norm": 0.3637252151966095, |
| "learning_rate": 9.94656782583966e-05, |
| "loss": 2.0955, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.05905635484073632, |
| "grad_norm": 0.37392285466194153, |
| "learning_rate": 9.945983328732698e-05, |
| "loss": 2.2662, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.05930765847835647, |
| "grad_norm": 0.36526796221733093, |
| "learning_rate": 9.945395669433172e-05, |
| "loss": 2.4013, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.05955896211597663, |
| "grad_norm": 0.3178480267524719, |
| "learning_rate": 9.944804848316802e-05, |
| "loss": 2.4751, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.059810265753596784, |
| "grad_norm": 0.5924585461616516, |
| "learning_rate": 9.944210865761328e-05, |
| "loss": 2.4306, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.060061569391216936, |
| "grad_norm": 0.19464784860610962, |
| "learning_rate": 9.943613722146505e-05, |
| "loss": 1.7291, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.060312873028837095, |
| "grad_norm": 0.43970218300819397, |
| "learning_rate": 9.943013417854122e-05, |
| "loss": 2.6384, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.06056417666645725, |
| "grad_norm": 0.24550621211528778, |
| "learning_rate": 9.942409953267972e-05, |
| "loss": 1.5974, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0608154803040774, |
| "grad_norm": 0.33134451508522034, |
| "learning_rate": 9.941803328773885e-05, |
| "loss": 2.4358, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.06106678394169755, |
| "grad_norm": 0.5241190791130066, |
| "learning_rate": 9.941193544759699e-05, |
| "loss": 2.3483, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.06131808757931771, |
| "grad_norm": 0.5123705863952637, |
| "learning_rate": 9.940580601615279e-05, |
| "loss": 1.9421, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.061569391216937865, |
| "grad_norm": 0.3747979998588562, |
| "learning_rate": 9.939964499732507e-05, |
| "loss": 2.207, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.06182069485455802, |
| "grad_norm": 0.3442586362361908, |
| "learning_rate": 9.939345239505284e-05, |
| "loss": 2.2754, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.062071998492178176, |
| "grad_norm": 0.3288307785987854, |
| "learning_rate": 9.938722821329532e-05, |
| "loss": 2.3217, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.06232330212979833, |
| "grad_norm": 0.33456501364707947, |
| "learning_rate": 9.938097245603193e-05, |
| "loss": 2.1507, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.06257460576741848, |
| "grad_norm": 0.4627265930175781, |
| "learning_rate": 9.937468512726223e-05, |
| "loss": 2.4072, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.06282590940503864, |
| "grad_norm": 0.2955438196659088, |
| "learning_rate": 9.9368366231006e-05, |
| "loss": 2.452, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06307721304265879, |
| "grad_norm": 0.7906093001365662, |
| "learning_rate": 9.936201577130324e-05, |
| "loss": 2.1179, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.06332851668027895, |
| "grad_norm": 0.41081616282463074, |
| "learning_rate": 9.935563375221404e-05, |
| "loss": 2.0524, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.0635798203178991, |
| "grad_norm": 0.18861036002635956, |
| "learning_rate": 9.934922017781873e-05, |
| "loss": 1.4761, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.06383112395551925, |
| "grad_norm": 0.27071478962898254, |
| "learning_rate": 9.93427750522178e-05, |
| "loss": 2.2497, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.06408242759313941, |
| "grad_norm": 0.335267573595047, |
| "learning_rate": 9.933629837953191e-05, |
| "loss": 2.3577, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.06433373123075957, |
| "grad_norm": 0.3902337849140167, |
| "learning_rate": 9.932979016390192e-05, |
| "loss": 2.2602, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.06458503486837971, |
| "grad_norm": 0.550452470779419, |
| "learning_rate": 9.932325040948878e-05, |
| "loss": 2.1322, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.06483633850599987, |
| "grad_norm": 0.42229825258255005, |
| "learning_rate": 9.93166791204737e-05, |
| "loss": 2.449, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.06508764214362003, |
| "grad_norm": 0.2767508924007416, |
| "learning_rate": 9.931007630105798e-05, |
| "loss": 2.552, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.06533894578124018, |
| "grad_norm": 0.3925112783908844, |
| "learning_rate": 9.93034419554631e-05, |
| "loss": 2.5078, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06559024941886034, |
| "grad_norm": 0.38622385263442993, |
| "learning_rate": 9.929677608793072e-05, |
| "loss": 2.5889, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.0658415530564805, |
| "grad_norm": 0.3186779022216797, |
| "learning_rate": 9.929007870272262e-05, |
| "loss": 2.3722, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.06609285669410064, |
| "grad_norm": 0.308403342962265, |
| "learning_rate": 9.928334980412073e-05, |
| "loss": 2.8672, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.0663441603317208, |
| "grad_norm": 0.37506890296936035, |
| "learning_rate": 9.927658939642716e-05, |
| "loss": 2.3802, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.06659546396934096, |
| "grad_norm": 0.43554747104644775, |
| "learning_rate": 9.926979748396415e-05, |
| "loss": 2.1515, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.06684676760696111, |
| "grad_norm": 0.4737273156642914, |
| "learning_rate": 9.926297407107406e-05, |
| "loss": 2.4804, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.06709807124458127, |
| "grad_norm": 0.30481746792793274, |
| "learning_rate": 9.925611916211943e-05, |
| "loss": 2.5455, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.06734937488220143, |
| "grad_norm": 0.29122671484947205, |
| "learning_rate": 9.92492327614829e-05, |
| "loss": 1.9563, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.06760067851982157, |
| "grad_norm": 0.2516386806964874, |
| "learning_rate": 9.924231487356725e-05, |
| "loss": 2.1242, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.06785198215744173, |
| "grad_norm": 0.1990005224943161, |
| "learning_rate": 9.923536550279544e-05, |
| "loss": 1.6236, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06810328579506188, |
| "grad_norm": 0.3531213402748108, |
| "learning_rate": 9.92283846536105e-05, |
| "loss": 2.3706, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.06835458943268204, |
| "grad_norm": 0.358190655708313, |
| "learning_rate": 9.922137233047558e-05, |
| "loss": 2.3467, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.0686058930703022, |
| "grad_norm": 0.3931328058242798, |
| "learning_rate": 9.9214328537874e-05, |
| "loss": 2.359, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.06885719670792234, |
| "grad_norm": 0.30483195185661316, |
| "learning_rate": 9.92072532803092e-05, |
| "loss": 2.8789, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.0691085003455425, |
| "grad_norm": 0.155193030834198, |
| "learning_rate": 9.920014656230468e-05, |
| "loss": 1.4218, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.06935980398316266, |
| "grad_norm": 0.2718715965747833, |
| "learning_rate": 9.919300838840409e-05, |
| "loss": 2.6687, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.0696111076207828, |
| "grad_norm": 0.45890912413597107, |
| "learning_rate": 9.91858387631712e-05, |
| "loss": 2.4566, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.06986241125840296, |
| "grad_norm": 0.39152559638023376, |
| "learning_rate": 9.917863769118988e-05, |
| "loss": 2.7171, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.07011371489602312, |
| "grad_norm": 0.423405259847641, |
| "learning_rate": 9.91714051770641e-05, |
| "loss": 2.4389, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.07036501853364327, |
| "grad_norm": 0.2914890944957733, |
| "learning_rate": 9.916414122541794e-05, |
| "loss": 2.8702, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.07061632217126343, |
| "grad_norm": 0.45607659220695496, |
| "learning_rate": 9.915684584089557e-05, |
| "loss": 2.2563, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.07086762580888359, |
| "grad_norm": 0.8603537678718567, |
| "learning_rate": 9.914951902816128e-05, |
| "loss": 2.0017, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.07111892944650373, |
| "grad_norm": 0.34362930059432983, |
| "learning_rate": 9.91421607918994e-05, |
| "loss": 2.5167, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.0713702330841239, |
| "grad_norm": 0.3992220163345337, |
| "learning_rate": 9.913477113681441e-05, |
| "loss": 2.1986, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.07162153672174405, |
| "grad_norm": 0.409201443195343, |
| "learning_rate": 9.912735006763085e-05, |
| "loss": 1.8954, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.0718728403593642, |
| "grad_norm": 0.293445348739624, |
| "learning_rate": 9.911989758909335e-05, |
| "loss": 2.3246, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.07212414399698436, |
| "grad_norm": 0.3958424925804138, |
| "learning_rate": 9.911241370596663e-05, |
| "loss": 2.35, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.07237544763460452, |
| "grad_norm": 0.5355744957923889, |
| "learning_rate": 9.910489842303544e-05, |
| "loss": 2.3103, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.07262675127222466, |
| "grad_norm": 0.4253835678100586, |
| "learning_rate": 9.909735174510467e-05, |
| "loss": 2.5327, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.07287805490984482, |
| "grad_norm": 0.2699333727359772, |
| "learning_rate": 9.908977367699926e-05, |
| "loss": 1.4629, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.07312935854746497, |
| "grad_norm": 0.44158613681793213, |
| "learning_rate": 9.90821642235642e-05, |
| "loss": 2.1242, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.07338066218508513, |
| "grad_norm": 0.33713415265083313, |
| "learning_rate": 9.907452338966457e-05, |
| "loss": 2.5233, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.07363196582270529, |
| "grad_norm": 0.21191152930259705, |
| "learning_rate": 9.906685118018549e-05, |
| "loss": 1.9315, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.07388326946032543, |
| "grad_norm": 0.3546162247657776, |
| "learning_rate": 9.905914760003216e-05, |
| "loss": 2.3661, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.07413457309794559, |
| "grad_norm": 0.33006587624549866, |
| "learning_rate": 9.905141265412984e-05, |
| "loss": 1.7989, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.07438587673556575, |
| "grad_norm": 0.4529229700565338, |
| "learning_rate": 9.904364634742385e-05, |
| "loss": 2.4695, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.0746371803731859, |
| "grad_norm": 0.29525983333587646, |
| "learning_rate": 9.90358486848795e-05, |
| "loss": 2.1218, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.07488848401080606, |
| "grad_norm": 0.273483544588089, |
| "learning_rate": 9.902801967148219e-05, |
| "loss": 2.4696, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.07513978764842622, |
| "grad_norm": 0.3772584795951843, |
| "learning_rate": 9.902015931223742e-05, |
| "loss": 2.3749, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.07539109128604636, |
| "grad_norm": 0.15759634971618652, |
| "learning_rate": 9.901226761217062e-05, |
| "loss": 0.7498, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07564239492366652, |
| "grad_norm": 0.2802472710609436, |
| "learning_rate": 9.900434457632734e-05, |
| "loss": 2.3353, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.07589369856128668, |
| "grad_norm": 0.7529959678649902, |
| "learning_rate": 9.899639020977314e-05, |
| "loss": 1.7885, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.07614500219890682, |
| "grad_norm": 0.3906523585319519, |
| "learning_rate": 9.89884045175936e-05, |
| "loss": 2.4677, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.07639630583652698, |
| "grad_norm": 0.31307291984558105, |
| "learning_rate": 9.898038750489433e-05, |
| "loss": 2.408, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.07664760947414714, |
| "grad_norm": 0.3155834674835205, |
| "learning_rate": 9.897233917680098e-05, |
| "loss": 2.0308, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.07689891311176729, |
| "grad_norm": 0.2838704586029053, |
| "learning_rate": 9.896425953845923e-05, |
| "loss": 2.1338, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.07715021674938745, |
| "grad_norm": 0.3904295563697815, |
| "learning_rate": 9.895614859503472e-05, |
| "loss": 2.4403, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.07740152038700761, |
| "grad_norm": 0.27329105138778687, |
| "learning_rate": 9.89480063517132e-05, |
| "loss": 1.7174, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.07765282402462775, |
| "grad_norm": 0.20863182842731476, |
| "learning_rate": 9.893983281370034e-05, |
| "loss": 2.1524, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.07790412766224791, |
| "grad_norm": 0.3970401883125305, |
| "learning_rate": 9.893162798622185e-05, |
| "loss": 3.0095, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.07815543129986807, |
| "grad_norm": 0.3557833731174469, |
| "learning_rate": 9.892339187452347e-05, |
| "loss": 2.2407, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.07840673493748822, |
| "grad_norm": 0.3668101131916046, |
| "learning_rate": 9.891512448387092e-05, |
| "loss": 1.8899, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.07865803857510838, |
| "grad_norm": 0.2897646129131317, |
| "learning_rate": 9.890682581954991e-05, |
| "loss": 2.4487, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.07890934221272852, |
| "grad_norm": 0.31541863083839417, |
| "learning_rate": 9.889849588686617e-05, |
| "loss": 1.9136, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.07916064585034868, |
| "grad_norm": 0.4142214357852936, |
| "learning_rate": 9.889013469114539e-05, |
| "loss": 2.4315, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.07941194948796884, |
| "grad_norm": 0.39966338872909546, |
| "learning_rate": 9.888174223773325e-05, |
| "loss": 2.0768, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.07966325312558899, |
| "grad_norm": 0.3038378357887268, |
| "learning_rate": 9.887331853199546e-05, |
| "loss": 2.4825, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.07991455676320915, |
| "grad_norm": 0.36689433455467224, |
| "learning_rate": 9.886486357931767e-05, |
| "loss": 2.3153, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.0801658604008293, |
| "grad_norm": 0.3301517963409424, |
| "learning_rate": 9.885637738510551e-05, |
| "loss": 2.4927, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.08041716403844945, |
| "grad_norm": 0.26406022906303406, |
| "learning_rate": 9.884785995478458e-05, |
| "loss": 2.3793, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.08066846767606961, |
| "grad_norm": 0.3765343427658081, |
| "learning_rate": 9.883931129380049e-05, |
| "loss": 2.3813, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.08091977131368977, |
| "grad_norm": 0.42643749713897705, |
| "learning_rate": 9.883073140761876e-05, |
| "loss": 2.444, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.08117107495130992, |
| "grad_norm": 0.42273804545402527, |
| "learning_rate": 9.882212030172493e-05, |
| "loss": 2.3634, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.08142237858893007, |
| "grad_norm": 0.3113279938697815, |
| "learning_rate": 9.881347798162443e-05, |
| "loss": 2.236, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.08167368222655023, |
| "grad_norm": 0.4435007870197296, |
| "learning_rate": 9.880480445284274e-05, |
| "loss": 2.3091, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.08192498586417038, |
| "grad_norm": 0.5760218501091003, |
| "learning_rate": 9.879609972092522e-05, |
| "loss": 1.7405, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.08217628950179054, |
| "grad_norm": 0.3486250638961792, |
| "learning_rate": 9.878736379143719e-05, |
| "loss": 1.9632, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.0824275931394107, |
| "grad_norm": 0.33690646290779114, |
| "learning_rate": 9.877859666996395e-05, |
| "loss": 2.454, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.08267889677703084, |
| "grad_norm": 0.4438174068927765, |
| "learning_rate": 9.876979836211069e-05, |
| "loss": 2.0577, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.082930200414651, |
| "grad_norm": 0.13432446122169495, |
| "learning_rate": 9.87609688735026e-05, |
| "loss": 0.8846, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.08318150405227116, |
| "grad_norm": 0.4359930753707886, |
| "learning_rate": 9.875210820978475e-05, |
| "loss": 2.1561, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.08343280768989131, |
| "grad_norm": 0.38827580213546753, |
| "learning_rate": 9.87432163766222e-05, |
| "loss": 2.2525, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.08368411132751147, |
| "grad_norm": 0.3862955868244171, |
| "learning_rate": 9.873429337969985e-05, |
| "loss": 2.3337, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.08393541496513161, |
| "grad_norm": 0.3881922960281372, |
| "learning_rate": 9.872533922472264e-05, |
| "loss": 2.3624, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.08418671860275177, |
| "grad_norm": 0.21210597455501556, |
| "learning_rate": 9.871635391741533e-05, |
| "loss": 2.0269, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.08443802224037193, |
| "grad_norm": 0.38510724902153015, |
| "learning_rate": 9.870733746352265e-05, |
| "loss": 2.1879, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.08468932587799208, |
| "grad_norm": 0.31321218609809875, |
| "learning_rate": 9.869828986880924e-05, |
| "loss": 1.8977, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.08494062951561224, |
| "grad_norm": 0.3835349678993225, |
| "learning_rate": 9.868921113905961e-05, |
| "loss": 2.5596, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.0851919331532324, |
| "grad_norm": 0.2992005944252014, |
| "learning_rate": 9.868010128007823e-05, |
| "loss": 2.3065, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.08544323679085254, |
| "grad_norm": 0.31576502323150635, |
| "learning_rate": 9.867096029768943e-05, |
| "loss": 2.4326, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0856945404284727, |
| "grad_norm": 0.265114426612854, |
| "learning_rate": 9.866178819773747e-05, |
| "loss": 2.4567, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.08594584406609286, |
| "grad_norm": 0.40755563974380493, |
| "learning_rate": 9.86525849860865e-05, |
| "loss": 2.4134, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.086197147703713, |
| "grad_norm": 0.35845133662223816, |
| "learning_rate": 9.864335066862054e-05, |
| "loss": 2.1912, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.08644845134133317, |
| "grad_norm": 0.2640887498855591, |
| "learning_rate": 9.863408525124349e-05, |
| "loss": 2.3818, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.08669975497895333, |
| "grad_norm": 0.19019848108291626, |
| "learning_rate": 9.862478873987919e-05, |
| "loss": 1.2064, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.08695105861657347, |
| "grad_norm": 0.3398876488208771, |
| "learning_rate": 9.861546114047131e-05, |
| "loss": 2.513, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.08720236225419363, |
| "grad_norm": 0.3897905945777893, |
| "learning_rate": 9.86061024589834e-05, |
| "loss": 2.3802, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.08745366589181379, |
| "grad_norm": 0.5403574109077454, |
| "learning_rate": 9.859671270139892e-05, |
| "loss": 2.492, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.08770496952943393, |
| "grad_norm": 0.38538798689842224, |
| "learning_rate": 9.858729187372114e-05, |
| "loss": 2.6697, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.0879562731670541, |
| "grad_norm": 0.47243911027908325, |
| "learning_rate": 9.857783998197321e-05, |
| "loss": 2.5493, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.08820757680467425, |
| "grad_norm": 0.24692249298095703, |
| "learning_rate": 9.85683570321982e-05, |
| "loss": 1.9117, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.0884588804422944, |
| "grad_norm": 0.35984736680984497, |
| "learning_rate": 9.855884303045897e-05, |
| "loss": 2.0952, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.08871018407991456, |
| "grad_norm": 0.4461250901222229, |
| "learning_rate": 9.854929798283826e-05, |
| "loss": 2.1836, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.08896148771753472, |
| "grad_norm": 0.4806773066520691, |
| "learning_rate": 9.853972189543864e-05, |
| "loss": 2.0937, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.08921279135515486, |
| "grad_norm": 0.5709269642829895, |
| "learning_rate": 9.853011477438254e-05, |
| "loss": 1.9677, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.08946409499277502, |
| "grad_norm": 0.3929988443851471, |
| "learning_rate": 9.852047662581225e-05, |
| "loss": 2.5909, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.08971539863039517, |
| "grad_norm": 0.33998608589172363, |
| "learning_rate": 9.851080745588987e-05, |
| "loss": 2.0388, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.08996670226801533, |
| "grad_norm": 0.4756825268268585, |
| "learning_rate": 9.850110727079735e-05, |
| "loss": 2.1712, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.09021800590563549, |
| "grad_norm": 0.3752896785736084, |
| "learning_rate": 9.849137607673643e-05, |
| "loss": 2.4498, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.09046930954325563, |
| "grad_norm": 0.4232666790485382, |
| "learning_rate": 9.848161387992874e-05, |
| "loss": 2.0123, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.09072061318087579, |
| "grad_norm": 0.4741215705871582, |
| "learning_rate": 9.847182068661567e-05, |
| "loss": 2.0118, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.09097191681849595, |
| "grad_norm": 0.41064271330833435, |
| "learning_rate": 9.846199650305846e-05, |
| "loss": 2.2685, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.0912232204561161, |
| "grad_norm": 0.31049537658691406, |
| "learning_rate": 9.845214133553817e-05, |
| "loss": 1.9623, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.09147452409373626, |
| "grad_norm": 0.2547619342803955, |
| "learning_rate": 9.844225519035565e-05, |
| "loss": 1.8739, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.09172582773135642, |
| "grad_norm": 0.26991865038871765, |
| "learning_rate": 9.843233807383159e-05, |
| "loss": 2.2907, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.09197713136897656, |
| "grad_norm": 0.3443757891654968, |
| "learning_rate": 9.84223899923064e-05, |
| "loss": 2.1534, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.09222843500659672, |
| "grad_norm": 0.2624412477016449, |
| "learning_rate": 9.841241095214038e-05, |
| "loss": 2.4799, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.09247973864421688, |
| "grad_norm": 0.34518447518348694, |
| "learning_rate": 9.840240095971358e-05, |
| "loss": 2.2512, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.09273104228183703, |
| "grad_norm": 0.4448896646499634, |
| "learning_rate": 9.839236002142584e-05, |
| "loss": 2.3634, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.09298234591945718, |
| "grad_norm": 0.1713920682668686, |
| "learning_rate": 9.83822881436968e-05, |
| "loss": 1.9883, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.09323364955707734, |
| "grad_norm": 0.26538336277008057, |
| "learning_rate": 9.837218533296587e-05, |
| "loss": 2.3259, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.09348495319469749, |
| "grad_norm": 0.3171433210372925, |
| "learning_rate": 9.83620515956922e-05, |
| "loss": 2.3368, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.09373625683231765, |
| "grad_norm": 0.47525712847709656, |
| "learning_rate": 9.83518869383548e-05, |
| "loss": 2.0206, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.09398756046993781, |
| "grad_norm": 0.6884750723838806, |
| "learning_rate": 9.834169136745237e-05, |
| "loss": 2.2423, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.09423886410755795, |
| "grad_norm": 0.3786754012107849, |
| "learning_rate": 9.833146488950342e-05, |
| "loss": 2.5151, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.09449016774517811, |
| "grad_norm": 0.15857549011707306, |
| "learning_rate": 9.832120751104617e-05, |
| "loss": 1.4356, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.09474147138279826, |
| "grad_norm": 0.6414403915405273, |
| "learning_rate": 9.831091923863868e-05, |
| "loss": 2.2734, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.09499277502041842, |
| "grad_norm": 0.25082263350486755, |
| "learning_rate": 9.830060007885868e-05, |
| "loss": 2.8101, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.09524407865803858, |
| "grad_norm": 0.29801589250564575, |
| "learning_rate": 9.829025003830368e-05, |
| "loss": 1.8527, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.09549538229565872, |
| "grad_norm": 0.293905109167099, |
| "learning_rate": 9.827986912359094e-05, |
| "loss": 2.1692, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.09574668593327888, |
| "grad_norm": 0.5134365558624268, |
| "learning_rate": 9.826945734135744e-05, |
| "loss": 2.1052, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.09599798957089904, |
| "grad_norm": 0.24011516571044922, |
| "learning_rate": 9.825901469825994e-05, |
| "loss": 1.8188, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.09624929320851919, |
| "grad_norm": 0.39140889048576355, |
| "learning_rate": 9.824854120097485e-05, |
| "loss": 2.1372, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.09650059684613935, |
| "grad_norm": 0.35121777653694153, |
| "learning_rate": 9.82380368561984e-05, |
| "loss": 2.2186, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.0967519004837595, |
| "grad_norm": 0.3034665286540985, |
| "learning_rate": 9.822750167064645e-05, |
| "loss": 2.2167, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.09700320412137965, |
| "grad_norm": 0.18376107513904572, |
| "learning_rate": 9.821693565105465e-05, |
| "loss": 1.573, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.09725450775899981, |
| "grad_norm": 0.18543782830238342, |
| "learning_rate": 9.820633880417836e-05, |
| "loss": 2.0766, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.09750581139661997, |
| "grad_norm": 0.3020473122596741, |
| "learning_rate": 9.819571113679258e-05, |
| "loss": 2.4098, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.09775711503424012, |
| "grad_norm": 0.43857908248901367, |
| "learning_rate": 9.818505265569209e-05, |
| "loss": 2.3636, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.09800841867186028, |
| "grad_norm": 0.31539830565452576, |
| "learning_rate": 9.817436336769135e-05, |
| "loss": 2.3027, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.09825972230948044, |
| "grad_norm": 0.38662660121917725, |
| "learning_rate": 9.816364327962449e-05, |
| "loss": 2.7305, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.09851102594710058, |
| "grad_norm": 0.6164030432701111, |
| "learning_rate": 9.815289239834536e-05, |
| "loss": 1.8858, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.09876232958472074, |
| "grad_norm": 0.2521904408931732, |
| "learning_rate": 9.814211073072748e-05, |
| "loss": 1.9274, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.0990136332223409, |
| "grad_norm": 0.35303381085395813, |
| "learning_rate": 9.813129828366407e-05, |
| "loss": 2.1945, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.09926493685996104, |
| "grad_norm": 0.276737242937088, |
| "learning_rate": 9.812045506406803e-05, |
| "loss": 2.2649, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.0995162404975812, |
| "grad_norm": 0.45490264892578125, |
| "learning_rate": 9.81095810788719e-05, |
| "loss": 2.37, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.09976754413520136, |
| "grad_norm": 0.5738433599472046, |
| "learning_rate": 9.809867633502794e-05, |
| "loss": 2.1393, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.10001884777282151, |
| "grad_norm": 0.2685263454914093, |
| "learning_rate": 9.808774083950802e-05, |
| "loss": 2.6325, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.10027015141044167, |
| "grad_norm": 0.41066989302635193, |
| "learning_rate": 9.807677459930374e-05, |
| "loss": 2.1897, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.10052145504806181, |
| "grad_norm": 0.41453301906585693, |
| "learning_rate": 9.806577762142628e-05, |
| "loss": 2.243, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.10077275868568197, |
| "grad_norm": 0.39212143421173096, |
| "learning_rate": 9.805474991290652e-05, |
| "loss": 2.502, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.10102406232330213, |
| "grad_norm": 0.23721270263195038, |
| "learning_rate": 9.804369148079498e-05, |
| "loss": 1.7259, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.10127536596092228, |
| "grad_norm": 0.22118382155895233, |
| "learning_rate": 9.803260233216184e-05, |
| "loss": 1.1238, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.10152666959854244, |
| "grad_norm": 0.386078804731369, |
| "learning_rate": 9.802148247409686e-05, |
| "loss": 2.1155, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1017779732361626, |
| "grad_norm": 0.3332570195198059, |
| "learning_rate": 9.80103319137095e-05, |
| "loss": 2.5907, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.10202927687378274, |
| "grad_norm": 0.20631489157676697, |
| "learning_rate": 9.799915065812882e-05, |
| "loss": 2.3401, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1022805805114029, |
| "grad_norm": 0.529591977596283, |
| "learning_rate": 9.798793871450346e-05, |
| "loss": 2.3916, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.10253188414902306, |
| "grad_norm": 0.32578417658805847, |
| "learning_rate": 9.79766960900018e-05, |
| "loss": 2.3661, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.10278318778664321, |
| "grad_norm": 0.4134072959423065, |
| "learning_rate": 9.796542279181172e-05, |
| "loss": 2.0656, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.10303449142426337, |
| "grad_norm": 0.3845951557159424, |
| "learning_rate": 9.795411882714076e-05, |
| "loss": 2.3176, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.10328579506188353, |
| "grad_norm": 0.6262491345405579, |
| "learning_rate": 9.794278420321605e-05, |
| "loss": 2.42, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.10353709869950367, |
| "grad_norm": 0.4291568696498871, |
| "learning_rate": 9.793141892728436e-05, |
| "loss": 1.9455, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.10378840233712383, |
| "grad_norm": 0.24446240067481995, |
| "learning_rate": 9.792002300661201e-05, |
| "loss": 2.6102, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.10403970597474399, |
| "grad_norm": 0.4551761746406555, |
| "learning_rate": 9.79085964484849e-05, |
| "loss": 2.0785, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.10429100961236414, |
| "grad_norm": 0.384036123752594, |
| "learning_rate": 9.789713926020863e-05, |
| "loss": 2.3401, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.1045423132499843, |
| "grad_norm": 0.2475469559431076, |
| "learning_rate": 9.788565144910822e-05, |
| "loss": 2.2387, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.10479361688760445, |
| "grad_norm": 0.269940048456192, |
| "learning_rate": 9.78741330225284e-05, |
| "loss": 2.2894, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.1050449205252246, |
| "grad_norm": 0.27409201860427856, |
| "learning_rate": 9.786258398783341e-05, |
| "loss": 2.2478, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.10529622416284476, |
| "grad_norm": 0.28689828515052795, |
| "learning_rate": 9.785100435240706e-05, |
| "loss": 1.9406, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.10554752780046492, |
| "grad_norm": 0.5282906293869019, |
| "learning_rate": 9.783939412365278e-05, |
| "loss": 2.1423, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.10579883143808506, |
| "grad_norm": 0.2722564935684204, |
| "learning_rate": 9.782775330899347e-05, |
| "loss": 2.3398, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.10605013507570522, |
| "grad_norm": 0.22443710267543793, |
| "learning_rate": 9.781608191587166e-05, |
| "loss": 2.2233, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.10630143871332537, |
| "grad_norm": 1.7654945850372314, |
| "learning_rate": 9.78043799517494e-05, |
| "loss": 2.2441, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.10655274235094553, |
| "grad_norm": 0.272491455078125, |
| "learning_rate": 9.779264742410829e-05, |
| "loss": 2.4267, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.10680404598856569, |
| "grad_norm": 0.4860396683216095, |
| "learning_rate": 9.778088434044945e-05, |
| "loss": 2.3139, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.10705534962618583, |
| "grad_norm": 0.5187298059463501, |
| "learning_rate": 9.77690907082936e-05, |
| "loss": 2.0393, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.10730665326380599, |
| "grad_norm": 0.25615808367729187, |
| "learning_rate": 9.775726653518091e-05, |
| "loss": 2.4811, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.10755795690142615, |
| "grad_norm": 0.26843705773353577, |
| "learning_rate": 9.774541182867112e-05, |
| "loss": 2.0025, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1078092605390463, |
| "grad_norm": 0.42573508620262146, |
| "learning_rate": 9.773352659634348e-05, |
| "loss": 2.3841, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.10806056417666646, |
| "grad_norm": 0.34817081689834595, |
| "learning_rate": 9.772161084579679e-05, |
| "loss": 2.4695, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.10831186781428662, |
| "grad_norm": 0.3741927742958069, |
| "learning_rate": 9.770966458464927e-05, |
| "loss": 2.099, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.10856317145190676, |
| "grad_norm": 0.4013387858867645, |
| "learning_rate": 9.769768782053879e-05, |
| "loss": 2.2978, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.10881447508952692, |
| "grad_norm": 0.4488285779953003, |
| "learning_rate": 9.768568056112258e-05, |
| "loss": 2.064, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.10906577872714708, |
| "grad_norm": 0.43745100498199463, |
| "learning_rate": 9.767364281407745e-05, |
| "loss": 2.0517, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.10931708236476723, |
| "grad_norm": 0.2905375361442566, |
| "learning_rate": 9.766157458709967e-05, |
| "loss": 2.43, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.10956838600238739, |
| "grad_norm": 0.3817865252494812, |
| "learning_rate": 9.764947588790502e-05, |
| "loss": 2.439, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.10981968964000755, |
| "grad_norm": 0.44527363777160645, |
| "learning_rate": 9.763734672422876e-05, |
| "loss": 2.4307, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.11007099327762769, |
| "grad_norm": 0.5595135688781738, |
| "learning_rate": 9.76251871038256e-05, |
| "loss": 2.1648, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.11032229691524785, |
| "grad_norm": 0.4044279456138611, |
| "learning_rate": 9.761299703446973e-05, |
| "loss": 2.3435, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.11057360055286801, |
| "grad_norm": 0.1558169573545456, |
| "learning_rate": 9.760077652395483e-05, |
| "loss": 0.9334, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.11082490419048815, |
| "grad_norm": 0.42228519916534424, |
| "learning_rate": 9.758852558009404e-05, |
| "loss": 2.4764, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.11107620782810831, |
| "grad_norm": 0.18882694840431213, |
| "learning_rate": 9.757624421071993e-05, |
| "loss": 1.3726, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.11132751146572846, |
| "grad_norm": 0.37905827164649963, |
| "learning_rate": 9.756393242368453e-05, |
| "loss": 1.8814, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.11157881510334862, |
| "grad_norm": 0.347260445356369, |
| "learning_rate": 9.755159022685936e-05, |
| "loss": 2.6184, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.11183011874096878, |
| "grad_norm": 0.4600488543510437, |
| "learning_rate": 9.753921762813534e-05, |
| "loss": 2.2642, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.11208142237858892, |
| "grad_norm": 0.3841269612312317, |
| "learning_rate": 9.75268146354228e-05, |
| "loss": 2.2473, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.11233272601620908, |
| "grad_norm": 0.4765447676181793, |
| "learning_rate": 9.751438125665158e-05, |
| "loss": 2.3474, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.11258402965382924, |
| "grad_norm": 0.3766055405139923, |
| "learning_rate": 9.750191749977089e-05, |
| "loss": 1.809, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.11283533329144939, |
| "grad_norm": 0.3452647626399994, |
| "learning_rate": 9.748942337274938e-05, |
| "loss": 2.5878, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.11308663692906955, |
| "grad_norm": 0.21866516768932343, |
| "learning_rate": 9.747689888357509e-05, |
| "loss": 0.992, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.11333794056668971, |
| "grad_norm": 0.36812421679496765, |
| "learning_rate": 9.746434404025555e-05, |
| "loss": 2.0005, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.11358924420430985, |
| "grad_norm": 0.44335392117500305, |
| "learning_rate": 9.74517588508176e-05, |
| "loss": 2.4726, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.11384054784193001, |
| "grad_norm": 0.448779433965683, |
| "learning_rate": 9.743914332330754e-05, |
| "loss": 2.2657, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.11409185147955017, |
| "grad_norm": 0.5500572323799133, |
| "learning_rate": 9.742649746579105e-05, |
| "loss": 2.4726, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.11434315511717032, |
| "grad_norm": 0.47495442628860474, |
| "learning_rate": 9.741382128635321e-05, |
| "loss": 2.4044, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.11459445875479048, |
| "grad_norm": 0.4240530729293823, |
| "learning_rate": 9.740111479309847e-05, |
| "loss": 2.6976, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.11484576239241064, |
| "grad_norm": 0.48315781354904175, |
| "learning_rate": 9.738837799415067e-05, |
| "loss": 2.4035, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.11509706603003078, |
| "grad_norm": 0.28931689262390137, |
| "learning_rate": 9.737561089765303e-05, |
| "loss": 1.9762, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.11534836966765094, |
| "grad_norm": 0.2892288267612457, |
| "learning_rate": 9.736281351176813e-05, |
| "loss": 2.1718, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.1155996733052711, |
| "grad_norm": 0.4304305911064148, |
| "learning_rate": 9.734998584467794e-05, |
| "loss": 2.2799, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.11585097694289125, |
| "grad_norm": 0.2887556552886963, |
| "learning_rate": 9.733712790458375e-05, |
| "loss": 2.4617, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.1161022805805114, |
| "grad_norm": 0.37903836369514465, |
| "learning_rate": 9.732423969970626e-05, |
| "loss": 2.4154, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.11635358421813156, |
| "grad_norm": 0.34230297803878784, |
| "learning_rate": 9.731132123828543e-05, |
| "loss": 1.9664, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.11660488785575171, |
| "grad_norm": 0.4099084138870239, |
| "learning_rate": 9.729837252858067e-05, |
| "loss": 2.473, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.11685619149337187, |
| "grad_norm": 0.4079136252403259, |
| "learning_rate": 9.728539357887068e-05, |
| "loss": 1.8914, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.11710749513099201, |
| "grad_norm": 0.2663559317588806, |
| "learning_rate": 9.727238439745346e-05, |
| "loss": 2.1168, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.11735879876861217, |
| "grad_norm": 0.35732075572013855, |
| "learning_rate": 9.72593449926464e-05, |
| "loss": 2.3938, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.11761010240623233, |
| "grad_norm": 0.26939693093299866, |
| "learning_rate": 9.724627537278616e-05, |
| "loss": 1.9446, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.11786140604385248, |
| "grad_norm": 0.3639311194419861, |
| "learning_rate": 9.72331755462288e-05, |
| "loss": 2.8929, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.11811270968147264, |
| "grad_norm": 0.26295900344848633, |
| "learning_rate": 9.722004552134956e-05, |
| "loss": 2.3815, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1183640133190928, |
| "grad_norm": 0.4366108179092407, |
| "learning_rate": 9.720688530654311e-05, |
| "loss": 2.1099, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.11861531695671294, |
| "grad_norm": 0.5005189776420593, |
| "learning_rate": 9.719369491022339e-05, |
| "loss": 2.7562, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.1188666205943331, |
| "grad_norm": 0.3057880401611328, |
| "learning_rate": 9.718047434082357e-05, |
| "loss": 2.5181, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.11911792423195326, |
| "grad_norm": 0.2341679483652115, |
| "learning_rate": 9.716722360679619e-05, |
| "loss": 1.2066, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.11936922786957341, |
| "grad_norm": 0.27327990531921387, |
| "learning_rate": 9.715394271661306e-05, |
| "loss": 2.3469, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.11962053150719357, |
| "grad_norm": 0.39172980189323425, |
| "learning_rate": 9.714063167876527e-05, |
| "loss": 2.2347, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.11987183514481373, |
| "grad_norm": 0.49665653705596924, |
| "learning_rate": 9.71272905017631e-05, |
| "loss": 1.954, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.12012313878243387, |
| "grad_norm": 0.2704184949398041, |
| "learning_rate": 9.711391919413626e-05, |
| "loss": 2.3236, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.12037444242005403, |
| "grad_norm": 0.17571642994880676, |
| "learning_rate": 9.710051776443358e-05, |
| "loss": 1.277, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.12062574605767419, |
| "grad_norm": 0.2642328143119812, |
| "learning_rate": 9.708708622122322e-05, |
| "loss": 2.4537, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.12087704969529434, |
| "grad_norm": 0.21277543902397156, |
| "learning_rate": 9.707362457309261e-05, |
| "loss": 1.3385, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.1211283533329145, |
| "grad_norm": 0.3256551921367645, |
| "learning_rate": 9.706013282864834e-05, |
| "loss": 2.1567, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.12137965697053466, |
| "grad_norm": 0.4194876551628113, |
| "learning_rate": 9.704661099651633e-05, |
| "loss": 2.2617, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1216309606081548, |
| "grad_norm": 0.13989388942718506, |
| "learning_rate": 9.70330590853417e-05, |
| "loss": 0.8074, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.12188226424577496, |
| "grad_norm": 0.2916125953197479, |
| "learning_rate": 9.701947710378881e-05, |
| "loss": 2.0217, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.1221335678833951, |
| "grad_norm": 0.17038998007774353, |
| "learning_rate": 9.700586506054121e-05, |
| "loss": 0.8153, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.12238487152101526, |
| "grad_norm": 0.39059990644454956, |
| "learning_rate": 9.699222296430172e-05, |
| "loss": 2.161, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.12263617515863542, |
| "grad_norm": 0.5335647463798523, |
| "learning_rate": 9.697855082379239e-05, |
| "loss": 2.2604, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.12288747879625557, |
| "grad_norm": 0.4468785524368286, |
| "learning_rate": 9.696484864775437e-05, |
| "loss": 2.3022, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.12313878243387573, |
| "grad_norm": 0.38395336270332336, |
| "learning_rate": 9.695111644494814e-05, |
| "loss": 2.2467, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.12339008607149589, |
| "grad_norm": 0.35352465510368347, |
| "learning_rate": 9.693735422415332e-05, |
| "loss": 2.7816, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.12364138970911603, |
| "grad_norm": 0.5462369322776794, |
| "learning_rate": 9.692356199416868e-05, |
| "loss": 2.2723, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.1238926933467362, |
| "grad_norm": 0.24535368382930756, |
| "learning_rate": 9.690973976381228e-05, |
| "loss": 2.0469, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.12414399698435635, |
| "grad_norm": 0.3857629597187042, |
| "learning_rate": 9.689588754192126e-05, |
| "loss": 2.1396, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.1243953006219765, |
| "grad_norm": 0.4369061291217804, |
| "learning_rate": 9.688200533735199e-05, |
| "loss": 2.3099, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.12464660425959666, |
| "grad_norm": 0.22359801828861237, |
| "learning_rate": 9.686809315898e-05, |
| "loss": 2.113, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.12489790789721682, |
| "grad_norm": 0.3624935746192932, |
| "learning_rate": 9.685415101569999e-05, |
| "loss": 2.2168, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.12514921153483696, |
| "grad_norm": 0.4480370581150055, |
| "learning_rate": 9.684017891642578e-05, |
| "loss": 2.5805, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.12540051517245712, |
| "grad_norm": 0.47670865058898926, |
| "learning_rate": 9.682617687009039e-05, |
| "loss": 1.5993, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.12565181881007728, |
| "grad_norm": 0.37690502405166626, |
| "learning_rate": 9.681214488564596e-05, |
| "loss": 2.125, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.12590312244769744, |
| "grad_norm": 0.29597729444503784, |
| "learning_rate": 9.679808297206377e-05, |
| "loss": 2.6068, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.12615442608531757, |
| "grad_norm": 0.33130696415901184, |
| "learning_rate": 9.678399113833425e-05, |
| "loss": 2.2649, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.12640572972293773, |
| "grad_norm": 0.4030790627002716, |
| "learning_rate": 9.676986939346696e-05, |
| "loss": 1.8589, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.1266570333605579, |
| "grad_norm": 0.37136292457580566, |
| "learning_rate": 9.675571774649057e-05, |
| "loss": 2.1863, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.12690833699817805, |
| "grad_norm": 0.439748615026474, |
| "learning_rate": 9.674153620645287e-05, |
| "loss": 2.6519, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.1271596406357982, |
| "grad_norm": 0.3839961588382721, |
| "learning_rate": 9.672732478242075e-05, |
| "loss": 2.4758, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.12741094427341837, |
| "grad_norm": 0.2992021441459656, |
| "learning_rate": 9.671308348348025e-05, |
| "loss": 2.4348, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.1276622479110385, |
| "grad_norm": 0.3640328645706177, |
| "learning_rate": 9.669881231873646e-05, |
| "loss": 2.6048, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.12791355154865866, |
| "grad_norm": 0.27911099791526794, |
| "learning_rate": 9.66845112973136e-05, |
| "loss": 2.4449, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.12816485518627882, |
| "grad_norm": 0.4001411199569702, |
| "learning_rate": 9.667018042835496e-05, |
| "loss": 3.0271, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.12841615882389898, |
| "grad_norm": 0.4469778537750244, |
| "learning_rate": 9.665581972102291e-05, |
| "loss": 2.192, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.12866746246151914, |
| "grad_norm": 0.4696493446826935, |
| "learning_rate": 9.66414291844989e-05, |
| "loss": 2.0927, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.1289187660991393, |
| "grad_norm": 0.485324889421463, |
| "learning_rate": 9.662700882798348e-05, |
| "loss": 1.8437, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.12917006973675943, |
| "grad_norm": 0.2740216851234436, |
| "learning_rate": 9.661255866069622e-05, |
| "loss": 2.0895, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.1294213733743796, |
| "grad_norm": 0.48760735988616943, |
| "learning_rate": 9.659807869187578e-05, |
| "loss": 2.3416, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.12967267701199975, |
| "grad_norm": 0.14928022027015686, |
| "learning_rate": 9.658356893077987e-05, |
| "loss": 1.1877, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.1299239806496199, |
| "grad_norm": 0.46163851022720337, |
| "learning_rate": 9.656902938668524e-05, |
| "loss": 2.6305, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.13017528428724007, |
| "grad_norm": 0.30622944235801697, |
| "learning_rate": 9.655446006888766e-05, |
| "loss": 1.5633, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.1304265879248602, |
| "grad_norm": 0.30690157413482666, |
| "learning_rate": 9.653986098670198e-05, |
| "loss": 2.5689, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.13067789156248036, |
| "grad_norm": 0.3105219602584839, |
| "learning_rate": 9.652523214946205e-05, |
| "loss": 2.564, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.13092919520010052, |
| "grad_norm": 0.2955935299396515, |
| "learning_rate": 9.651057356652077e-05, |
| "loss": 2.5073, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.13118049883772068, |
| "grad_norm": 0.19297459721565247, |
| "learning_rate": 9.649588524725002e-05, |
| "loss": 1.8412, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.13143180247534084, |
| "grad_norm": 0.3480035066604614, |
| "learning_rate": 9.64811672010407e-05, |
| "loss": 2.5528, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.131683106112961, |
| "grad_norm": 0.15509484708309174, |
| "learning_rate": 9.646641943730277e-05, |
| "loss": 0.7748, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.13193440975058113, |
| "grad_norm": 0.33523187041282654, |
| "learning_rate": 9.645164196546512e-05, |
| "loss": 1.939, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.1321857133882013, |
| "grad_norm": 0.3940199613571167, |
| "learning_rate": 9.643683479497567e-05, |
| "loss": 2.3232, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.13243701702582145, |
| "grad_norm": 0.3470746576786041, |
| "learning_rate": 9.64219979353013e-05, |
| "loss": 1.933, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.1326883206634416, |
| "grad_norm": 0.3262689709663391, |
| "learning_rate": 9.640713139592792e-05, |
| "loss": 2.3454, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.13293962430106177, |
| "grad_norm": 0.5404649972915649, |
| "learning_rate": 9.639223518636036e-05, |
| "loss": 2.4712, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.13319092793868192, |
| "grad_norm": 0.30311527848243713, |
| "learning_rate": 9.637730931612245e-05, |
| "loss": 2.0744, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.13344223157630206, |
| "grad_norm": 0.9992802739143372, |
| "learning_rate": 9.6362353794757e-05, |
| "loss": 2.1378, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.13369353521392222, |
| "grad_norm": 0.2992432415485382, |
| "learning_rate": 9.634736863182574e-05, |
| "loss": 2.4675, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.13394483885154237, |
| "grad_norm": 0.3760550618171692, |
| "learning_rate": 9.633235383690937e-05, |
| "loss": 2.3762, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.13419614248916253, |
| "grad_norm": 0.3664836287498474, |
| "learning_rate": 9.631730941960752e-05, |
| "loss": 2.2417, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.1344474461267827, |
| "grad_norm": 0.4195888042449951, |
| "learning_rate": 9.630223538953881e-05, |
| "loss": 1.9261, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.13469874976440285, |
| "grad_norm": 0.39428257942199707, |
| "learning_rate": 9.628713175634072e-05, |
| "loss": 2.2189, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.13495005340202298, |
| "grad_norm": 0.3145690858364105, |
| "learning_rate": 9.627199852966969e-05, |
| "loss": 2.1252, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.13520135703964314, |
| "grad_norm": 0.41197821497917175, |
| "learning_rate": 9.625683571920108e-05, |
| "loss": 2.0061, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.1354526606772633, |
| "grad_norm": 0.4640690088272095, |
| "learning_rate": 9.62416433346292e-05, |
| "loss": 1.9037, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.13570396431488346, |
| "grad_norm": 0.39173510670661926, |
| "learning_rate": 9.62264213856672e-05, |
| "loss": 2.1323, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.13595526795250362, |
| "grad_norm": 0.33818063139915466, |
| "learning_rate": 9.62111698820472e-05, |
| "loss": 2.4662, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.13620657159012375, |
| "grad_norm": 0.3888367712497711, |
| "learning_rate": 9.619588883352011e-05, |
| "loss": 1.987, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.1364578752277439, |
| "grad_norm": 0.49469324946403503, |
| "learning_rate": 9.61805782498559e-05, |
| "loss": 2.075, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.13670917886536407, |
| "grad_norm": 0.3214045464992523, |
| "learning_rate": 9.616523814084324e-05, |
| "loss": 2.6687, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.13696048250298423, |
| "grad_norm": 0.3943631649017334, |
| "learning_rate": 9.61498685162898e-05, |
| "loss": 1.9817, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.1372117861406044, |
| "grad_norm": 0.46393269300460815, |
| "learning_rate": 9.613446938602209e-05, |
| "loss": 2.2147, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.13746308977822455, |
| "grad_norm": 0.3647920489311218, |
| "learning_rate": 9.611904075988544e-05, |
| "loss": 1.9163, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.13771439341584468, |
| "grad_norm": 0.3861480951309204, |
| "learning_rate": 9.610358264774411e-05, |
| "loss": 2.7924, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.13796569705346484, |
| "grad_norm": 0.4985499083995819, |
| "learning_rate": 9.608809505948114e-05, |
| "loss": 2.0298, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.138217000691085, |
| "grad_norm": 0.4473382532596588, |
| "learning_rate": 9.607257800499849e-05, |
| "loss": 2.0965, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.13846830432870516, |
| "grad_norm": 0.4468131959438324, |
| "learning_rate": 9.60570314942169e-05, |
| "loss": 2.311, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.13871960796632532, |
| "grad_norm": 0.1555897295475006, |
| "learning_rate": 9.604145553707595e-05, |
| "loss": 0.8849, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.13897091160394548, |
| "grad_norm": 0.17197562754154205, |
| "learning_rate": 9.602585014353409e-05, |
| "loss": 1.0516, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.1392222152415656, |
| "grad_norm": 0.22020426392555237, |
| "learning_rate": 9.601021532356854e-05, |
| "loss": 1.9116, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.13947351887918577, |
| "grad_norm": 0.33761653304100037, |
| "learning_rate": 9.599455108717535e-05, |
| "loss": 2.3363, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.13972482251680593, |
| "grad_norm": 0.5171981453895569, |
| "learning_rate": 9.59788574443694e-05, |
| "loss": 2.0701, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.1399761261544261, |
| "grad_norm": 0.22373630106449127, |
| "learning_rate": 9.596313440518432e-05, |
| "loss": 1.0946, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.14022742979204625, |
| "grad_norm": 0.5129598379135132, |
| "learning_rate": 9.594738197967259e-05, |
| "loss": 2.3418, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.1404787334296664, |
| "grad_norm": 0.4349968433380127, |
| "learning_rate": 9.593160017790546e-05, |
| "loss": 2.0397, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.14073003706728654, |
| "grad_norm": 0.39511239528656006, |
| "learning_rate": 9.591578900997292e-05, |
| "loss": 2.2823, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.1409813407049067, |
| "grad_norm": 0.25913137197494507, |
| "learning_rate": 9.58999484859838e-05, |
| "loss": 2.3511, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.14123264434252686, |
| "grad_norm": 0.19200782477855682, |
| "learning_rate": 9.588407861606566e-05, |
| "loss": 1.6859, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.14148394798014702, |
| "grad_norm": 0.30504798889160156, |
| "learning_rate": 9.586817941036483e-05, |
| "loss": 2.1132, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.14173525161776718, |
| "grad_norm": 0.46058428287506104, |
| "learning_rate": 9.585225087904641e-05, |
| "loss": 2.4114, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.1419865552553873, |
| "grad_norm": 0.2597549557685852, |
| "learning_rate": 9.583629303229423e-05, |
| "loss": 2.4956, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.14223785889300747, |
| "grad_norm": 0.35064586997032166, |
| "learning_rate": 9.582030588031084e-05, |
| "loss": 2.4188, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.14248916253062763, |
| "grad_norm": 0.21857944130897522, |
| "learning_rate": 9.580428943331758e-05, |
| "loss": 1.4052, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.1427404661682478, |
| "grad_norm": 0.4945662021636963, |
| "learning_rate": 9.578824370155451e-05, |
| "loss": 2.6965, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.14299176980586795, |
| "grad_norm": 0.37999972701072693, |
| "learning_rate": 9.577216869528038e-05, |
| "loss": 2.5524, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.1432430734434881, |
| "grad_norm": 0.47479113936424255, |
| "learning_rate": 9.575606442477267e-05, |
| "loss": 2.166, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.14349437708110824, |
| "grad_norm": 0.42284709215164185, |
| "learning_rate": 9.573993090032758e-05, |
| "loss": 1.7279, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.1437456807187284, |
| "grad_norm": 0.412218302488327, |
| "learning_rate": 9.572376813225999e-05, |
| "loss": 2.1049, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.14399698435634856, |
| "grad_norm": 0.6507567167282104, |
| "learning_rate": 9.570757613090353e-05, |
| "loss": 2.5453, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.14424828799396872, |
| "grad_norm": 0.31104111671447754, |
| "learning_rate": 9.569135490661046e-05, |
| "loss": 2.6578, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.14449959163158888, |
| "grad_norm": 0.3139590620994568, |
| "learning_rate": 9.567510446975176e-05, |
| "loss": 2.5413, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.14475089526920903, |
| "grad_norm": 0.5592718720436096, |
| "learning_rate": 9.565882483071706e-05, |
| "loss": 2.3341, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.14500219890682917, |
| "grad_norm": 0.4113386869430542, |
| "learning_rate": 9.564251599991467e-05, |
| "loss": 2.7414, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.14525350254444933, |
| "grad_norm": 0.2924419045448303, |
| "learning_rate": 9.56261779877716e-05, |
| "loss": 1.882, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.14550480618206948, |
| "grad_norm": 0.3123188316822052, |
| "learning_rate": 9.560981080473346e-05, |
| "loss": 2.2884, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.14575610981968964, |
| "grad_norm": 0.3279459476470947, |
| "learning_rate": 9.559341446126455e-05, |
| "loss": 1.8245, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1460074134573098, |
| "grad_norm": 0.2936881184577942, |
| "learning_rate": 9.55769889678478e-05, |
| "loss": 2.2238, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.14625871709492994, |
| "grad_norm": 0.3411659598350525, |
| "learning_rate": 9.556053433498475e-05, |
| "loss": 2.1781, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.1465100207325501, |
| "grad_norm": 0.42901405692100525, |
| "learning_rate": 9.554405057319565e-05, |
| "loss": 2.5198, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.14676132437017025, |
| "grad_norm": 0.37800028920173645, |
| "learning_rate": 9.552753769301925e-05, |
| "loss": 2.009, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.1470126280077904, |
| "grad_norm": 0.35843050479888916, |
| "learning_rate": 9.551099570501305e-05, |
| "loss": 2.0567, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.14726393164541057, |
| "grad_norm": 0.4403095245361328, |
| "learning_rate": 9.549442461975306e-05, |
| "loss": 2.242, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.14751523528303073, |
| "grad_norm": 0.3680509626865387, |
| "learning_rate": 9.547782444783393e-05, |
| "loss": 1.9327, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.14776653892065086, |
| "grad_norm": 0.26541033387184143, |
| "learning_rate": 9.546119519986894e-05, |
| "loss": 2.5879, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.14801784255827102, |
| "grad_norm": 0.3599735200405121, |
| "learning_rate": 9.544453688648989e-05, |
| "loss": 2.448, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.14826914619589118, |
| "grad_norm": 0.46123915910720825, |
| "learning_rate": 9.542784951834721e-05, |
| "loss": 2.1933, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.14852044983351134, |
| "grad_norm": 0.3718903064727783, |
| "learning_rate": 9.54111331061099e-05, |
| "loss": 2.0662, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.1487717534711315, |
| "grad_norm": 0.5671700239181519, |
| "learning_rate": 9.539438766046554e-05, |
| "loss": 2.2881, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.14902305710875166, |
| "grad_norm": 0.415791779756546, |
| "learning_rate": 9.537761319212021e-05, |
| "loss": 2.1724, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.1492743607463718, |
| "grad_norm": 0.33377009630203247, |
| "learning_rate": 9.536080971179864e-05, |
| "loss": 2.2665, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.14952566438399195, |
| "grad_norm": 0.48479607701301575, |
| "learning_rate": 9.534397723024402e-05, |
| "loss": 1.9262, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.1497769680216121, |
| "grad_norm": 0.4698795974254608, |
| "learning_rate": 9.532711575821816e-05, |
| "loss": 2.3923, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.15002827165923227, |
| "grad_norm": 0.45373061299324036, |
| "learning_rate": 9.531022530650135e-05, |
| "loss": 2.2584, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.15027957529685243, |
| "grad_norm": 0.25719451904296875, |
| "learning_rate": 9.529330588589243e-05, |
| "loss": 2.1529, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.1505308789344726, |
| "grad_norm": 0.27690425515174866, |
| "learning_rate": 9.527635750720875e-05, |
| "loss": 2.1076, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.15078218257209272, |
| "grad_norm": 0.11603706330060959, |
| "learning_rate": 9.525938018128617e-05, |
| "loss": 0.5727, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.15103348620971288, |
| "grad_norm": 0.39744696021080017, |
| "learning_rate": 9.524237391897909e-05, |
| "loss": 2.1513, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.15128478984733304, |
| "grad_norm": 0.3867836594581604, |
| "learning_rate": 9.522533873116041e-05, |
| "loss": 2.3665, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.1515360934849532, |
| "grad_norm": 0.16841183602809906, |
| "learning_rate": 9.520827462872144e-05, |
| "loss": 1.1614, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.15178739712257336, |
| "grad_norm": 0.34245559573173523, |
| "learning_rate": 9.519118162257209e-05, |
| "loss": 2.2823, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.1520387007601935, |
| "grad_norm": 0.47992077469825745, |
| "learning_rate": 9.517405972364067e-05, |
| "loss": 1.9997, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.15229000439781365, |
| "grad_norm": 0.2888699471950531, |
| "learning_rate": 9.5156908942874e-05, |
| "loss": 2.2298, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.1525413080354338, |
| "grad_norm": 0.1741029918193817, |
| "learning_rate": 9.513972929123737e-05, |
| "loss": 0.7344, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.15279261167305397, |
| "grad_norm": 0.3610229790210724, |
| "learning_rate": 9.512252077971448e-05, |
| "loss": 2.0037, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.15304391531067413, |
| "grad_norm": 0.30153217911720276, |
| "learning_rate": 9.510528341930756e-05, |
| "loss": 2.1396, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.1532952189482943, |
| "grad_norm": 0.5049226880073547, |
| "learning_rate": 9.50880172210372e-05, |
| "loss": 2.1197, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.15354652258591442, |
| "grad_norm": 0.27209773659706116, |
| "learning_rate": 9.507072219594249e-05, |
| "loss": 2.37, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.15379782622353458, |
| "grad_norm": 0.33535251021385193, |
| "learning_rate": 9.505339835508091e-05, |
| "loss": 2.4393, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.15404912986115474, |
| "grad_norm": 0.4262019991874695, |
| "learning_rate": 9.50360457095284e-05, |
| "loss": 2.0283, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.1543004334987749, |
| "grad_norm": 0.5614545941352844, |
| "learning_rate": 9.50186642703793e-05, |
| "loss": 2.0785, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.15455173713639506, |
| "grad_norm": 0.3979303240776062, |
| "learning_rate": 9.500125404874631e-05, |
| "loss": 2.6146, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.15480304077401522, |
| "grad_norm": 0.39508217573165894, |
| "learning_rate": 9.498381505576064e-05, |
| "loss": 2.2959, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.15505434441163535, |
| "grad_norm": 0.36526814103126526, |
| "learning_rate": 9.49663473025718e-05, |
| "loss": 2.3149, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.1553056480492555, |
| "grad_norm": 0.6422840356826782, |
| "learning_rate": 9.494885080034774e-05, |
| "loss": 2.0929, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.15555695168687567, |
| "grad_norm": 0.32328036427497864, |
| "learning_rate": 9.493132556027475e-05, |
| "loss": 2.2233, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.15580825532449583, |
| "grad_norm": 0.25220245122909546, |
| "learning_rate": 9.491377159355752e-05, |
| "loss": 2.5095, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.15605955896211599, |
| "grad_norm": 0.32473161816596985, |
| "learning_rate": 9.489618891141911e-05, |
| "loss": 2.6889, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.15631086259973614, |
| "grad_norm": 0.4301775395870209, |
| "learning_rate": 9.487857752510093e-05, |
| "loss": 2.227, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.15656216623735628, |
| "grad_norm": 0.4302082061767578, |
| "learning_rate": 9.486093744586271e-05, |
| "loss": 2.2468, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.15681346987497644, |
| "grad_norm": 0.39020925760269165, |
| "learning_rate": 9.484326868498261e-05, |
| "loss": 2.3726, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.1570647735125966, |
| "grad_norm": 0.3023484945297241, |
| "learning_rate": 9.482557125375704e-05, |
| "loss": 2.4235, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.15731607715021675, |
| "grad_norm": 0.3555540442466736, |
| "learning_rate": 9.480784516350079e-05, |
| "loss": 2.4532, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.1575673807878369, |
| "grad_norm": 0.3152056634426117, |
| "learning_rate": 9.479009042554694e-05, |
| "loss": 2.7172, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.15781868442545705, |
| "grad_norm": 0.5882668495178223, |
| "learning_rate": 9.477230705124692e-05, |
| "loss": 2.5912, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.1580699880630772, |
| "grad_norm": 0.36808767914772034, |
| "learning_rate": 9.475449505197043e-05, |
| "loss": 2.2798, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.15832129170069736, |
| "grad_norm": 0.44441547989845276, |
| "learning_rate": 9.473665443910551e-05, |
| "loss": 1.9648, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.15857259533831752, |
| "grad_norm": 0.40813305974006653, |
| "learning_rate": 9.471878522405849e-05, |
| "loss": 1.7885, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.15882389897593768, |
| "grad_norm": 0.32029062509536743, |
| "learning_rate": 9.470088741825394e-05, |
| "loss": 2.6975, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.15907520261355784, |
| "grad_norm": 0.270475834608078, |
| "learning_rate": 9.468296103313476e-05, |
| "loss": 0.8822, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.15932650625117797, |
| "grad_norm": 0.3409525454044342, |
| "learning_rate": 9.46650060801621e-05, |
| "loss": 1.7725, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.15957780988879813, |
| "grad_norm": 0.4669889807701111, |
| "learning_rate": 9.464702257081539e-05, |
| "loss": 2.379, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.1598291135264183, |
| "grad_norm": 0.5293301939964294, |
| "learning_rate": 9.462901051659232e-05, |
| "loss": 2.3118, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.16008041716403845, |
| "grad_norm": 0.38586729764938354, |
| "learning_rate": 9.461096992900879e-05, |
| "loss": 2.0312, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.1603317208016586, |
| "grad_norm": 0.3819951117038727, |
| "learning_rate": 9.459290081959897e-05, |
| "loss": 1.973, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.16058302443927877, |
| "grad_norm": 0.37507110834121704, |
| "learning_rate": 9.457480319991529e-05, |
| "loss": 2.3668, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.1608343280768989, |
| "grad_norm": 0.3354203701019287, |
| "learning_rate": 9.455667708152836e-05, |
| "loss": 1.7392, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.16108563171451906, |
| "grad_norm": 0.31567585468292236, |
| "learning_rate": 9.453852247602704e-05, |
| "loss": 2.2258, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.16133693535213922, |
| "grad_norm": 0.35445067286491394, |
| "learning_rate": 9.452033939501839e-05, |
| "loss": 2.5792, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.16158823898975938, |
| "grad_norm": 0.3172896206378937, |
| "learning_rate": 9.45021278501277e-05, |
| "loss": 2.4493, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.16183954262737954, |
| "grad_norm": 0.4169299602508545, |
| "learning_rate": 9.448388785299842e-05, |
| "loss": 2.3125, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.1620908462649997, |
| "grad_norm": 0.5329498648643494, |
| "learning_rate": 9.446561941529224e-05, |
| "loss": 2.1722, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.16234214990261983, |
| "grad_norm": 0.21547356247901917, |
| "learning_rate": 9.444732254868898e-05, |
| "loss": 2.1758, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.16259345354024, |
| "grad_norm": 0.29865655303001404, |
| "learning_rate": 9.442899726488665e-05, |
| "loss": 1.5479, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.16284475717786015, |
| "grad_norm": 0.38035672903060913, |
| "learning_rate": 9.441064357560147e-05, |
| "loss": 2.8413, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.1630960608154803, |
| "grad_norm": 0.6323506832122803, |
| "learning_rate": 9.439226149256779e-05, |
| "loss": 2.1557, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.16334736445310047, |
| "grad_norm": 0.5052780508995056, |
| "learning_rate": 9.43738510275381e-05, |
| "loss": 1.7929, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.1635986680907206, |
| "grad_norm": 0.3237319588661194, |
| "learning_rate": 9.435541219228303e-05, |
| "loss": 2.336, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.16384997172834076, |
| "grad_norm": 0.18192382156848907, |
| "learning_rate": 9.433694499859141e-05, |
| "loss": 0.9273, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.16410127536596092, |
| "grad_norm": 0.31093844771385193, |
| "learning_rate": 9.431844945827014e-05, |
| "loss": 2.633, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.16435257900358108, |
| "grad_norm": 0.40673384070396423, |
| "learning_rate": 9.429992558314423e-05, |
| "loss": 1.8868, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.16460388264120124, |
| "grad_norm": 0.30386146903038025, |
| "learning_rate": 9.428137338505687e-05, |
| "loss": 2.1055, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.1648551862788214, |
| "grad_norm": 0.3704879581928253, |
| "learning_rate": 9.426279287586934e-05, |
| "loss": 1.9851, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.16510648991644153, |
| "grad_norm": 0.17675453424453735, |
| "learning_rate": 9.424418406746098e-05, |
| "loss": 1.0456, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.1653577935540617, |
| "grad_norm": 0.2338314950466156, |
| "learning_rate": 9.422554697172925e-05, |
| "loss": 1.8722, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.16560909719168185, |
| "grad_norm": 0.2262151688337326, |
| "learning_rate": 9.420688160058972e-05, |
| "loss": 1.2118, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.165860400829302, |
| "grad_norm": 0.32973191142082214, |
| "learning_rate": 9.418818796597597e-05, |
| "loss": 1.974, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.16611170446692217, |
| "grad_norm": 0.3016055226325989, |
| "learning_rate": 9.416946607983975e-05, |
| "loss": 1.8877, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.16636300810454233, |
| "grad_norm": 0.39721089601516724, |
| "learning_rate": 9.415071595415075e-05, |
| "loss": 2.1139, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.16661431174216246, |
| "grad_norm": 0.3398868143558502, |
| "learning_rate": 9.413193760089682e-05, |
| "loss": 1.9081, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.16686561537978262, |
| "grad_norm": 0.3406602144241333, |
| "learning_rate": 9.411313103208382e-05, |
| "loss": 2.1456, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.16711691901740278, |
| "grad_norm": 0.4096097946166992, |
| "learning_rate": 9.409429625973563e-05, |
| "loss": 2.294, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.16736822265502294, |
| "grad_norm": 0.2669360041618347, |
| "learning_rate": 9.407543329589418e-05, |
| "loss": 1.8453, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.1676195262926431, |
| "grad_norm": 0.5418170690536499, |
| "learning_rate": 9.405654215261944e-05, |
| "loss": 2.3642, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.16787082993026323, |
| "grad_norm": 0.32345104217529297, |
| "learning_rate": 9.403762284198936e-05, |
| "loss": 2.1211, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.1681221335678834, |
| "grad_norm": 0.49764448404312134, |
| "learning_rate": 9.401867537609991e-05, |
| "loss": 1.9661, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.16837343720550355, |
| "grad_norm": 1.5639890432357788, |
| "learning_rate": 9.399969976706509e-05, |
| "loss": 1.9938, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.1686247408431237, |
| "grad_norm": 1.2142359018325806, |
| "learning_rate": 9.398069602701687e-05, |
| "loss": 2.148, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.16887604448074386, |
| "grad_norm": 0.2211383879184723, |
| "learning_rate": 9.396166416810519e-05, |
| "loss": 2.0333, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.16912734811836402, |
| "grad_norm": 0.3100007176399231, |
| "learning_rate": 9.394260420249801e-05, |
| "loss": 2.529, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.16937865175598416, |
| "grad_norm": 0.39237165451049805, |
| "learning_rate": 9.39235161423812e-05, |
| "loss": 2.2316, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.16962995539360431, |
| "grad_norm": 0.553925096988678, |
| "learning_rate": 9.390439999995865e-05, |
| "loss": 2.1865, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.16988125903122447, |
| "grad_norm": 0.45522618293762207, |
| "learning_rate": 9.38852557874522e-05, |
| "loss": 2.346, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.17013256266884463, |
| "grad_norm": 0.4678781032562256, |
| "learning_rate": 9.386608351710157e-05, |
| "loss": 1.9335, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.1703838663064648, |
| "grad_norm": 0.3529011905193329, |
| "learning_rate": 9.38468832011645e-05, |
| "loss": 2.6414, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.17063516994408495, |
| "grad_norm": 0.26705560088157654, |
| "learning_rate": 9.382765485191662e-05, |
| "loss": 2.3328, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.17088647358170508, |
| "grad_norm": 0.3495092988014221, |
| "learning_rate": 9.380839848165149e-05, |
| "loss": 2.3455, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.17113777721932524, |
| "grad_norm": 0.32814642786979675, |
| "learning_rate": 9.378911410268058e-05, |
| "loss": 2.4395, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.1713890808569454, |
| "grad_norm": 0.33247315883636475, |
| "learning_rate": 9.376980172733329e-05, |
| "loss": 1.6115, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.17164038449456556, |
| "grad_norm": 0.44238927960395813, |
| "learning_rate": 9.375046136795686e-05, |
| "loss": 2.1676, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.17189168813218572, |
| "grad_norm": 0.3289899528026581, |
| "learning_rate": 9.373109303691652e-05, |
| "loss": 2.3906, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.17214299176980588, |
| "grad_norm": 0.3287547826766968, |
| "learning_rate": 9.371169674659529e-05, |
| "loss": 2.1608, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.172394295407426, |
| "grad_norm": 0.2546299397945404, |
| "learning_rate": 9.36922725093941e-05, |
| "loss": 2.4981, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.17264559904504617, |
| "grad_norm": 0.2905830144882202, |
| "learning_rate": 9.367282033773177e-05, |
| "loss": 2.3735, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.17289690268266633, |
| "grad_norm": 0.5941200256347656, |
| "learning_rate": 9.365334024404495e-05, |
| "loss": 2.7027, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.1731482063202865, |
| "grad_norm": 0.3145928680896759, |
| "learning_rate": 9.363383224078814e-05, |
| "loss": 2.1827, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.17339950995790665, |
| "grad_norm": 0.3063415586948395, |
| "learning_rate": 9.361429634043372e-05, |
| "loss": 2.0369, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.17365081359552678, |
| "grad_norm": 0.40637439489364624, |
| "learning_rate": 9.359473255547186e-05, |
| "loss": 2.4201, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.17390211723314694, |
| "grad_norm": 0.5028929710388184, |
| "learning_rate": 9.357514089841061e-05, |
| "loss": 1.7536, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.1741534208707671, |
| "grad_norm": 0.19582106173038483, |
| "learning_rate": 9.355552138177577e-05, |
| "loss": 1.6803, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.17440472450838726, |
| "grad_norm": 0.42780154943466187, |
| "learning_rate": 9.353587401811101e-05, |
| "loss": 2.0536, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.17465602814600742, |
| "grad_norm": 0.41308510303497314, |
| "learning_rate": 9.351619881997779e-05, |
| "loss": 2.1088, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.17490733178362758, |
| "grad_norm": 0.37217557430267334, |
| "learning_rate": 9.349649579995536e-05, |
| "loss": 2.3313, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.1751586354212477, |
| "grad_norm": 0.49139076471328735, |
| "learning_rate": 9.347676497064074e-05, |
| "loss": 2.048, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.17540993905886787, |
| "grad_norm": 0.49698978662490845, |
| "learning_rate": 9.345700634464876e-05, |
| "loss": 2.2736, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.17566124269648803, |
| "grad_norm": 0.3499569296836853, |
| "learning_rate": 9.343721993461203e-05, |
| "loss": 2.4421, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.1759125463341082, |
| "grad_norm": 0.2008151412010193, |
| "learning_rate": 9.341740575318088e-05, |
| "loss": 1.8728, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.17616384997172835, |
| "grad_norm": 0.43613699078559875, |
| "learning_rate": 9.339756381302341e-05, |
| "loss": 2.1268, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.1764151536093485, |
| "grad_norm": 0.24621760845184326, |
| "learning_rate": 9.337769412682551e-05, |
| "loss": 2.3428, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.17666645724696864, |
| "grad_norm": 0.6878277659416199, |
| "learning_rate": 9.335779670729075e-05, |
| "loss": 2.1971, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.1769177608845888, |
| "grad_norm": 0.3094477653503418, |
| "learning_rate": 9.333787156714047e-05, |
| "loss": 1.6276, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.17716906452220896, |
| "grad_norm": 0.2712198495864868, |
| "learning_rate": 9.331791871911371e-05, |
| "loss": 2.1207, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.17742036815982912, |
| "grad_norm": 0.40410006046295166, |
| "learning_rate": 9.329793817596724e-05, |
| "loss": 2.5001, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.17767167179744928, |
| "grad_norm": 0.37285852432250977, |
| "learning_rate": 9.327792995047553e-05, |
| "loss": 2.513, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.17792297543506944, |
| "grad_norm": 0.3589307963848114, |
| "learning_rate": 9.325789405543075e-05, |
| "loss": 2.3407, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.17817427907268957, |
| "grad_norm": 0.16013433039188385, |
| "learning_rate": 9.323783050364276e-05, |
| "loss": 1.0699, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.17842558271030973, |
| "grad_norm": 0.3747367858886719, |
| "learning_rate": 9.321773930793914e-05, |
| "loss": 2.2041, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.1786768863479299, |
| "grad_norm": 0.48945263028144836, |
| "learning_rate": 9.319762048116503e-05, |
| "loss": 1.8284, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.17892818998555005, |
| "grad_norm": 0.5572097897529602, |
| "learning_rate": 9.317747403618337e-05, |
| "loss": 1.8432, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.1791794936231702, |
| "grad_norm": 0.4351899027824402, |
| "learning_rate": 9.31572999858747e-05, |
| "loss": 2.0469, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.17943079726079034, |
| "grad_norm": 0.747898519039154, |
| "learning_rate": 9.31370983431372e-05, |
| "loss": 2.3444, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.1796821008984105, |
| "grad_norm": 0.3677506148815155, |
| "learning_rate": 9.311686912088669e-05, |
| "loss": 1.7389, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.17993340453603066, |
| "grad_norm": 0.7408022880554199, |
| "learning_rate": 9.309661233205663e-05, |
| "loss": 2.3839, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.18018470817365081, |
| "grad_norm": 0.46684297919273376, |
| "learning_rate": 9.307632798959813e-05, |
| "loss": 2.4899, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.18043601181127097, |
| "grad_norm": 0.5166415572166443, |
| "learning_rate": 9.305601610647989e-05, |
| "loss": 2.2496, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.18068731544889113, |
| "grad_norm": 0.17087407410144806, |
| "learning_rate": 9.30356766956882e-05, |
| "loss": 0.624, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.18093861908651127, |
| "grad_norm": 0.3293837904930115, |
| "learning_rate": 9.301530977022701e-05, |
| "loss": 2.1589, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.18118992272413142, |
| "grad_norm": 0.5174190402030945, |
| "learning_rate": 9.29949153431178e-05, |
| "loss": 1.9908, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.18144122636175158, |
| "grad_norm": 0.6492531895637512, |
| "learning_rate": 9.297449342739964e-05, |
| "loss": 2.3542, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.18169252999937174, |
| "grad_norm": 0.3231172263622284, |
| "learning_rate": 9.295404403612924e-05, |
| "loss": 2.0346, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.1819438336369919, |
| "grad_norm": 0.3993067145347595, |
| "learning_rate": 9.293356718238077e-05, |
| "loss": 2.0821, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.18219513727461206, |
| "grad_norm": 0.3920503854751587, |
| "learning_rate": 9.291306287924608e-05, |
| "loss": 2.1273, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.1824464409122322, |
| "grad_norm": 0.3167310655117035, |
| "learning_rate": 9.289253113983444e-05, |
| "loss": 2.0075, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.18269774454985235, |
| "grad_norm": 0.3710818290710449, |
| "learning_rate": 9.287197197727277e-05, |
| "loss": 2.0204, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.1829490481874725, |
| "grad_norm": 0.18097934126853943, |
| "learning_rate": 9.285138540470546e-05, |
| "loss": 0.749, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.18320035182509267, |
| "grad_norm": 0.18432289361953735, |
| "learning_rate": 9.283077143529446e-05, |
| "loss": 0.8822, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.18345165546271283, |
| "grad_norm": 0.3951958417892456, |
| "learning_rate": 9.281013008221921e-05, |
| "loss": 2.1613, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.183702959100333, |
| "grad_norm": 0.3688110113143921, |
| "learning_rate": 9.278946135867665e-05, |
| "loss": 2.8058, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.18395426273795312, |
| "grad_norm": 0.29696959257125854, |
| "learning_rate": 9.276876527788127e-05, |
| "loss": 1.7129, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.18420556637557328, |
| "grad_norm": 0.405823677778244, |
| "learning_rate": 9.274804185306503e-05, |
| "loss": 2.334, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.18445687001319344, |
| "grad_norm": 0.4110073447227478, |
| "learning_rate": 9.27272910974773e-05, |
| "loss": 2.4929, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.1847081736508136, |
| "grad_norm": 0.3715936541557312, |
| "learning_rate": 9.270651302438502e-05, |
| "loss": 1.7891, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.18495947728843376, |
| "grad_norm": 0.1682804673910141, |
| "learning_rate": 9.268570764707257e-05, |
| "loss": 0.726, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.1852107809260539, |
| "grad_norm": 0.2395019680261612, |
| "learning_rate": 9.266487497884176e-05, |
| "loss": 2.4391, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.18546208456367405, |
| "grad_norm": 0.3601885437965393, |
| "learning_rate": 9.264401503301185e-05, |
| "loss": 2.5297, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.1857133882012942, |
| "grad_norm": 0.26369959115982056, |
| "learning_rate": 9.262312782291959e-05, |
| "loss": 1.7459, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.18596469183891437, |
| "grad_norm": 0.15436404943466187, |
| "learning_rate": 9.26022133619191e-05, |
| "loss": 1.0022, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.18621599547653453, |
| "grad_norm": 0.3303896486759186, |
| "learning_rate": 9.258127166338196e-05, |
| "loss": 2.3745, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.1864672991141547, |
| "grad_norm": 0.3118177056312561, |
| "learning_rate": 9.256030274069713e-05, |
| "loss": 2.8046, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.18671860275177482, |
| "grad_norm": 0.17974944412708282, |
| "learning_rate": 9.253930660727104e-05, |
| "loss": 0.8354, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.18696990638939498, |
| "grad_norm": 0.2209557741880417, |
| "learning_rate": 9.251828327652742e-05, |
| "loss": 2.1792, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.18722121002701514, |
| "grad_norm": 0.4132773280143738, |
| "learning_rate": 9.24972327619075e-05, |
| "loss": 2.3944, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.1874725136646353, |
| "grad_norm": 0.47139856219291687, |
| "learning_rate": 9.24761550768698e-05, |
| "loss": 2.3182, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.18772381730225546, |
| "grad_norm": 0.40354403853416443, |
| "learning_rate": 9.245505023489024e-05, |
| "loss": 2.1719, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.18797512093987562, |
| "grad_norm": 0.23668596148490906, |
| "learning_rate": 9.243391824946213e-05, |
| "loss": 1.9976, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.18822642457749575, |
| "grad_norm": 0.48701080679893494, |
| "learning_rate": 9.24127591340961e-05, |
| "loss": 2.3892, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.1884777282151159, |
| "grad_norm": 0.48195892572402954, |
| "learning_rate": 9.239157290232014e-05, |
| "loss": 2.2488, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.18872903185273607, |
| "grad_norm": 0.3957456946372986, |
| "learning_rate": 9.237035956767956e-05, |
| "loss": 2.2675, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.18898033549035623, |
| "grad_norm": 0.419040709733963, |
| "learning_rate": 9.234911914373702e-05, |
| "loss": 1.9331, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.1892316391279764, |
| "grad_norm": 0.3198854327201843, |
| "learning_rate": 9.23278516440725e-05, |
| "loss": 1.9211, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.18948294276559652, |
| "grad_norm": 0.4320249855518341, |
| "learning_rate": 9.230655708228328e-05, |
| "loss": 1.9932, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.18973424640321668, |
| "grad_norm": 0.34588703513145447, |
| "learning_rate": 9.228523547198393e-05, |
| "loss": 1.6818, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.18998555004083684, |
| "grad_norm": 0.17924979329109192, |
| "learning_rate": 9.226388682680633e-05, |
| "loss": 1.2715, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.190236853678457, |
| "grad_norm": 0.3479664921760559, |
| "learning_rate": 9.224251116039965e-05, |
| "loss": 2.6595, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.19048815731607716, |
| "grad_norm": 0.9396395087242126, |
| "learning_rate": 9.222110848643035e-05, |
| "loss": 2.1373, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.19073946095369732, |
| "grad_norm": 0.423880934715271, |
| "learning_rate": 9.219967881858209e-05, |
| "loss": 2.0013, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.19099076459131745, |
| "grad_norm": 0.18442866206169128, |
| "learning_rate": 9.217822217055586e-05, |
| "loss": 1.1016, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.1912420682289376, |
| "grad_norm": 0.33031755685806274, |
| "learning_rate": 9.215673855606986e-05, |
| "loss": 2.208, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.19149337186655777, |
| "grad_norm": 0.5207613706588745, |
| "learning_rate": 9.213522798885956e-05, |
| "loss": 2.0212, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.19174467550417792, |
| "grad_norm": 0.29409703612327576, |
| "learning_rate": 9.211369048267764e-05, |
| "loss": 2.5577, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.19199597914179808, |
| "grad_norm": 0.44755882024765015, |
| "learning_rate": 9.2092126051294e-05, |
| "loss": 2.1216, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.19224728277941824, |
| "grad_norm": 0.33680227398872375, |
| "learning_rate": 9.207053470849576e-05, |
| "loss": 2.5058, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.19249858641703838, |
| "grad_norm": 0.41669735312461853, |
| "learning_rate": 9.204891646808726e-05, |
| "loss": 2.5137, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.19274989005465853, |
| "grad_norm": 0.4869091808795929, |
| "learning_rate": 9.202727134389004e-05, |
| "loss": 2.2094, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.1930011936922787, |
| "grad_norm": 0.3771580159664154, |
| "learning_rate": 9.20055993497428e-05, |
| "loss": 2.5748, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.19325249732989885, |
| "grad_norm": 0.4663945734500885, |
| "learning_rate": 9.198390049950143e-05, |
| "loss": 2.6845, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.193503800967519, |
| "grad_norm": 0.6000380516052246, |
| "learning_rate": 9.196217480703899e-05, |
| "loss": 2.4598, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.19375510460513917, |
| "grad_norm": 0.4322783946990967, |
| "learning_rate": 9.194042228624572e-05, |
| "loss": 2.5049, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.1940064082427593, |
| "grad_norm": 0.194077730178833, |
| "learning_rate": 9.191864295102899e-05, |
| "loss": 1.5018, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.19425771188037946, |
| "grad_norm": 0.28692805767059326, |
| "learning_rate": 9.189683681531333e-05, |
| "loss": 1.8483, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.19450901551799962, |
| "grad_norm": 0.8345639109611511, |
| "learning_rate": 9.187500389304037e-05, |
| "loss": 1.8403, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.19476031915561978, |
| "grad_norm": 0.3533509373664856, |
| "learning_rate": 9.185314419816892e-05, |
| "loss": 2.4375, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.19501162279323994, |
| "grad_norm": 0.40252941846847534, |
| "learning_rate": 9.18312577446749e-05, |
| "loss": 2.1452, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.19526292643086007, |
| "grad_norm": 0.4904803931713104, |
| "learning_rate": 9.180934454655126e-05, |
| "loss": 2.2475, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.19551423006848023, |
| "grad_norm": 0.4086427688598633, |
| "learning_rate": 9.178740461780812e-05, |
| "loss": 1.9234, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.1957655337061004, |
| "grad_norm": 0.32106295228004456, |
| "learning_rate": 9.176543797247271e-05, |
| "loss": 2.3433, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.19601683734372055, |
| "grad_norm": 0.45663875341415405, |
| "learning_rate": 9.17434446245893e-05, |
| "loss": 2.4102, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.1962681409813407, |
| "grad_norm": 0.16669417917728424, |
| "learning_rate": 9.17214245882192e-05, |
| "loss": 0.9148, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.19651944461896087, |
| "grad_norm": 0.21185293793678284, |
| "learning_rate": 9.169937787744088e-05, |
| "loss": 2.032, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.196770748256581, |
| "grad_norm": 0.28057172894477844, |
| "learning_rate": 9.167730450634975e-05, |
| "loss": 2.3357, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.19702205189420116, |
| "grad_norm": 0.30073508620262146, |
| "learning_rate": 9.165520448905835e-05, |
| "loss": 1.9842, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.19727335553182132, |
| "grad_norm": 0.5807662606239319, |
| "learning_rate": 9.163307783969624e-05, |
| "loss": 2.1852, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.19752465916944148, |
| "grad_norm": 0.43151628971099854, |
| "learning_rate": 9.161092457240999e-05, |
| "loss": 2.3249, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.19777596280706164, |
| "grad_norm": 0.31615135073661804, |
| "learning_rate": 9.158874470136319e-05, |
| "loss": 2.3183, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.1980272664446818, |
| "grad_norm": 0.4318180978298187, |
| "learning_rate": 9.156653824073642e-05, |
| "loss": 2.0189, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.19827857008230193, |
| "grad_norm": 0.6335446834564209, |
| "learning_rate": 9.154430520472731e-05, |
| "loss": 1.8264, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.1985298737199221, |
| "grad_norm": 0.1641846001148224, |
| "learning_rate": 9.152204560755045e-05, |
| "loss": 1.3867, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.19878117735754225, |
| "grad_norm": 0.44742926955223083, |
| "learning_rate": 9.149975946343741e-05, |
| "loss": 1.9269, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.1990324809951624, |
| "grad_norm": 0.438804566860199, |
| "learning_rate": 9.147744678663672e-05, |
| "loss": 1.8561, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.19928378463278257, |
| "grad_norm": 0.6063904166221619, |
| "learning_rate": 9.145510759141393e-05, |
| "loss": 2.1038, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.19953508827040273, |
| "grad_norm": 0.3686808943748474, |
| "learning_rate": 9.143274189205147e-05, |
| "loss": 2.811, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.19978639190802286, |
| "grad_norm": 0.45831429958343506, |
| "learning_rate": 9.141034970284877e-05, |
| "loss": 2.1029, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.20003769554564302, |
| "grad_norm": 0.4418196678161621, |
| "learning_rate": 9.138793103812218e-05, |
| "loss": 1.9126, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.20028899918326318, |
| "grad_norm": 0.23358654975891113, |
| "learning_rate": 9.136548591220495e-05, |
| "loss": 2.0087, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.20054030282088334, |
| "grad_norm": 0.5014088749885559, |
| "learning_rate": 9.134301433944731e-05, |
| "loss": 2.1698, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.2007916064585035, |
| "grad_norm": 0.48934677243232727, |
| "learning_rate": 9.132051633421632e-05, |
| "loss": 1.9628, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.20104291009612363, |
| "grad_norm": 0.46975913643836975, |
| "learning_rate": 9.129799191089601e-05, |
| "loss": 2.1432, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2012942137337438, |
| "grad_norm": 0.33380022644996643, |
| "learning_rate": 9.127544108388725e-05, |
| "loss": 1.7332, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.20154551737136395, |
| "grad_norm": 0.6146292090415955, |
| "learning_rate": 9.125286386760785e-05, |
| "loss": 2.2721, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.2017968210089841, |
| "grad_norm": 0.14950276911258698, |
| "learning_rate": 9.12302602764924e-05, |
| "loss": 1.2602, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.20204812464660427, |
| "grad_norm": 0.4614298641681671, |
| "learning_rate": 9.120763032499242e-05, |
| "loss": 2.2327, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.20229942828422443, |
| "grad_norm": 0.249393031001091, |
| "learning_rate": 9.118497402757631e-05, |
| "loss": 1.8148, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.20255073192184456, |
| "grad_norm": 0.41946738958358765, |
| "learning_rate": 9.116229139872922e-05, |
| "loss": 2.5221, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.20280203555946472, |
| "grad_norm": 0.4390411078929901, |
| "learning_rate": 9.113958245295321e-05, |
| "loss": 2.1989, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.20305333919708488, |
| "grad_norm": 0.5600268244743347, |
| "learning_rate": 9.111684720476717e-05, |
| "loss": 2.6773, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.20330464283470503, |
| "grad_norm": 0.2843821346759796, |
| "learning_rate": 9.109408566870673e-05, |
| "loss": 1.9472, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.2035559464723252, |
| "grad_norm": 0.3715212643146515, |
| "learning_rate": 9.107129785932443e-05, |
| "loss": 2.1466, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.20380725010994535, |
| "grad_norm": 0.8551393151283264, |
| "learning_rate": 9.10484837911895e-05, |
| "loss": 2.199, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.20405855374756549, |
| "grad_norm": 0.5052684545516968, |
| "learning_rate": 9.102564347888806e-05, |
| "loss": 1.9972, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.20430985738518564, |
| "grad_norm": 0.24479907751083374, |
| "learning_rate": 9.100277693702294e-05, |
| "loss": 2.3708, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.2045611610228058, |
| "grad_norm": 0.42139294743537903, |
| "learning_rate": 9.097988418021377e-05, |
| "loss": 1.9225, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.20481246466042596, |
| "grad_norm": 0.342489629983902, |
| "learning_rate": 9.095696522309693e-05, |
| "loss": 2.7236, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.20506376829804612, |
| "grad_norm": 0.47755831480026245, |
| "learning_rate": 9.093402008032554e-05, |
| "loss": 2.2168, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.20531507193566628, |
| "grad_norm": 0.32807457447052, |
| "learning_rate": 9.09110487665695e-05, |
| "loss": 2.4625, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.20556637557328641, |
| "grad_norm": 0.3581337034702301, |
| "learning_rate": 9.088805129651542e-05, |
| "loss": 2.6607, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.20581767921090657, |
| "grad_norm": 0.24006719887256622, |
| "learning_rate": 9.08650276848666e-05, |
| "loss": 2.0933, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.20606898284852673, |
| "grad_norm": 0.22163568437099457, |
| "learning_rate": 9.084197794634312e-05, |
| "loss": 2.0709, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2063202864861469, |
| "grad_norm": 0.20495516061782837, |
| "learning_rate": 9.081890209568169e-05, |
| "loss": 1.8137, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.20657159012376705, |
| "grad_norm": 0.33006590604782104, |
| "learning_rate": 9.079580014763579e-05, |
| "loss": 2.242, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.20682289376138718, |
| "grad_norm": 0.31708237528800964, |
| "learning_rate": 9.077267211697554e-05, |
| "loss": 2.5707, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.20707419739900734, |
| "grad_norm": 0.3039303719997406, |
| "learning_rate": 9.07495180184877e-05, |
| "loss": 2.4013, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.2073255010366275, |
| "grad_norm": 0.3243713974952698, |
| "learning_rate": 9.072633786697581e-05, |
| "loss": 1.9324, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.20757680467424766, |
| "grad_norm": 0.376941055059433, |
| "learning_rate": 9.070313167725995e-05, |
| "loss": 1.9856, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.20782810831186782, |
| "grad_norm": 0.4256725013256073, |
| "learning_rate": 9.06798994641769e-05, |
| "loss": 1.9451, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.20807941194948798, |
| "grad_norm": 0.21601825952529907, |
| "learning_rate": 9.06566412425801e-05, |
| "loss": 0.9348, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.2083307155871081, |
| "grad_norm": 0.5165765881538391, |
| "learning_rate": 9.063335702733958e-05, |
| "loss": 2.3604, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.20858201922472827, |
| "grad_norm": 0.4365144670009613, |
| "learning_rate": 9.061004683334196e-05, |
| "loss": 2.0167, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.20883332286234843, |
| "grad_norm": 0.2237500250339508, |
| "learning_rate": 9.058671067549056e-05, |
| "loss": 1.7844, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.2090846264999686, |
| "grad_norm": 0.26887792348861694, |
| "learning_rate": 9.056334856870522e-05, |
| "loss": 2.3547, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.20933593013758875, |
| "grad_norm": 0.21619755029678345, |
| "learning_rate": 9.053996052792244e-05, |
| "loss": 2.0557, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.2095872337752089, |
| "grad_norm": 0.5002549886703491, |
| "learning_rate": 9.051654656809521e-05, |
| "loss": 1.962, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.20983853741282904, |
| "grad_norm": 0.3360225260257721, |
| "learning_rate": 9.049310670419316e-05, |
| "loss": 1.9531, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.2100898410504492, |
| "grad_norm": 0.24657025933265686, |
| "learning_rate": 9.046964095120248e-05, |
| "loss": 1.2244, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.21034114468806936, |
| "grad_norm": 0.29951533675193787, |
| "learning_rate": 9.044614932412587e-05, |
| "loss": 1.9471, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.21059244832568952, |
| "grad_norm": 0.3678789734840393, |
| "learning_rate": 9.04226318379826e-05, |
| "loss": 2.3963, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.21084375196330968, |
| "grad_norm": 0.45650580525398254, |
| "learning_rate": 9.03990885078085e-05, |
| "loss": 2.342, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.21109505560092984, |
| "grad_norm": 0.4444562792778015, |
| "learning_rate": 9.037551934865587e-05, |
| "loss": 1.9851, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.21134635923854997, |
| "grad_norm": 0.2063484787940979, |
| "learning_rate": 9.035192437559354e-05, |
| "loss": 2.2532, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.21159766287617013, |
| "grad_norm": 0.3520076274871826, |
| "learning_rate": 9.032830360370688e-05, |
| "loss": 1.6042, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.2118489665137903, |
| "grad_norm": 0.4106435477733612, |
| "learning_rate": 9.03046570480977e-05, |
| "loss": 2.0197, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.21210027015141045, |
| "grad_norm": 0.37212634086608887, |
| "learning_rate": 9.028098472388433e-05, |
| "loss": 2.1224, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.2123515737890306, |
| "grad_norm": 0.36991527676582336, |
| "learning_rate": 9.025728664620157e-05, |
| "loss": 2.5759, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.21260287742665074, |
| "grad_norm": 0.3646388649940491, |
| "learning_rate": 9.023356283020067e-05, |
| "loss": 2.2962, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.2128541810642709, |
| "grad_norm": 0.21754935383796692, |
| "learning_rate": 9.020981329104936e-05, |
| "loss": 1.7198, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.21310548470189106, |
| "grad_norm": 0.24025185406208038, |
| "learning_rate": 9.01860380439318e-05, |
| "loss": 1.271, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.21335678833951122, |
| "grad_norm": 0.42848479747772217, |
| "learning_rate": 9.016223710404856e-05, |
| "loss": 2.3559, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.21360809197713138, |
| "grad_norm": 0.4237200915813446, |
| "learning_rate": 9.013841048661673e-05, |
| "loss": 2.2002, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.21385939561475154, |
| "grad_norm": 0.31660404801368713, |
| "learning_rate": 9.01145582068697e-05, |
| "loss": 2.441, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.21411069925237167, |
| "grad_norm": 0.4812658131122589, |
| "learning_rate": 9.009068028005732e-05, |
| "loss": 2.2861, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.21436200288999183, |
| "grad_norm": 0.37174031138420105, |
| "learning_rate": 9.006677672144586e-05, |
| "loss": 2.4933, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.21461330652761199, |
| "grad_norm": 0.5248540639877319, |
| "learning_rate": 9.004284754631793e-05, |
| "loss": 1.9397, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.21486461016523214, |
| "grad_norm": 0.2802974283695221, |
| "learning_rate": 9.001889276997258e-05, |
| "loss": 2.2688, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.2151159138028523, |
| "grad_norm": 0.4122345745563507, |
| "learning_rate": 8.999491240772516e-05, |
| "loss": 2.1688, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.21536721744047246, |
| "grad_norm": 0.49358898401260376, |
| "learning_rate": 8.99709064749074e-05, |
| "loss": 2.0717, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.2156185210780926, |
| "grad_norm": 0.415002703666687, |
| "learning_rate": 8.994687498686742e-05, |
| "loss": 2.4572, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.21586982471571275, |
| "grad_norm": 0.3565453290939331, |
| "learning_rate": 8.992281795896962e-05, |
| "loss": 2.2275, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.21612112835333291, |
| "grad_norm": 0.25147542357444763, |
| "learning_rate": 8.989873540659476e-05, |
| "loss": 1.6368, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.21637243199095307, |
| "grad_norm": 0.330954372882843, |
| "learning_rate": 8.987462734513993e-05, |
| "loss": 1.6743, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.21662373562857323, |
| "grad_norm": 0.3177106976509094, |
| "learning_rate": 8.985049379001849e-05, |
| "loss": 2.373, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.21687503926619336, |
| "grad_norm": 0.4051658511161804, |
| "learning_rate": 8.982633475666014e-05, |
| "loss": 2.504, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.21712634290381352, |
| "grad_norm": 0.1954205185174942, |
| "learning_rate": 8.980215026051083e-05, |
| "loss": 1.8698, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.21737764654143368, |
| "grad_norm": 0.31402403116226196, |
| "learning_rate": 8.977794031703282e-05, |
| "loss": 2.2363, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.21762895017905384, |
| "grad_norm": 0.28115931153297424, |
| "learning_rate": 8.975370494170463e-05, |
| "loss": 2.3547, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.217880253816674, |
| "grad_norm": 0.4603864252567291, |
| "learning_rate": 8.972944415002105e-05, |
| "loss": 2.3678, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.21813155745429416, |
| "grad_norm": 0.21994365751743317, |
| "learning_rate": 8.97051579574931e-05, |
| "loss": 2.2028, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.2183828610919143, |
| "grad_norm": 0.3067527711391449, |
| "learning_rate": 8.968084637964804e-05, |
| "loss": 2.6017, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.21863416472953445, |
| "grad_norm": 0.33018767833709717, |
| "learning_rate": 8.96565094320294e-05, |
| "loss": 2.3301, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.2188854683671546, |
| "grad_norm": 0.6793331503868103, |
| "learning_rate": 8.963214713019687e-05, |
| "loss": 2.7803, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.21913677200477477, |
| "grad_norm": 0.3478843569755554, |
| "learning_rate": 8.96077594897264e-05, |
| "loss": 2.2915, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.21938807564239493, |
| "grad_norm": 0.400673508644104, |
| "learning_rate": 8.95833465262101e-05, |
| "loss": 1.2611, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.2196393792800151, |
| "grad_norm": 0.27117395401000977, |
| "learning_rate": 8.955890825525631e-05, |
| "loss": 2.6418, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.21989068291763522, |
| "grad_norm": 0.41636621952056885, |
| "learning_rate": 8.953444469248952e-05, |
| "loss": 2.0555, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.22014198655525538, |
| "grad_norm": 0.5339227318763733, |
| "learning_rate": 8.95099558535504e-05, |
| "loss": 2.4363, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.22039329019287554, |
| "grad_norm": 0.4329914450645447, |
| "learning_rate": 8.948544175409579e-05, |
| "loss": 2.198, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.2206445938304957, |
| "grad_norm": 0.377668559551239, |
| "learning_rate": 8.946090240979865e-05, |
| "loss": 2.2962, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.22089589746811586, |
| "grad_norm": 0.3951661288738251, |
| "learning_rate": 8.943633783634813e-05, |
| "loss": 2.1264, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.22114720110573602, |
| "grad_norm": 0.36469566822052, |
| "learning_rate": 8.941174804944948e-05, |
| "loss": 2.6947, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.22139850474335615, |
| "grad_norm": 0.21884319186210632, |
| "learning_rate": 8.938713306482403e-05, |
| "loss": 1.9526, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.2216498083809763, |
| "grad_norm": 0.38323378562927246, |
| "learning_rate": 8.936249289820931e-05, |
| "loss": 2.1726, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.22190111201859647, |
| "grad_norm": 0.653200089931488, |
| "learning_rate": 8.933782756535887e-05, |
| "loss": 2.505, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.22215241565621663, |
| "grad_norm": 0.41666847467422485, |
| "learning_rate": 8.931313708204239e-05, |
| "loss": 2.218, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.2224037192938368, |
| "grad_norm": 0.3992173373699188, |
| "learning_rate": 8.928842146404562e-05, |
| "loss": 2.4002, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.22265502293145692, |
| "grad_norm": 0.5349919199943542, |
| "learning_rate": 8.92636807271704e-05, |
| "loss": 1.5443, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.22290632656907708, |
| "grad_norm": 0.3590314984321594, |
| "learning_rate": 8.923891488723459e-05, |
| "loss": 2.3424, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.22315763020669724, |
| "grad_norm": 0.4399639666080475, |
| "learning_rate": 8.921412396007212e-05, |
| "loss": 2.3039, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.2234089338443174, |
| "grad_norm": 0.476307213306427, |
| "learning_rate": 8.918930796153297e-05, |
| "loss": 2.0807, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.22366023748193756, |
| "grad_norm": 0.40012243390083313, |
| "learning_rate": 8.916446690748315e-05, |
| "loss": 2.2535, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.22391154111955772, |
| "grad_norm": 0.4795278012752533, |
| "learning_rate": 8.913960081380465e-05, |
| "loss": 1.9693, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.22416284475717785, |
| "grad_norm": 0.1798836588859558, |
| "learning_rate": 8.911470969639551e-05, |
| "loss": 0.6201, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.224414148394798, |
| "grad_norm": 0.8967116475105286, |
| "learning_rate": 8.908979357116976e-05, |
| "loss": 2.3321, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.22466545203241817, |
| "grad_norm": 0.363643616437912, |
| "learning_rate": 8.90648524540574e-05, |
| "loss": 2.2787, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.22491675567003833, |
| "grad_norm": 0.46721211075782776, |
| "learning_rate": 8.903988636100445e-05, |
| "loss": 2.05, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.22516805930765849, |
| "grad_norm": 0.41919320821762085, |
| "learning_rate": 8.901489530797282e-05, |
| "loss": 2.2821, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.22541936294527865, |
| "grad_norm": 0.29892247915267944, |
| "learning_rate": 8.898987931094049e-05, |
| "loss": 2.3336, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.22567066658289878, |
| "grad_norm": 0.41926395893096924, |
| "learning_rate": 8.896483838590131e-05, |
| "loss": 2.1726, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.22592197022051894, |
| "grad_norm": 0.3506767451763153, |
| "learning_rate": 8.893977254886505e-05, |
| "loss": 1.8011, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.2261732738581391, |
| "grad_norm": 0.39030522108078003, |
| "learning_rate": 8.891468181585747e-05, |
| "loss": 1.962, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.22642457749575925, |
| "grad_norm": 0.40221068263053894, |
| "learning_rate": 8.888956620292022e-05, |
| "loss": 2.2385, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.22667588113337941, |
| "grad_norm": 0.312210351228714, |
| "learning_rate": 8.886442572611087e-05, |
| "loss": 2.3336, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.22692718477099957, |
| "grad_norm": 0.1972939372062683, |
| "learning_rate": 8.883926040150283e-05, |
| "loss": 1.8672, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.2271784884086197, |
| "grad_norm": 0.30837222933769226, |
| "learning_rate": 8.881407024518548e-05, |
| "loss": 2.1923, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.22742979204623986, |
| "grad_norm": 0.32954657077789307, |
| "learning_rate": 8.8788855273264e-05, |
| "loss": 2.53, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.22768109568386002, |
| "grad_norm": 0.4983203113079071, |
| "learning_rate": 8.87636155018595e-05, |
| "loss": 1.7686, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.22793239932148018, |
| "grad_norm": 0.29157817363739014, |
| "learning_rate": 8.873835094710891e-05, |
| "loss": 2.0444, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.22818370295910034, |
| "grad_norm": 0.7063765525817871, |
| "learning_rate": 8.8713061625165e-05, |
| "loss": 2.3444, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.22843500659672047, |
| "grad_norm": 0.2752906382083893, |
| "learning_rate": 8.868774755219641e-05, |
| "loss": 2.2363, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.22868631023434063, |
| "grad_norm": 0.4020974338054657, |
| "learning_rate": 8.866240874438755e-05, |
| "loss": 2.2792, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.2289376138719608, |
| "grad_norm": 0.3203171193599701, |
| "learning_rate": 8.863704521793869e-05, |
| "loss": 2.1895, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.22918891750958095, |
| "grad_norm": 0.7605288028717041, |
| "learning_rate": 8.861165698906589e-05, |
| "loss": 2.2526, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.2294402211472011, |
| "grad_norm": 0.4583891034126282, |
| "learning_rate": 8.8586244074001e-05, |
| "loss": 2.5682, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.22969152478482127, |
| "grad_norm": 0.29976171255111694, |
| "learning_rate": 8.856080648899163e-05, |
| "loss": 1.8839, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.2299428284224414, |
| "grad_norm": 0.4007084369659424, |
| "learning_rate": 8.853534425030123e-05, |
| "loss": 2.2063, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.23019413206006156, |
| "grad_norm": 0.37610113620758057, |
| "learning_rate": 8.850985737420896e-05, |
| "loss": 2.5636, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.23044543569768172, |
| "grad_norm": 0.3937523663043976, |
| "learning_rate": 8.84843458770097e-05, |
| "loss": 2.2865, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.23069673933530188, |
| "grad_norm": 0.44338393211364746, |
| "learning_rate": 8.845880977501419e-05, |
| "loss": 2.2287, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.23094804297292204, |
| "grad_norm": 0.4235187768936157, |
| "learning_rate": 8.843324908454875e-05, |
| "loss": 2.3839, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.2311993466105422, |
| "grad_norm": 0.5745797157287598, |
| "learning_rate": 8.840766382195553e-05, |
| "loss": 1.9735, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.23145065024816233, |
| "grad_norm": 0.35707423090934753, |
| "learning_rate": 8.838205400359234e-05, |
| "loss": 1.8084, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.2317019538857825, |
| "grad_norm": 0.323047935962677, |
| "learning_rate": 8.835641964583272e-05, |
| "loss": 2.422, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.23195325752340265, |
| "grad_norm": 0.4362463355064392, |
| "learning_rate": 8.833076076506588e-05, |
| "loss": 2.5153, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.2322045611610228, |
| "grad_norm": 0.3632015883922577, |
| "learning_rate": 8.830507737769669e-05, |
| "loss": 2.7776, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.23245586479864297, |
| "grad_norm": 0.48653021454811096, |
| "learning_rate": 8.827936950014573e-05, |
| "loss": 2.3679, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.23270716843626313, |
| "grad_norm": 0.37184858322143555, |
| "learning_rate": 8.825363714884922e-05, |
| "loss": 1.9735, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.23295847207388326, |
| "grad_norm": 0.38167500495910645, |
| "learning_rate": 8.822788034025903e-05, |
| "loss": 2.8957, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.23320977571150342, |
| "grad_norm": 0.4858744144439697, |
| "learning_rate": 8.820209909084265e-05, |
| "loss": 2.4265, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.23346107934912358, |
| "grad_norm": 0.4078523516654968, |
| "learning_rate": 8.81762934170832e-05, |
| "loss": 2.0574, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.23371238298674374, |
| "grad_norm": 0.3559398353099823, |
| "learning_rate": 8.815046333547943e-05, |
| "loss": 2.2669, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.2339636866243639, |
| "grad_norm": 0.4299301207065582, |
| "learning_rate": 8.81246088625457e-05, |
| "loss": 2.1053, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.23421499026198403, |
| "grad_norm": 0.40756756067276, |
| "learning_rate": 8.809873001481193e-05, |
| "loss": 2.3635, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.2344662938996042, |
| "grad_norm": 0.37873560190200806, |
| "learning_rate": 8.807282680882367e-05, |
| "loss": 1.0149, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.23471759753722435, |
| "grad_norm": 0.40375521779060364, |
| "learning_rate": 8.8046899261142e-05, |
| "loss": 2.5615, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.2349689011748445, |
| "grad_norm": 0.4414771497249603, |
| "learning_rate": 8.802094738834361e-05, |
| "loss": 2.513, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.23522020481246467, |
| "grad_norm": 0.40930548310279846, |
| "learning_rate": 8.799497120702069e-05, |
| "loss": 1.5781, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.23547150845008483, |
| "grad_norm": 0.4570627808570862, |
| "learning_rate": 8.7968970733781e-05, |
| "loss": 2.2293, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.23572281208770496, |
| "grad_norm": 0.38974353671073914, |
| "learning_rate": 8.794294598524784e-05, |
| "loss": 1.959, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.23597411572532512, |
| "grad_norm": 0.5479612946510315, |
| "learning_rate": 8.791689697806e-05, |
| "loss": 2.0345, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.23622541936294528, |
| "grad_norm": 0.35705479979515076, |
| "learning_rate": 8.789082372887183e-05, |
| "loss": 2.2542, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.23647672300056544, |
| "grad_norm": 0.4570086896419525, |
| "learning_rate": 8.786472625435311e-05, |
| "loss": 2.4306, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.2367280266381856, |
| "grad_norm": 1.2224934101104736, |
| "learning_rate": 8.783860457118918e-05, |
| "loss": 2.031, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.23697933027580576, |
| "grad_norm": 0.47019338607788086, |
| "learning_rate": 8.781245869608077e-05, |
| "loss": 2.5845, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.2372306339134259, |
| "grad_norm": 0.4843159019947052, |
| "learning_rate": 8.778628864574419e-05, |
| "loss": 2.2976, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.23748193755104605, |
| "grad_norm": 0.44156232476234436, |
| "learning_rate": 8.776009443691109e-05, |
| "loss": 1.7753, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.2377332411886662, |
| "grad_norm": 0.4849649667739868, |
| "learning_rate": 8.773387608632867e-05, |
| "loss": 2.4643, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.23798454482628636, |
| "grad_norm": 0.47490194439888, |
| "learning_rate": 8.770763361075949e-05, |
| "loss": 1.9425, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.23823584846390652, |
| "grad_norm": 0.42135703563690186, |
| "learning_rate": 8.768136702698158e-05, |
| "loss": 2.2653, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.23848715210152666, |
| "grad_norm": 0.3938981294631958, |
| "learning_rate": 8.765507635178832e-05, |
| "loss": 1.8336, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.23873845573914682, |
| "grad_norm": 0.3590041399002075, |
| "learning_rate": 8.762876160198858e-05, |
| "loss": 2.083, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.23898975937676697, |
| "grad_norm": 10.042701721191406, |
| "learning_rate": 8.760242279440657e-05, |
| "loss": 1.9433, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.23924106301438713, |
| "grad_norm": 0.4117979407310486, |
| "learning_rate": 8.75760599458819e-05, |
| "loss": 1.9981, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.2394923666520073, |
| "grad_norm": 0.3976224660873413, |
| "learning_rate": 8.754967307326951e-05, |
| "loss": 1.9221, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.23974367028962745, |
| "grad_norm": 0.4007420241832733, |
| "learning_rate": 8.752326219343977e-05, |
| "loss": 2.1583, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.23999497392724758, |
| "grad_norm": 0.43828290700912476, |
| "learning_rate": 8.74968273232783e-05, |
| "loss": 2.0958, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.24024627756486774, |
| "grad_norm": 0.3099430501461029, |
| "learning_rate": 8.747036847968618e-05, |
| "loss": 2.0985, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.2404975812024879, |
| "grad_norm": 0.2864762842655182, |
| "learning_rate": 8.744388567957971e-05, |
| "loss": 1.9034, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.24074888484010806, |
| "grad_norm": 0.38019657135009766, |
| "learning_rate": 8.741737893989058e-05, |
| "loss": 2.2289, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.24100018847772822, |
| "grad_norm": 0.506572425365448, |
| "learning_rate": 8.739084827756575e-05, |
| "loss": 2.3025, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.24125149211534838, |
| "grad_norm": 0.4378896653652191, |
| "learning_rate": 8.736429370956746e-05, |
| "loss": 1.9396, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2415027957529685, |
| "grad_norm": 0.36668267846107483, |
| "learning_rate": 8.733771525287331e-05, |
| "loss": 2.7244, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.24175409939058867, |
| "grad_norm": 0.3157023787498474, |
| "learning_rate": 8.731111292447605e-05, |
| "loss": 2.2407, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.24200540302820883, |
| "grad_norm": 0.2563331127166748, |
| "learning_rate": 8.728448674138381e-05, |
| "loss": 2.4275, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.242256706665829, |
| "grad_norm": 0.49870792031288147, |
| "learning_rate": 8.72578367206199e-05, |
| "loss": 2.0975, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.24250801030344915, |
| "grad_norm": 0.328143835067749, |
| "learning_rate": 8.723116287922288e-05, |
| "loss": 2.0371, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.2427593139410693, |
| "grad_norm": 0.40886473655700684, |
| "learning_rate": 8.72044652342466e-05, |
| "loss": 2.294, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.24301061757868944, |
| "grad_norm": 0.39602747559547424, |
| "learning_rate": 8.717774380276002e-05, |
| "loss": 2.067, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.2432619212163096, |
| "grad_norm": 0.33269646763801575, |
| "learning_rate": 8.715099860184743e-05, |
| "loss": 2.2003, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.24351322485392976, |
| "grad_norm": 0.4260612726211548, |
| "learning_rate": 8.712422964860822e-05, |
| "loss": 2.4808, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.24376452849154992, |
| "grad_norm": 0.3412139415740967, |
| "learning_rate": 8.7097436960157e-05, |
| "loss": 2.3457, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.24401583212917008, |
| "grad_norm": 0.402170866727829, |
| "learning_rate": 8.707062055362359e-05, |
| "loss": 2.4638, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.2442671357667902, |
| "grad_norm": 0.2597676217556, |
| "learning_rate": 8.70437804461529e-05, |
| "loss": 1.7648, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.24451843940441037, |
| "grad_norm": 0.424844354391098, |
| "learning_rate": 8.701691665490504e-05, |
| "loss": 2.276, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.24476974304203053, |
| "grad_norm": 0.1249924823641777, |
| "learning_rate": 8.699002919705527e-05, |
| "loss": 0.5754, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.2450210466796507, |
| "grad_norm": 0.14425934851169586, |
| "learning_rate": 8.696311808979397e-05, |
| "loss": 0.9444, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.24527235031727085, |
| "grad_norm": 0.5581234693527222, |
| "learning_rate": 8.693618335032663e-05, |
| "loss": 2.2831, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.245523653954891, |
| "grad_norm": 0.17069493234157562, |
| "learning_rate": 8.690922499587387e-05, |
| "loss": 1.0646, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.24577495759251114, |
| "grad_norm": 0.3384755551815033, |
| "learning_rate": 8.688224304367137e-05, |
| "loss": 1.8558, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.2460262612301313, |
| "grad_norm": 0.3919682204723358, |
| "learning_rate": 8.685523751096994e-05, |
| "loss": 2.5674, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.24627756486775146, |
| "grad_norm": 0.25237560272216797, |
| "learning_rate": 8.682820841503542e-05, |
| "loss": 1.7601, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.24652886850537162, |
| "grad_norm": 0.3378541171550751, |
| "learning_rate": 8.680115577314877e-05, |
| "loss": 2.5141, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.24678017214299178, |
| "grad_norm": 0.4153686761856079, |
| "learning_rate": 8.6774079602606e-05, |
| "loss": 1.1121, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.24703147578061194, |
| "grad_norm": 0.3883324861526489, |
| "learning_rate": 8.67469799207181e-05, |
| "loss": 1.8358, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.24728277941823207, |
| "grad_norm": 0.35025763511657715, |
| "learning_rate": 8.671985674481113e-05, |
| "loss": 1.7407, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.24753408305585223, |
| "grad_norm": 0.1769835203886032, |
| "learning_rate": 8.66927100922262e-05, |
| "loss": 1.1858, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.2477853866934724, |
| "grad_norm": 0.4593118131160736, |
| "learning_rate": 8.66655399803194e-05, |
| "loss": 2.4193, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.24803669033109255, |
| "grad_norm": 0.24635930359363556, |
| "learning_rate": 8.663834642646178e-05, |
| "loss": 2.3587, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.2482879939687127, |
| "grad_norm": 0.3709847927093506, |
| "learning_rate": 8.661112944803946e-05, |
| "loss": 2.5285, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.24853929760633287, |
| "grad_norm": 0.3426840603351593, |
| "learning_rate": 8.65838890624535e-05, |
| "loss": 1.9006, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.248790601243953, |
| "grad_norm": 0.29686790704727173, |
| "learning_rate": 8.655662528711987e-05, |
| "loss": 1.9789, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.24904190488157316, |
| "grad_norm": 0.15335550904273987, |
| "learning_rate": 8.65293381394696e-05, |
| "loss": 1.379, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.24929320851919332, |
| "grad_norm": 0.31200841069221497, |
| "learning_rate": 8.650202763694856e-05, |
| "loss": 2.6586, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.24954451215681347, |
| "grad_norm": 0.28087717294692993, |
| "learning_rate": 8.647469379701765e-05, |
| "loss": 2.1206, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.24979581579443363, |
| "grad_norm": 0.4623563289642334, |
| "learning_rate": 8.64473366371526e-05, |
| "loss": 2.3116, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.25004711943205377, |
| "grad_norm": 0.3620995879173279, |
| "learning_rate": 8.641995617484411e-05, |
| "loss": 2.1746, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.2502984230696739, |
| "grad_norm": 0.42614445090293884, |
| "learning_rate": 8.639255242759773e-05, |
| "loss": 2.1065, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.2505497267072941, |
| "grad_norm": 0.5489148497581482, |
| "learning_rate": 8.636512541293396e-05, |
| "loss": 1.6889, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.25080103034491424, |
| "grad_norm": 0.5294693112373352, |
| "learning_rate": 8.633767514838811e-05, |
| "loss": 1.9634, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.2510523339825344, |
| "grad_norm": 0.2602441906929016, |
| "learning_rate": 8.631020165151041e-05, |
| "loss": 2.0264, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.25130363762015456, |
| "grad_norm": 0.5113153457641602, |
| "learning_rate": 8.62827049398659e-05, |
| "loss": 2.47, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2515549412577747, |
| "grad_norm": 0.21645587682724, |
| "learning_rate": 8.62551850310345e-05, |
| "loss": 1.525, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.2518062448953949, |
| "grad_norm": 0.48258715867996216, |
| "learning_rate": 8.622764194261093e-05, |
| "loss": 2.5367, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.25205754853301504, |
| "grad_norm": 0.4978300929069519, |
| "learning_rate": 8.620007569220472e-05, |
| "loss": 2.3648, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.25230885217063515, |
| "grad_norm": 0.3442172110080719, |
| "learning_rate": 8.617248629744028e-05, |
| "loss": 2.1073, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.2525601558082553, |
| "grad_norm": 0.2076217383146286, |
| "learning_rate": 8.614487377595672e-05, |
| "loss": 1.2976, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.25281145944587546, |
| "grad_norm": 0.41670069098472595, |
| "learning_rate": 8.611723814540802e-05, |
| "loss": 2.4169, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.2530627630834956, |
| "grad_norm": 0.1605192869901657, |
| "learning_rate": 8.608957942346287e-05, |
| "loss": 1.2566, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.2533140667211158, |
| "grad_norm": 0.3367489278316498, |
| "learning_rate": 8.606189762780478e-05, |
| "loss": 2.5501, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.25356537035873594, |
| "grad_norm": 0.5993397235870361, |
| "learning_rate": 8.603419277613195e-05, |
| "loss": 2.5038, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.2538166739963561, |
| "grad_norm": 0.4803079068660736, |
| "learning_rate": 8.600646488615738e-05, |
| "loss": 2.5279, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.25406797763397626, |
| "grad_norm": 0.22795291244983673, |
| "learning_rate": 8.597871397560877e-05, |
| "loss": 2.4772, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.2543192812715964, |
| "grad_norm": 0.43239662051200867, |
| "learning_rate": 8.595094006222853e-05, |
| "loss": 2.4182, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.2545705849092166, |
| "grad_norm": 0.5313870310783386, |
| "learning_rate": 8.592314316377379e-05, |
| "loss": 1.6524, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.25482188854683674, |
| "grad_norm": 0.30473679304122925, |
| "learning_rate": 8.589532329801639e-05, |
| "loss": 1.8479, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.25507319218445684, |
| "grad_norm": 0.5765677690505981, |
| "learning_rate": 8.586748048274281e-05, |
| "loss": 2.4291, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.255324495822077, |
| "grad_norm": 0.5507373809814453, |
| "learning_rate": 8.583961473575426e-05, |
| "loss": 1.8522, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.25557579945969716, |
| "grad_norm": 0.2905975878238678, |
| "learning_rate": 8.581172607486658e-05, |
| "loss": 2.0781, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.2558271030973173, |
| "grad_norm": 0.27408596873283386, |
| "learning_rate": 8.578381451791025e-05, |
| "loss": 2.203, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.2560784067349375, |
| "grad_norm": 0.3338567316532135, |
| "learning_rate": 8.575588008273039e-05, |
| "loss": 2.1768, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.25632971037255764, |
| "grad_norm": 0.44413185119628906, |
| "learning_rate": 8.572792278718678e-05, |
| "loss": 2.2131, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.2565810140101778, |
| "grad_norm": 0.3354492783546448, |
| "learning_rate": 8.569994264915378e-05, |
| "loss": 2.367, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.25683231764779796, |
| "grad_norm": 0.4711921513080597, |
| "learning_rate": 8.567193968652037e-05, |
| "loss": 2.0846, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.2570836212854181, |
| "grad_norm": 0.4673987925052643, |
| "learning_rate": 8.564391391719013e-05, |
| "loss": 2.2828, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.2573349249230383, |
| "grad_norm": 0.3377786874771118, |
| "learning_rate": 8.56158653590812e-05, |
| "loss": 1.8866, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.25758622856065844, |
| "grad_norm": 0.3204606771469116, |
| "learning_rate": 8.558779403012631e-05, |
| "loss": 2.3215, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.2578375321982786, |
| "grad_norm": 0.2833389341831207, |
| "learning_rate": 8.555969994827272e-05, |
| "loss": 1.8964, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.2580888358358987, |
| "grad_norm": 0.33112481236457825, |
| "learning_rate": 8.553158313148229e-05, |
| "loss": 2.3466, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.25834013947351886, |
| "grad_norm": 0.3505350351333618, |
| "learning_rate": 8.550344359773135e-05, |
| "loss": 2.2383, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.258591443111139, |
| "grad_norm": 0.39331895112991333, |
| "learning_rate": 8.547528136501079e-05, |
| "loss": 1.9421, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.2588427467487592, |
| "grad_norm": 0.40246421098709106, |
| "learning_rate": 8.544709645132605e-05, |
| "loss": 2.3665, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.25909405038637934, |
| "grad_norm": 0.3660884499549866, |
| "learning_rate": 8.541888887469698e-05, |
| "loss": 2.3929, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.2593453540239995, |
| "grad_norm": 0.47951841354370117, |
| "learning_rate": 8.5390658653158e-05, |
| "loss": 2.4356, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.25959665766161966, |
| "grad_norm": 0.42761266231536865, |
| "learning_rate": 8.536240580475795e-05, |
| "loss": 2.3366, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.2598479612992398, |
| "grad_norm": 0.3438764214515686, |
| "learning_rate": 8.53341303475602e-05, |
| "loss": 2.2531, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.26009926493686, |
| "grad_norm": 0.6126356720924377, |
| "learning_rate": 8.530583229964249e-05, |
| "loss": 2.0537, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.26035056857448013, |
| "grad_norm": 0.24006953835487366, |
| "learning_rate": 8.527751167909711e-05, |
| "loss": 2.0906, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.2606018722121003, |
| "grad_norm": 0.5417637825012207, |
| "learning_rate": 8.524916850403068e-05, |
| "loss": 2.1112, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.2608531758497204, |
| "grad_norm": 0.2629411220550537, |
| "learning_rate": 8.52208027925643e-05, |
| "loss": 1.0985, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.26110447948734056, |
| "grad_norm": 0.36217573285102844, |
| "learning_rate": 8.519241456283345e-05, |
| "loss": 1.8339, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.2613557831249607, |
| "grad_norm": 0.398267537355423, |
| "learning_rate": 8.516400383298803e-05, |
| "loss": 2.3536, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.2616070867625809, |
| "grad_norm": 0.4537069499492645, |
| "learning_rate": 8.513557062119232e-05, |
| "loss": 1.8612, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.26185839040020104, |
| "grad_norm": 0.39333221316337585, |
| "learning_rate": 8.510711494562496e-05, |
| "loss": 2.1242, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.2621096940378212, |
| "grad_norm": 0.3150976300239563, |
| "learning_rate": 8.507863682447896e-05, |
| "loss": 2.1419, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.26236099767544135, |
| "grad_norm": 0.33974120020866394, |
| "learning_rate": 8.505013627596167e-05, |
| "loss": 2.7756, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.2626123013130615, |
| "grad_norm": 0.23866431415081024, |
| "learning_rate": 8.502161331829483e-05, |
| "loss": 2.24, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.2628636049506817, |
| "grad_norm": 0.3774257004261017, |
| "learning_rate": 8.499306796971443e-05, |
| "loss": 2.6075, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.26311490858830183, |
| "grad_norm": 0.3210359215736389, |
| "learning_rate": 8.49645002484708e-05, |
| "loss": 2.1907, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.263366212225922, |
| "grad_norm": 0.5776761174201965, |
| "learning_rate": 8.493591017282862e-05, |
| "loss": 1.9049, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.26361751586354215, |
| "grad_norm": 0.3872734606266022, |
| "learning_rate": 8.490729776106681e-05, |
| "loss": 1.8222, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.26386881950116226, |
| "grad_norm": 0.36327892541885376, |
| "learning_rate": 8.48786630314786e-05, |
| "loss": 2.2852, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2641201231387824, |
| "grad_norm": 0.5938968658447266, |
| "learning_rate": 8.485000600237147e-05, |
| "loss": 2.2302, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.2643714267764026, |
| "grad_norm": 0.23401345312595367, |
| "learning_rate": 8.482132669206718e-05, |
| "loss": 2.2253, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.26462273041402273, |
| "grad_norm": 0.36279526352882385, |
| "learning_rate": 8.479262511890168e-05, |
| "loss": 2.5141, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.2648740340516429, |
| "grad_norm": 0.362393319606781, |
| "learning_rate": 8.476390130122524e-05, |
| "loss": 2.4776, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.26512533768926305, |
| "grad_norm": 0.42770296335220337, |
| "learning_rate": 8.473515525740228e-05, |
| "loss": 2.5201, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.2653766413268832, |
| "grad_norm": 0.3319082260131836, |
| "learning_rate": 8.470638700581146e-05, |
| "loss": 2.2149, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.26562794496450337, |
| "grad_norm": 0.6831150054931641, |
| "learning_rate": 8.467759656484563e-05, |
| "loss": 2.0079, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.26587924860212353, |
| "grad_norm": 0.3941812813282013, |
| "learning_rate": 8.464878395291186e-05, |
| "loss": 2.2493, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.2661305522397437, |
| "grad_norm": 0.5003316402435303, |
| "learning_rate": 8.461994918843133e-05, |
| "loss": 2.0187, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.26638185587736385, |
| "grad_norm": 0.7166873216629028, |
| "learning_rate": 8.459109228983943e-05, |
| "loss": 2.4745, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.26663315951498395, |
| "grad_norm": 0.5578575134277344, |
| "learning_rate": 8.456221327558572e-05, |
| "loss": 2.4646, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.2668844631526041, |
| "grad_norm": 1.8631904125213623, |
| "learning_rate": 8.45333121641338e-05, |
| "loss": 2.7934, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.26713576679022427, |
| "grad_norm": 0.30865031480789185, |
| "learning_rate": 8.450438897396154e-05, |
| "loss": 1.8842, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.26738707042784443, |
| "grad_norm": 0.3725493848323822, |
| "learning_rate": 8.447544372356082e-05, |
| "loss": 1.9359, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.2676383740654646, |
| "grad_norm": 0.5875294804573059, |
| "learning_rate": 8.444647643143766e-05, |
| "loss": 2.611, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.26788967770308475, |
| "grad_norm": 0.20553795993328094, |
| "learning_rate": 8.441748711611216e-05, |
| "loss": 2.3654, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.2681409813407049, |
| "grad_norm": 0.814967691898346, |
| "learning_rate": 8.438847579611855e-05, |
| "loss": 1.9915, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.26839228497832507, |
| "grad_norm": 0.19453756511211395, |
| "learning_rate": 8.435944249000504e-05, |
| "loss": 1.5404, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.26864358861594523, |
| "grad_norm": 0.2923291027545929, |
| "learning_rate": 8.433038721633399e-05, |
| "loss": 2.6746, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.2688948922535654, |
| "grad_norm": 0.4880603551864624, |
| "learning_rate": 8.430130999368173e-05, |
| "loss": 2.8785, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.26914619589118555, |
| "grad_norm": 0.3680141270160675, |
| "learning_rate": 8.427221084063866e-05, |
| "loss": 2.2196, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.2693974995288057, |
| "grad_norm": 0.3895852863788605, |
| "learning_rate": 8.42430897758092e-05, |
| "loss": 2.1979, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.2696488031664258, |
| "grad_norm": 0.1814226508140564, |
| "learning_rate": 8.421394681781176e-05, |
| "loss": 0.8641, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.26990010680404597, |
| "grad_norm": 0.5926142334938049, |
| "learning_rate": 8.418478198527878e-05, |
| "loss": 2.3769, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.27015141044166613, |
| "grad_norm": 0.47387734055519104, |
| "learning_rate": 8.415559529685666e-05, |
| "loss": 2.4868, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.2704027140792863, |
| "grad_norm": 0.33305391669273376, |
| "learning_rate": 8.412638677120577e-05, |
| "loss": 1.9719, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.27065401771690645, |
| "grad_norm": 0.40535521507263184, |
| "learning_rate": 8.409715642700048e-05, |
| "loss": 2.0834, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.2709053213545266, |
| "grad_norm": 0.2667929530143738, |
| "learning_rate": 8.406790428292906e-05, |
| "loss": 2.2652, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.27115662499214677, |
| "grad_norm": 0.35209354758262634, |
| "learning_rate": 8.403863035769372e-05, |
| "loss": 1.9958, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.2714079286297669, |
| "grad_norm": 0.4001673758029938, |
| "learning_rate": 8.400933467001066e-05, |
| "loss": 2.2901, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.2716592322673871, |
| "grad_norm": 0.397296667098999, |
| "learning_rate": 8.39800172386099e-05, |
| "loss": 2.3905, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.27191053590500724, |
| "grad_norm": 0.40791282057762146, |
| "learning_rate": 8.395067808223547e-05, |
| "loss": 2.0415, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.2721618395426274, |
| "grad_norm": 0.42828860878944397, |
| "learning_rate": 8.392131721964516e-05, |
| "loss": 2.4725, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.2724131431802475, |
| "grad_norm": 0.2626012861728668, |
| "learning_rate": 8.389193466961076e-05, |
| "loss": 2.4184, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.27266444681786767, |
| "grad_norm": 0.5925227999687195, |
| "learning_rate": 8.386253045091785e-05, |
| "loss": 2.4812, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.2729157504554878, |
| "grad_norm": 0.43262800574302673, |
| "learning_rate": 8.383310458236589e-05, |
| "loss": 2.0934, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.273167054093108, |
| "grad_norm": 0.3726051151752472, |
| "learning_rate": 8.380365708276818e-05, |
| "loss": 2.1072, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.27341835773072815, |
| "grad_norm": 0.2803446352481842, |
| "learning_rate": 8.377418797095186e-05, |
| "loss": 1.6965, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.2736696613683483, |
| "grad_norm": 0.4271281361579895, |
| "learning_rate": 8.374469726575786e-05, |
| "loss": 2.499, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.27392096500596846, |
| "grad_norm": 0.4066520929336548, |
| "learning_rate": 8.371518498604094e-05, |
| "loss": 2.5189, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.2741722686435886, |
| "grad_norm": 0.6859603524208069, |
| "learning_rate": 8.368565115066963e-05, |
| "loss": 2.4845, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.2744235722812088, |
| "grad_norm": 0.5193429589271545, |
| "learning_rate": 8.365609577852629e-05, |
| "loss": 2.2581, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.27467487591882894, |
| "grad_norm": 0.2996985912322998, |
| "learning_rate": 8.362651888850699e-05, |
| "loss": 2.7202, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.2749261795564491, |
| "grad_norm": 0.4352916181087494, |
| "learning_rate": 8.359692049952158e-05, |
| "loss": 1.9127, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.27517748319406926, |
| "grad_norm": 0.5546665191650391, |
| "learning_rate": 8.356730063049368e-05, |
| "loss": 1.5597, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.27542878683168937, |
| "grad_norm": 0.45468562841415405, |
| "learning_rate": 8.35376593003606e-05, |
| "loss": 2.2546, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.2756800904693095, |
| "grad_norm": 0.5346075892448425, |
| "learning_rate": 8.350799652807342e-05, |
| "loss": 2.1757, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.2759313941069297, |
| "grad_norm": 0.4020247161388397, |
| "learning_rate": 8.347831233259688e-05, |
| "loss": 2.5377, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.27618269774454984, |
| "grad_norm": 0.44145265221595764, |
| "learning_rate": 8.344860673290946e-05, |
| "loss": 2.526, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.27643400138217, |
| "grad_norm": 0.3393920361995697, |
| "learning_rate": 8.341887974800328e-05, |
| "loss": 2.4143, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.27668530501979016, |
| "grad_norm": 3.1971209049224854, |
| "learning_rate": 8.338913139688416e-05, |
| "loss": 2.1105, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.2769366086574103, |
| "grad_norm": 0.28271710872650146, |
| "learning_rate": 8.335936169857159e-05, |
| "loss": 1.4886, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.2771879122950305, |
| "grad_norm": 0.4334240257740021, |
| "learning_rate": 8.332957067209869e-05, |
| "loss": 2.3919, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.27743921593265064, |
| "grad_norm": 0.31101804971694946, |
| "learning_rate": 8.329975833651221e-05, |
| "loss": 2.5608, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.2776905195702708, |
| "grad_norm": 0.5559032559394836, |
| "learning_rate": 8.326992471087256e-05, |
| "loss": 2.0525, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.27794182320789096, |
| "grad_norm": 0.2627129852771759, |
| "learning_rate": 8.324006981425371e-05, |
| "loss": 2.4173, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.27819312684551106, |
| "grad_norm": 0.5957119464874268, |
| "learning_rate": 8.321019366574326e-05, |
| "loss": 2.3493, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.2784444304831312, |
| "grad_norm": 0.4542389512062073, |
| "learning_rate": 8.318029628444241e-05, |
| "loss": 2.2662, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.2786957341207514, |
| "grad_norm": 0.6782068014144897, |
| "learning_rate": 8.315037768946591e-05, |
| "loss": 2.0514, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.27894703775837154, |
| "grad_norm": 0.17118225991725922, |
| "learning_rate": 8.312043789994209e-05, |
| "loss": 0.6076, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.2791983413959917, |
| "grad_norm": 0.598452091217041, |
| "learning_rate": 8.309047693501278e-05, |
| "loss": 1.9878, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.27944964503361186, |
| "grad_norm": 0.4604327976703644, |
| "learning_rate": 8.306049481383344e-05, |
| "loss": 2.0652, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.279700948671232, |
| "grad_norm": 0.3509052097797394, |
| "learning_rate": 8.3030491555573e-05, |
| "loss": 2.3199, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.2799522523088522, |
| "grad_norm": 0.32564830780029297, |
| "learning_rate": 8.300046717941387e-05, |
| "loss": 2.5692, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.28020355594647234, |
| "grad_norm": 0.4420936107635498, |
| "learning_rate": 8.297042170455207e-05, |
| "loss": 2.1994, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.2804548595840925, |
| "grad_norm": 0.19294236600399017, |
| "learning_rate": 8.294035515019699e-05, |
| "loss": 1.8342, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.28070616322171266, |
| "grad_norm": 0.28902173042297363, |
| "learning_rate": 8.291026753557154e-05, |
| "loss": 1.9677, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.2809574668593328, |
| "grad_norm": 0.5199251770973206, |
| "learning_rate": 8.288015887991214e-05, |
| "loss": 2.2992, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.2812087704969529, |
| "grad_norm": 0.445320725440979, |
| "learning_rate": 8.285002920246862e-05, |
| "loss": 2.0663, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.2814600741345731, |
| "grad_norm": 0.3564877510070801, |
| "learning_rate": 8.281987852250424e-05, |
| "loss": 2.447, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.28171137777219324, |
| "grad_norm": 0.4224308133125305, |
| "learning_rate": 8.27897068592957e-05, |
| "loss": 2.228, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.2819626814098134, |
| "grad_norm": 0.40947914123535156, |
| "learning_rate": 8.275951423213312e-05, |
| "loss": 2.3052, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.28221398504743356, |
| "grad_norm": 0.38864952325820923, |
| "learning_rate": 8.272930066032006e-05, |
| "loss": 2.1341, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.2824652886850537, |
| "grad_norm": 0.5043284893035889, |
| "learning_rate": 8.26990661631734e-05, |
| "loss": 1.8143, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.2827165923226739, |
| "grad_norm": 0.3912737965583801, |
| "learning_rate": 8.266881076002344e-05, |
| "loss": 2.1044, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.28296789596029404, |
| "grad_norm": 0.39619266986846924, |
| "learning_rate": 8.263853447021382e-05, |
| "loss": 2.2082, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.2832191995979142, |
| "grad_norm": 0.2806078791618347, |
| "learning_rate": 8.260823731310159e-05, |
| "loss": 2.2696, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.28347050323553435, |
| "grad_norm": 0.4805527627468109, |
| "learning_rate": 8.257791930805707e-05, |
| "loss": 2.0736, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.2837218068731545, |
| "grad_norm": 0.37303459644317627, |
| "learning_rate": 8.254758047446394e-05, |
| "loss": 2.1587, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.2839731105107746, |
| "grad_norm": 0.4668956398963928, |
| "learning_rate": 8.251722083171923e-05, |
| "loss": 1.9901, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.2842244141483948, |
| "grad_norm": 0.2532447278499603, |
| "learning_rate": 8.248684039923322e-05, |
| "loss": 2.2625, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.28447571778601494, |
| "grad_norm": 0.5377678871154785, |
| "learning_rate": 8.245643919642951e-05, |
| "loss": 2.0567, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.2847270214236351, |
| "grad_norm": 0.4332278370857239, |
| "learning_rate": 8.242601724274498e-05, |
| "loss": 2.323, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.28497832506125526, |
| "grad_norm": 0.284900039434433, |
| "learning_rate": 8.239557455762976e-05, |
| "loss": 2.0663, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.2852296286988754, |
| "grad_norm": 0.385785847902298, |
| "learning_rate": 8.236511116054725e-05, |
| "loss": 2.2765, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.2854809323364956, |
| "grad_norm": 0.3278866410255432, |
| "learning_rate": 8.233462707097413e-05, |
| "loss": 2.3421, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.28573223597411573, |
| "grad_norm": 0.31879276037216187, |
| "learning_rate": 8.230412230840019e-05, |
| "loss": 2.6483, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.2859835396117359, |
| "grad_norm": 0.41390541195869446, |
| "learning_rate": 8.227359689232856e-05, |
| "loss": 2.5977, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.28623484324935605, |
| "grad_norm": 0.657051682472229, |
| "learning_rate": 8.224305084227553e-05, |
| "loss": 2.1319, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.2864861468869762, |
| "grad_norm": 0.47445768117904663, |
| "learning_rate": 8.221248417777057e-05, |
| "loss": 2.4098, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.28673745052459637, |
| "grad_norm": 0.3382059931755066, |
| "learning_rate": 8.218189691835636e-05, |
| "loss": 2.4012, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.2869887541622165, |
| "grad_norm": 0.23410090804100037, |
| "learning_rate": 8.215128908358871e-05, |
| "loss": 0.9557, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.28724005779983663, |
| "grad_norm": 0.3863859176635742, |
| "learning_rate": 8.212066069303661e-05, |
| "loss": 2.5172, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.2874913614374568, |
| "grad_norm": 0.4687202572822571, |
| "learning_rate": 8.209001176628218e-05, |
| "loss": 2.2719, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.28774266507507695, |
| "grad_norm": 0.3292360007762909, |
| "learning_rate": 8.20593423229207e-05, |
| "loss": 2.4493, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.2879939687126971, |
| "grad_norm": 0.3099132180213928, |
| "learning_rate": 8.20286523825605e-05, |
| "loss": 2.2859, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.28824527235031727, |
| "grad_norm": 0.4771600663661957, |
| "learning_rate": 8.19979419648231e-05, |
| "loss": 2.1732, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.28849657598793743, |
| "grad_norm": 0.23781466484069824, |
| "learning_rate": 8.196721108934305e-05, |
| "loss": 2.0926, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.2887478796255576, |
| "grad_norm": 0.267600953578949, |
| "learning_rate": 8.1936459775768e-05, |
| "loss": 2.3248, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.28899918326317775, |
| "grad_norm": 0.28264153003692627, |
| "learning_rate": 8.190568804375867e-05, |
| "loss": 1.7394, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2892504869007979, |
| "grad_norm": 0.47913792729377747, |
| "learning_rate": 8.187489591298883e-05, |
| "loss": 2.2019, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.28950179053841807, |
| "grad_norm": 0.46330931782722473, |
| "learning_rate": 8.184408340314528e-05, |
| "loss": 2.0046, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.2897530941760382, |
| "grad_norm": 0.46758121252059937, |
| "learning_rate": 8.181325053392788e-05, |
| "loss": 2.7781, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.29000439781365833, |
| "grad_norm": 0.4416895806789398, |
| "learning_rate": 8.178239732504949e-05, |
| "loss": 1.9636, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.2902557014512785, |
| "grad_norm": 0.4437691867351532, |
| "learning_rate": 8.175152379623597e-05, |
| "loss": 2.0821, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.29050700508889865, |
| "grad_norm": 0.41940081119537354, |
| "learning_rate": 8.172062996722615e-05, |
| "loss": 2.164, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.2907583087265188, |
| "grad_norm": 0.34322845935821533, |
| "learning_rate": 8.168971585777189e-05, |
| "loss": 2.3625, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.29100961236413897, |
| "grad_norm": 0.349292516708374, |
| "learning_rate": 8.165878148763797e-05, |
| "loss": 2.1895, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.29126091600175913, |
| "grad_norm": 0.4713262915611267, |
| "learning_rate": 8.162782687660218e-05, |
| "loss": 2.1822, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.2915122196393793, |
| "grad_norm": 0.498413622379303, |
| "learning_rate": 8.159685204445517e-05, |
| "loss": 1.8382, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.29176352327699945, |
| "grad_norm": 0.2707071602344513, |
| "learning_rate": 8.156585701100057e-05, |
| "loss": 1.8682, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.2920148269146196, |
| "grad_norm": 0.16994576156139374, |
| "learning_rate": 8.153484179605494e-05, |
| "loss": 1.4461, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.29226613055223977, |
| "grad_norm": 0.46193161606788635, |
| "learning_rate": 8.150380641944771e-05, |
| "loss": 2.2424, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.29251743418985987, |
| "grad_norm": 0.4411792457103729, |
| "learning_rate": 8.14727509010212e-05, |
| "loss": 2.4993, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.29276873782748003, |
| "grad_norm": 0.49905550479888916, |
| "learning_rate": 8.144167526063062e-05, |
| "loss": 2.0086, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.2930200414651002, |
| "grad_norm": 0.3959316909313202, |
| "learning_rate": 8.141057951814405e-05, |
| "loss": 2.1823, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.29327134510272035, |
| "grad_norm": 0.4969998002052307, |
| "learning_rate": 8.137946369344243e-05, |
| "loss": 1.7895, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.2935226487403405, |
| "grad_norm": 0.3000521659851074, |
| "learning_rate": 8.13483278064195e-05, |
| "loss": 2.1703, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.29377395237796067, |
| "grad_norm": 0.3654926121234894, |
| "learning_rate": 8.131717187698185e-05, |
| "loss": 2.311, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.2940252560155808, |
| "grad_norm": 0.4902850091457367, |
| "learning_rate": 8.128599592504891e-05, |
| "loss": 2.4557, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.294276559653201, |
| "grad_norm": 0.3724076449871063, |
| "learning_rate": 8.125479997055286e-05, |
| "loss": 2.0032, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.29452786329082115, |
| "grad_norm": 0.4023280739784241, |
| "learning_rate": 8.122358403343872e-05, |
| "loss": 2.2439, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.2947791669284413, |
| "grad_norm": 0.46174219250679016, |
| "learning_rate": 8.119234813366425e-05, |
| "loss": 2.0743, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.29503047056606146, |
| "grad_norm": 0.30935728549957275, |
| "learning_rate": 8.11610922912e-05, |
| "loss": 2.7031, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.2952817742036816, |
| "grad_norm": 0.3213157653808594, |
| "learning_rate": 8.11298165260292e-05, |
| "loss": 2.5725, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.29553307784130173, |
| "grad_norm": 0.5222627520561218, |
| "learning_rate": 8.109852085814796e-05, |
| "loss": 2.2112, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.2957843814789219, |
| "grad_norm": 0.3197146952152252, |
| "learning_rate": 8.106720530756495e-05, |
| "loss": 2.4653, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.29603568511654205, |
| "grad_norm": 0.34864383935928345, |
| "learning_rate": 8.103586989430165e-05, |
| "loss": 2.395, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.2962869887541622, |
| "grad_norm": 0.5509148240089417, |
| "learning_rate": 8.100451463839223e-05, |
| "loss": 2.0561, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.29653829239178237, |
| "grad_norm": 0.5259515643119812, |
| "learning_rate": 8.097313955988355e-05, |
| "loss": 2.1252, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2967895960294025, |
| "grad_norm": 0.2766565978527069, |
| "learning_rate": 8.094174467883509e-05, |
| "loss": 1.5991, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.2970408996670227, |
| "grad_norm": 0.3643835484981537, |
| "learning_rate": 8.091033001531905e-05, |
| "loss": 1.9686, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.29729220330464284, |
| "grad_norm": 0.45411252975463867, |
| "learning_rate": 8.087889558942023e-05, |
| "loss": 2.2172, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.297543506942263, |
| "grad_norm": 0.47071462869644165, |
| "learning_rate": 8.084744142123613e-05, |
| "loss": 1.9389, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.29779481057988316, |
| "grad_norm": 0.3977017402648926, |
| "learning_rate": 8.081596753087682e-05, |
| "loss": 2.1713, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.2980461142175033, |
| "grad_norm": 0.6268634796142578, |
| "learning_rate": 8.0784473938465e-05, |
| "loss": 2.168, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.2982974178551234, |
| "grad_norm": 0.31418439745903015, |
| "learning_rate": 8.075296066413593e-05, |
| "loss": 2.0036, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.2985487214927436, |
| "grad_norm": 1.348002314567566, |
| "learning_rate": 8.07214277280375e-05, |
| "loss": 2.6528, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.29880002513036374, |
| "grad_norm": 0.2204774022102356, |
| "learning_rate": 8.068987515033015e-05, |
| "loss": 2.1439, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.2990513287679839, |
| "grad_norm": 0.34849774837493896, |
| "learning_rate": 8.06583029511869e-05, |
| "loss": 2.1119, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.29930263240560406, |
| "grad_norm": 0.3371882438659668, |
| "learning_rate": 8.062671115079325e-05, |
| "loss": 2.3992, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.2995539360432242, |
| "grad_norm": 0.5298404097557068, |
| "learning_rate": 8.059509976934733e-05, |
| "loss": 1.9743, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.2998052396808444, |
| "grad_norm": 0.33625608682632446, |
| "learning_rate": 8.056346882705972e-05, |
| "loss": 1.9329, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.30005654331846454, |
| "grad_norm": 0.3414923846721649, |
| "learning_rate": 8.053181834415351e-05, |
| "loss": 2.3464, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.3003078469560847, |
| "grad_norm": 0.49444150924682617, |
| "learning_rate": 8.05001483408643e-05, |
| "loss": 2.5747, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.30055915059370486, |
| "grad_norm": 0.4675874412059784, |
| "learning_rate": 8.046845883744016e-05, |
| "loss": 2.0696, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.300810454231325, |
| "grad_norm": 0.1863304227590561, |
| "learning_rate": 8.043674985414167e-05, |
| "loss": 1.0368, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.3010617578689452, |
| "grad_norm": 0.5033738613128662, |
| "learning_rate": 8.040502141124178e-05, |
| "loss": 2.0936, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.3013130615065653, |
| "grad_norm": 0.36377644538879395, |
| "learning_rate": 8.037327352902598e-05, |
| "loss": 1.9558, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.30156436514418544, |
| "grad_norm": 0.48326364159584045, |
| "learning_rate": 8.034150622779209e-05, |
| "loss": 2.2018, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3018156687818056, |
| "grad_norm": 0.5234176516532898, |
| "learning_rate": 8.030971952785041e-05, |
| "loss": 2.0583, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.30206697241942576, |
| "grad_norm": 0.4377163052558899, |
| "learning_rate": 8.027791344952363e-05, |
| "loss": 2.1404, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.3023182760570459, |
| "grad_norm": 0.25891783833503723, |
| "learning_rate": 8.024608801314684e-05, |
| "loss": 1.7364, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.3025695796946661, |
| "grad_norm": 0.508296549320221, |
| "learning_rate": 8.021424323906745e-05, |
| "loss": 2.5818, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.30282088333228624, |
| "grad_norm": 0.4298109710216522, |
| "learning_rate": 8.018237914764533e-05, |
| "loss": 2.0584, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.3030721869699064, |
| "grad_norm": 0.40173277258872986, |
| "learning_rate": 8.01504957592526e-05, |
| "loss": 1.7585, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.30332349060752656, |
| "grad_norm": 0.5122236013412476, |
| "learning_rate": 8.011859309427379e-05, |
| "loss": 1.9794, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.3035747942451467, |
| "grad_norm": 0.624552309513092, |
| "learning_rate": 8.008667117310572e-05, |
| "loss": 1.8687, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.3038260978827669, |
| "grad_norm": 0.4688674807548523, |
| "learning_rate": 8.005473001615753e-05, |
| "loss": 2.1042, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.304077401520387, |
| "grad_norm": 0.4452720284461975, |
| "learning_rate": 8.002276964385065e-05, |
| "loss": 2.0988, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.30432870515800714, |
| "grad_norm": 0.4090367257595062, |
| "learning_rate": 7.99907900766188e-05, |
| "loss": 2.4699, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.3045800087956273, |
| "grad_norm": 0.5209043025970459, |
| "learning_rate": 7.9958791334908e-05, |
| "loss": 2.0238, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.30483131243324746, |
| "grad_norm": 0.2394983172416687, |
| "learning_rate": 7.992677343917648e-05, |
| "loss": 1.0293, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.3050826160708676, |
| "grad_norm": 0.38234421610832214, |
| "learning_rate": 7.989473640989475e-05, |
| "loss": 2.5856, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.3053339197084878, |
| "grad_norm": 0.3305385112762451, |
| "learning_rate": 7.986268026754554e-05, |
| "loss": 2.4238, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.30558522334610794, |
| "grad_norm": 0.3514966368675232, |
| "learning_rate": 7.98306050326238e-05, |
| "loss": 2.1778, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.3058365269837281, |
| "grad_norm": 0.3517964780330658, |
| "learning_rate": 7.979851072563669e-05, |
| "loss": 1.7162, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.30608783062134826, |
| "grad_norm": 0.3453415036201477, |
| "learning_rate": 7.976639736710357e-05, |
| "loss": 2.5491, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.3063391342589684, |
| "grad_norm": 0.4221291244029999, |
| "learning_rate": 7.973426497755599e-05, |
| "loss": 2.4673, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.3065904378965886, |
| "grad_norm": 0.4376787543296814, |
| "learning_rate": 7.970211357753761e-05, |
| "loss": 2.6587, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.30684174153420873, |
| "grad_norm": 0.31638938188552856, |
| "learning_rate": 7.96699431876043e-05, |
| "loss": 2.3988, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.30709304517182884, |
| "grad_norm": 0.4708680510520935, |
| "learning_rate": 7.963775382832407e-05, |
| "loss": 2.3329, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.307344348809449, |
| "grad_norm": 0.3276142179965973, |
| "learning_rate": 7.960554552027703e-05, |
| "loss": 2.2746, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.30759565244706916, |
| "grad_norm": 0.23761656880378723, |
| "learning_rate": 7.957331828405543e-05, |
| "loss": 2.0233, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.3078469560846893, |
| "grad_norm": 0.4292398691177368, |
| "learning_rate": 7.95410721402636e-05, |
| "loss": 1.866, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.3080982597223095, |
| "grad_norm": 0.24796533584594727, |
| "learning_rate": 7.9508807109518e-05, |
| "loss": 2.4157, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.30834956335992963, |
| "grad_norm": 0.4541628360748291, |
| "learning_rate": 7.947652321244709e-05, |
| "loss": 2.5871, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.3086008669975498, |
| "grad_norm": 3.6204922199249268, |
| "learning_rate": 7.944422046969146e-05, |
| "loss": 2.5856, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.30885217063516995, |
| "grad_norm": 0.1898091733455658, |
| "learning_rate": 7.941189890190375e-05, |
| "loss": 1.8819, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.3091034742727901, |
| "grad_norm": 0.4697364866733551, |
| "learning_rate": 7.937955852974856e-05, |
| "loss": 2.7554, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.3093547779104103, |
| "grad_norm": 0.2671608030796051, |
| "learning_rate": 7.934719937390258e-05, |
| "loss": 2.0734, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.30960608154803043, |
| "grad_norm": 0.3299066424369812, |
| "learning_rate": 7.931482145505452e-05, |
| "loss": 2.3875, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.30985738518565054, |
| "grad_norm": 0.45493796467781067, |
| "learning_rate": 7.928242479390504e-05, |
| "loss": 2.1111, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.3101086888232707, |
| "grad_norm": 0.4255753755569458, |
| "learning_rate": 7.925000941116681e-05, |
| "loss": 1.9511, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.31035999246089085, |
| "grad_norm": 0.4670938551425934, |
| "learning_rate": 7.921757532756444e-05, |
| "loss": 1.9518, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.310611296098511, |
| "grad_norm": 0.3174899220466614, |
| "learning_rate": 7.918512256383452e-05, |
| "loss": 1.8118, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.3108625997361312, |
| "grad_norm": 0.35116392374038696, |
| "learning_rate": 7.915265114072562e-05, |
| "loss": 2.0331, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.31111390337375133, |
| "grad_norm": 0.30320197343826294, |
| "learning_rate": 7.912016107899818e-05, |
| "loss": 2.3592, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.3113652070113715, |
| "grad_norm": 0.3480928838253021, |
| "learning_rate": 7.908765239942455e-05, |
| "loss": 2.5963, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.31161651064899165, |
| "grad_norm": 0.3407917618751526, |
| "learning_rate": 7.905512512278904e-05, |
| "loss": 2.2058, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.3118678142866118, |
| "grad_norm": 0.3371085226535797, |
| "learning_rate": 7.902257926988781e-05, |
| "loss": 2.2821, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.31211911792423197, |
| "grad_norm": 0.5407741069793701, |
| "learning_rate": 7.89900148615289e-05, |
| "loss": 2.5554, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.31237042156185213, |
| "grad_norm": 0.5080079436302185, |
| "learning_rate": 7.895743191853224e-05, |
| "loss": 2.5177, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.3126217251994723, |
| "grad_norm": 0.4085712432861328, |
| "learning_rate": 7.892483046172958e-05, |
| "loss": 2.4434, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.3128730288370924, |
| "grad_norm": 0.727549135684967, |
| "learning_rate": 7.889221051196451e-05, |
| "loss": 2.0379, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.31312433247471255, |
| "grad_norm": 0.4746238887310028, |
| "learning_rate": 7.885957209009247e-05, |
| "loss": 2.1352, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.3133756361123327, |
| "grad_norm": 0.370112806558609, |
| "learning_rate": 7.882691521698069e-05, |
| "loss": 2.5383, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.31362693974995287, |
| "grad_norm": 0.4031289219856262, |
| "learning_rate": 7.879423991350817e-05, |
| "loss": 1.896, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.31387824338757303, |
| "grad_norm": 0.6527341604232788, |
| "learning_rate": 7.876154620056573e-05, |
| "loss": 2.2678, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.3141295470251932, |
| "grad_norm": 0.3396470546722412, |
| "learning_rate": 7.872883409905599e-05, |
| "loss": 2.593, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.31438085066281335, |
| "grad_norm": 0.4700802266597748, |
| "learning_rate": 7.869610362989322e-05, |
| "loss": 2.459, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.3146321543004335, |
| "grad_norm": 0.4480900466442108, |
| "learning_rate": 7.866335481400355e-05, |
| "loss": 1.8601, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.31488345793805367, |
| "grad_norm": 0.43752118945121765, |
| "learning_rate": 7.863058767232477e-05, |
| "loss": 2.5687, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.3151347615756738, |
| "grad_norm": 0.4294682741165161, |
| "learning_rate": 7.859780222580642e-05, |
| "loss": 1.6714, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.315386065213294, |
| "grad_norm": 0.4277234673500061, |
| "learning_rate": 7.856499849540973e-05, |
| "loss": 1.7993, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.3156373688509141, |
| "grad_norm": 0.2997349202632904, |
| "learning_rate": 7.85321765021076e-05, |
| "loss": 2.388, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.31588867248853425, |
| "grad_norm": 0.39641648530960083, |
| "learning_rate": 7.849933626688464e-05, |
| "loss": 2.3478, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.3161399761261544, |
| "grad_norm": 0.42108651995658875, |
| "learning_rate": 7.846647781073712e-05, |
| "loss": 2.4717, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.31639127976377457, |
| "grad_norm": 0.3705635964870453, |
| "learning_rate": 7.843360115467293e-05, |
| "loss": 2.0972, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.31664258340139473, |
| "grad_norm": 0.6227510571479797, |
| "learning_rate": 7.840070631971163e-05, |
| "loss": 1.7682, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.3168938870390149, |
| "grad_norm": 0.35013604164123535, |
| "learning_rate": 7.836779332688436e-05, |
| "loss": 2.371, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.31714519067663505, |
| "grad_norm": 0.6708528995513916, |
| "learning_rate": 7.833486219723394e-05, |
| "loss": 2.0226, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.3173964943142552, |
| "grad_norm": 0.3249523639678955, |
| "learning_rate": 7.83019129518147e-05, |
| "loss": 1.9312, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.31764779795187537, |
| "grad_norm": 0.3649659752845764, |
| "learning_rate": 7.826894561169262e-05, |
| "loss": 2.6408, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.3178991015894955, |
| "grad_norm": 0.5461932420730591, |
| "learning_rate": 7.823596019794521e-05, |
| "loss": 2.23, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.3181504052271157, |
| "grad_norm": 0.48531582951545715, |
| "learning_rate": 7.820295673166155e-05, |
| "loss": 2.0363, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.31840170886473584, |
| "grad_norm": 0.3750603199005127, |
| "learning_rate": 7.81699352339423e-05, |
| "loss": 2.4194, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.31865301250235595, |
| "grad_norm": 0.6479251384735107, |
| "learning_rate": 7.813689572589952e-05, |
| "loss": 2.3267, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.3189043161399761, |
| "grad_norm": 0.3576231896877289, |
| "learning_rate": 7.810383822865697e-05, |
| "loss": 2.2102, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.31915561977759627, |
| "grad_norm": 0.3646045923233032, |
| "learning_rate": 7.807076276334973e-05, |
| "loss": 1.9242, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.3194069234152164, |
| "grad_norm": 0.6005945205688477, |
| "learning_rate": 7.803766935112452e-05, |
| "loss": 1.7235, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.3196582270528366, |
| "grad_norm": 0.4535173773765564, |
| "learning_rate": 7.800455801313943e-05, |
| "loss": 1.8387, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.31990953069045674, |
| "grad_norm": 0.30238354206085205, |
| "learning_rate": 7.797142877056406e-05, |
| "loss": 1.626, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.3201608343280769, |
| "grad_norm": 0.4098001718521118, |
| "learning_rate": 7.793828164457944e-05, |
| "loss": 1.9611, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.32041213796569706, |
| "grad_norm": 0.3639359772205353, |
| "learning_rate": 7.790511665637803e-05, |
| "loss": 2.1905, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.3206634416033172, |
| "grad_norm": 0.3771366477012634, |
| "learning_rate": 7.787193382716374e-05, |
| "loss": 2.0498, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.3209147452409374, |
| "grad_norm": 0.22171245515346527, |
| "learning_rate": 7.783873317815184e-05, |
| "loss": 1.7027, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.32116604887855754, |
| "grad_norm": 0.5142925381660461, |
| "learning_rate": 7.780551473056904e-05, |
| "loss": 2.4564, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.32141735251617765, |
| "grad_norm": 0.3985360264778137, |
| "learning_rate": 7.777227850565341e-05, |
| "loss": 2.3719, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.3216686561537978, |
| "grad_norm": 0.48328259587287903, |
| "learning_rate": 7.773902452465436e-05, |
| "loss": 2.405, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.32191995979141796, |
| "grad_norm": 0.455669641494751, |
| "learning_rate": 7.77057528088327e-05, |
| "loss": 2.2332, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.3221712634290381, |
| "grad_norm": 0.2635050415992737, |
| "learning_rate": 7.767246337946053e-05, |
| "loss": 2.0382, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.3224225670666583, |
| "grad_norm": 0.46467408537864685, |
| "learning_rate": 7.763915625782132e-05, |
| "loss": 2.1012, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.32267387070427844, |
| "grad_norm": 0.44746190309524536, |
| "learning_rate": 7.76058314652098e-05, |
| "loss": 1.9779, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.3229251743418986, |
| "grad_norm": 0.4189201295375824, |
| "learning_rate": 7.757248902293209e-05, |
| "loss": 2.7604, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.32317647797951876, |
| "grad_norm": 0.35619938373565674, |
| "learning_rate": 7.753912895230546e-05, |
| "loss": 1.6188, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.3234277816171389, |
| "grad_norm": 0.31852611899375916, |
| "learning_rate": 7.750575127465858e-05, |
| "loss": 2.2916, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.3236790852547591, |
| "grad_norm": 0.427613765001297, |
| "learning_rate": 7.747235601133129e-05, |
| "loss": 1.7733, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.32393038889237924, |
| "grad_norm": 0.4661354720592499, |
| "learning_rate": 7.74389431836747e-05, |
| "loss": 2.4934, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.3241816925299994, |
| "grad_norm": 0.2827777862548828, |
| "learning_rate": 7.740551281305119e-05, |
| "loss": 1.927, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.3244329961676195, |
| "grad_norm": 0.37848183512687683, |
| "learning_rate": 7.737206492083428e-05, |
| "loss": 2.2857, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.32468429980523966, |
| "grad_norm": 0.4958799183368683, |
| "learning_rate": 7.733859952840875e-05, |
| "loss": 2.1101, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.3249356034428598, |
| "grad_norm": 0.27771416306495667, |
| "learning_rate": 7.730511665717056e-05, |
| "loss": 2.6824, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.32518690708048, |
| "grad_norm": 0.27138715982437134, |
| "learning_rate": 7.72716163285268e-05, |
| "loss": 1.9027, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.32543821071810014, |
| "grad_norm": 0.40234288573265076, |
| "learning_rate": 7.723809856389578e-05, |
| "loss": 1.9377, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.3256895143557203, |
| "grad_norm": 0.45054858922958374, |
| "learning_rate": 7.720456338470696e-05, |
| "loss": 2.0891, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.32594081799334046, |
| "grad_norm": 0.28506696224212646, |
| "learning_rate": 7.717101081240087e-05, |
| "loss": 2.0881, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.3261921216309606, |
| "grad_norm": 0.621242880821228, |
| "learning_rate": 7.713744086842921e-05, |
| "loss": 2.3001, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.3264434252685808, |
| "grad_norm": 0.3025634288787842, |
| "learning_rate": 7.710385357425479e-05, |
| "loss": 2.4073, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.32669472890620094, |
| "grad_norm": 0.4620816707611084, |
| "learning_rate": 7.707024895135147e-05, |
| "loss": 2.1442, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3269460325438211, |
| "grad_norm": 0.47035887837409973, |
| "learning_rate": 7.703662702120424e-05, |
| "loss": 2.2738, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.3271973361814412, |
| "grad_norm": 0.4486905038356781, |
| "learning_rate": 7.700298780530913e-05, |
| "loss": 2.5139, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.32744863981906136, |
| "grad_norm": 0.459060400724411, |
| "learning_rate": 7.69693313251732e-05, |
| "loss": 2.2898, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.3276999434566815, |
| "grad_norm": 0.3948078155517578, |
| "learning_rate": 7.693565760231461e-05, |
| "loss": 2.0639, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.3279512470943017, |
| "grad_norm": 0.45571649074554443, |
| "learning_rate": 7.690196665826247e-05, |
| "loss": 2.3439, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.32820255073192184, |
| "grad_norm": 0.45678454637527466, |
| "learning_rate": 7.686825851455695e-05, |
| "loss": 2.479, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.328453854369542, |
| "grad_norm": 0.3192417025566101, |
| "learning_rate": 7.683453319274922e-05, |
| "loss": 2.2833, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.32870515800716216, |
| "grad_norm": 0.3500188887119293, |
| "learning_rate": 7.68007907144014e-05, |
| "loss": 2.1342, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.3289564616447823, |
| "grad_norm": 0.38774073123931885, |
| "learning_rate": 7.676703110108658e-05, |
| "loss": 2.2442, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.3292077652824025, |
| "grad_norm": 0.4461459219455719, |
| "learning_rate": 7.673325437438885e-05, |
| "loss": 2.4629, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.32945906892002264, |
| "grad_norm": 0.42307528853416443, |
| "learning_rate": 7.669946055590318e-05, |
| "loss": 2.6043, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.3297103725576428, |
| "grad_norm": 0.3208043575286865, |
| "learning_rate": 7.666564966723552e-05, |
| "loss": 2.4378, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.32996167619526295, |
| "grad_norm": 0.42787060141563416, |
| "learning_rate": 7.663182173000269e-05, |
| "loss": 2.2689, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.33021297983288306, |
| "grad_norm": 0.42350679636001587, |
| "learning_rate": 7.659797676583247e-05, |
| "loss": 2.142, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.3304642834705032, |
| "grad_norm": 0.3012787699699402, |
| "learning_rate": 7.656411479636345e-05, |
| "loss": 2.1637, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.3307155871081234, |
| "grad_norm": 0.4277016222476959, |
| "learning_rate": 7.653023584324516e-05, |
| "loss": 2.0439, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.33096689074574354, |
| "grad_norm": 0.1811779886484146, |
| "learning_rate": 7.649633992813793e-05, |
| "loss": 0.9751, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.3312181943833637, |
| "grad_norm": 0.4230121672153473, |
| "learning_rate": 7.646242707271298e-05, |
| "loss": 2.6928, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.33146949802098385, |
| "grad_norm": 0.3608384132385254, |
| "learning_rate": 7.642849729865235e-05, |
| "loss": 2.3808, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.331720801658604, |
| "grad_norm": 0.4780106842517853, |
| "learning_rate": 7.639455062764888e-05, |
| "loss": 1.6927, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.3319721052962242, |
| "grad_norm": 0.3628620505332947, |
| "learning_rate": 7.636058708140622e-05, |
| "loss": 2.1814, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.33222340893384433, |
| "grad_norm": 0.28604358434677124, |
| "learning_rate": 7.632660668163882e-05, |
| "loss": 2.0401, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.3324747125714645, |
| "grad_norm": 0.29259243607521057, |
| "learning_rate": 7.62926094500719e-05, |
| "loss": 2.0084, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.33272601620908465, |
| "grad_norm": 0.38667431473731995, |
| "learning_rate": 7.625859540844144e-05, |
| "loss": 2.2576, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.33297731984670476, |
| "grad_norm": 0.5796361565589905, |
| "learning_rate": 7.622456457849417e-05, |
| "loss": 2.3307, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.3332286234843249, |
| "grad_norm": 0.37588635087013245, |
| "learning_rate": 7.619051698198752e-05, |
| "loss": 2.2745, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.3334799271219451, |
| "grad_norm": 0.15837115049362183, |
| "learning_rate": 7.615645264068971e-05, |
| "loss": 0.8862, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.33373123075956523, |
| "grad_norm": 0.6717337965965271, |
| "learning_rate": 7.612237157637962e-05, |
| "loss": 2.4251, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.3339825343971854, |
| "grad_norm": 0.2916143834590912, |
| "learning_rate": 7.608827381084682e-05, |
| "loss": 2.4426, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.33423383803480555, |
| "grad_norm": 0.4805827736854553, |
| "learning_rate": 7.605415936589155e-05, |
| "loss": 1.997, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.3344851416724257, |
| "grad_norm": 0.3782505989074707, |
| "learning_rate": 7.602002826332475e-05, |
| "loss": 1.9675, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.33473644531004587, |
| "grad_norm": 0.38300177454948425, |
| "learning_rate": 7.598588052496799e-05, |
| "loss": 2.6077, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.33498774894766603, |
| "grad_norm": 0.845617949962616, |
| "learning_rate": 7.595171617265347e-05, |
| "loss": 2.7052, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.3352390525852862, |
| "grad_norm": 0.4692726135253906, |
| "learning_rate": 7.5917535228224e-05, |
| "loss": 1.9976, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.33549035622290635, |
| "grad_norm": 0.42371514439582825, |
| "learning_rate": 7.588333771353305e-05, |
| "loss": 1.9521, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.33574165986052645, |
| "grad_norm": 0.34298497438430786, |
| "learning_rate": 7.584912365044463e-05, |
| "loss": 2.3475, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.3359929634981466, |
| "grad_norm": 0.4288291931152344, |
| "learning_rate": 7.581489306083334e-05, |
| "loss": 2.4001, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.3362442671357668, |
| "grad_norm": 0.371548056602478, |
| "learning_rate": 7.578064596658438e-05, |
| "loss": 2.0676, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.33649557077338693, |
| "grad_norm": 0.18411099910736084, |
| "learning_rate": 7.574638238959345e-05, |
| "loss": 1.4833, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.3367468744110071, |
| "grad_norm": 0.4732882082462311, |
| "learning_rate": 7.571210235176684e-05, |
| "loss": 2.3069, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.33699817804862725, |
| "grad_norm": 0.38196468353271484, |
| "learning_rate": 7.567780587502134e-05, |
| "loss": 2.331, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.3372494816862474, |
| "grad_norm": 0.41077175736427307, |
| "learning_rate": 7.564349298128423e-05, |
| "loss": 2.0708, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.33750078532386757, |
| "grad_norm": 0.21946145594120026, |
| "learning_rate": 7.560916369249333e-05, |
| "loss": 1.8817, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.33775208896148773, |
| "grad_norm": 0.2860282361507416, |
| "learning_rate": 7.557481803059692e-05, |
| "loss": 2.4348, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.3380033925991079, |
| "grad_norm": 0.8802721500396729, |
| "learning_rate": 7.554045601755371e-05, |
| "loss": 2.3791, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.33825469623672805, |
| "grad_norm": 0.3701392412185669, |
| "learning_rate": 7.550607767533294e-05, |
| "loss": 2.6149, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.3385059998743482, |
| "grad_norm": 0.3857259154319763, |
| "learning_rate": 7.547168302591424e-05, |
| "loss": 2.5165, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.3387573035119683, |
| "grad_norm": 0.3372226357460022, |
| "learning_rate": 7.543727209128768e-05, |
| "loss": 1.8363, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.33900860714958847, |
| "grad_norm": 0.27592962980270386, |
| "learning_rate": 7.540284489345372e-05, |
| "loss": 2.3205, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.33925991078720863, |
| "grad_norm": 0.3359019160270691, |
| "learning_rate": 7.536840145442327e-05, |
| "loss": 1.7028, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.3395112144248288, |
| "grad_norm": 0.21417422592639923, |
| "learning_rate": 7.533394179621756e-05, |
| "loss": 1.8902, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.33976251806244895, |
| "grad_norm": 0.37822896242141724, |
| "learning_rate": 7.529946594086823e-05, |
| "loss": 2.5567, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.3400138217000691, |
| "grad_norm": 0.3702054023742676, |
| "learning_rate": 7.526497391041727e-05, |
| "loss": 1.854, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.34026512533768927, |
| "grad_norm": 0.2901824414730072, |
| "learning_rate": 7.523046572691699e-05, |
| "loss": 2.1113, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.3405164289753094, |
| "grad_norm": 0.4038754403591156, |
| "learning_rate": 7.519594141243007e-05, |
| "loss": 2.4576, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.3407677326129296, |
| "grad_norm": 0.5765335559844971, |
| "learning_rate": 7.516140098902946e-05, |
| "loss": 2.6652, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.34101903625054975, |
| "grad_norm": 0.3066706657409668, |
| "learning_rate": 7.512684447879845e-05, |
| "loss": 2.2157, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.3412703398881699, |
| "grad_norm": 0.4212114214897156, |
| "learning_rate": 7.509227190383057e-05, |
| "loss": 2.747, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.34152164352579, |
| "grad_norm": 0.5156689882278442, |
| "learning_rate": 7.505768328622964e-05, |
| "loss": 2.0679, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.34177294716341017, |
| "grad_norm": 0.43117931485176086, |
| "learning_rate": 7.502307864810973e-05, |
| "loss": 2.1347, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.3420242508010303, |
| "grad_norm": 0.5635091662406921, |
| "learning_rate": 7.498845801159521e-05, |
| "loss": 2.6811, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.3422755544386505, |
| "grad_norm": 0.3573296070098877, |
| "learning_rate": 7.49538213988206e-05, |
| "loss": 2.2536, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.34252685807627065, |
| "grad_norm": 0.4179854094982147, |
| "learning_rate": 7.491916883193065e-05, |
| "loss": 1.9621, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.3427781617138908, |
| "grad_norm": 0.2795206308364868, |
| "learning_rate": 7.488450033308034e-05, |
| "loss": 1.8347, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.34302946535151096, |
| "grad_norm": 0.48083072900772095, |
| "learning_rate": 7.484981592443483e-05, |
| "loss": 1.8598, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.3432807689891311, |
| "grad_norm": 0.4337339401245117, |
| "learning_rate": 7.481511562816943e-05, |
| "loss": 2.1042, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.3435320726267513, |
| "grad_norm": 0.3307173550128937, |
| "learning_rate": 7.478039946646963e-05, |
| "loss": 2.6296, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.34378337626437144, |
| "grad_norm": 0.39121687412261963, |
| "learning_rate": 7.474566746153105e-05, |
| "loss": 2.0115, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.3440346799019916, |
| "grad_norm": 0.3068203628063202, |
| "learning_rate": 7.471091963555946e-05, |
| "loss": 2.3176, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.34428598353961176, |
| "grad_norm": 0.42959433794021606, |
| "learning_rate": 7.467615601077071e-05, |
| "loss": 2.2989, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.34453728717723187, |
| "grad_norm": 0.45218074321746826, |
| "learning_rate": 7.464137660939076e-05, |
| "loss": 2.1487, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.344788590814852, |
| "grad_norm": 0.3469322919845581, |
| "learning_rate": 7.460658145365566e-05, |
| "loss": 2.48, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.3450398944524722, |
| "grad_norm": 0.28552716970443726, |
| "learning_rate": 7.45717705658116e-05, |
| "loss": 2.0998, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.34529119809009234, |
| "grad_norm": 0.5254274010658264, |
| "learning_rate": 7.453694396811469e-05, |
| "loss": 1.8048, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.3455425017277125, |
| "grad_norm": 0.41990652680397034, |
| "learning_rate": 7.450210168283122e-05, |
| "loss": 1.6255, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.34579380536533266, |
| "grad_norm": 0.353322833776474, |
| "learning_rate": 7.446724373223743e-05, |
| "loss": 1.9041, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.3460451090029528, |
| "grad_norm": 0.347417414188385, |
| "learning_rate": 7.443237013861958e-05, |
| "loss": 2.1782, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.346296412640573, |
| "grad_norm": 0.2713114619255066, |
| "learning_rate": 7.439748092427397e-05, |
| "loss": 2.09, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.34654771627819314, |
| "grad_norm": 0.5424685478210449, |
| "learning_rate": 7.436257611150685e-05, |
| "loss": 2.2003, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.3467990199158133, |
| "grad_norm": 0.17719517648220062, |
| "learning_rate": 7.432765572263447e-05, |
| "loss": 1.0122, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.34705032355343346, |
| "grad_norm": 0.33825281262397766, |
| "learning_rate": 7.429271977998303e-05, |
| "loss": 2.3135, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.34730162719105356, |
| "grad_norm": 0.5347048044204712, |
| "learning_rate": 7.425776830588864e-05, |
| "loss": 2.0274, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.3475529308286737, |
| "grad_norm": 0.5033213496208191, |
| "learning_rate": 7.422280132269741e-05, |
| "loss": 2.328, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.3478042344662939, |
| "grad_norm": 0.5650802850723267, |
| "learning_rate": 7.41878188527653e-05, |
| "loss": 2.6224, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.34805553810391404, |
| "grad_norm": 0.2062671184539795, |
| "learning_rate": 7.415282091845822e-05, |
| "loss": 0.8484, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.3483068417415342, |
| "grad_norm": 0.3416356146335602, |
| "learning_rate": 7.411780754215189e-05, |
| "loss": 2.2545, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.34855814537915436, |
| "grad_norm": 0.3872370719909668, |
| "learning_rate": 7.408277874623202e-05, |
| "loss": 2.5386, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.3488094490167745, |
| "grad_norm": 0.42746737599372864, |
| "learning_rate": 7.404773455309411e-05, |
| "loss": 2.4253, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.3490607526543947, |
| "grad_norm": 0.36025798320770264, |
| "learning_rate": 7.401267498514347e-05, |
| "loss": 2.7584, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.34931205629201484, |
| "grad_norm": 0.374305784702301, |
| "learning_rate": 7.397760006479531e-05, |
| "loss": 2.4407, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.349563359929635, |
| "grad_norm": 2.065537929534912, |
| "learning_rate": 7.394250981447462e-05, |
| "loss": 2.1375, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.34981466356725516, |
| "grad_norm": 0.2785825729370117, |
| "learning_rate": 7.390740425661619e-05, |
| "loss": 2.6172, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.3500659672048753, |
| "grad_norm": 0.45189452171325684, |
| "learning_rate": 7.387228341366461e-05, |
| "loss": 2.7076, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.3503172708424954, |
| "grad_norm": 0.4751068949699402, |
| "learning_rate": 7.383714730807423e-05, |
| "loss": 1.9378, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.3505685744801156, |
| "grad_norm": 0.4904008209705353, |
| "learning_rate": 7.380199596230916e-05, |
| "loss": 2.4684, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.35081987811773574, |
| "grad_norm": 0.36860087513923645, |
| "learning_rate": 7.376682939884328e-05, |
| "loss": 2.3451, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.3510711817553559, |
| "grad_norm": 0.4491627514362335, |
| "learning_rate": 7.373164764016014e-05, |
| "loss": 2.1021, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.35132248539297606, |
| "grad_norm": 0.5599209666252136, |
| "learning_rate": 7.369645070875307e-05, |
| "loss": 2.2324, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.3515737890305962, |
| "grad_norm": 0.3067469298839569, |
| "learning_rate": 7.366123862712506e-05, |
| "loss": 2.4792, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.3518250926682164, |
| "grad_norm": 0.41154590249061584, |
| "learning_rate": 7.362601141778881e-05, |
| "loss": 2.3062, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.35207639630583654, |
| "grad_norm": 0.46394750475883484, |
| "learning_rate": 7.359076910326667e-05, |
| "loss": 1.8677, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.3523276999434567, |
| "grad_norm": 0.3576370179653168, |
| "learning_rate": 7.355551170609066e-05, |
| "loss": 2.0856, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.35257900358107686, |
| "grad_norm": 0.3749783933162689, |
| "learning_rate": 7.352023924880245e-05, |
| "loss": 2.2229, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.352830307218697, |
| "grad_norm": 0.47458767890930176, |
| "learning_rate": 7.348495175395331e-05, |
| "loss": 2.0969, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.3530816108563171, |
| "grad_norm": 0.5009230375289917, |
| "learning_rate": 7.344964924410415e-05, |
| "loss": 2.104, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.3533329144939373, |
| "grad_norm": 0.3183805048465729, |
| "learning_rate": 7.341433174182551e-05, |
| "loss": 2.2145, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.35358421813155744, |
| "grad_norm": 0.30304932594299316, |
| "learning_rate": 7.337899926969741e-05, |
| "loss": 2.36, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.3538355217691776, |
| "grad_norm": 0.402339905500412, |
| "learning_rate": 7.334365185030958e-05, |
| "loss": 2.8444, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.35408682540679776, |
| "grad_norm": 0.3983165919780731, |
| "learning_rate": 7.330828950626118e-05, |
| "loss": 2.0626, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.3543381290444179, |
| "grad_norm": 0.36163392663002014, |
| "learning_rate": 7.3272912260161e-05, |
| "loss": 2.5673, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.3545894326820381, |
| "grad_norm": 0.28790605068206787, |
| "learning_rate": 7.323752013462731e-05, |
| "loss": 2.0637, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.35484073631965823, |
| "grad_norm": 0.3295561373233795, |
| "learning_rate": 7.320211315228794e-05, |
| "loss": 2.2175, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.3550920399572784, |
| "grad_norm": 0.2871148884296417, |
| "learning_rate": 7.316669133578014e-05, |
| "loss": 1.4072, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.35534334359489855, |
| "grad_norm": 0.4259054958820343, |
| "learning_rate": 7.313125470775072e-05, |
| "loss": 2.1157, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.3555946472325187, |
| "grad_norm": 0.3132629692554474, |
| "learning_rate": 7.309580329085593e-05, |
| "loss": 2.3555, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.35584595087013887, |
| "grad_norm": 0.5399345755577087, |
| "learning_rate": 7.306033710776147e-05, |
| "loss": 2.1951, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.356097254507759, |
| "grad_norm": 0.22800098359584808, |
| "learning_rate": 7.30248561811425e-05, |
| "loss": 1.8473, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.35634855814537914, |
| "grad_norm": 0.30099812150001526, |
| "learning_rate": 7.298936053368358e-05, |
| "loss": 2.2015, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.3565998617829993, |
| "grad_norm": 0.41763341426849365, |
| "learning_rate": 7.29538501880787e-05, |
| "loss": 1.8453, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.35685116542061945, |
| "grad_norm": 0.381164014339447, |
| "learning_rate": 7.291832516703125e-05, |
| "loss": 2.1346, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.3571024690582396, |
| "grad_norm": 0.44966503977775574, |
| "learning_rate": 7.288278549325398e-05, |
| "loss": 2.288, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.3573537726958598, |
| "grad_norm": 0.41181033849716187, |
| "learning_rate": 7.284723118946902e-05, |
| "loss": 2.0504, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.35760507633347993, |
| "grad_norm": 0.1352885365486145, |
| "learning_rate": 7.281166227840787e-05, |
| "loss": 0.9655, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.3578563799711001, |
| "grad_norm": 0.4056221544742584, |
| "learning_rate": 7.277607878281138e-05, |
| "loss": 2.4637, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.35810768360872025, |
| "grad_norm": 0.7632689476013184, |
| "learning_rate": 7.274048072542965e-05, |
| "loss": 2.2485, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.3583589872463404, |
| "grad_norm": 0.42599251866340637, |
| "learning_rate": 7.270486812902216e-05, |
| "loss": 2.4106, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.35861029088396057, |
| "grad_norm": 0.3249692916870117, |
| "learning_rate": 7.266924101635767e-05, |
| "loss": 2.173, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.3588615945215807, |
| "grad_norm": 0.3229914605617523, |
| "learning_rate": 7.263359941021423e-05, |
| "loss": 2.5058, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.35911289815920083, |
| "grad_norm": 0.5926707983016968, |
| "learning_rate": 7.259794333337914e-05, |
| "loss": 2.0133, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.359364201796821, |
| "grad_norm": 0.3878900408744812, |
| "learning_rate": 7.256227280864892e-05, |
| "loss": 1.943, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.35961550543444115, |
| "grad_norm": 0.34182676672935486, |
| "learning_rate": 7.252658785882942e-05, |
| "loss": 2.4174, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.3598668090720613, |
| "grad_norm": 0.3920558989048004, |
| "learning_rate": 7.24908885067356e-05, |
| "loss": 2.5771, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.36011811270968147, |
| "grad_norm": 0.26655280590057373, |
| "learning_rate": 7.24551747751917e-05, |
| "loss": 2.4015, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.36036941634730163, |
| "grad_norm": 0.6898893713951111, |
| "learning_rate": 7.241944668703114e-05, |
| "loss": 2.2238, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.3606207199849218, |
| "grad_norm": 0.2271176278591156, |
| "learning_rate": 7.238370426509653e-05, |
| "loss": 1.8, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.36087202362254195, |
| "grad_norm": 0.5071490406990051, |
| "learning_rate": 7.23479475322396e-05, |
| "loss": 2.2468, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.3611233272601621, |
| "grad_norm": 0.4647441804409027, |
| "learning_rate": 7.231217651132129e-05, |
| "loss": 2.2595, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.36137463089778227, |
| "grad_norm": 0.6888497471809387, |
| "learning_rate": 7.227639122521162e-05, |
| "loss": 1.8527, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.3616259345354024, |
| "grad_norm": 0.31782910227775574, |
| "learning_rate": 7.224059169678976e-05, |
| "loss": 2.5325, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.36187723817302253, |
| "grad_norm": 0.39101526141166687, |
| "learning_rate": 7.2204777948944e-05, |
| "loss": 2.0058, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.3621285418106427, |
| "grad_norm": 0.21498848497867584, |
| "learning_rate": 7.216895000457166e-05, |
| "loss": 2.092, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.36237984544826285, |
| "grad_norm": 0.3530738949775696, |
| "learning_rate": 7.213310788657922e-05, |
| "loss": 1.957, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.362631149085883, |
| "grad_norm": 0.2858533263206482, |
| "learning_rate": 7.209725161788217e-05, |
| "loss": 2.4624, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.36288245272350317, |
| "grad_norm": 0.512286365032196, |
| "learning_rate": 7.206138122140503e-05, |
| "loss": 2.2276, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.3631337563611233, |
| "grad_norm": 0.3804178833961487, |
| "learning_rate": 7.202549672008141e-05, |
| "loss": 2.1131, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.3633850599987435, |
| "grad_norm": 0.5225626230239868, |
| "learning_rate": 7.198959813685388e-05, |
| "loss": 2.1261, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.43813782930374146, |
| "learning_rate": 7.195368549467407e-05, |
| "loss": 1.9948, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.3638876672739838, |
| "grad_norm": 0.37537580728530884, |
| "learning_rate": 7.191775881650256e-05, |
| "loss": 2.3365, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.36413897091160397, |
| "grad_norm": 0.20629999041557312, |
| "learning_rate": 7.188181812530888e-05, |
| "loss": 1.0852, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.3643902745492241, |
| "grad_norm": 0.36247143149375916, |
| "learning_rate": 7.18458634440716e-05, |
| "loss": 2.2337, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.36464157818684423, |
| "grad_norm": 0.30473005771636963, |
| "learning_rate": 7.180989479577817e-05, |
| "loss": 1.8102, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.3648928818244644, |
| "grad_norm": 0.4480074942111969, |
| "learning_rate": 7.177391220342497e-05, |
| "loss": 2.5071, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.36514418546208455, |
| "grad_norm": 0.40253522992134094, |
| "learning_rate": 7.173791569001734e-05, |
| "loss": 2.6847, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.3653954890997047, |
| "grad_norm": 0.42434608936309814, |
| "learning_rate": 7.170190527856944e-05, |
| "loss": 2.1038, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.36564679273732487, |
| "grad_norm": 0.3165349066257477, |
| "learning_rate": 7.166588099210442e-05, |
| "loss": 2.7536, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.365898096374945, |
| "grad_norm": 0.36183032393455505, |
| "learning_rate": 7.162984285365424e-05, |
| "loss": 2.3763, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.3661494000125652, |
| "grad_norm": 0.17733515799045563, |
| "learning_rate": 7.15937908862597e-05, |
| "loss": 1.2717, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.36640070365018534, |
| "grad_norm": 0.49144411087036133, |
| "learning_rate": 7.155772511297051e-05, |
| "loss": 2.0034, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.3666520072878055, |
| "grad_norm": 0.31911566853523254, |
| "learning_rate": 7.15216455568451e-05, |
| "loss": 1.8858, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.36690331092542566, |
| "grad_norm": 0.35006940364837646, |
| "learning_rate": 7.148555224095083e-05, |
| "loss": 2.4235, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.3671546145630458, |
| "grad_norm": 0.21370449662208557, |
| "learning_rate": 7.144944518836377e-05, |
| "loss": 1.9213, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.367405918200666, |
| "grad_norm": 0.5415109395980835, |
| "learning_rate": 7.14133244221688e-05, |
| "loss": 2.3867, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.3676572218382861, |
| "grad_norm": 0.3929027318954468, |
| "learning_rate": 7.13771899654596e-05, |
| "loss": 2.3617, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.36790852547590625, |
| "grad_norm": 0.5242950320243835, |
| "learning_rate": 7.134104184133856e-05, |
| "loss": 2.0933, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.3681598291135264, |
| "grad_norm": 0.3808387517929077, |
| "learning_rate": 7.130488007291683e-05, |
| "loss": 1.905, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.36841113275114656, |
| "grad_norm": 0.33748942613601685, |
| "learning_rate": 7.126870468331426e-05, |
| "loss": 2.2044, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.3686624363887667, |
| "grad_norm": 0.37744027376174927, |
| "learning_rate": 7.123251569565943e-05, |
| "loss": 1.9647, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.3689137400263869, |
| "grad_norm": 0.43732622265815735, |
| "learning_rate": 7.119631313308964e-05, |
| "loss": 2.2956, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.36916504366400704, |
| "grad_norm": 0.2148832082748413, |
| "learning_rate": 7.116009701875078e-05, |
| "loss": 2.0276, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.3694163473016272, |
| "grad_norm": 0.3715471029281616, |
| "learning_rate": 7.112386737579752e-05, |
| "loss": 2.5056, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.36966765093924736, |
| "grad_norm": 0.34628719091415405, |
| "learning_rate": 7.108762422739309e-05, |
| "loss": 2.0201, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.3699189545768675, |
| "grad_norm": 0.40477246046066284, |
| "learning_rate": 7.10513675967094e-05, |
| "loss": 2.1746, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.3701702582144877, |
| "grad_norm": 0.46029016375541687, |
| "learning_rate": 7.101509750692695e-05, |
| "loss": 2.0847, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.3704215618521078, |
| "grad_norm": 0.45498889684677124, |
| "learning_rate": 7.097881398123488e-05, |
| "loss": 1.9687, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.37067286548972794, |
| "grad_norm": 0.46583226323127747, |
| "learning_rate": 7.094251704283089e-05, |
| "loss": 2.5303, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.3709241691273481, |
| "grad_norm": 0.2552844285964966, |
| "learning_rate": 7.090620671492128e-05, |
| "loss": 2.5519, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.37117547276496826, |
| "grad_norm": 0.14888420701026917, |
| "learning_rate": 7.086988302072089e-05, |
| "loss": 0.7764, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.3714267764025884, |
| "grad_norm": 0.3378463685512543, |
| "learning_rate": 7.08335459834531e-05, |
| "loss": 2.0823, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.3716780800402086, |
| "grad_norm": 0.46584033966064453, |
| "learning_rate": 7.079719562634986e-05, |
| "loss": 2.3412, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.37192938367782874, |
| "grad_norm": 0.34758704900741577, |
| "learning_rate": 7.076083197265157e-05, |
| "loss": 2.1768, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.3721806873154489, |
| "grad_norm": 0.40054845809936523, |
| "learning_rate": 7.072445504560722e-05, |
| "loss": 1.9939, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.37243199095306906, |
| "grad_norm": 0.29733559489250183, |
| "learning_rate": 7.068806486847421e-05, |
| "loss": 1.9872, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.3726832945906892, |
| "grad_norm": 0.3448224365711212, |
| "learning_rate": 7.065166146451844e-05, |
| "loss": 1.9621, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.3729345982283094, |
| "grad_norm": 0.3913850784301758, |
| "learning_rate": 7.061524485701428e-05, |
| "loss": 2.0991, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.37318590186592954, |
| "grad_norm": 0.34898319840431213, |
| "learning_rate": 7.057881506924448e-05, |
| "loss": 2.3239, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.37343720550354964, |
| "grad_norm": 0.40906834602355957, |
| "learning_rate": 7.054237212450034e-05, |
| "loss": 2.214, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.3736885091411698, |
| "grad_norm": 0.2804180979728699, |
| "learning_rate": 7.050591604608143e-05, |
| "loss": 2.3497, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.37393981277878996, |
| "grad_norm": 0.5161413550376892, |
| "learning_rate": 7.046944685729581e-05, |
| "loss": 2.0113, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.3741911164164101, |
| "grad_norm": 0.42594465613365173, |
| "learning_rate": 7.04329645814599e-05, |
| "loss": 2.2473, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.3744424200540303, |
| "grad_norm": 0.28369376063346863, |
| "learning_rate": 7.039646924189848e-05, |
| "loss": 2.5277, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.37469372369165044, |
| "grad_norm": 0.3760066330432892, |
| "learning_rate": 7.035996086194467e-05, |
| "loss": 2.5726, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.3749450273292706, |
| "grad_norm": 0.8721727132797241, |
| "learning_rate": 7.032343946493993e-05, |
| "loss": 2.1684, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.37519633096689076, |
| "grad_norm": 0.3655981123447418, |
| "learning_rate": 7.02869050742341e-05, |
| "loss": 2.2741, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.3754476346045109, |
| "grad_norm": 0.2971070408821106, |
| "learning_rate": 7.025035771318524e-05, |
| "loss": 2.0898, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.3756989382421311, |
| "grad_norm": 0.25091758370399475, |
| "learning_rate": 7.021379740515977e-05, |
| "loss": 1.8725, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.37595024187975123, |
| "grad_norm": 0.49658530950546265, |
| "learning_rate": 7.017722417353233e-05, |
| "loss": 2.4947, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.37620154551737134, |
| "grad_norm": 0.607513427734375, |
| "learning_rate": 7.014063804168587e-05, |
| "loss": 1.5828, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.3764528491549915, |
| "grad_norm": 0.22139927744865417, |
| "learning_rate": 7.010403903301158e-05, |
| "loss": 2.1792, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.37670415279261166, |
| "grad_norm": 0.670466959476471, |
| "learning_rate": 7.006742717090887e-05, |
| "loss": 2.6152, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.3769554564302318, |
| "grad_norm": 0.27696824073791504, |
| "learning_rate": 7.003080247878537e-05, |
| "loss": 2.2613, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.377206760067852, |
| "grad_norm": 0.5103288888931274, |
| "learning_rate": 6.99941649800569e-05, |
| "loss": 2.3938, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.37745806370547214, |
| "grad_norm": 0.38460054993629456, |
| "learning_rate": 6.995751469814751e-05, |
| "loss": 2.5157, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.3777093673430923, |
| "grad_norm": 0.46227124333381653, |
| "learning_rate": 6.992085165648939e-05, |
| "loss": 2.2615, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.37796067098071245, |
| "grad_norm": 0.4528365433216095, |
| "learning_rate": 6.988417587852287e-05, |
| "loss": 2.4324, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.3782119746183326, |
| "grad_norm": 0.172061488032341, |
| "learning_rate": 6.984748738769647e-05, |
| "loss": 1.8319, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.3784632782559528, |
| "grad_norm": 0.3058612048625946, |
| "learning_rate": 6.98107862074668e-05, |
| "loss": 2.1932, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.37871458189357293, |
| "grad_norm": 0.279033362865448, |
| "learning_rate": 6.977407236129862e-05, |
| "loss": 2.5174, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.37896588553119304, |
| "grad_norm": 0.31402352452278137, |
| "learning_rate": 6.973734587266474e-05, |
| "loss": 2.623, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.3792171891688132, |
| "grad_norm": 0.18773365020751953, |
| "learning_rate": 6.970060676504608e-05, |
| "loss": 1.8999, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.37946849280643336, |
| "grad_norm": 0.39459121227264404, |
| "learning_rate": 6.966385506193165e-05, |
| "loss": 2.1866, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3797197964440535, |
| "grad_norm": 0.4056554138660431, |
| "learning_rate": 6.962709078681845e-05, |
| "loss": 2.512, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.3799711000816737, |
| "grad_norm": 0.3101893961429596, |
| "learning_rate": 6.95903139632116e-05, |
| "loss": 2.4238, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.38022240371929383, |
| "grad_norm": 0.4230501353740692, |
| "learning_rate": 6.955352461462418e-05, |
| "loss": 2.1624, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.380473707356914, |
| "grad_norm": 0.4053939878940582, |
| "learning_rate": 6.951672276457728e-05, |
| "loss": 2.0436, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.38072501099453415, |
| "grad_norm": 0.3917832672595978, |
| "learning_rate": 6.947990843660002e-05, |
| "loss": 2.6129, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.3809763146321543, |
| "grad_norm": 0.3876727819442749, |
| "learning_rate": 6.944308165422949e-05, |
| "loss": 2.2111, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.38122761826977447, |
| "grad_norm": 0.49222511053085327, |
| "learning_rate": 6.94062424410107e-05, |
| "loss": 2.0696, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.38147892190739463, |
| "grad_norm": 0.7491002678871155, |
| "learning_rate": 6.936939082049664e-05, |
| "loss": 1.8861, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.3817302255450148, |
| "grad_norm": 0.3003447949886322, |
| "learning_rate": 6.933252681624825e-05, |
| "loss": 2.344, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.3819815291826349, |
| "grad_norm": 0.4402701258659363, |
| "learning_rate": 6.929565045183438e-05, |
| "loss": 2.1229, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.38223283282025505, |
| "grad_norm": 0.2871621251106262, |
| "learning_rate": 6.925876175083174e-05, |
| "loss": 1.8697, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.3824841364578752, |
| "grad_norm": 0.3502472937107086, |
| "learning_rate": 6.922186073682496e-05, |
| "loss": 2.2823, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.38273544009549537, |
| "grad_norm": 0.13275161385536194, |
| "learning_rate": 6.918494743340656e-05, |
| "loss": 0.4535, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.38298674373311553, |
| "grad_norm": 0.18874838948249817, |
| "learning_rate": 6.914802186417689e-05, |
| "loss": 2.1222, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.3832380473707357, |
| "grad_norm": 0.2585754096508026, |
| "learning_rate": 6.911108405274412e-05, |
| "loss": 2.2653, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.38348935100835585, |
| "grad_norm": 0.36807018518447876, |
| "learning_rate": 6.907413402272431e-05, |
| "loss": 2.3976, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.383740654645976, |
| "grad_norm": 0.5914536118507385, |
| "learning_rate": 6.90371717977413e-05, |
| "loss": 1.9575, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.38399195828359617, |
| "grad_norm": 0.4083138108253479, |
| "learning_rate": 6.900019740142672e-05, |
| "loss": 2.2095, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.38424326192121633, |
| "grad_norm": 0.33133891224861145, |
| "learning_rate": 6.896321085741997e-05, |
| "loss": 2.2339, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.3844945655588365, |
| "grad_norm": 0.26224732398986816, |
| "learning_rate": 6.892621218936825e-05, |
| "loss": 2.3765, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.3847458691964566, |
| "grad_norm": 0.3711860179901123, |
| "learning_rate": 6.888920142092647e-05, |
| "loss": 2.4986, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.38499717283407675, |
| "grad_norm": 0.1522902250289917, |
| "learning_rate": 6.885217857575734e-05, |
| "loss": 0.7433, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.3852484764716969, |
| "grad_norm": 0.2632433772087097, |
| "learning_rate": 6.881514367753124e-05, |
| "loss": 1.6683, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.38549978010931707, |
| "grad_norm": 0.27296924591064453, |
| "learning_rate": 6.877809674992625e-05, |
| "loss": 2.2339, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.38575108374693723, |
| "grad_norm": 0.3882145285606384, |
| "learning_rate": 6.874103781662818e-05, |
| "loss": 2.2982, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.3860023873845574, |
| "grad_norm": 0.35082605481147766, |
| "learning_rate": 6.870396690133047e-05, |
| "loss": 2.2364, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.38625369102217755, |
| "grad_norm": 0.4729946255683899, |
| "learning_rate": 6.866688402773427e-05, |
| "loss": 2.3253, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.3865049946597977, |
| "grad_norm": 0.42705488204956055, |
| "learning_rate": 6.862978921954835e-05, |
| "loss": 2.4093, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.38675629829741787, |
| "grad_norm": 0.17129021883010864, |
| "learning_rate": 6.859268250048909e-05, |
| "loss": 0.8209, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.387007601935038, |
| "grad_norm": 0.30329659581184387, |
| "learning_rate": 6.855556389428052e-05, |
| "loss": 2.3676, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.3872589055726582, |
| "grad_norm": 0.26523396372795105, |
| "learning_rate": 6.851843342465428e-05, |
| "loss": 2.3319, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.38751020921027834, |
| "grad_norm": 0.32253608107566833, |
| "learning_rate": 6.848129111534953e-05, |
| "loss": 2.4377, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.38776151284789845, |
| "grad_norm": 0.4546685516834259, |
| "learning_rate": 6.844413699011306e-05, |
| "loss": 2.1457, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.3880128164855186, |
| "grad_norm": 0.7805173993110657, |
| "learning_rate": 6.84069710726992e-05, |
| "loss": 2.0873, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.38826412012313877, |
| "grad_norm": 0.46392345428466797, |
| "learning_rate": 6.836979338686981e-05, |
| "loss": 1.9017, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.3885154237607589, |
| "grad_norm": 0.39228349924087524, |
| "learning_rate": 6.833260395639429e-05, |
| "loss": 2.2843, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.3887667273983791, |
| "grad_norm": 0.42995691299438477, |
| "learning_rate": 6.829540280504951e-05, |
| "loss": 2.8123, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.38901803103599925, |
| "grad_norm": 0.4211295247077942, |
| "learning_rate": 6.82581899566199e-05, |
| "loss": 1.995, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.3892693346736194, |
| "grad_norm": 0.43150249123573303, |
| "learning_rate": 6.822096543489729e-05, |
| "loss": 2.0724, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.38952063831123956, |
| "grad_norm": 0.35338032245635986, |
| "learning_rate": 6.818372926368104e-05, |
| "loss": 2.0811, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.3897719419488597, |
| "grad_norm": 0.41222333908081055, |
| "learning_rate": 6.814648146677793e-05, |
| "loss": 2.3013, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.3900232455864799, |
| "grad_norm": 0.4232124984264374, |
| "learning_rate": 6.810922206800215e-05, |
| "loss": 2.6888, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.39027454922410004, |
| "grad_norm": 0.40635040402412415, |
| "learning_rate": 6.807195109117537e-05, |
| "loss": 2.409, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.39052585286172015, |
| "grad_norm": 0.5073560476303101, |
| "learning_rate": 6.80346685601266e-05, |
| "loss": 2.5831, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.3907771564993403, |
| "grad_norm": 0.3237619400024414, |
| "learning_rate": 6.799737449869226e-05, |
| "loss": 1.837, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.39102846013696047, |
| "grad_norm": 0.31155863404273987, |
| "learning_rate": 6.796006893071615e-05, |
| "loss": 2.1885, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.3912797637745806, |
| "grad_norm": 0.5250048637390137, |
| "learning_rate": 6.792275188004942e-05, |
| "loss": 2.3971, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.3915310674122008, |
| "grad_norm": 0.3368058502674103, |
| "learning_rate": 6.788542337055055e-05, |
| "loss": 2.3448, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.39178237104982094, |
| "grad_norm": 0.40310317277908325, |
| "learning_rate": 6.784808342608537e-05, |
| "loss": 2.1963, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.3920336746874411, |
| "grad_norm": 0.34813496470451355, |
| "learning_rate": 6.781073207052702e-05, |
| "loss": 2.0816, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.39228497832506126, |
| "grad_norm": 0.4611975848674774, |
| "learning_rate": 6.777336932775588e-05, |
| "loss": 2.4709, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.3925362819626814, |
| "grad_norm": 0.45631229877471924, |
| "learning_rate": 6.773599522165971e-05, |
| "loss": 2.2422, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.3927875856003016, |
| "grad_norm": 0.3720869719982147, |
| "learning_rate": 6.769860977613345e-05, |
| "loss": 1.9893, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.39303888923792174, |
| "grad_norm": 0.36420589685440063, |
| "learning_rate": 6.76612130150793e-05, |
| "loss": 2.4629, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.3932901928755419, |
| "grad_norm": 0.4226488173007965, |
| "learning_rate": 6.762380496240675e-05, |
| "loss": 2.4334, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.393541496513162, |
| "grad_norm": 0.4091019928455353, |
| "learning_rate": 6.758638564203245e-05, |
| "loss": 2.3821, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.39379280015078216, |
| "grad_norm": 0.24210424721240997, |
| "learning_rate": 6.75489550778803e-05, |
| "loss": 0.9144, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.3940441037884023, |
| "grad_norm": 0.23522338271141052, |
| "learning_rate": 6.751151329388136e-05, |
| "loss": 1.7405, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.3942954074260225, |
| "grad_norm": 0.15203450620174408, |
| "learning_rate": 6.747406031397384e-05, |
| "loss": 0.7777, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.39454671106364264, |
| "grad_norm": 0.22364236414432526, |
| "learning_rate": 6.743659616210316e-05, |
| "loss": 2.0508, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.3947980147012628, |
| "grad_norm": 0.3944762945175171, |
| "learning_rate": 6.739912086222187e-05, |
| "loss": 1.9432, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.39504931833888296, |
| "grad_norm": 0.46476662158966064, |
| "learning_rate": 6.736163443828962e-05, |
| "loss": 1.9567, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.3953006219765031, |
| "grad_norm": 0.4436105489730835, |
| "learning_rate": 6.732413691427318e-05, |
| "loss": 1.9792, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.3955519256141233, |
| "grad_norm": 0.3982658088207245, |
| "learning_rate": 6.728662831414646e-05, |
| "loss": 1.8666, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.39580322925174344, |
| "grad_norm": 0.30122610926628113, |
| "learning_rate": 6.724910866189038e-05, |
| "loss": 2.4439, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.3960545328893636, |
| "grad_norm": 0.3222323954105377, |
| "learning_rate": 6.7211577981493e-05, |
| "loss": 2.4668, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.3963058365269837, |
| "grad_norm": 0.5659331679344177, |
| "learning_rate": 6.717403629694936e-05, |
| "loss": 2.482, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.39655714016460386, |
| "grad_norm": 0.2780524492263794, |
| "learning_rate": 6.713648363226159e-05, |
| "loss": 1.9637, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.396808443802224, |
| "grad_norm": 0.34126976132392883, |
| "learning_rate": 6.709892001143882e-05, |
| "loss": 0.7672, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.3970597474398442, |
| "grad_norm": 0.4123145639896393, |
| "learning_rate": 6.706134545849718e-05, |
| "loss": 2.1787, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.39731105107746434, |
| "grad_norm": 0.45804792642593384, |
| "learning_rate": 6.702375999745979e-05, |
| "loss": 2.5832, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.3975623547150845, |
| "grad_norm": 0.3806130886077881, |
| "learning_rate": 6.698616365235676e-05, |
| "loss": 2.0183, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.39781365835270466, |
| "grad_norm": 0.4113427996635437, |
| "learning_rate": 6.694855644722513e-05, |
| "loss": 2.3072, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.3980649619903248, |
| "grad_norm": 0.43049007654190063, |
| "learning_rate": 6.691093840610892e-05, |
| "loss": 2.1318, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.398316265627945, |
| "grad_norm": 0.5058295726776123, |
| "learning_rate": 6.687330955305904e-05, |
| "loss": 2.3739, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.39856756926556514, |
| "grad_norm": 0.3820212781429291, |
| "learning_rate": 6.683566991213334e-05, |
| "loss": 2.4582, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.3988188729031853, |
| "grad_norm": 0.6381146907806396, |
| "learning_rate": 6.679801950739655e-05, |
| "loss": 2.6437, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.39907017654080545, |
| "grad_norm": 0.26425233483314514, |
| "learning_rate": 6.67603583629203e-05, |
| "loss": 1.7483, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.39932148017842556, |
| "grad_norm": 0.1781735122203827, |
| "learning_rate": 6.672268650278308e-05, |
| "loss": 0.692, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.3995727838160457, |
| "grad_norm": 0.22695676982402802, |
| "learning_rate": 6.668500395107023e-05, |
| "loss": 1.9322, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3998240874536659, |
| "grad_norm": 0.4311312735080719, |
| "learning_rate": 6.66473107318739e-05, |
| "loss": 2.4121, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.40007539109128604, |
| "grad_norm": 0.3456994593143463, |
| "learning_rate": 6.660960686929308e-05, |
| "loss": 2.0809, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.4003266947289062, |
| "grad_norm": 0.3440932333469391, |
| "learning_rate": 6.65718923874336e-05, |
| "loss": 2.5492, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.40057799836652636, |
| "grad_norm": 0.486544668674469, |
| "learning_rate": 6.653416731040802e-05, |
| "loss": 2.3515, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.4008293020041465, |
| "grad_norm": 0.48320162296295166, |
| "learning_rate": 6.649643166233573e-05, |
| "loss": 1.9959, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.4010806056417667, |
| "grad_norm": 0.5600380897521973, |
| "learning_rate": 6.645868546734282e-05, |
| "loss": 1.6978, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.40133190927938683, |
| "grad_norm": 0.41357421875, |
| "learning_rate": 6.642092874956217e-05, |
| "loss": 2.1349, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.401583212917007, |
| "grad_norm": 0.4483415186405182, |
| "learning_rate": 6.638316153313335e-05, |
| "loss": 2.0338, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.40183451655462715, |
| "grad_norm": 0.24149690568447113, |
| "learning_rate": 6.634538384220268e-05, |
| "loss": 2.2433, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.40208582019224726, |
| "grad_norm": 0.3381589949131012, |
| "learning_rate": 6.630759570092317e-05, |
| "loss": 2.0402, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.4023371238298674, |
| "grad_norm": 0.25250038504600525, |
| "learning_rate": 6.626979713345448e-05, |
| "loss": 2.2019, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.4025884274674876, |
| "grad_norm": 0.31296542286872864, |
| "learning_rate": 6.623198816396297e-05, |
| "loss": 2.4589, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.40283973110510773, |
| "grad_norm": 0.36123234033584595, |
| "learning_rate": 6.619416881662162e-05, |
| "loss": 2.0428, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.4030910347427279, |
| "grad_norm": 0.3003954589366913, |
| "learning_rate": 6.615633911561007e-05, |
| "loss": 2.1773, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.40334233838034805, |
| "grad_norm": 0.4871181845664978, |
| "learning_rate": 6.61184990851146e-05, |
| "loss": 2.0408, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.4035936420179682, |
| "grad_norm": 0.4325210452079773, |
| "learning_rate": 6.608064874932804e-05, |
| "loss": 2.5123, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.40384494565558837, |
| "grad_norm": 0.3475637137889862, |
| "learning_rate": 6.604278813244982e-05, |
| "loss": 2.1961, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.40409624929320853, |
| "grad_norm": 0.3930399715900421, |
| "learning_rate": 6.600491725868599e-05, |
| "loss": 1.8388, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.4043475529308287, |
| "grad_norm": 0.25549179315567017, |
| "learning_rate": 6.59670361522491e-05, |
| "loss": 1.1064, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.40459885656844885, |
| "grad_norm": 0.3450826406478882, |
| "learning_rate": 6.592914483735828e-05, |
| "loss": 2.1311, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.404850160206069, |
| "grad_norm": 0.32824480533599854, |
| "learning_rate": 6.589124333823918e-05, |
| "loss": 2.0151, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.4051014638436891, |
| "grad_norm": 0.5576545596122742, |
| "learning_rate": 6.585333167912394e-05, |
| "loss": 1.882, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.4053527674813093, |
| "grad_norm": 0.43908435106277466, |
| "learning_rate": 6.581540988425123e-05, |
| "loss": 1.7746, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.40560407111892943, |
| "grad_norm": 0.5368839502334595, |
| "learning_rate": 6.577747797786617e-05, |
| "loss": 2.3111, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.4058553747565496, |
| "grad_norm": 0.32215848565101624, |
| "learning_rate": 6.573953598422036e-05, |
| "loss": 2.2492, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.40610667839416975, |
| "grad_norm": 0.18906524777412415, |
| "learning_rate": 6.570158392757184e-05, |
| "loss": 1.7845, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.4063579820317899, |
| "grad_norm": 0.522850751876831, |
| "learning_rate": 6.566362183218511e-05, |
| "loss": 1.9706, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.40660928566941007, |
| "grad_norm": 0.5646048784255981, |
| "learning_rate": 6.562564972233103e-05, |
| "loss": 2.1804, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.40686058930703023, |
| "grad_norm": 0.41810548305511475, |
| "learning_rate": 6.558766762228693e-05, |
| "loss": 2.2449, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.4071118929446504, |
| "grad_norm": 0.4327705204486847, |
| "learning_rate": 6.554967555633649e-05, |
| "loss": 2.6681, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.40736319658227055, |
| "grad_norm": 0.3351784944534302, |
| "learning_rate": 6.551167354876977e-05, |
| "loss": 2.2619, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.4076145002198907, |
| "grad_norm": 0.31251177191734314, |
| "learning_rate": 6.547366162388319e-05, |
| "loss": 2.1284, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.4078658038575108, |
| "grad_norm": 0.20730093121528625, |
| "learning_rate": 6.543563980597949e-05, |
| "loss": 1.2999, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.40811710749513097, |
| "grad_norm": 0.4779922664165497, |
| "learning_rate": 6.539760811936777e-05, |
| "loss": 2.5214, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.40836841113275113, |
| "grad_norm": 0.38662630319595337, |
| "learning_rate": 6.535956658836341e-05, |
| "loss": 2.46, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.4086197147703713, |
| "grad_norm": 0.2927972674369812, |
| "learning_rate": 6.53215152372881e-05, |
| "loss": 2.3501, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.40887101840799145, |
| "grad_norm": 0.4462030231952667, |
| "learning_rate": 6.52834540904698e-05, |
| "loss": 2.0303, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.4091223220456116, |
| "grad_norm": 0.3924959599971771, |
| "learning_rate": 6.524538317224273e-05, |
| "loss": 2.2702, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.40937362568323177, |
| "grad_norm": 0.34013834595680237, |
| "learning_rate": 6.520730250694738e-05, |
| "loss": 1.6836, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.4096249293208519, |
| "grad_norm": 0.42472806572914124, |
| "learning_rate": 6.516921211893041e-05, |
| "loss": 2.0513, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.4098762329584721, |
| "grad_norm": 0.4425092935562134, |
| "learning_rate": 6.51311120325448e-05, |
| "loss": 2.092, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.41012753659609225, |
| "grad_norm": 0.41965746879577637, |
| "learning_rate": 6.509300227214965e-05, |
| "loss": 2.5082, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.4103788402337124, |
| "grad_norm": 0.41122114658355713, |
| "learning_rate": 6.505488286211027e-05, |
| "loss": 2.2883, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.41063014387133256, |
| "grad_norm": 0.2704785466194153, |
| "learning_rate": 6.501675382679812e-05, |
| "loss": 2.2976, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.41088144750895267, |
| "grad_norm": 0.4489680230617523, |
| "learning_rate": 6.497861519059084e-05, |
| "loss": 2.1877, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.41113275114657283, |
| "grad_norm": 0.4025135934352875, |
| "learning_rate": 6.49404669778722e-05, |
| "loss": 2.1432, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.411384054784193, |
| "grad_norm": 0.31426647305488586, |
| "learning_rate": 6.490230921303209e-05, |
| "loss": 2.3581, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.41163535842181315, |
| "grad_norm": 0.4263187348842621, |
| "learning_rate": 6.48641419204665e-05, |
| "loss": 2.0292, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.4118866620594333, |
| "grad_norm": 0.19534143805503845, |
| "learning_rate": 6.482596512457754e-05, |
| "loss": 1.0616, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.41213796569705347, |
| "grad_norm": 0.1499776840209961, |
| "learning_rate": 6.478777884977335e-05, |
| "loss": 0.5066, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.4123892693346736, |
| "grad_norm": 0.2703084647655487, |
| "learning_rate": 6.474958312046817e-05, |
| "loss": 2.4124, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.4126405729722938, |
| "grad_norm": 0.48154720664024353, |
| "learning_rate": 6.471137796108227e-05, |
| "loss": 2.4563, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.41289187660991394, |
| "grad_norm": 0.6079049706459045, |
| "learning_rate": 6.467316339604197e-05, |
| "loss": 2.4903, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.4131431802475341, |
| "grad_norm": 0.7089735865592957, |
| "learning_rate": 6.463493944977954e-05, |
| "loss": 2.7061, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.41339448388515426, |
| "grad_norm": 0.48337823152542114, |
| "learning_rate": 6.459670614673332e-05, |
| "loss": 1.8887, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.41364578752277437, |
| "grad_norm": 0.2442682683467865, |
| "learning_rate": 6.455846351134759e-05, |
| "loss": 2.4887, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.4138970911603945, |
| "grad_norm": 0.42603641748428345, |
| "learning_rate": 6.452021156807262e-05, |
| "loss": 2.0719, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.4141483947980147, |
| "grad_norm": 0.2770818769931793, |
| "learning_rate": 6.448195034136461e-05, |
| "loss": 1.7527, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.41439969843563484, |
| "grad_norm": 0.3582463562488556, |
| "learning_rate": 6.444367985568571e-05, |
| "loss": 2.3942, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.414651002073255, |
| "grad_norm": 0.31034761667251587, |
| "learning_rate": 6.4405400135504e-05, |
| "loss": 2.3178, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.41490230571087516, |
| "grad_norm": 0.19327189028263092, |
| "learning_rate": 6.436711120529343e-05, |
| "loss": 1.4941, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.4151536093484953, |
| "grad_norm": 0.47710877656936646, |
| "learning_rate": 6.432881308953386e-05, |
| "loss": 1.9341, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.4154049129861155, |
| "grad_norm": 0.44465112686157227, |
| "learning_rate": 6.429050581271105e-05, |
| "loss": 2.5935, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.41565621662373564, |
| "grad_norm": 0.5776760578155518, |
| "learning_rate": 6.425218939931654e-05, |
| "loss": 2.3612, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.4159075202613558, |
| "grad_norm": 0.14499108493328094, |
| "learning_rate": 6.42138638738478e-05, |
| "loss": 1.0138, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.41615882389897596, |
| "grad_norm": 0.4578950107097626, |
| "learning_rate": 6.417552926080806e-05, |
| "loss": 2.4257, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.4164101275365961, |
| "grad_norm": 0.4383941888809204, |
| "learning_rate": 6.413718558470639e-05, |
| "loss": 2.303, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.4166614311742162, |
| "grad_norm": 0.24252034723758698, |
| "learning_rate": 6.409883287005767e-05, |
| "loss": 2.3268, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.4169127348118364, |
| "grad_norm": 0.6126996874809265, |
| "learning_rate": 6.406047114138252e-05, |
| "loss": 2.0916, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.41716403844945654, |
| "grad_norm": 0.49406373500823975, |
| "learning_rate": 6.402210042320733e-05, |
| "loss": 1.8098, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.4174153420870767, |
| "grad_norm": 0.4942653179168701, |
| "learning_rate": 6.398372074006428e-05, |
| "loss": 1.6953, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.41766664572469686, |
| "grad_norm": 0.2597447335720062, |
| "learning_rate": 6.394533211649122e-05, |
| "loss": 2.0503, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.417917949362317, |
| "grad_norm": 0.4600328207015991, |
| "learning_rate": 6.390693457703177e-05, |
| "loss": 2.0682, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.4181692529999372, |
| "grad_norm": 0.3954939842224121, |
| "learning_rate": 6.386852814623522e-05, |
| "loss": 2.239, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.41842055663755734, |
| "grad_norm": 0.36097919940948486, |
| "learning_rate": 6.383011284865654e-05, |
| "loss": 2.4497, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.4186718602751775, |
| "grad_norm": 0.40833091735839844, |
| "learning_rate": 6.37916887088564e-05, |
| "loss": 2.2907, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.41892316391279766, |
| "grad_norm": 0.41030532121658325, |
| "learning_rate": 6.375325575140108e-05, |
| "loss": 2.2066, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.4191744675504178, |
| "grad_norm": 0.43152961134910583, |
| "learning_rate": 6.371481400086254e-05, |
| "loss": 2.0852, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.4194257711880379, |
| "grad_norm": 0.3646620810031891, |
| "learning_rate": 6.367636348181835e-05, |
| "loss": 1.9575, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.4196770748256581, |
| "grad_norm": 0.39278149604797363, |
| "learning_rate": 6.363790421885165e-05, |
| "loss": 2.0296, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.41992837846327824, |
| "grad_norm": 0.26738765835762024, |
| "learning_rate": 6.359943623655122e-05, |
| "loss": 2.2653, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.4201796821008984, |
| "grad_norm": 0.41789373755455017, |
| "learning_rate": 6.35609595595114e-05, |
| "loss": 2.3384, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.42043098573851856, |
| "grad_norm": 0.4324728846549988, |
| "learning_rate": 6.352247421233207e-05, |
| "loss": 1.8498, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.4206822893761387, |
| "grad_norm": 0.40609726309776306, |
| "learning_rate": 6.348398021961868e-05, |
| "loss": 2.4486, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.4209335930137589, |
| "grad_norm": 0.36156487464904785, |
| "learning_rate": 6.344547760598217e-05, |
| "loss": 2.2026, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.42118489665137904, |
| "grad_norm": 0.4976545572280884, |
| "learning_rate": 6.340696639603905e-05, |
| "loss": 2.1005, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.4214362002889992, |
| "grad_norm": 0.25209322571754456, |
| "learning_rate": 6.336844661441126e-05, |
| "loss": 1.9217, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.42168750392661936, |
| "grad_norm": 0.22021417319774628, |
| "learning_rate": 6.332991828572627e-05, |
| "loss": 1.8579, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.4219388075642395, |
| "grad_norm": 0.24054262042045593, |
| "learning_rate": 6.329138143461698e-05, |
| "loss": 1.9387, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.4221901112018597, |
| "grad_norm": 0.19225017726421356, |
| "learning_rate": 6.325283608572178e-05, |
| "loss": 1.4229, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.4224414148394798, |
| "grad_norm": 0.3207547962665558, |
| "learning_rate": 6.321428226368444e-05, |
| "loss": 2.6563, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.42269271847709994, |
| "grad_norm": 0.6084730625152588, |
| "learning_rate": 6.31757199931542e-05, |
| "loss": 2.1161, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.4229440221147201, |
| "grad_norm": 0.4410717487335205, |
| "learning_rate": 6.313714929878566e-05, |
| "loss": 2.3267, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.42319532575234026, |
| "grad_norm": 0.36312195658683777, |
| "learning_rate": 6.309857020523884e-05, |
| "loss": 2.1917, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.4234466293899604, |
| "grad_norm": 0.3438815176486969, |
| "learning_rate": 6.305998273717909e-05, |
| "loss": 2.3029, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.4236979330275806, |
| "grad_norm": 0.39790135622024536, |
| "learning_rate": 6.302138691927715e-05, |
| "loss": 2.2402, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.42394923666520073, |
| "grad_norm": 0.6604608297348022, |
| "learning_rate": 6.29827827762091e-05, |
| "loss": 2.0682, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.4242005403028209, |
| "grad_norm": 0.39813148975372314, |
| "learning_rate": 6.29441703326563e-05, |
| "loss": 2.1918, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.42445184394044105, |
| "grad_norm": 0.2750075161457062, |
| "learning_rate": 6.290554961330546e-05, |
| "loss": 2.3667, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.4247031475780612, |
| "grad_norm": 0.7254701852798462, |
| "learning_rate": 6.286692064284858e-05, |
| "loss": 2.1079, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.4249544512156814, |
| "grad_norm": 0.3623849153518677, |
| "learning_rate": 6.282828344598289e-05, |
| "loss": 2.2025, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.4252057548533015, |
| "grad_norm": 0.498189240694046, |
| "learning_rate": 6.278963804741092e-05, |
| "loss": 1.854, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.42545705849092164, |
| "grad_norm": 0.4776882231235504, |
| "learning_rate": 6.275098447184045e-05, |
| "loss": 1.931, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.4257083621285418, |
| "grad_norm": 0.3936712145805359, |
| "learning_rate": 6.271232274398447e-05, |
| "loss": 2.1005, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.42595966576616195, |
| "grad_norm": 0.3415871262550354, |
| "learning_rate": 6.267365288856117e-05, |
| "loss": 2.1024, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.4262109694037821, |
| "grad_norm": 0.40847861766815186, |
| "learning_rate": 6.263497493029396e-05, |
| "loss": 2.3391, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.4264622730414023, |
| "grad_norm": 0.38105782866477966, |
| "learning_rate": 6.259628889391142e-05, |
| "loss": 2.3127, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.42671357667902243, |
| "grad_norm": 0.5018170475959778, |
| "learning_rate": 6.255759480414732e-05, |
| "loss": 2.2537, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.4269648803166426, |
| "grad_norm": 0.2998669445514679, |
| "learning_rate": 6.25188926857405e-05, |
| "loss": 2.0985, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.42721618395426275, |
| "grad_norm": 0.17855164408683777, |
| "learning_rate": 6.248018256343504e-05, |
| "loss": 0.5861, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.4274674875918829, |
| "grad_norm": 0.3114035725593567, |
| "learning_rate": 6.244146446198006e-05, |
| "loss": 2.144, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.42771879122950307, |
| "grad_norm": 0.3799639940261841, |
| "learning_rate": 6.240273840612981e-05, |
| "loss": 2.4188, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.4279700948671232, |
| "grad_norm": 0.5280464291572571, |
| "learning_rate": 6.236400442064363e-05, |
| "loss": 2.1123, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.42822139850474333, |
| "grad_norm": 0.237023264169693, |
| "learning_rate": 6.232526253028593e-05, |
| "loss": 1.9309, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.4284727021423635, |
| "grad_norm": 0.3898535668849945, |
| "learning_rate": 6.228651275982614e-05, |
| "loss": 1.7865, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.42872400577998365, |
| "grad_norm": 2.0331013202667236, |
| "learning_rate": 6.224775513403877e-05, |
| "loss": 1.8481, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.4289753094176038, |
| "grad_norm": 0.3832615613937378, |
| "learning_rate": 6.220898967770334e-05, |
| "loss": 2.2707, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.42922661305522397, |
| "grad_norm": 0.3408937454223633, |
| "learning_rate": 6.217021641560436e-05, |
| "loss": 2.4584, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.42947791669284413, |
| "grad_norm": 0.49640515446662903, |
| "learning_rate": 6.213143537253134e-05, |
| "loss": 2.2002, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.4297292203304643, |
| "grad_norm": 0.33837461471557617, |
| "learning_rate": 6.20926465732788e-05, |
| "loss": 2.3568, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.42998052396808445, |
| "grad_norm": 0.5988655686378479, |
| "learning_rate": 6.205385004264616e-05, |
| "loss": 2.2734, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.4302318276057046, |
| "grad_norm": 0.5065860152244568, |
| "learning_rate": 6.201504580543782e-05, |
| "loss": 2.4239, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.43048313124332477, |
| "grad_norm": 0.5038727521896362, |
| "learning_rate": 6.197623388646308e-05, |
| "loss": 1.9147, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.4307344348809449, |
| "grad_norm": 0.3687746822834015, |
| "learning_rate": 6.19374143105362e-05, |
| "loss": 2.6965, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.43098573851856503, |
| "grad_norm": 0.39210835099220276, |
| "learning_rate": 6.189858710247629e-05, |
| "loss": 2.1168, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.4312370421561852, |
| "grad_norm": 0.5022109746932983, |
| "learning_rate": 6.185975228710734e-05, |
| "loss": 2.3949, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.43148834579380535, |
| "grad_norm": 0.43233370780944824, |
| "learning_rate": 6.182090988925823e-05, |
| "loss": 2.0664, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.4317396494314255, |
| "grad_norm": 0.2932543456554413, |
| "learning_rate": 6.178205993376268e-05, |
| "loss": 1.8359, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.43199095306904567, |
| "grad_norm": 0.2603437602519989, |
| "learning_rate": 6.174320244545922e-05, |
| "loss": 2.1517, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.43224225670666583, |
| "grad_norm": 0.398425817489624, |
| "learning_rate": 6.170433744919123e-05, |
| "loss": 1.9729, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.432493560344286, |
| "grad_norm": 0.44565537571907043, |
| "learning_rate": 6.166546496980688e-05, |
| "loss": 1.9102, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.43274486398190615, |
| "grad_norm": 0.38102757930755615, |
| "learning_rate": 6.16265850321591e-05, |
| "loss": 2.265, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.4329961676195263, |
| "grad_norm": 0.40929126739501953, |
| "learning_rate": 6.158769766110561e-05, |
| "loss": 2.4499, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.43324747125714647, |
| "grad_norm": 0.4028054475784302, |
| "learning_rate": 6.154880288150888e-05, |
| "loss": 2.4045, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.4334987748947666, |
| "grad_norm": 0.514401912689209, |
| "learning_rate": 6.15099007182361e-05, |
| "loss": 2.2548, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.43375007853238673, |
| "grad_norm": 0.42896950244903564, |
| "learning_rate": 6.147099119615923e-05, |
| "loss": 2.1909, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.4340013821700069, |
| "grad_norm": 0.3507866859436035, |
| "learning_rate": 6.143207434015487e-05, |
| "loss": 2.3633, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.43425268580762705, |
| "grad_norm": 0.26029667258262634, |
| "learning_rate": 6.139315017510437e-05, |
| "loss": 1.9713, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.4345039894452472, |
| "grad_norm": 0.5305449962615967, |
| "learning_rate": 6.135421872589369e-05, |
| "loss": 2.1603, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.43475529308286737, |
| "grad_norm": 0.39215922355651855, |
| "learning_rate": 6.13152800174135e-05, |
| "loss": 2.0211, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.4350065967204875, |
| "grad_norm": 0.4130334258079529, |
| "learning_rate": 6.127633407455909e-05, |
| "loss": 2.0349, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.4352579003581077, |
| "grad_norm": 0.4265572428703308, |
| "learning_rate": 6.123738092223036e-05, |
| "loss": 1.9254, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.43550920399572784, |
| "grad_norm": 0.43121498823165894, |
| "learning_rate": 6.119842058533185e-05, |
| "loss": 2.0005, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.435760507633348, |
| "grad_norm": 0.4759058952331543, |
| "learning_rate": 6.115945308877269e-05, |
| "loss": 2.1132, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.43601181127096816, |
| "grad_norm": 0.278901606798172, |
| "learning_rate": 6.112047845746654e-05, |
| "loss": 2.3716, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.4362631149085883, |
| "grad_norm": 0.2603948414325714, |
| "learning_rate": 6.10814967163317e-05, |
| "loss": 1.7418, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.4365144185462085, |
| "grad_norm": 0.4486573338508606, |
| "learning_rate": 6.104250789029096e-05, |
| "loss": 2.3385, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.4367657221838286, |
| "grad_norm": 0.23672929406166077, |
| "learning_rate": 6.1003512004271634e-05, |
| "loss": 2.1366, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.43701702582144875, |
| "grad_norm": 0.31534305214881897, |
| "learning_rate": 6.09645090832056e-05, |
| "loss": 1.7805, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.4372683294590689, |
| "grad_norm": 0.34337756037712097, |
| "learning_rate": 6.0925499152029175e-05, |
| "loss": 2.5157, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.43751963309668906, |
| "grad_norm": 0.4734466075897217, |
| "learning_rate": 6.0886482235683195e-05, |
| "loss": 1.9865, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.4377709367343092, |
| "grad_norm": 0.49610087275505066, |
| "learning_rate": 6.084745835911296e-05, |
| "loss": 1.8706, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.4380222403719294, |
| "grad_norm": 0.2000686675310135, |
| "learning_rate": 6.080842754726821e-05, |
| "loss": 2.1542, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.43827354400954954, |
| "grad_norm": 0.3884282410144806, |
| "learning_rate": 6.076938982510312e-05, |
| "loss": 2.4855, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.4385248476471697, |
| "grad_norm": 0.2191421538591385, |
| "learning_rate": 6.0730345217576276e-05, |
| "loss": 1.9066, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.43877615128478986, |
| "grad_norm": 0.37026357650756836, |
| "learning_rate": 6.06912937496507e-05, |
| "loss": 2.5894, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.43902745492241, |
| "grad_norm": 0.32741832733154297, |
| "learning_rate": 6.065223544629375e-05, |
| "loss": 2.3304, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.4392787585600302, |
| "grad_norm": 0.46646368503570557, |
| "learning_rate": 6.0613170332477185e-05, |
| "loss": 2.392, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.4395300621976503, |
| "grad_norm": 0.2375309318304062, |
| "learning_rate": 6.057409843317713e-05, |
| "loss": 1.1848, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.43978136583527044, |
| "grad_norm": 0.6087144613265991, |
| "learning_rate": 6.0535019773374014e-05, |
| "loss": 2.0747, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.4400326694728906, |
| "grad_norm": 0.33546391129493713, |
| "learning_rate": 6.04959343780526e-05, |
| "loss": 2.0869, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.44028397311051076, |
| "grad_norm": 0.4496229887008667, |
| "learning_rate": 6.0456842272201974e-05, |
| "loss": 2.2199, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.4405352767481309, |
| "grad_norm": 0.3253905773162842, |
| "learning_rate": 6.04177434808155e-05, |
| "loss": 2.2447, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.4407865803857511, |
| "grad_norm": 0.3203786313533783, |
| "learning_rate": 6.037863802889082e-05, |
| "loss": 2.1416, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.44103788402337124, |
| "grad_norm": 0.38746264576911926, |
| "learning_rate": 6.033952594142983e-05, |
| "loss": 2.1872, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.4412891876609914, |
| "grad_norm": 0.4595598578453064, |
| "learning_rate": 6.030040724343866e-05, |
| "loss": 2.002, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.44154049129861156, |
| "grad_norm": 0.28612473607063293, |
| "learning_rate": 6.0261281959927694e-05, |
| "loss": 2.1546, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.4417917949362317, |
| "grad_norm": 4.791689872741699, |
| "learning_rate": 6.022215011591148e-05, |
| "loss": 2.2476, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.4420430985738519, |
| "grad_norm": 0.5441824793815613, |
| "learning_rate": 6.0183011736408825e-05, |
| "loss": 2.5157, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.44229440221147204, |
| "grad_norm": 0.42228463292121887, |
| "learning_rate": 6.014386684644265e-05, |
| "loss": 2.3466, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.44254570584909214, |
| "grad_norm": 0.3516453504562378, |
| "learning_rate": 6.01047154710401e-05, |
| "loss": 2.0042, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.4427970094867123, |
| "grad_norm": 0.34274590015411377, |
| "learning_rate": 6.006555763523239e-05, |
| "loss": 2.2938, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.44304831312433246, |
| "grad_norm": 0.36631983518600464, |
| "learning_rate": 6.0026393364054957e-05, |
| "loss": 2.131, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.4432996167619526, |
| "grad_norm": 0.3376319110393524, |
| "learning_rate": 5.998722268254726e-05, |
| "loss": 1.9654, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.4435509203995728, |
| "grad_norm": 0.27467769384384155, |
| "learning_rate": 5.994804561575294e-05, |
| "loss": 1.5135, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.44380222403719294, |
| "grad_norm": 0.5476408004760742, |
| "learning_rate": 5.990886218871965e-05, |
| "loss": 2.0412, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.4440535276748131, |
| "grad_norm": 0.4842160940170288, |
| "learning_rate": 5.986967242649916e-05, |
| "loss": 2.4164, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.44430483131243326, |
| "grad_norm": 0.2067529559135437, |
| "learning_rate": 5.983047635414726e-05, |
| "loss": 0.7425, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.4445561349500534, |
| "grad_norm": 0.4682113826274872, |
| "learning_rate": 5.9791273996723785e-05, |
| "loss": 2.1776, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.4448074385876736, |
| "grad_norm": 0.5939032435417175, |
| "learning_rate": 5.975206537929259e-05, |
| "loss": 2.251, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.44505874222529374, |
| "grad_norm": 0.4370846748352051, |
| "learning_rate": 5.971285052692155e-05, |
| "loss": 2.518, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.44531004586291384, |
| "grad_norm": 0.42708754539489746, |
| "learning_rate": 5.967362946468248e-05, |
| "loss": 1.8735, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.445561349500534, |
| "grad_norm": 0.3979972302913666, |
| "learning_rate": 5.96344022176512e-05, |
| "loss": 2.3454, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.44581265313815416, |
| "grad_norm": 0.33092716336250305, |
| "learning_rate": 5.95951688109075e-05, |
| "loss": 2.4123, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.4460639567757743, |
| "grad_norm": 0.4420959949493408, |
| "learning_rate": 5.955592926953505e-05, |
| "loss": 1.9745, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.4463152604133945, |
| "grad_norm": 0.21554313600063324, |
| "learning_rate": 5.951668361862149e-05, |
| "loss": 0.8182, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.44656656405101464, |
| "grad_norm": 0.4098570644855499, |
| "learning_rate": 5.947743188325837e-05, |
| "loss": 1.7592, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.4468178676886348, |
| "grad_norm": 0.2512660324573517, |
| "learning_rate": 5.94381740885411e-05, |
| "loss": 1.8342, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.44706917132625495, |
| "grad_norm": 0.49591559171676636, |
| "learning_rate": 5.939891025956896e-05, |
| "loss": 2.1854, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.4473204749638751, |
| "grad_norm": 0.5562049746513367, |
| "learning_rate": 5.935964042144515e-05, |
| "loss": 2.0548, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.4475717786014953, |
| "grad_norm": 0.2882930338382721, |
| "learning_rate": 5.932036459927662e-05, |
| "loss": 1.7693, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.44782308223911543, |
| "grad_norm": 0.4819808304309845, |
| "learning_rate": 5.928108281817422e-05, |
| "loss": 2.1611, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.4480743858767356, |
| "grad_norm": 0.4501156210899353, |
| "learning_rate": 5.924179510325258e-05, |
| "loss": 1.9628, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.4483256895143557, |
| "grad_norm": 0.43177562952041626, |
| "learning_rate": 5.920250147963013e-05, |
| "loss": 1.9167, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.44857699315197586, |
| "grad_norm": 0.398242712020874, |
| "learning_rate": 5.916320197242905e-05, |
| "loss": 2.2221, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.448828296789596, |
| "grad_norm": 1.2034392356872559, |
| "learning_rate": 5.912389660677533e-05, |
| "loss": 2.3411, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.4490796004272162, |
| "grad_norm": 0.48686063289642334, |
| "learning_rate": 5.908458540779868e-05, |
| "loss": 1.9525, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.44933090406483633, |
| "grad_norm": 0.22618024051189423, |
| "learning_rate": 5.9045268400632524e-05, |
| "loss": 1.9921, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.4495822077024565, |
| "grad_norm": 0.45006734132766724, |
| "learning_rate": 5.9005945610414036e-05, |
| "loss": 2.3134, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.44983351134007665, |
| "grad_norm": 0.22294217348098755, |
| "learning_rate": 5.8966617062284066e-05, |
| "loss": 2.1551, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.4500848149776968, |
| "grad_norm": 0.29651448130607605, |
| "learning_rate": 5.8927282781387147e-05, |
| "loss": 2.3817, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.45033611861531697, |
| "grad_norm": 0.23336410522460938, |
| "learning_rate": 5.888794279287146e-05, |
| "loss": 2.0842, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.45058742225293713, |
| "grad_norm": 0.5561927556991577, |
| "learning_rate": 5.884859712188887e-05, |
| "loss": 2.1477, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.4508387258905573, |
| "grad_norm": 0.537323534488678, |
| "learning_rate": 5.880924579359486e-05, |
| "loss": 2.0926, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.4510900295281774, |
| "grad_norm": 0.47504621744155884, |
| "learning_rate": 5.8769888833148514e-05, |
| "loss": 2.1737, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.45134133316579755, |
| "grad_norm": 0.3701205551624298, |
| "learning_rate": 5.8730526265712535e-05, |
| "loss": 2.5034, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.4515926368034177, |
| "grad_norm": 0.2942044138908386, |
| "learning_rate": 5.869115811645322e-05, |
| "loss": 2.1007, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.4518439404410379, |
| "grad_norm": 0.3349366784095764, |
| "learning_rate": 5.86517844105404e-05, |
| "loss": 1.9833, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.45209524407865803, |
| "grad_norm": 0.41249701380729675, |
| "learning_rate": 5.861240517314749e-05, |
| "loss": 2.287, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.4523465477162782, |
| "grad_norm": 0.2055690586566925, |
| "learning_rate": 5.8573020429451455e-05, |
| "loss": 2.4701, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.45259785135389835, |
| "grad_norm": 0.400979220867157, |
| "learning_rate": 5.8533630204632704e-05, |
| "loss": 2.6295, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.4528491549915185, |
| "grad_norm": 0.45950108766555786, |
| "learning_rate": 5.8494234523875234e-05, |
| "loss": 2.2317, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.45310045862913867, |
| "grad_norm": 0.3457741439342499, |
| "learning_rate": 5.84548334123665e-05, |
| "loss": 2.4107, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.45335176226675883, |
| "grad_norm": 0.5182722806930542, |
| "learning_rate": 5.8415426895297434e-05, |
| "loss": 2.4685, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.453603065904379, |
| "grad_norm": 0.4126606583595276, |
| "learning_rate": 5.837601499786239e-05, |
| "loss": 2.6861, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.45385436954199915, |
| "grad_norm": 0.4424208998680115, |
| "learning_rate": 5.8336597745259226e-05, |
| "loss": 2.191, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.45410567317961925, |
| "grad_norm": 0.5643180012702942, |
| "learning_rate": 5.8297175162689164e-05, |
| "loss": 2.5079, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.4543569768172394, |
| "grad_norm": 0.351386159658432, |
| "learning_rate": 5.825774727535688e-05, |
| "loss": 2.2525, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.45460828045485957, |
| "grad_norm": 0.5042213201522827, |
| "learning_rate": 5.8218314108470385e-05, |
| "loss": 2.1878, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.45485958409247973, |
| "grad_norm": 0.4025678038597107, |
| "learning_rate": 5.817887568724113e-05, |
| "loss": 2.2556, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4551108877300999, |
| "grad_norm": 0.5732985138893127, |
| "learning_rate": 5.8139432036883875e-05, |
| "loss": 1.8569, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.45536219136772005, |
| "grad_norm": 0.35006287693977356, |
| "learning_rate": 5.809998318261677e-05, |
| "loss": 1.04, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.4556134950053402, |
| "grad_norm": 0.4501281678676605, |
| "learning_rate": 5.806052914966124e-05, |
| "loss": 2.293, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.45586479864296037, |
| "grad_norm": 0.3089803457260132, |
| "learning_rate": 5.802106996324206e-05, |
| "loss": 2.4766, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.4561161022805805, |
| "grad_norm": 0.38950660824775696, |
| "learning_rate": 5.7981605648587264e-05, |
| "loss": 1.977, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.4563674059182007, |
| "grad_norm": 0.3673931062221527, |
| "learning_rate": 5.7942136230928226e-05, |
| "loss": 2.0008, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.45661870955582085, |
| "grad_norm": 0.378292441368103, |
| "learning_rate": 5.790266173549951e-05, |
| "loss": 2.3145, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.45687001319344095, |
| "grad_norm": 0.1496533304452896, |
| "learning_rate": 5.786318218753898e-05, |
| "loss": 1.2677, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.4571213168310611, |
| "grad_norm": 0.1584922969341278, |
| "learning_rate": 5.78236976122877e-05, |
| "loss": 0.6397, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.45737262046868127, |
| "grad_norm": 0.37663987278938293, |
| "learning_rate": 5.778420803498995e-05, |
| "loss": 2.0862, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.4576239241063014, |
| "grad_norm": 0.34862029552459717, |
| "learning_rate": 5.774471348089323e-05, |
| "loss": 1.7093, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.4578752277439216, |
| "grad_norm": 0.3889473080635071, |
| "learning_rate": 5.7705213975248216e-05, |
| "loss": 2.127, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.45812653138154175, |
| "grad_norm": 0.5065658092498779, |
| "learning_rate": 5.766570954330872e-05, |
| "loss": 2.1475, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.4583778350191619, |
| "grad_norm": 0.3527318239212036, |
| "learning_rate": 5.7626200210331746e-05, |
| "loss": 2.1492, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.45862913865678206, |
| "grad_norm": 0.39478063583374023, |
| "learning_rate": 5.75866860015774e-05, |
| "loss": 2.1778, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.4588804422944022, |
| "grad_norm": 0.5078315734863281, |
| "learning_rate": 5.7547166942308925e-05, |
| "loss": 2.2873, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.4591317459320224, |
| "grad_norm": 0.46022409200668335, |
| "learning_rate": 5.750764305779265e-05, |
| "loss": 2.1011, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.45938304956964254, |
| "grad_norm": 0.4178464114665985, |
| "learning_rate": 5.7468114373297995e-05, |
| "loss": 2.2731, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.4596343532072627, |
| "grad_norm": 0.4607785940170288, |
| "learning_rate": 5.7428580914097465e-05, |
| "loss": 2.2106, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.4598856568448828, |
| "grad_norm": 0.518441915512085, |
| "learning_rate": 5.738904270546658e-05, |
| "loss": 2.5474, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.46013696048250297, |
| "grad_norm": 0.39153853058815, |
| "learning_rate": 5.7349499772683927e-05, |
| "loss": 2.2149, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.4603882641201231, |
| "grad_norm": 0.3753896653652191, |
| "learning_rate": 5.7309952141031095e-05, |
| "loss": 2.1979, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.4606395677577433, |
| "grad_norm": 0.4839867949485779, |
| "learning_rate": 5.727039983579271e-05, |
| "loss": 2.1297, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.46089087139536344, |
| "grad_norm": 0.4048977196216583, |
| "learning_rate": 5.7230842882256354e-05, |
| "loss": 2.5347, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.4611421750329836, |
| "grad_norm": 0.3666813373565674, |
| "learning_rate": 5.719128130571257e-05, |
| "loss": 2.4136, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.46139347867060376, |
| "grad_norm": 0.3380883038043976, |
| "learning_rate": 5.715171513145492e-05, |
| "loss": 1.8449, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.4616447823082239, |
| "grad_norm": 0.28998619318008423, |
| "learning_rate": 5.711214438477982e-05, |
| "loss": 2.1513, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.4618960859458441, |
| "grad_norm": 0.45006147027015686, |
| "learning_rate": 5.7072569090986675e-05, |
| "loss": 1.8252, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.46214738958346424, |
| "grad_norm": 0.39759132266044617, |
| "learning_rate": 5.703298927537777e-05, |
| "loss": 1.5816, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.4623986932210844, |
| "grad_norm": 0.3186738193035126, |
| "learning_rate": 5.699340496325828e-05, |
| "loss": 2.3996, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.4626499968587045, |
| "grad_norm": 0.426439106464386, |
| "learning_rate": 5.695381617993626e-05, |
| "loss": 2.0965, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.46290130049632466, |
| "grad_norm": 0.291789710521698, |
| "learning_rate": 5.6914222950722626e-05, |
| "loss": 2.4023, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.4631526041339448, |
| "grad_norm": 0.4969780743122101, |
| "learning_rate": 5.687462530093115e-05, |
| "loss": 1.6636, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.463403907771565, |
| "grad_norm": 0.3465602993965149, |
| "learning_rate": 5.6835023255878384e-05, |
| "loss": 2.7812, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.46365521140918514, |
| "grad_norm": 0.30250221490859985, |
| "learning_rate": 5.6795416840883744e-05, |
| "loss": 2.0632, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.4639065150468053, |
| "grad_norm": 0.1709776073694229, |
| "learning_rate": 5.675580608126939e-05, |
| "loss": 1.5832, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.46415781868442546, |
| "grad_norm": 0.47186335921287537, |
| "learning_rate": 5.671619100236032e-05, |
| "loss": 2.7179, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.4644091223220456, |
| "grad_norm": 0.3553767204284668, |
| "learning_rate": 5.6676571629484244e-05, |
| "loss": 2.5521, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.4646604259596658, |
| "grad_norm": 0.41556620597839355, |
| "learning_rate": 5.663694798797162e-05, |
| "loss": 1.9523, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.46491172959728594, |
| "grad_norm": 0.4675963819026947, |
| "learning_rate": 5.659732010315567e-05, |
| "loss": 2.045, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.4651630332349061, |
| "grad_norm": 0.41482940316200256, |
| "learning_rate": 5.655768800037229e-05, |
| "loss": 2.0328, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.46541433687252626, |
| "grad_norm": 0.4568209648132324, |
| "learning_rate": 5.6518051704960094e-05, |
| "loss": 1.8168, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.46566564051014636, |
| "grad_norm": 0.5529626607894897, |
| "learning_rate": 5.6478411242260376e-05, |
| "loss": 1.9009, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.4659169441477665, |
| "grad_norm": 0.5022344589233398, |
| "learning_rate": 5.643876663761709e-05, |
| "loss": 2.038, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.4661682477853867, |
| "grad_norm": 0.32788538932800293, |
| "learning_rate": 5.6399117916376844e-05, |
| "loss": 2.186, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.46641955142300684, |
| "grad_norm": 0.38160184025764465, |
| "learning_rate": 5.6359465103888854e-05, |
| "loss": 1.8572, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.466670855060627, |
| "grad_norm": 0.17872734367847443, |
| "learning_rate": 5.631980822550499e-05, |
| "loss": 0.8302, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.46692215869824716, |
| "grad_norm": 0.4570051431655884, |
| "learning_rate": 5.628014730657969e-05, |
| "loss": 1.8124, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.4671734623358673, |
| "grad_norm": 0.36893993616104126, |
| "learning_rate": 5.6240482372469995e-05, |
| "loss": 2.4174, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.4674247659734875, |
| "grad_norm": 0.4820000231266022, |
| "learning_rate": 5.62008134485355e-05, |
| "loss": 1.9411, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.46767606961110764, |
| "grad_norm": 0.2347617745399475, |
| "learning_rate": 5.616114056013835e-05, |
| "loss": 1.9671, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.4679273732487278, |
| "grad_norm": 0.381150484085083, |
| "learning_rate": 5.6121463732643255e-05, |
| "loss": 2.0366, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.46817867688634796, |
| "grad_norm": 0.6411362290382385, |
| "learning_rate": 5.6081782991417384e-05, |
| "loss": 2.3673, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.46842998052396806, |
| "grad_norm": 0.36828893423080444, |
| "learning_rate": 5.6042098361830464e-05, |
| "loss": 2.8204, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.4686812841615882, |
| "grad_norm": 0.31962066888809204, |
| "learning_rate": 5.600240986925469e-05, |
| "loss": 2.2475, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.4689325877992084, |
| "grad_norm": 0.3448081910610199, |
| "learning_rate": 5.5962717539064716e-05, |
| "loss": 2.1665, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.46918389143682854, |
| "grad_norm": 0.5330995917320251, |
| "learning_rate": 5.5923021396637656e-05, |
| "loss": 2.2013, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.4694351950744487, |
| "grad_norm": 0.4247521460056305, |
| "learning_rate": 5.588332146735307e-05, |
| "loss": 2.2438, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.46968649871206886, |
| "grad_norm": 0.4937816262245178, |
| "learning_rate": 5.584361777659293e-05, |
| "loss": 2.1838, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.469937802349689, |
| "grad_norm": 0.3027281165122986, |
| "learning_rate": 5.5803910349741615e-05, |
| "loss": 2.1061, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.4701891059873092, |
| "grad_norm": 0.40652328729629517, |
| "learning_rate": 5.5764199212185895e-05, |
| "loss": 1.9048, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.47044040962492933, |
| "grad_norm": 0.4994272291660309, |
| "learning_rate": 5.5724484389314904e-05, |
| "loss": 2.3435, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.4706917132625495, |
| "grad_norm": 0.38275086879730225, |
| "learning_rate": 5.568476590652014e-05, |
| "loss": 2.264, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.47094301690016965, |
| "grad_norm": 0.33458179235458374, |
| "learning_rate": 5.564504378919544e-05, |
| "loss": 2.2379, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.47119432053778976, |
| "grad_norm": 0.4976329207420349, |
| "learning_rate": 5.560531806273697e-05, |
| "loss": 2.2247, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.4714456241754099, |
| "grad_norm": 0.34130215644836426, |
| "learning_rate": 5.55655887525432e-05, |
| "loss": 2.3974, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.4716969278130301, |
| "grad_norm": 0.4960635304450989, |
| "learning_rate": 5.5525855884014867e-05, |
| "loss": 2.3234, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.47194823145065024, |
| "grad_norm": 0.31311216950416565, |
| "learning_rate": 5.548611948255502e-05, |
| "loss": 2.1189, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.4721995350882704, |
| "grad_norm": 0.3007585406303406, |
| "learning_rate": 5.5446379573568964e-05, |
| "loss": 2.2485, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.47245083872589055, |
| "grad_norm": 0.4978335499763489, |
| "learning_rate": 5.54066361824642e-05, |
| "loss": 2.2486, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.4727021423635107, |
| "grad_norm": 0.3356104791164398, |
| "learning_rate": 5.536688933465053e-05, |
| "loss": 2.4258, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.4729534460011309, |
| "grad_norm": 0.3924019932746887, |
| "learning_rate": 5.532713905553989e-05, |
| "loss": 2.2866, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.47320474963875103, |
| "grad_norm": 0.5366889238357544, |
| "learning_rate": 5.5287385370546476e-05, |
| "loss": 2.3925, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.4734560532763712, |
| "grad_norm": 0.37187063694000244, |
| "learning_rate": 5.5247628305086596e-05, |
| "loss": 2.3975, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.47370735691399135, |
| "grad_norm": 0.5848695635795593, |
| "learning_rate": 5.520786788457879e-05, |
| "loss": 2.2758, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.4739586605516115, |
| "grad_norm": 0.359260231256485, |
| "learning_rate": 5.5168104134443674e-05, |
| "loss": 2.2616, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.4742099641892316, |
| "grad_norm": 0.25694742798805237, |
| "learning_rate": 5.512833708010404e-05, |
| "loss": 2.4823, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.4744612678268518, |
| "grad_norm": 0.4734945595264435, |
| "learning_rate": 5.508856674698479e-05, |
| "loss": 2.8068, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.47471257146447193, |
| "grad_norm": 0.2672096788883209, |
| "learning_rate": 5.5048793160512914e-05, |
| "loss": 2.4088, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.4749638751020921, |
| "grad_norm": 0.3386220932006836, |
| "learning_rate": 5.500901634611745e-05, |
| "loss": 2.0906, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.47521517873971225, |
| "grad_norm": 0.41815564036369324, |
| "learning_rate": 5.4969236329229544e-05, |
| "loss": 1.9785, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.4754664823773324, |
| "grad_norm": 0.5156980752944946, |
| "learning_rate": 5.492945313528237e-05, |
| "loss": 1.9482, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.47571778601495257, |
| "grad_norm": 0.5490496754646301, |
| "learning_rate": 5.488966678971115e-05, |
| "loss": 2.1706, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.47596908965257273, |
| "grad_norm": 0.47855961322784424, |
| "learning_rate": 5.484987731795309e-05, |
| "loss": 2.2599, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.4762203932901929, |
| "grad_norm": 0.31765252351760864, |
| "learning_rate": 5.481008474544742e-05, |
| "loss": 1.9829, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.47647169692781305, |
| "grad_norm": 0.3568671941757202, |
| "learning_rate": 5.477028909763535e-05, |
| "loss": 2.6977, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.4767230005654332, |
| "grad_norm": 0.6139686703681946, |
| "learning_rate": 5.473049039996005e-05, |
| "loss": 1.8471, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.4769743042030533, |
| "grad_norm": 0.31178557872772217, |
| "learning_rate": 5.469068867786663e-05, |
| "loss": 2.2934, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.47722560784067347, |
| "grad_norm": 0.20065036416053772, |
| "learning_rate": 5.465088395680216e-05, |
| "loss": 1.1206, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.47747691147829363, |
| "grad_norm": 0.5498302578926086, |
| "learning_rate": 5.461107626221561e-05, |
| "loss": 1.8837, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4777282151159138, |
| "grad_norm": 0.34776148200035095, |
| "learning_rate": 5.4571265619557846e-05, |
| "loss": 2.091, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.47797951875353395, |
| "grad_norm": 0.4100053906440735, |
| "learning_rate": 5.453145205428163e-05, |
| "loss": 2.5989, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.4782308223911541, |
| "grad_norm": 0.47446209192276, |
| "learning_rate": 5.4491635591841586e-05, |
| "loss": 2.3832, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.47848212602877427, |
| "grad_norm": 0.44983819127082825, |
| "learning_rate": 5.4451816257694185e-05, |
| "loss": 2.5454, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.4787334296663944, |
| "grad_norm": 0.3998521566390991, |
| "learning_rate": 5.441199407729777e-05, |
| "loss": 2.5369, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.4789847333040146, |
| "grad_norm": 0.5979151725769043, |
| "learning_rate": 5.437216907611246e-05, |
| "loss": 2.1094, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.47923603694163475, |
| "grad_norm": 0.5320187211036682, |
| "learning_rate": 5.433234127960018e-05, |
| "loss": 2.1983, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.4794873405792549, |
| "grad_norm": 0.3964744210243225, |
| "learning_rate": 5.4292510713224676e-05, |
| "loss": 2.4393, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.47973864421687507, |
| "grad_norm": 0.271665096282959, |
| "learning_rate": 5.4252677402451435e-05, |
| "loss": 2.0589, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.47998994785449517, |
| "grad_norm": 0.40751466155052185, |
| "learning_rate": 5.42128413727477e-05, |
| "loss": 1.7317, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.48024125149211533, |
| "grad_norm": 0.28429216146469116, |
| "learning_rate": 5.417300264958248e-05, |
| "loss": 2.6466, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.4804925551297355, |
| "grad_norm": 0.5931762456893921, |
| "learning_rate": 5.413316125842647e-05, |
| "loss": 2.1482, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.48074385876735565, |
| "grad_norm": 0.3537997305393219, |
| "learning_rate": 5.409331722475211e-05, |
| "loss": 2.1424, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.4809951624049758, |
| "grad_norm": 0.3427460789680481, |
| "learning_rate": 5.405347057403346e-05, |
| "loss": 2.3135, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.48124646604259597, |
| "grad_norm": 0.542516827583313, |
| "learning_rate": 5.401362133174635e-05, |
| "loss": 2.2244, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.4814977696802161, |
| "grad_norm": 0.32623621821403503, |
| "learning_rate": 5.397376952336819e-05, |
| "loss": 1.7302, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.4817490733178363, |
| "grad_norm": 0.334738552570343, |
| "learning_rate": 5.3933915174378066e-05, |
| "loss": 2.1778, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.48200037695545644, |
| "grad_norm": 0.4472411572933197, |
| "learning_rate": 5.389405831025668e-05, |
| "loss": 2.1097, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.4822516805930766, |
| "grad_norm": 0.43921470642089844, |
| "learning_rate": 5.385419895648633e-05, |
| "loss": 2.196, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.48250298423069676, |
| "grad_norm": 0.4858810007572174, |
| "learning_rate": 5.381433713855094e-05, |
| "loss": 2.0108, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.48275428786831687, |
| "grad_norm": 0.5167921781539917, |
| "learning_rate": 5.3774472881935965e-05, |
| "loss": 2.342, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.483005591505937, |
| "grad_norm": 0.27940118312835693, |
| "learning_rate": 5.3734606212128446e-05, |
| "loss": 2.4511, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.4832568951435572, |
| "grad_norm": 0.2919957935810089, |
| "learning_rate": 5.369473715461698e-05, |
| "loss": 2.1458, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.48350819878117735, |
| "grad_norm": 0.43019670248031616, |
| "learning_rate": 5.365486573489166e-05, |
| "loss": 2.2083, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.4837595024187975, |
| "grad_norm": 0.39743998646736145, |
| "learning_rate": 5.361499197844409e-05, |
| "loss": 2.1078, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.48401080605641766, |
| "grad_norm": 0.33505332469940186, |
| "learning_rate": 5.3575115910767395e-05, |
| "loss": 2.1952, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.4842621096940378, |
| "grad_norm": 0.2203834056854248, |
| "learning_rate": 5.3535237557356146e-05, |
| "loss": 2.282, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.484513413331658, |
| "grad_norm": 0.7342590093612671, |
| "learning_rate": 5.349535694370641e-05, |
| "loss": 1.9875, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.48476471696927814, |
| "grad_norm": 0.3641369640827179, |
| "learning_rate": 5.345547409531568e-05, |
| "loss": 2.1203, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.4850160206068983, |
| "grad_norm": 2.032423973083496, |
| "learning_rate": 5.341558903768287e-05, |
| "loss": 2.0247, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.48526732424451846, |
| "grad_norm": 0.6103079319000244, |
| "learning_rate": 5.3375701796308305e-05, |
| "loss": 2.4254, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.4855186278821386, |
| "grad_norm": 0.40158334374427795, |
| "learning_rate": 5.3335812396693744e-05, |
| "loss": 2.0793, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.4857699315197587, |
| "grad_norm": 0.5189076066017151, |
| "learning_rate": 5.329592086434228e-05, |
| "loss": 1.7984, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.4860212351573789, |
| "grad_norm": 0.3564301133155823, |
| "learning_rate": 5.32560272247584e-05, |
| "loss": 2.0899, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.48627253879499904, |
| "grad_norm": 0.3177907168865204, |
| "learning_rate": 5.3216131503447895e-05, |
| "loss": 2.6728, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.4865238424326192, |
| "grad_norm": 0.5341473817825317, |
| "learning_rate": 5.3176233725917954e-05, |
| "loss": 2.4917, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.48677514607023936, |
| "grad_norm": 1.1832932233810425, |
| "learning_rate": 5.313633391767704e-05, |
| "loss": 1.5361, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.4870264497078595, |
| "grad_norm": 0.4320741295814514, |
| "learning_rate": 5.3096432104234895e-05, |
| "loss": 2.0366, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.4872777533454797, |
| "grad_norm": 0.347154825925827, |
| "learning_rate": 5.305652831110259e-05, |
| "loss": 1.8777, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.48752905698309984, |
| "grad_norm": 0.5214780569076538, |
| "learning_rate": 5.301662256379243e-05, |
| "loss": 1.953, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.48778036062072, |
| "grad_norm": 0.3790840804576874, |
| "learning_rate": 5.2976714887817965e-05, |
| "loss": 2.1505, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.48803166425834016, |
| "grad_norm": 0.4315270781517029, |
| "learning_rate": 5.293680530869403e-05, |
| "loss": 2.4113, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.4882829678959603, |
| "grad_norm": 0.48924243450164795, |
| "learning_rate": 5.289689385193659e-05, |
| "loss": 2.4325, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.4885342715335804, |
| "grad_norm": 0.3082104027271271, |
| "learning_rate": 5.285698054306289e-05, |
| "loss": 2.4431, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.4887855751712006, |
| "grad_norm": 0.24867354333400726, |
| "learning_rate": 5.2817065407591316e-05, |
| "loss": 1.9332, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.48903687880882074, |
| "grad_norm": 0.3941580653190613, |
| "learning_rate": 5.277714847104144e-05, |
| "loss": 2.3426, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.4892881824464409, |
| "grad_norm": 0.31942319869995117, |
| "learning_rate": 5.273722975893397e-05, |
| "loss": 2.0446, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.48953948608406106, |
| "grad_norm": 0.4005180299282074, |
| "learning_rate": 5.269730929679076e-05, |
| "loss": 2.8318, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.4897907897216812, |
| "grad_norm": 0.33907705545425415, |
| "learning_rate": 5.265738711013478e-05, |
| "loss": 2.6326, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.4900420933593014, |
| "grad_norm": 0.30913516879081726, |
| "learning_rate": 5.261746322449012e-05, |
| "loss": 2.0023, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.49029339699692154, |
| "grad_norm": 0.5829331874847412, |
| "learning_rate": 5.257753766538192e-05, |
| "loss": 2.2518, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.4905447006345417, |
| "grad_norm": 0.420886367559433, |
| "learning_rate": 5.25376104583364e-05, |
| "loss": 1.9362, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.49079600427216186, |
| "grad_norm": 0.3854474127292633, |
| "learning_rate": 5.249768162888088e-05, |
| "loss": 2.3414, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.491047307909782, |
| "grad_norm": 0.36064305901527405, |
| "learning_rate": 5.245775120254365e-05, |
| "loss": 2.0693, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.4912986115474022, |
| "grad_norm": 0.15027983486652374, |
| "learning_rate": 5.241781920485407e-05, |
| "loss": 0.7658, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.4915499151850223, |
| "grad_norm": 0.22711443901062012, |
| "learning_rate": 5.2377885661342466e-05, |
| "loss": 2.4119, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.49180121882264244, |
| "grad_norm": 0.525962769985199, |
| "learning_rate": 5.233795059754019e-05, |
| "loss": 2.5089, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.4920525224602626, |
| "grad_norm": 0.2939525544643402, |
| "learning_rate": 5.229801403897954e-05, |
| "loss": 2.0917, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.49230382609788276, |
| "grad_norm": 0.2676595151424408, |
| "learning_rate": 5.2258076011193765e-05, |
| "loss": 2.0566, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.4925551297355029, |
| "grad_norm": 0.38721850514411926, |
| "learning_rate": 5.22181365397171e-05, |
| "loss": 2.4625, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.4928064333731231, |
| "grad_norm": 0.4662513732910156, |
| "learning_rate": 5.217819565008465e-05, |
| "loss": 2.3879, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.49305773701074324, |
| "grad_norm": 0.3855404555797577, |
| "learning_rate": 5.2138253367832446e-05, |
| "loss": 1.8247, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.4933090406483634, |
| "grad_norm": 0.38118478655815125, |
| "learning_rate": 5.2098309718497405e-05, |
| "loss": 2.5317, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.49356034428598355, |
| "grad_norm": 0.483946293592453, |
| "learning_rate": 5.2058364727617325e-05, |
| "loss": 1.9472, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.4938116479236037, |
| "grad_norm": 0.32675084471702576, |
| "learning_rate": 5.2018418420730866e-05, |
| "loss": 2.0327, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.4940629515612239, |
| "grad_norm": 0.43735966086387634, |
| "learning_rate": 5.197847082337752e-05, |
| "loss": 2.3672, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.494314255198844, |
| "grad_norm": 0.43739423155784607, |
| "learning_rate": 5.193852196109761e-05, |
| "loss": 2.1745, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.49456555883646414, |
| "grad_norm": 0.24707311391830444, |
| "learning_rate": 5.189857185943224e-05, |
| "loss": 2.4274, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.4948168624740843, |
| "grad_norm": 0.2463517189025879, |
| "learning_rate": 5.185862054392337e-05, |
| "loss": 2.0382, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.49506816611170446, |
| "grad_norm": 0.38449257612228394, |
| "learning_rate": 5.181866804011367e-05, |
| "loss": 2.0812, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.4953194697493246, |
| "grad_norm": 0.3080759644508362, |
| "learning_rate": 5.17787143735466e-05, |
| "loss": 1.702, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.4955707733869448, |
| "grad_norm": 0.5761748552322388, |
| "learning_rate": 5.173875956976637e-05, |
| "loss": 2.3456, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.49582207702456493, |
| "grad_norm": 0.5399059653282166, |
| "learning_rate": 5.1698803654317904e-05, |
| "loss": 2.0914, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.4960733806621851, |
| "grad_norm": 0.317590594291687, |
| "learning_rate": 5.165884665274683e-05, |
| "loss": 2.6376, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.49632468429980525, |
| "grad_norm": 0.2823307514190674, |
| "learning_rate": 5.161888859059949e-05, |
| "loss": 1.6927, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.4965759879374254, |
| "grad_norm": 0.45718204975128174, |
| "learning_rate": 5.157892949342289e-05, |
| "loss": 2.3115, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.49682729157504557, |
| "grad_norm": 0.42325839400291443, |
| "learning_rate": 5.15389693867647e-05, |
| "loss": 2.0753, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.49707859521266573, |
| "grad_norm": 0.34479397535324097, |
| "learning_rate": 5.149900829617325e-05, |
| "loss": 2.3803, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.49732989885028583, |
| "grad_norm": 0.40837740898132324, |
| "learning_rate": 5.145904624719746e-05, |
| "loss": 1.6998, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.497581202487906, |
| "grad_norm": 0.5791382193565369, |
| "learning_rate": 5.141908326538691e-05, |
| "loss": 2.0878, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.49783250612552615, |
| "grad_norm": 0.4563978612422943, |
| "learning_rate": 5.137911937629176e-05, |
| "loss": 1.9045, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.4980838097631463, |
| "grad_norm": 0.6332088708877563, |
| "learning_rate": 5.1339154605462734e-05, |
| "loss": 2.5065, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.49833511340076647, |
| "grad_norm": 0.3342919945716858, |
| "learning_rate": 5.129918897845114e-05, |
| "loss": 2.4884, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.49858641703838663, |
| "grad_norm": 0.24111764132976532, |
| "learning_rate": 5.125922252080881e-05, |
| "loss": 2.1311, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.4988377206760068, |
| "grad_norm": 0.39446836709976196, |
| "learning_rate": 5.121925525808815e-05, |
| "loss": 2.2891, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.49908902431362695, |
| "grad_norm": 0.3553325831890106, |
| "learning_rate": 5.117928721584202e-05, |
| "loss": 1.9434, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.4993403279512471, |
| "grad_norm": 0.20506922900676727, |
| "learning_rate": 5.1139318419623825e-05, |
| "loss": 0.9382, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.49959163158886727, |
| "grad_norm": 0.2444799542427063, |
| "learning_rate": 5.1099348894987445e-05, |
| "loss": 2.1564, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.49984293522648743, |
| "grad_norm": 0.25949880480766296, |
| "learning_rate": 5.105937866748721e-05, |
| "loss": 2.593, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.5000942388641075, |
| "grad_norm": 0.3917735517024994, |
| "learning_rate": 5.1019407762677906e-05, |
| "loss": 2.3575, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.5003455425017277, |
| "grad_norm": 0.49749505519866943, |
| "learning_rate": 5.097943620611476e-05, |
| "loss": 1.9251, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.5005968461393479, |
| "grad_norm": 0.3024258017539978, |
| "learning_rate": 5.0939464023353414e-05, |
| "loss": 2.1926, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.500848149776968, |
| "grad_norm": 0.5735132694244385, |
| "learning_rate": 5.089949123994989e-05, |
| "loss": 2.2119, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.5010994534145882, |
| "grad_norm": 0.3834688365459442, |
| "learning_rate": 5.0859517881460625e-05, |
| "loss": 2.7961, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.5013507570522083, |
| "grad_norm": 0.3446560800075531, |
| "learning_rate": 5.0819543973442404e-05, |
| "loss": 2.3877, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.5016020606898285, |
| "grad_norm": 0.4088892638683319, |
| "learning_rate": 5.0779569541452374e-05, |
| "loss": 2.0748, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.5018533643274486, |
| "grad_norm": 0.4269651770591736, |
| "learning_rate": 5.0739594611048004e-05, |
| "loss": 2.0912, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.5021046679650688, |
| "grad_norm": 0.27764561772346497, |
| "learning_rate": 5.0699619207787086e-05, |
| "loss": 2.3711, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.502355971602689, |
| "grad_norm": 0.3662841320037842, |
| "learning_rate": 5.0659643357227726e-05, |
| "loss": 2.3661, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.5026072752403091, |
| "grad_norm": 0.3839915990829468, |
| "learning_rate": 5.06196670849283e-05, |
| "loss": 2.2995, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 3979, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.970888917349206e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|