| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1358, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0014727540500736377, |
| "grad_norm": 4.94458948599831, |
| "learning_rate": 9.999986620495792e-06, |
| "loss": 0.3378, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0029455081001472753, |
| "grad_norm": 4.166056018399312, |
| "learning_rate": 9.999946482054771e-06, |
| "loss": 0.3049, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.004418262150220913, |
| "grad_norm": 3.840790925563368, |
| "learning_rate": 9.999879584891754e-06, |
| "loss": 0.3079, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.005891016200294551, |
| "grad_norm": 3.4823512343952325, |
| "learning_rate": 9.999785929364756e-06, |
| "loss": 0.278, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.007363770250368188, |
| "grad_norm": 3.6374829783923586, |
| "learning_rate": 9.999665515975005e-06, |
| "loss": 0.2717, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.008836524300441826, |
| "grad_norm": 3.8692984807597726, |
| "learning_rate": 9.999518345366933e-06, |
| "loss": 0.3032, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.010309278350515464, |
| "grad_norm": 3.246353468882525, |
| "learning_rate": 9.999344418328161e-06, |
| "loss": 0.2181, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.011782032400589101, |
| "grad_norm": 3.3637880240853746, |
| "learning_rate": 9.999143735789518e-06, |
| "loss": 0.2504, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.013254786450662739, |
| "grad_norm": 2.84021111636833, |
| "learning_rate": 9.998916298825015e-06, |
| "loss": 0.2165, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.014727540500736377, |
| "grad_norm": 2.9675485045739736, |
| "learning_rate": 9.99866210865185e-06, |
| "loss": 0.2166, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.016200294550810016, |
| "grad_norm": 3.673938624977175, |
| "learning_rate": 9.998381166630395e-06, |
| "loss": 0.257, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.017673048600883652, |
| "grad_norm": 3.386111654135163, |
| "learning_rate": 9.9980734742642e-06, |
| "loss": 0.2725, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01914580265095729, |
| "grad_norm": 3.1832497014707113, |
| "learning_rate": 9.997739033199974e-06, |
| "loss": 0.2325, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.020618556701030927, |
| "grad_norm": 2.437336524322532, |
| "learning_rate": 9.997377845227577e-06, |
| "loss": 0.183, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.022091310751104567, |
| "grad_norm": 2.7443979256988675, |
| "learning_rate": 9.996989912280015e-06, |
| "loss": 0.2183, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.023564064801178203, |
| "grad_norm": 2.759798152580142, |
| "learning_rate": 9.996575236433428e-06, |
| "loss": 0.1976, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.025036818851251842, |
| "grad_norm": 3.712743145586044, |
| "learning_rate": 9.996133819907082e-06, |
| "loss": 0.3022, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.026509572901325478, |
| "grad_norm": 3.515452198275458, |
| "learning_rate": 9.995665665063349e-06, |
| "loss": 0.2521, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.027982326951399118, |
| "grad_norm": 3.2896891836927065, |
| "learning_rate": 9.9951707744077e-06, |
| "loss": 0.2439, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.029455081001472753, |
| "grad_norm": 3.1762823618397507, |
| "learning_rate": 9.994649150588694e-06, |
| "loss": 0.2565, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.030927835051546393, |
| "grad_norm": 2.8582894091949735, |
| "learning_rate": 9.994100796397954e-06, |
| "loss": 0.201, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03240058910162003, |
| "grad_norm": 3.180040249667404, |
| "learning_rate": 9.993525714770167e-06, |
| "loss": 0.2983, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.033873343151693665, |
| "grad_norm": 3.798894109532273, |
| "learning_rate": 9.992923908783054e-06, |
| "loss": 0.2578, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.035346097201767304, |
| "grad_norm": 3.3577639769979917, |
| "learning_rate": 9.992295381657361e-06, |
| "loss": 0.2551, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.036818851251840944, |
| "grad_norm": 3.027011475769264, |
| "learning_rate": 9.991640136756843e-06, |
| "loss": 0.2394, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03829160530191458, |
| "grad_norm": 3.803089146754714, |
| "learning_rate": 9.990958177588236e-06, |
| "loss": 0.2749, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.039764359351988215, |
| "grad_norm": 2.714001785750428, |
| "learning_rate": 9.990249507801257e-06, |
| "loss": 0.2391, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.041237113402061855, |
| "grad_norm": 3.6859042183737447, |
| "learning_rate": 9.98951413118856e-06, |
| "loss": 0.2897, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.042709867452135494, |
| "grad_norm": 3.45627286944561, |
| "learning_rate": 9.988752051685736e-06, |
| "loss": 0.2453, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.044182621502209134, |
| "grad_norm": 3.5275255658767093, |
| "learning_rate": 9.987963273371287e-06, |
| "loss": 0.2442, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.045655375552282766, |
| "grad_norm": 2.8285444922182967, |
| "learning_rate": 9.987147800466593e-06, |
| "loss": 0.2494, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.047128129602356406, |
| "grad_norm": 2.945285023713762, |
| "learning_rate": 9.986305637335907e-06, |
| "loss": 0.2292, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.048600883652430045, |
| "grad_norm": 2.5322659450463547, |
| "learning_rate": 9.985436788486317e-06, |
| "loss": 0.1992, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.050073637702503684, |
| "grad_norm": 3.264319463862271, |
| "learning_rate": 9.984541258567732e-06, |
| "loss": 0.2289, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.05154639175257732, |
| "grad_norm": 3.231647823081557, |
| "learning_rate": 9.983619052372847e-06, |
| "loss": 0.2446, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.053019145802650956, |
| "grad_norm": 2.5793360921976465, |
| "learning_rate": 9.982670174837131e-06, |
| "loss": 0.2099, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.054491899852724596, |
| "grad_norm": 4.189833287500822, |
| "learning_rate": 9.981694631038785e-06, |
| "loss": 0.2302, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.055964653902798235, |
| "grad_norm": 2.822296706691291, |
| "learning_rate": 9.980692426198728e-06, |
| "loss": 0.1954, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.05743740795287187, |
| "grad_norm": 3.517010072729644, |
| "learning_rate": 9.97966356568056e-06, |
| "loss": 0.282, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.05891016200294551, |
| "grad_norm": 2.9747996462515527, |
| "learning_rate": 9.97860805499054e-06, |
| "loss": 0.2311, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.060382916053019146, |
| "grad_norm": 3.571132661443482, |
| "learning_rate": 9.977525899777549e-06, |
| "loss": 0.2048, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.061855670103092786, |
| "grad_norm": 2.8914392164923757, |
| "learning_rate": 9.97641710583307e-06, |
| "loss": 0.228, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.06332842415316642, |
| "grad_norm": 3.342268753640644, |
| "learning_rate": 9.97528167909115e-06, |
| "loss": 0.2652, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.06480117820324006, |
| "grad_norm": 2.733501107730972, |
| "learning_rate": 9.97411962562836e-06, |
| "loss": 0.1933, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0662739322533137, |
| "grad_norm": 3.077042960095706, |
| "learning_rate": 9.97293095166379e-06, |
| "loss": 0.1986, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.06774668630338733, |
| "grad_norm": 2.7641800941458365, |
| "learning_rate": 9.971715663558978e-06, |
| "loss": 0.2019, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.06921944035346098, |
| "grad_norm": 3.2668699260976894, |
| "learning_rate": 9.970473767817913e-06, |
| "loss": 0.2039, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.07069219440353461, |
| "grad_norm": 3.3534010470167703, |
| "learning_rate": 9.969205271086969e-06, |
| "loss": 0.2109, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.07216494845360824, |
| "grad_norm": 3.2183136865151187, |
| "learning_rate": 9.96791018015489e-06, |
| "loss": 0.3102, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.07363770250368189, |
| "grad_norm": 2.9261416367269426, |
| "learning_rate": 9.966588501952747e-06, |
| "loss": 0.1933, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07511045655375552, |
| "grad_norm": 3.1755105009061326, |
| "learning_rate": 9.965240243553899e-06, |
| "loss": 0.2289, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.07658321060382917, |
| "grad_norm": 3.190180430758934, |
| "learning_rate": 9.963865412173958e-06, |
| "loss": 0.2387, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0780559646539028, |
| "grad_norm": 2.752399278611623, |
| "learning_rate": 9.962464015170748e-06, |
| "loss": 0.2387, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.07952871870397643, |
| "grad_norm": 3.5795903714534654, |
| "learning_rate": 9.961036060044268e-06, |
| "loss": 0.1661, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.08100147275405008, |
| "grad_norm": 2.9565589967050423, |
| "learning_rate": 9.959581554436654e-06, |
| "loss": 0.2617, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.08247422680412371, |
| "grad_norm": 3.0373689834666755, |
| "learning_rate": 9.958100506132127e-06, |
| "loss": 0.1963, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.08394698085419734, |
| "grad_norm": 2.5982563190319525, |
| "learning_rate": 9.956592923056965e-06, |
| "loss": 0.1582, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.08541973490427099, |
| "grad_norm": 3.3173710787447406, |
| "learning_rate": 9.955058813279454e-06, |
| "loss": 0.2197, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.08689248895434462, |
| "grad_norm": 3.3097321090388334, |
| "learning_rate": 9.953498185009846e-06, |
| "loss": 0.2425, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.08836524300441827, |
| "grad_norm": 2.9050353404780687, |
| "learning_rate": 9.951911046600313e-06, |
| "loss": 0.1833, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0898379970544919, |
| "grad_norm": 3.0423904876869603, |
| "learning_rate": 9.950297406544907e-06, |
| "loss": 0.1826, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.09131075110456553, |
| "grad_norm": 3.17541167048525, |
| "learning_rate": 9.948657273479508e-06, |
| "loss": 0.211, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09278350515463918, |
| "grad_norm": 2.962504763876762, |
| "learning_rate": 9.946990656181782e-06, |
| "loss": 0.2213, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.09425625920471281, |
| "grad_norm": 3.54508058393369, |
| "learning_rate": 9.945297563571135e-06, |
| "loss": 0.218, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.09572901325478646, |
| "grad_norm": 2.59575497239498, |
| "learning_rate": 9.943578004708664e-06, |
| "loss": 0.201, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.09720176730486009, |
| "grad_norm": 3.190873302956337, |
| "learning_rate": 9.941831988797104e-06, |
| "loss": 0.2394, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.09867452135493372, |
| "grad_norm": 3.126965019920143, |
| "learning_rate": 9.940059525180788e-06, |
| "loss": 0.2143, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.10014727540500737, |
| "grad_norm": 2.9885431966842484, |
| "learning_rate": 9.938260623345591e-06, |
| "loss": 0.2248, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.101620029455081, |
| "grad_norm": 2.9461738168513083, |
| "learning_rate": 9.936435292918878e-06, |
| "loss": 0.1865, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.10309278350515463, |
| "grad_norm": 2.649205090590615, |
| "learning_rate": 9.934583543669454e-06, |
| "loss": 0.1835, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10456553755522828, |
| "grad_norm": 4.042700652793221, |
| "learning_rate": 9.932705385507514e-06, |
| "loss": 0.2352, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.10603829160530191, |
| "grad_norm": 3.387539024131993, |
| "learning_rate": 9.930800828484593e-06, |
| "loss": 0.2051, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.10751104565537556, |
| "grad_norm": 3.7913024480425865, |
| "learning_rate": 9.928869882793495e-06, |
| "loss": 0.2465, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.10898379970544919, |
| "grad_norm": 3.34902917263494, |
| "learning_rate": 9.926912558768261e-06, |
| "loss": 0.2331, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.11045655375552282, |
| "grad_norm": 2.4043773466283302, |
| "learning_rate": 9.924928866884103e-06, |
| "loss": 0.1843, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.11192930780559647, |
| "grad_norm": 2.5357530889863247, |
| "learning_rate": 9.922918817757346e-06, |
| "loss": 0.1752, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1134020618556701, |
| "grad_norm": 3.2637634737226056, |
| "learning_rate": 9.920882422145372e-06, |
| "loss": 0.2313, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.11487481590574374, |
| "grad_norm": 3.731080966458299, |
| "learning_rate": 9.918819690946568e-06, |
| "loss": 0.2456, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11634756995581738, |
| "grad_norm": 3.047609400176811, |
| "learning_rate": 9.916730635200265e-06, |
| "loss": 0.2012, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.11782032400589101, |
| "grad_norm": 2.9217369450670705, |
| "learning_rate": 9.914615266086668e-06, |
| "loss": 0.2083, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11929307805596466, |
| "grad_norm": 4.36023190619276, |
| "learning_rate": 9.912473594926821e-06, |
| "loss": 0.1813, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.12076583210603829, |
| "grad_norm": 3.258283279415663, |
| "learning_rate": 9.910305633182518e-06, |
| "loss": 0.2091, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.12223858615611193, |
| "grad_norm": 3.1772623956205663, |
| "learning_rate": 9.908111392456263e-06, |
| "loss": 0.2122, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.12371134020618557, |
| "grad_norm": 2.956141700380078, |
| "learning_rate": 9.905890884491196e-06, |
| "loss": 0.2089, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1251840942562592, |
| "grad_norm": 3.496221823148631, |
| "learning_rate": 9.903644121171036e-06, |
| "loss": 0.2418, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.12665684830633284, |
| "grad_norm": 3.75719996287404, |
| "learning_rate": 9.901371114520014e-06, |
| "loss": 0.2046, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.12812960235640647, |
| "grad_norm": 3.191941100664362, |
| "learning_rate": 9.89907187670281e-06, |
| "loss": 0.217, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.12960235640648013, |
| "grad_norm": 4.028688439687238, |
| "learning_rate": 9.89674642002449e-06, |
| "loss": 0.2247, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.13107511045655376, |
| "grad_norm": 3.013392301649945, |
| "learning_rate": 9.894394756930437e-06, |
| "loss": 0.2055, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1325478645066274, |
| "grad_norm": 3.3003674751402143, |
| "learning_rate": 9.892016900006284e-06, |
| "loss": 0.2457, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.13402061855670103, |
| "grad_norm": 2.688227321715556, |
| "learning_rate": 9.889612861977855e-06, |
| "loss": 0.1833, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.13549337260677466, |
| "grad_norm": 3.5359724671571535, |
| "learning_rate": 9.887182655711078e-06, |
| "loss": 0.1991, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.13696612665684832, |
| "grad_norm": 3.3331679118887747, |
| "learning_rate": 9.884726294211937e-06, |
| "loss": 0.1987, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.13843888070692195, |
| "grad_norm": 2.3015340391465204, |
| "learning_rate": 9.882243790626393e-06, |
| "loss": 0.1772, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.13991163475699558, |
| "grad_norm": 3.581411392511822, |
| "learning_rate": 9.879735158240314e-06, |
| "loss": 0.262, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.14138438880706922, |
| "grad_norm": 3.3862669869305124, |
| "learning_rate": 9.877200410479399e-06, |
| "loss": 0.2068, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 3.351128575072124, |
| "learning_rate": 9.874639560909118e-06, |
| "loss": 0.2399, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.14432989690721648, |
| "grad_norm": 3.032326655318766, |
| "learning_rate": 9.872052623234632e-06, |
| "loss": 0.2081, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.14580265095729014, |
| "grad_norm": 3.1356576751257768, |
| "learning_rate": 9.869439611300712e-06, |
| "loss": 0.2146, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.14727540500736377, |
| "grad_norm": 2.9170663043693073, |
| "learning_rate": 9.866800539091688e-06, |
| "loss": 0.1936, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1487481590574374, |
| "grad_norm": 3.1762356343512477, |
| "learning_rate": 9.864135420731345e-06, |
| "loss": 0.2488, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.15022091310751104, |
| "grad_norm": 3.331979025526921, |
| "learning_rate": 9.861444270482869e-06, |
| "loss": 0.2102, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.15169366715758467, |
| "grad_norm": 2.5999400966061357, |
| "learning_rate": 9.858727102748762e-06, |
| "loss": 0.1745, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.15316642120765833, |
| "grad_norm": 3.0703393900110902, |
| "learning_rate": 9.855983932070771e-06, |
| "loss": 0.1969, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.15463917525773196, |
| "grad_norm": 3.3019426748278256, |
| "learning_rate": 9.853214773129796e-06, |
| "loss": 0.2471, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1561119293078056, |
| "grad_norm": 2.8316407741625222, |
| "learning_rate": 9.85041964074583e-06, |
| "loss": 0.1926, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.15758468335787923, |
| "grad_norm": 2.3805838783625792, |
| "learning_rate": 9.847598549877867e-06, |
| "loss": 0.1631, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.15905743740795286, |
| "grad_norm": 2.8943998454183517, |
| "learning_rate": 9.844751515623824e-06, |
| "loss": 0.2498, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.16053019145802652, |
| "grad_norm": 3.379043461001327, |
| "learning_rate": 9.841878553220465e-06, |
| "loss": 0.2663, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.16200294550810015, |
| "grad_norm": 2.71005551644853, |
| "learning_rate": 9.838979678043314e-06, |
| "loss": 0.1811, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1634756995581738, |
| "grad_norm": 2.8109475835050968, |
| "learning_rate": 9.836054905606578e-06, |
| "loss": 0.2296, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.16494845360824742, |
| "grad_norm": 3.2874015483825256, |
| "learning_rate": 9.833104251563058e-06, |
| "loss": 0.2377, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.16642120765832105, |
| "grad_norm": 2.9367556118169533, |
| "learning_rate": 9.830127731704067e-06, |
| "loss": 0.2109, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.16789396170839468, |
| "grad_norm": 3.0606889648156863, |
| "learning_rate": 9.827125361959353e-06, |
| "loss": 0.2027, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.16936671575846834, |
| "grad_norm": 3.397990076888804, |
| "learning_rate": 9.824097158397e-06, |
| "loss": 0.2227, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.17083946980854198, |
| "grad_norm": 6.011726589982756, |
| "learning_rate": 9.821043137223356e-06, |
| "loss": 0.2235, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1723122238586156, |
| "grad_norm": 3.1315937708088524, |
| "learning_rate": 9.817963314782934e-06, |
| "loss": 0.1889, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.17378497790868924, |
| "grad_norm": 2.972045976596521, |
| "learning_rate": 9.814857707558334e-06, |
| "loss": 0.1899, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.17525773195876287, |
| "grad_norm": 3.024575764721706, |
| "learning_rate": 9.811726332170153e-06, |
| "loss": 0.1888, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.17673048600883653, |
| "grad_norm": 2.862267267477572, |
| "learning_rate": 9.808569205376885e-06, |
| "loss": 0.1977, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.17820324005891017, |
| "grad_norm": 3.1490402592480184, |
| "learning_rate": 9.80538634407485e-06, |
| "loss": 0.2022, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1796759941089838, |
| "grad_norm": 2.703112681626916, |
| "learning_rate": 9.802177765298091e-06, |
| "loss": 0.2172, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.18114874815905743, |
| "grad_norm": 2.8427204160110517, |
| "learning_rate": 9.798943486218284e-06, |
| "loss": 0.1733, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.18262150220913106, |
| "grad_norm": 3.2810082739191415, |
| "learning_rate": 9.795683524144649e-06, |
| "loss": 0.2001, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.18409425625920472, |
| "grad_norm": 3.171988135686974, |
| "learning_rate": 9.792397896523857e-06, |
| "loss": 0.2304, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.18556701030927836, |
| "grad_norm": 3.613990085160166, |
| "learning_rate": 9.789086620939936e-06, |
| "loss": 0.2841, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.187039764359352, |
| "grad_norm": 3.436471960168319, |
| "learning_rate": 9.785749715114177e-06, |
| "loss": 0.2289, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.18851251840942562, |
| "grad_norm": 2.66634702118146, |
| "learning_rate": 9.782387196905034e-06, |
| "loss": 0.1724, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.18998527245949925, |
| "grad_norm": 3.1813963633826683, |
| "learning_rate": 9.778999084308043e-06, |
| "loss": 0.2295, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.19145802650957292, |
| "grad_norm": 3.3659137081706367, |
| "learning_rate": 9.775585395455708e-06, |
| "loss": 0.2157, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.19293078055964655, |
| "grad_norm": 3.4836322324211513, |
| "learning_rate": 9.772146148617414e-06, |
| "loss": 0.2214, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.19440353460972018, |
| "grad_norm": 3.292266923964018, |
| "learning_rate": 9.76868136219933e-06, |
| "loss": 0.2089, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1958762886597938, |
| "grad_norm": 2.9068554190412046, |
| "learning_rate": 9.765191054744305e-06, |
| "loss": 0.1876, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.19734904270986744, |
| "grad_norm": 2.777681310150956, |
| "learning_rate": 9.761675244931772e-06, |
| "loss": 0.1981, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.19882179675994108, |
| "grad_norm": 3.391020475699768, |
| "learning_rate": 9.75813395157765e-06, |
| "loss": 0.2165, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.20029455081001474, |
| "grad_norm": 3.1826923463080923, |
| "learning_rate": 9.754567193634232e-06, |
| "loss": 0.2107, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.20176730486008837, |
| "grad_norm": 2.6839128866173563, |
| "learning_rate": 9.750974990190107e-06, |
| "loss": 0.2013, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.203240058910162, |
| "grad_norm": 4.2008156135049814, |
| "learning_rate": 9.747357360470033e-06, |
| "loss": 0.2419, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.20471281296023564, |
| "grad_norm": 4.027928958269575, |
| "learning_rate": 9.743714323834844e-06, |
| "loss": 0.2175, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.20618556701030927, |
| "grad_norm": 3.115166800077054, |
| "learning_rate": 9.740045899781353e-06, |
| "loss": 0.221, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.20765832106038293, |
| "grad_norm": 4.0874344767769974, |
| "learning_rate": 9.736352107942237e-06, |
| "loss": 0.2442, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.20913107511045656, |
| "grad_norm": 3.565915108388009, |
| "learning_rate": 9.732632968085937e-06, |
| "loss": 0.1765, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2106038291605302, |
| "grad_norm": 2.565264174585471, |
| "learning_rate": 9.728888500116551e-06, |
| "loss": 0.1833, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.21207658321060383, |
| "grad_norm": 3.2970463396097145, |
| "learning_rate": 9.725118724073732e-06, |
| "loss": 0.1681, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.21354933726067746, |
| "grad_norm": 3.985651307847386, |
| "learning_rate": 9.721323660132572e-06, |
| "loss": 0.2852, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.21502209131075112, |
| "grad_norm": 2.6390893883353037, |
| "learning_rate": 9.717503328603499e-06, |
| "loss": 0.2032, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.21649484536082475, |
| "grad_norm": 2.8556278741462475, |
| "learning_rate": 9.713657749932172e-06, |
| "loss": 0.2305, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.21796759941089838, |
| "grad_norm": 3.3789042172013737, |
| "learning_rate": 9.709786944699364e-06, |
| "loss": 0.2362, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.21944035346097202, |
| "grad_norm": 3.5664134680045967, |
| "learning_rate": 9.705890933620859e-06, |
| "loss": 0.2556, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.22091310751104565, |
| "grad_norm": 2.6115979526806226, |
| "learning_rate": 9.701969737547332e-06, |
| "loss": 0.2149, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.22238586156111928, |
| "grad_norm": 3.199894028609453, |
| "learning_rate": 9.69802337746425e-06, |
| "loss": 0.1842, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.22385861561119294, |
| "grad_norm": 2.9713680676755643, |
| "learning_rate": 9.694051874491748e-06, |
| "loss": 0.2037, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.22533136966126657, |
| "grad_norm": 3.371703034689337, |
| "learning_rate": 9.690055249884524e-06, |
| "loss": 0.2674, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.2268041237113402, |
| "grad_norm": 2.9536771126823784, |
| "learning_rate": 9.68603352503172e-06, |
| "loss": 0.2223, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.22827687776141384, |
| "grad_norm": 2.7924914660332316, |
| "learning_rate": 9.681986721456806e-06, |
| "loss": 0.1998, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.22974963181148747, |
| "grad_norm": 2.9038543623288446, |
| "learning_rate": 9.677914860817476e-06, |
| "loss": 0.2138, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.23122238586156113, |
| "grad_norm": 3.2780366704955313, |
| "learning_rate": 9.67381796490552e-06, |
| "loss": 0.1881, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.23269513991163476, |
| "grad_norm": 2.8392198609551795, |
| "learning_rate": 9.669696055646713e-06, |
| "loss": 0.1791, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2341678939617084, |
| "grad_norm": 2.519298030175099, |
| "learning_rate": 9.665549155100696e-06, |
| "loss": 0.1713, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.23564064801178203, |
| "grad_norm": 2.944710796481666, |
| "learning_rate": 9.661377285460856e-06, |
| "loss": 0.2036, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.23711340206185566, |
| "grad_norm": 3.7896683971691925, |
| "learning_rate": 9.657180469054213e-06, |
| "loss": 0.2603, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.23858615611192932, |
| "grad_norm": 2.9596722312146784, |
| "learning_rate": 9.652958728341296e-06, |
| "loss": 0.1781, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.24005891016200295, |
| "grad_norm": 3.214060280061799, |
| "learning_rate": 9.648712085916025e-06, |
| "loss": 0.1915, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.24153166421207659, |
| "grad_norm": 3.187133469928152, |
| "learning_rate": 9.644440564505589e-06, |
| "loss": 0.1834, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.24300441826215022, |
| "grad_norm": 3.681229252431609, |
| "learning_rate": 9.640144186970319e-06, |
| "loss": 0.2262, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.24447717231222385, |
| "grad_norm": 3.2016122797845363, |
| "learning_rate": 9.635822976303582e-06, |
| "loss": 0.1706, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.24594992636229748, |
| "grad_norm": 3.730689931811761, |
| "learning_rate": 9.631476955631636e-06, |
| "loss": 0.2484, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.24742268041237114, |
| "grad_norm": 3.494482628690699, |
| "learning_rate": 9.627106148213521e-06, |
| "loss": 0.2417, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.24889543446244478, |
| "grad_norm": 2.7046561661674176, |
| "learning_rate": 9.622710577440936e-06, |
| "loss": 0.1682, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.2503681885125184, |
| "grad_norm": 3.5016680527035198, |
| "learning_rate": 9.6182902668381e-06, |
| "loss": 0.2391, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.25184094256259204, |
| "grad_norm": 2.106385345129964, |
| "learning_rate": 9.613845240061642e-06, |
| "loss": 0.1651, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2533136966126657, |
| "grad_norm": 3.5843407537461576, |
| "learning_rate": 9.60937552090046e-06, |
| "loss": 0.2173, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2547864506627393, |
| "grad_norm": 2.6949160731602086, |
| "learning_rate": 9.604881133275606e-06, |
| "loss": 0.1804, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.25625920471281294, |
| "grad_norm": 2.698160920515062, |
| "learning_rate": 9.600362101240153e-06, |
| "loss": 0.179, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.25773195876288657, |
| "grad_norm": 2.8058773510654564, |
| "learning_rate": 9.595818448979061e-06, |
| "loss": 0.215, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.25920471281296026, |
| "grad_norm": 2.645103914035817, |
| "learning_rate": 9.591250200809061e-06, |
| "loss": 0.167, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2606774668630339, |
| "grad_norm": 2.5985448999120946, |
| "learning_rate": 9.586657381178506e-06, |
| "loss": 0.174, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2621502209131075, |
| "grad_norm": 2.3143387309459844, |
| "learning_rate": 9.582040014667258e-06, |
| "loss": 0.1872, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.26362297496318116, |
| "grad_norm": 3.438390359114222, |
| "learning_rate": 9.577398125986546e-06, |
| "loss": 0.2256, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.2650957290132548, |
| "grad_norm": 3.0621291109544932, |
| "learning_rate": 9.57273173997884e-06, |
| "loss": 0.2013, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2665684830633284, |
| "grad_norm": 2.589989781920295, |
| "learning_rate": 9.56804088161771e-06, |
| "loss": 0.2082, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.26804123711340205, |
| "grad_norm": 3.1290423588543517, |
| "learning_rate": 9.563325576007702e-06, |
| "loss": 0.2381, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2695139911634757, |
| "grad_norm": 2.5850784305510026, |
| "learning_rate": 9.558585848384194e-06, |
| "loss": 0.1492, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.2709867452135493, |
| "grad_norm": 3.197169330104788, |
| "learning_rate": 9.553821724113268e-06, |
| "loss": 0.2513, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.27245949926362295, |
| "grad_norm": 2.7955706520257455, |
| "learning_rate": 9.549033228691576e-06, |
| "loss": 0.1996, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.27393225331369664, |
| "grad_norm": 2.7908291417631053, |
| "learning_rate": 9.544220387746193e-06, |
| "loss": 0.2033, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.27540500736377027, |
| "grad_norm": 2.741273817653423, |
| "learning_rate": 9.539383227034489e-06, |
| "loss": 0.1994, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.2768777614138439, |
| "grad_norm": 3.1305777720744628, |
| "learning_rate": 9.534521772443989e-06, |
| "loss": 0.1924, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.27835051546391754, |
| "grad_norm": 3.7426036500166684, |
| "learning_rate": 9.529636049992235e-06, |
| "loss": 0.2504, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.27982326951399117, |
| "grad_norm": 2.0789109823416196, |
| "learning_rate": 9.524726085826645e-06, |
| "loss": 0.1498, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2812960235640648, |
| "grad_norm": 2.0879395036515085, |
| "learning_rate": 9.519791906224372e-06, |
| "loss": 0.1742, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.28276877761413843, |
| "grad_norm": 3.6332083539027122, |
| "learning_rate": 9.514833537592167e-06, |
| "loss": 0.2477, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.28424153166421207, |
| "grad_norm": 3.4011295429243953, |
| "learning_rate": 9.509851006466235e-06, |
| "loss": 0.2181, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 3.2524842525432214, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 0.1913, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.28718703976435933, |
| "grad_norm": 3.2393209593702026, |
| "learning_rate": 9.499813563524439e-06, |
| "loss": 0.2564, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.28865979381443296, |
| "grad_norm": 2.7259868505734883, |
| "learning_rate": 9.494758705426978e-06, |
| "loss": 0.2362, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.29013254786450665, |
| "grad_norm": 3.2730541713875954, |
| "learning_rate": 9.48967979227231e-06, |
| "loss": 0.2708, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.2916053019145803, |
| "grad_norm": 3.6942590779013837, |
| "learning_rate": 9.484576851241774e-06, |
| "loss": 0.2598, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.2930780559646539, |
| "grad_norm": 3.4221617953713044, |
| "learning_rate": 9.479449909645296e-06, |
| "loss": 0.2186, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.29455081001472755, |
| "grad_norm": 2.9048709806411153, |
| "learning_rate": 9.474298994921252e-06, |
| "loss": 0.1949, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2960235640648012, |
| "grad_norm": 2.7375080374295315, |
| "learning_rate": 9.469124134636317e-06, |
| "loss": 0.2169, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.2974963181148748, |
| "grad_norm": 3.2624468680574483, |
| "learning_rate": 9.463925356485313e-06, |
| "loss": 0.2005, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.29896907216494845, |
| "grad_norm": 3.357905644483348, |
| "learning_rate": 9.458702688291072e-06, |
| "loss": 0.221, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3004418262150221, |
| "grad_norm": 2.7139450967157686, |
| "learning_rate": 9.45345615800428e-06, |
| "loss": 0.2043, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3019145802650957, |
| "grad_norm": 2.1826977262886436, |
| "learning_rate": 9.448185793703325e-06, |
| "loss": 0.1382, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.30338733431516934, |
| "grad_norm": 3.142821649729342, |
| "learning_rate": 9.442891623594153e-06, |
| "loss": 0.2361, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.30486008836524303, |
| "grad_norm": 2.4836224577160446, |
| "learning_rate": 9.43757367601011e-06, |
| "loss": 0.2259, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.30633284241531666, |
| "grad_norm": 2.775002424298524, |
| "learning_rate": 9.432231979411799e-06, |
| "loss": 0.1841, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3078055964653903, |
| "grad_norm": 3.0215163458905767, |
| "learning_rate": 9.426866562386919e-06, |
| "loss": 0.2017, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.30927835051546393, |
| "grad_norm": 3.075012833268593, |
| "learning_rate": 9.421477453650118e-06, |
| "loss": 0.2564, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.31075110456553756, |
| "grad_norm": 2.1033703952542298, |
| "learning_rate": 9.41606468204284e-06, |
| "loss": 0.148, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.3122238586156112, |
| "grad_norm": 3.43331899798626, |
| "learning_rate": 9.410628276533163e-06, |
| "loss": 0.2211, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3136966126656848, |
| "grad_norm": 2.85824244857793, |
| "learning_rate": 9.40516826621565e-06, |
| "loss": 0.2331, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.31516936671575846, |
| "grad_norm": 2.590034132267709, |
| "learning_rate": 9.399684680311197e-06, |
| "loss": 0.1818, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3166421207658321, |
| "grad_norm": 2.9268108365833183, |
| "learning_rate": 9.394177548166865e-06, |
| "loss": 0.2232, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3181148748159057, |
| "grad_norm": 2.506123086913095, |
| "learning_rate": 9.388646899255733e-06, |
| "loss": 0.163, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.31958762886597936, |
| "grad_norm": 2.9103473622929688, |
| "learning_rate": 9.38309276317674e-06, |
| "loss": 0.1602, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.32106038291605304, |
| "grad_norm": 3.3616843450895004, |
| "learning_rate": 9.377515169654518e-06, |
| "loss": 0.198, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3225331369661267, |
| "grad_norm": 2.832377450626078, |
| "learning_rate": 9.371914148539242e-06, |
| "loss": 0.1925, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.3240058910162003, |
| "grad_norm": 2.430717934544939, |
| "learning_rate": 9.366289729806468e-06, |
| "loss": 0.1723, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.32547864506627394, |
| "grad_norm": 2.8065095246039506, |
| "learning_rate": 9.36064194355697e-06, |
| "loss": 0.1835, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3269513991163476, |
| "grad_norm": 3.285856798309481, |
| "learning_rate": 9.354970820016576e-06, |
| "loss": 0.2428, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3284241531664212, |
| "grad_norm": 3.292186036973222, |
| "learning_rate": 9.349276389536017e-06, |
| "loss": 0.211, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.32989690721649484, |
| "grad_norm": 3.0051382018089488, |
| "learning_rate": 9.343558682590757e-06, |
| "loss": 0.1826, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.33136966126656847, |
| "grad_norm": 3.8927356666818795, |
| "learning_rate": 9.337817729780826e-06, |
| "loss": 0.244, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3328424153166421, |
| "grad_norm": 2.6379798191774175, |
| "learning_rate": 9.332053561830669e-06, |
| "loss": 0.1853, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.33431516936671574, |
| "grad_norm": 3.6279970290742147, |
| "learning_rate": 9.326266209588966e-06, |
| "loss": 0.2242, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.33578792341678937, |
| "grad_norm": 3.380122497482897, |
| "learning_rate": 9.320455704028482e-06, |
| "loss": 0.2192, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.33726067746686306, |
| "grad_norm": 3.2219948474501305, |
| "learning_rate": 9.314622076245887e-06, |
| "loss": 0.2326, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.3387334315169367, |
| "grad_norm": 3.0404471997377907, |
| "learning_rate": 9.308765357461604e-06, |
| "loss": 0.1872, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3402061855670103, |
| "grad_norm": 3.0157736275712406, |
| "learning_rate": 9.302885579019626e-06, |
| "loss": 0.2216, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.34167893961708395, |
| "grad_norm": 2.716589993960879, |
| "learning_rate": 9.296982772387366e-06, |
| "loss": 0.2167, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3431516936671576, |
| "grad_norm": 3.061532521108386, |
| "learning_rate": 9.29105696915547e-06, |
| "loss": 0.2633, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.3446244477172312, |
| "grad_norm": 3.1220356470518773, |
| "learning_rate": 9.285108201037663e-06, |
| "loss": 0.2128, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.34609720176730485, |
| "grad_norm": 2.7156005801814964, |
| "learning_rate": 9.279136499870574e-06, |
| "loss": 0.1562, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.3475699558173785, |
| "grad_norm": 2.5161864643115837, |
| "learning_rate": 9.27314189761356e-06, |
| "loss": 0.208, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3490427098674521, |
| "grad_norm": 2.81380932528038, |
| "learning_rate": 9.267124426348549e-06, |
| "loss": 0.206, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.35051546391752575, |
| "grad_norm": 2.7182189987352454, |
| "learning_rate": 9.261084118279846e-06, |
| "loss": 0.1862, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.35198821796759944, |
| "grad_norm": 3.051952841047558, |
| "learning_rate": 9.255021005733989e-06, |
| "loss": 0.2654, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.35346097201767307, |
| "grad_norm": 3.2184875593429854, |
| "learning_rate": 9.248935121159552e-06, |
| "loss": 0.1956, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3549337260677467, |
| "grad_norm": 2.955006281633986, |
| "learning_rate": 9.24282649712698e-06, |
| "loss": 0.2764, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.35640648011782033, |
| "grad_norm": 2.8157353776869534, |
| "learning_rate": 9.23669516632842e-06, |
| "loss": 0.1844, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.35787923416789397, |
| "grad_norm": 2.1637523528646896, |
| "learning_rate": 9.230541161577535e-06, |
| "loss": 0.1573, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.3593519882179676, |
| "grad_norm": 2.598383826057999, |
| "learning_rate": 9.224364515809344e-06, |
| "loss": 0.1814, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.36082474226804123, |
| "grad_norm": 2.867443024017231, |
| "learning_rate": 9.218165262080024e-06, |
| "loss": 0.1963, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.36229749631811486, |
| "grad_norm": 3.1621315329847683, |
| "learning_rate": 9.211943433566755e-06, |
| "loss": 0.2315, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.3637702503681885, |
| "grad_norm": 2.7573640644608894, |
| "learning_rate": 9.205699063567528e-06, |
| "loss": 0.2127, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.36524300441826213, |
| "grad_norm": 3.225352548953467, |
| "learning_rate": 9.199432185500972e-06, |
| "loss": 0.2467, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.36671575846833576, |
| "grad_norm": 3.1363371958388484, |
| "learning_rate": 9.19314283290618e-06, |
| "loss": 0.2458, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.36818851251840945, |
| "grad_norm": 2.704895240551704, |
| "learning_rate": 9.186831039442514e-06, |
| "loss": 0.1929, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3696612665684831, |
| "grad_norm": 3.4728725502728715, |
| "learning_rate": 9.180496838889446e-06, |
| "loss": 0.2527, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.3711340206185567, |
| "grad_norm": 3.1470649429210926, |
| "learning_rate": 9.174140265146356e-06, |
| "loss": 0.2134, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.37260677466863035, |
| "grad_norm": 3.28728525676738, |
| "learning_rate": 9.167761352232372e-06, |
| "loss": 0.2366, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.374079528718704, |
| "grad_norm": 2.69310735559955, |
| "learning_rate": 9.161360134286166e-06, |
| "loss": 0.2066, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3755522827687776, |
| "grad_norm": 2.79351498094659, |
| "learning_rate": 9.154936645565788e-06, |
| "loss": 0.1749, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.37702503681885124, |
| "grad_norm": 2.5160724004293806, |
| "learning_rate": 9.148490920448476e-06, |
| "loss": 0.2123, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3784977908689249, |
| "grad_norm": 2.6875394507380137, |
| "learning_rate": 9.142022993430475e-06, |
| "loss": 0.1964, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.3799705449189985, |
| "grad_norm": 2.662291621699446, |
| "learning_rate": 9.135532899126844e-06, |
| "loss": 0.2049, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.38144329896907214, |
| "grad_norm": 2.810464023609334, |
| "learning_rate": 9.129020672271283e-06, |
| "loss": 0.1889, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.38291605301914583, |
| "grad_norm": 2.263563107768252, |
| "learning_rate": 9.122486347715937e-06, |
| "loss": 0.1627, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.38438880706921946, |
| "grad_norm": 2.7689382781268033, |
| "learning_rate": 9.115929960431217e-06, |
| "loss": 0.2142, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.3858615611192931, |
| "grad_norm": 3.524805623776474, |
| "learning_rate": 9.109351545505607e-06, |
| "loss": 0.3073, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3873343151693667, |
| "grad_norm": 2.9769637254549166, |
| "learning_rate": 9.10275113814548e-06, |
| "loss": 0.2254, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.38880706921944036, |
| "grad_norm": 2.674094413353048, |
| "learning_rate": 9.096128773674902e-06, |
| "loss": 0.1779, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.390279823269514, |
| "grad_norm": 3.1259498837476634, |
| "learning_rate": 9.08948448753546e-06, |
| "loss": 0.1791, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.3917525773195876, |
| "grad_norm": 2.73101491316821, |
| "learning_rate": 9.082818315286054e-06, |
| "loss": 0.1905, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.39322533136966126, |
| "grad_norm": 3.392701650405703, |
| "learning_rate": 9.076130292602717e-06, |
| "loss": 0.2023, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.3946980854197349, |
| "grad_norm": 2.7586772830435424, |
| "learning_rate": 9.069420455278418e-06, |
| "loss": 0.2096, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.3961708394698085, |
| "grad_norm": 2.6884641588849028, |
| "learning_rate": 9.062688839222878e-06, |
| "loss": 0.1844, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.39764359351988215, |
| "grad_norm": 2.851457009221402, |
| "learning_rate": 9.055935480462366e-06, |
| "loss": 0.2127, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.39911634756995584, |
| "grad_norm": 3.200602534780458, |
| "learning_rate": 9.049160415139525e-06, |
| "loss": 0.224, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4005891016200295, |
| "grad_norm": 2.9975515825389323, |
| "learning_rate": 9.042363679513158e-06, |
| "loss": 0.1574, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4020618556701031, |
| "grad_norm": 2.778235077186209, |
| "learning_rate": 9.035545309958048e-06, |
| "loss": 0.1813, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.40353460972017674, |
| "grad_norm": 2.1611591561612986, |
| "learning_rate": 9.028705342964752e-06, |
| "loss": 0.1311, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.4050073637702504, |
| "grad_norm": 3.1410123112072252, |
| "learning_rate": 9.021843815139424e-06, |
| "loss": 0.2018, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.406480117820324, |
| "grad_norm": 2.653212394824665, |
| "learning_rate": 9.014960763203592e-06, |
| "loss": 0.1725, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.40795287187039764, |
| "grad_norm": 2.5114048753988327, |
| "learning_rate": 9.008056223993993e-06, |
| "loss": 0.1587, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.40942562592047127, |
| "grad_norm": 2.3310789133305034, |
| "learning_rate": 9.001130234462348e-06, |
| "loss": 0.1728, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.4108983799705449, |
| "grad_norm": 3.0268391081998067, |
| "learning_rate": 8.994182831675176e-06, |
| "loss": 0.2069, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.41237113402061853, |
| "grad_norm": 3.176729678522873, |
| "learning_rate": 8.987214052813605e-06, |
| "loss": 0.202, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.41384388807069217, |
| "grad_norm": 2.900999234971349, |
| "learning_rate": 8.980223935173153e-06, |
| "loss": 0.2039, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.41531664212076586, |
| "grad_norm": 2.958916451457122, |
| "learning_rate": 8.973212516163545e-06, |
| "loss": 0.223, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4167893961708395, |
| "grad_norm": 3.2062050255647616, |
| "learning_rate": 8.966179833308506e-06, |
| "loss": 0.2008, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.4182621502209131, |
| "grad_norm": 2.3023016729588646, |
| "learning_rate": 8.959125924245559e-06, |
| "loss": 0.1724, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.41973490427098675, |
| "grad_norm": 3.3857256850661788, |
| "learning_rate": 8.952050826725827e-06, |
| "loss": 0.208, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4212076583210604, |
| "grad_norm": 2.9980500030235273, |
| "learning_rate": 8.944954578613826e-06, |
| "loss": 0.1935, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.422680412371134, |
| "grad_norm": 2.4335052728686826, |
| "learning_rate": 8.937837217887273e-06, |
| "loss": 0.1385, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.42415316642120765, |
| "grad_norm": 2.7372999604088113, |
| "learning_rate": 8.930698782636868e-06, |
| "loss": 0.1982, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4256259204712813, |
| "grad_norm": 3.3460685534877888, |
| "learning_rate": 8.923539311066101e-06, |
| "loss": 0.2435, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.4270986745213549, |
| "grad_norm": 3.073278464461925, |
| "learning_rate": 8.916358841491046e-06, |
| "loss": 0.2257, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 3.033222631807686, |
| "learning_rate": 8.90915741234015e-06, |
| "loss": 0.1561, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.43004418262150224, |
| "grad_norm": 2.9077878079684014, |
| "learning_rate": 8.901935062154035e-06, |
| "loss": 0.1688, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.43151693667157587, |
| "grad_norm": 2.4746618411647914, |
| "learning_rate": 8.894691829585285e-06, |
| "loss": 0.1901, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4329896907216495, |
| "grad_norm": 2.664815899368701, |
| "learning_rate": 8.887427753398249e-06, |
| "loss": 0.1554, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.43446244477172313, |
| "grad_norm": 3.0348226117764163, |
| "learning_rate": 8.880142872468816e-06, |
| "loss": 0.2212, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.43593519882179677, |
| "grad_norm": 2.6085379817053425, |
| "learning_rate": 8.872837225784227e-06, |
| "loss": 0.1719, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.4374079528718704, |
| "grad_norm": 3.2317544602456123, |
| "learning_rate": 8.865510852442854e-06, |
| "loss": 0.2083, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.43888070692194403, |
| "grad_norm": 2.741617300142697, |
| "learning_rate": 8.858163791653994e-06, |
| "loss": 0.1938, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.44035346097201766, |
| "grad_norm": 2.8748881811894993, |
| "learning_rate": 8.85079608273766e-06, |
| "loss": 0.1682, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.4418262150220913, |
| "grad_norm": 3.0709289314464274, |
| "learning_rate": 8.84340776512437e-06, |
| "loss": 0.2368, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.44329896907216493, |
| "grad_norm": 3.332976226058547, |
| "learning_rate": 8.83599887835493e-06, |
| "loss": 0.268, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.44477172312223856, |
| "grad_norm": 3.2928336415795676, |
| "learning_rate": 8.82856946208024e-06, |
| "loss": 0.2426, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.44624447717231225, |
| "grad_norm": 3.0918828260694644, |
| "learning_rate": 8.821119556061054e-06, |
| "loss": 0.2047, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.4477172312223859, |
| "grad_norm": 2.6455271835816316, |
| "learning_rate": 8.8136492001678e-06, |
| "loss": 0.2242, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.4491899852724595, |
| "grad_norm": 2.367999960305818, |
| "learning_rate": 8.806158434380334e-06, |
| "loss": 0.1498, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.45066273932253315, |
| "grad_norm": 3.1564104973218474, |
| "learning_rate": 8.798647298787754e-06, |
| "loss": 0.2197, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4521354933726068, |
| "grad_norm": 2.7830911081835263, |
| "learning_rate": 8.791115833588165e-06, |
| "loss": 0.2114, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.4536082474226804, |
| "grad_norm": 2.339228369282024, |
| "learning_rate": 8.783564079088478e-06, |
| "loss": 0.194, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.45508100147275404, |
| "grad_norm": 3.191493300938616, |
| "learning_rate": 8.775992075704181e-06, |
| "loss": 0.203, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.4565537555228277, |
| "grad_norm": 3.1503597728255475, |
| "learning_rate": 8.76839986395914e-06, |
| "loss": 0.1917, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4580265095729013, |
| "grad_norm": 2.5760890814393615, |
| "learning_rate": 8.760787484485362e-06, |
| "loss": 0.1699, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.45949926362297494, |
| "grad_norm": 2.508820257174523, |
| "learning_rate": 8.753154978022795e-06, |
| "loss": 0.1699, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.46097201767304863, |
| "grad_norm": 2.815681439486473, |
| "learning_rate": 8.7455023854191e-06, |
| "loss": 0.2175, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.46244477172312226, |
| "grad_norm": 2.3474709844903843, |
| "learning_rate": 8.737829747629432e-06, |
| "loss": 0.1524, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4639175257731959, |
| "grad_norm": 2.6627577805253533, |
| "learning_rate": 8.730137105716231e-06, |
| "loss": 0.2368, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.4653902798232695, |
| "grad_norm": 3.6483303897239963, |
| "learning_rate": 8.722424500848988e-06, |
| "loss": 0.1976, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.46686303387334316, |
| "grad_norm": 2.935242259870657, |
| "learning_rate": 8.714691974304035e-06, |
| "loss": 0.2103, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.4683357879234168, |
| "grad_norm": 2.8226822109484724, |
| "learning_rate": 8.706939567464322e-06, |
| "loss": 0.2348, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.4698085419734904, |
| "grad_norm": 3.703059329119747, |
| "learning_rate": 8.69916732181919e-06, |
| "loss": 0.2174, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.47128129602356406, |
| "grad_norm": 2.9480735349519307, |
| "learning_rate": 8.691375278964161e-06, |
| "loss": 0.2008, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4727540500736377, |
| "grad_norm": 3.091324162542489, |
| "learning_rate": 8.6835634806007e-06, |
| "loss": 0.2477, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.4742268041237113, |
| "grad_norm": 3.5902022399029834, |
| "learning_rate": 8.675731968536004e-06, |
| "loss": 0.2442, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.47569955817378495, |
| "grad_norm": 2.939683038888228, |
| "learning_rate": 8.66788078468277e-06, |
| "loss": 0.2198, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.47717231222385864, |
| "grad_norm": 2.8812334528236376, |
| "learning_rate": 8.660009971058977e-06, |
| "loss": 0.1993, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4786450662739323, |
| "grad_norm": 3.6057168421211787, |
| "learning_rate": 8.652119569787663e-06, |
| "loss": 0.2258, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.4801178203240059, |
| "grad_norm": 3.597288920583859, |
| "learning_rate": 8.644209623096686e-06, |
| "loss": 0.2545, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.48159057437407954, |
| "grad_norm": 3.4390171712068756, |
| "learning_rate": 8.636280173318517e-06, |
| "loss": 0.1918, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.48306332842415317, |
| "grad_norm": 3.032127662830995, |
| "learning_rate": 8.628331262889992e-06, |
| "loss": 0.2046, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.4845360824742268, |
| "grad_norm": 2.676901174668902, |
| "learning_rate": 8.620362934352109e-06, |
| "loss": 0.1815, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.48600883652430044, |
| "grad_norm": 3.830805699602978, |
| "learning_rate": 8.612375230349779e-06, |
| "loss": 0.2411, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.48748159057437407, |
| "grad_norm": 2.4906900864812305, |
| "learning_rate": 8.60436819363161e-06, |
| "loss": 0.1684, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.4889543446244477, |
| "grad_norm": 2.4444113482232126, |
| "learning_rate": 8.596341867049677e-06, |
| "loss": 0.1467, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.49042709867452133, |
| "grad_norm": 3.4954288127618365, |
| "learning_rate": 8.588296293559286e-06, |
| "loss": 0.2419, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.49189985272459497, |
| "grad_norm": 2.5438367926199135, |
| "learning_rate": 8.58023151621875e-06, |
| "loss": 0.1703, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.49337260677466865, |
| "grad_norm": 3.038027697084942, |
| "learning_rate": 8.57214757818916e-06, |
| "loss": 0.228, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.4948453608247423, |
| "grad_norm": 3.7548333153148867, |
| "learning_rate": 8.564044522734147e-06, |
| "loss": 0.257, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.4963181148748159, |
| "grad_norm": 2.7157802194600382, |
| "learning_rate": 8.55592239321966e-06, |
| "loss": 0.1518, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.49779086892488955, |
| "grad_norm": 2.19265493343489, |
| "learning_rate": 8.54778123311372e-06, |
| "loss": 0.1607, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.4992636229749632, |
| "grad_norm": 2.6816534246794634, |
| "learning_rate": 8.539621085986209e-06, |
| "loss": 0.1868, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.5007363770250368, |
| "grad_norm": 3.87477423163916, |
| "learning_rate": 8.531441995508609e-06, |
| "loss": 0.2445, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5022091310751104, |
| "grad_norm": 3.1386210194757274, |
| "learning_rate": 8.523244005453795e-06, |
| "loss": 0.2235, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5036818851251841, |
| "grad_norm": 3.0575398322579868, |
| "learning_rate": 8.515027159695781e-06, |
| "loss": 0.195, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5051546391752577, |
| "grad_norm": 3.259828431228313, |
| "learning_rate": 8.506791502209497e-06, |
| "loss": 0.1942, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5066273932253313, |
| "grad_norm": 2.8199139166564, |
| "learning_rate": 8.498537077070548e-06, |
| "loss": 0.1506, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.508100147275405, |
| "grad_norm": 3.0673180182256643, |
| "learning_rate": 8.490263928454983e-06, |
| "loss": 0.1812, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.5095729013254786, |
| "grad_norm": 2.7003191008577185, |
| "learning_rate": 8.481972100639049e-06, |
| "loss": 0.1724, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5110456553755522, |
| "grad_norm": 3.3788019563020666, |
| "learning_rate": 8.473661637998966e-06, |
| "loss": 0.1898, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.5125184094256259, |
| "grad_norm": 2.2754214416579783, |
| "learning_rate": 8.465332585010682e-06, |
| "loss": 0.1631, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5139911634756995, |
| "grad_norm": 2.578239855031428, |
| "learning_rate": 8.456984986249636e-06, |
| "loss": 0.1824, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.5154639175257731, |
| "grad_norm": 2.5873653173219266, |
| "learning_rate": 8.448618886390523e-06, |
| "loss": 0.203, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5169366715758469, |
| "grad_norm": 3.945703180252543, |
| "learning_rate": 8.440234330207047e-06, |
| "loss": 0.1796, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.5184094256259205, |
| "grad_norm": 3.3592392251092846, |
| "learning_rate": 8.431831362571692e-06, |
| "loss": 0.2058, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5198821796759941, |
| "grad_norm": 3.283241191967628, |
| "learning_rate": 8.423410028455474e-06, |
| "loss": 0.2041, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.5213549337260678, |
| "grad_norm": 2.5399001423784227, |
| "learning_rate": 8.414970372927705e-06, |
| "loss": 0.1664, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5228276877761414, |
| "grad_norm": 3.338954536109113, |
| "learning_rate": 8.406512441155746e-06, |
| "loss": 0.2712, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.524300441826215, |
| "grad_norm": 2.5050371298566056, |
| "learning_rate": 8.398036278404768e-06, |
| "loss": 0.1839, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5257731958762887, |
| "grad_norm": 2.1797030450497594, |
| "learning_rate": 8.389541930037516e-06, |
| "loss": 0.1578, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.5272459499263623, |
| "grad_norm": 2.479492098411274, |
| "learning_rate": 8.38102944151406e-06, |
| "loss": 0.1836, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5287187039764359, |
| "grad_norm": 3.2045132083900225, |
| "learning_rate": 8.372498858391545e-06, |
| "loss": 0.2813, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.5301914580265096, |
| "grad_norm": 2.6722264401815323, |
| "learning_rate": 8.363950226323963e-06, |
| "loss": 0.1736, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5316642120765832, |
| "grad_norm": 2.69073149052516, |
| "learning_rate": 8.355383591061898e-06, |
| "loss": 0.1817, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.5331369661266568, |
| "grad_norm": 3.065347755873716, |
| "learning_rate": 8.346798998452283e-06, |
| "loss": 0.1754, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5346097201767305, |
| "grad_norm": 3.0548564778815, |
| "learning_rate": 8.338196494438153e-06, |
| "loss": 0.2002, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5360824742268041, |
| "grad_norm": 2.9790505648872436, |
| "learning_rate": 8.329576125058406e-06, |
| "loss": 0.2348, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5375552282768777, |
| "grad_norm": 4.563506545157317, |
| "learning_rate": 8.320937936447549e-06, |
| "loss": 0.2441, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5390279823269514, |
| "grad_norm": 3.37684954182675, |
| "learning_rate": 8.312281974835452e-06, |
| "loss": 0.2578, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.540500736377025, |
| "grad_norm": 3.3361858569689415, |
| "learning_rate": 8.303608286547109e-06, |
| "loss": 0.2456, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5419734904270986, |
| "grad_norm": 2.3890129851813424, |
| "learning_rate": 8.294916918002377e-06, |
| "loss": 0.1602, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5434462444771723, |
| "grad_norm": 3.0258627442553983, |
| "learning_rate": 8.286207915715733e-06, |
| "loss": 0.2154, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.5449189985272459, |
| "grad_norm": 2.9412210960720238, |
| "learning_rate": 8.277481326296039e-06, |
| "loss": 0.2335, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5463917525773195, |
| "grad_norm": 3.602454180004041, |
| "learning_rate": 8.268737196446264e-06, |
| "loss": 0.2525, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.5478645066273933, |
| "grad_norm": 2.9183607705714922, |
| "learning_rate": 8.259975572963257e-06, |
| "loss": 0.2667, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5493372606774669, |
| "grad_norm": 2.6116150881709195, |
| "learning_rate": 8.251196502737496e-06, |
| "loss": 0.2216, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.5508100147275405, |
| "grad_norm": 3.3432845876969988, |
| "learning_rate": 8.242400032752813e-06, |
| "loss": 0.2595, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5522827687776142, |
| "grad_norm": 2.583209817811413, |
| "learning_rate": 8.233586210086182e-06, |
| "loss": 0.1627, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.5537555228276878, |
| "grad_norm": 3.234454619219617, |
| "learning_rate": 8.224755081907427e-06, |
| "loss": 0.2192, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5552282768777614, |
| "grad_norm": 3.314191004035807, |
| "learning_rate": 8.215906695478997e-06, |
| "loss": 0.1975, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.5567010309278351, |
| "grad_norm": 3.749827270518619, |
| "learning_rate": 8.207041098155701e-06, |
| "loss": 0.2513, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.5581737849779087, |
| "grad_norm": 2.7173241405482083, |
| "learning_rate": 8.198158337384457e-06, |
| "loss": 0.1781, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.5596465390279823, |
| "grad_norm": 2.481104647636881, |
| "learning_rate": 8.189258460704039e-06, |
| "loss": 0.1881, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.561119293078056, |
| "grad_norm": 2.709815508885945, |
| "learning_rate": 8.180341515744823e-06, |
| "loss": 0.2051, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.5625920471281296, |
| "grad_norm": 2.3543635541046917, |
| "learning_rate": 8.171407550228532e-06, |
| "loss": 0.1798, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5640648011782032, |
| "grad_norm": 2.681937385638091, |
| "learning_rate": 8.162456611967972e-06, |
| "loss": 0.1937, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.5655375552282769, |
| "grad_norm": 3.0109379848386633, |
| "learning_rate": 8.153488748866795e-06, |
| "loss": 0.2047, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5670103092783505, |
| "grad_norm": 3.2994675192690464, |
| "learning_rate": 8.144504008919224e-06, |
| "loss": 0.1708, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.5684830633284241, |
| "grad_norm": 2.63707536509823, |
| "learning_rate": 8.135502440209803e-06, |
| "loss": 0.1706, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5699558173784978, |
| "grad_norm": 2.5778988818118593, |
| "learning_rate": 8.126484090913148e-06, |
| "loss": 0.1613, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 2.9880786102250383, |
| "learning_rate": 8.117449009293668e-06, |
| "loss": 0.2124, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.572901325478645, |
| "grad_norm": 2.7339186904426422, |
| "learning_rate": 8.108397243705335e-06, |
| "loss": 0.1767, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.5743740795287187, |
| "grad_norm": 2.920927383070506, |
| "learning_rate": 8.0993288425914e-06, |
| "loss": 0.1948, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5758468335787923, |
| "grad_norm": 2.9959486658471417, |
| "learning_rate": 8.09024385448415e-06, |
| "loss": 0.1987, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.5773195876288659, |
| "grad_norm": 2.6078343603636114, |
| "learning_rate": 8.081142328004638e-06, |
| "loss": 0.1777, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.5787923416789397, |
| "grad_norm": 2.5604795545389205, |
| "learning_rate": 8.072024311862426e-06, |
| "loss": 0.1508, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.5802650957290133, |
| "grad_norm": 3.648602445911234, |
| "learning_rate": 8.062889854855334e-06, |
| "loss": 0.2459, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.5817378497790869, |
| "grad_norm": 2.677809870070787, |
| "learning_rate": 8.053739005869158e-06, |
| "loss": 0.1722, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.5832106038291606, |
| "grad_norm": 3.3192707460526325, |
| "learning_rate": 8.044571813877431e-06, |
| "loss": 0.2267, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.5846833578792342, |
| "grad_norm": 2.5799305333056837, |
| "learning_rate": 8.035388327941147e-06, |
| "loss": 0.1277, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.5861561119293078, |
| "grad_norm": 3.117270413994588, |
| "learning_rate": 8.0261885972085e-06, |
| "loss": 0.2293, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.5876288659793815, |
| "grad_norm": 2.9003896598861867, |
| "learning_rate": 8.016972670914624e-06, |
| "loss": 0.1986, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.5891016200294551, |
| "grad_norm": 2.9415851694090525, |
| "learning_rate": 8.007740598381329e-06, |
| "loss": 0.2323, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5905743740795287, |
| "grad_norm": 2.5157737935406477, |
| "learning_rate": 7.998492429016837e-06, |
| "loss": 0.1968, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.5920471281296024, |
| "grad_norm": 3.499617736336555, |
| "learning_rate": 7.989228212315516e-06, |
| "loss": 0.2127, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.593519882179676, |
| "grad_norm": 3.174124598925698, |
| "learning_rate": 7.979947997857617e-06, |
| "loss": 0.219, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.5949926362297496, |
| "grad_norm": 2.5812414659778278, |
| "learning_rate": 7.970651835309009e-06, |
| "loss": 0.1796, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5964653902798233, |
| "grad_norm": 2.4138046810747236, |
| "learning_rate": 7.961339774420907e-06, |
| "loss": 0.1878, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.5979381443298969, |
| "grad_norm": 2.8540565839576555, |
| "learning_rate": 7.952011865029614e-06, |
| "loss": 0.1945, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5994108983799705, |
| "grad_norm": 2.531347080366607, |
| "learning_rate": 7.942668157056255e-06, |
| "loss": 0.1378, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.6008836524300442, |
| "grad_norm": 3.154901934995226, |
| "learning_rate": 7.933308700506497e-06, |
| "loss": 0.1616, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6023564064801178, |
| "grad_norm": 3.053713830746646, |
| "learning_rate": 7.923933545470301e-06, |
| "loss": 0.2038, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.6038291605301914, |
| "grad_norm": 3.5319559062347174, |
| "learning_rate": 7.914542742121632e-06, |
| "loss": 0.2189, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.605301914580265, |
| "grad_norm": 2.929920661597212, |
| "learning_rate": 7.905136340718212e-06, |
| "loss": 0.1999, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.6067746686303387, |
| "grad_norm": 3.20901586200067, |
| "learning_rate": 7.895714391601232e-06, |
| "loss": 0.2332, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6082474226804123, |
| "grad_norm": 2.7241977220902927, |
| "learning_rate": 7.886276945195098e-06, |
| "loss": 0.1715, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.6097201767304861, |
| "grad_norm": 2.9678480126718476, |
| "learning_rate": 7.87682405200715e-06, |
| "loss": 0.1969, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.6111929307805597, |
| "grad_norm": 3.0479292039013064, |
| "learning_rate": 7.867355762627397e-06, |
| "loss": 0.1538, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.6126656848306333, |
| "grad_norm": 2.3888661846984247, |
| "learning_rate": 7.857872127728248e-06, |
| "loss": 0.1739, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.614138438880707, |
| "grad_norm": 3.346527102339922, |
| "learning_rate": 7.848373198064237e-06, |
| "loss": 0.208, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.6156111929307806, |
| "grad_norm": 2.813215877351235, |
| "learning_rate": 7.838859024471747e-06, |
| "loss": 0.1854, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6170839469808542, |
| "grad_norm": 3.009571356055603, |
| "learning_rate": 7.829329657868753e-06, |
| "loss": 0.1925, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.6185567010309279, |
| "grad_norm": 2.4693834346638193, |
| "learning_rate": 7.819785149254534e-06, |
| "loss": 0.1482, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6200294550810015, |
| "grad_norm": 2.823982785483761, |
| "learning_rate": 7.810225549709404e-06, |
| "loss": 0.1986, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.6215022091310751, |
| "grad_norm": 2.964260763763868, |
| "learning_rate": 7.80065091039445e-06, |
| "loss": 0.2132, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6229749631811488, |
| "grad_norm": 3.0959584354379572, |
| "learning_rate": 7.791061282551237e-06, |
| "loss": 0.2012, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.6244477172312224, |
| "grad_norm": 2.671725358882168, |
| "learning_rate": 7.781456717501557e-06, |
| "loss": 0.1887, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.625920471281296, |
| "grad_norm": 3.5658509460222985, |
| "learning_rate": 7.77183726664713e-06, |
| "loss": 0.2818, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6273932253313697, |
| "grad_norm": 3.1709737116220174, |
| "learning_rate": 7.762202981469358e-06, |
| "loss": 0.2644, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.6288659793814433, |
| "grad_norm": 2.429853910460586, |
| "learning_rate": 7.752553913529019e-06, |
| "loss": 0.17, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.6303387334315169, |
| "grad_norm": 3.1028664137976256, |
| "learning_rate": 7.74289011446601e-06, |
| "loss": 0.2063, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6318114874815906, |
| "grad_norm": 2.71916362952065, |
| "learning_rate": 7.733211635999072e-06, |
| "loss": 0.1695, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.6332842415316642, |
| "grad_norm": 2.81679325242666, |
| "learning_rate": 7.7235185299255e-06, |
| "loss": 0.1831, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6347569955817378, |
| "grad_norm": 2.9587096819378056, |
| "learning_rate": 7.713810848120873e-06, |
| "loss": 0.189, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.6362297496318114, |
| "grad_norm": 3.259400089947752, |
| "learning_rate": 7.704088642538782e-06, |
| "loss": 0.2296, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6377025036818851, |
| "grad_norm": 2.5365578312254518, |
| "learning_rate": 7.694351965210543e-06, |
| "loss": 0.2145, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.6391752577319587, |
| "grad_norm": 2.8276495805494792, |
| "learning_rate": 7.68460086824492e-06, |
| "loss": 0.1797, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6406480117820325, |
| "grad_norm": 2.736677687473085, |
| "learning_rate": 7.674835403827852e-06, |
| "loss": 0.2262, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6421207658321061, |
| "grad_norm": 3.070894838978635, |
| "learning_rate": 7.665055624222166e-06, |
| "loss": 0.2135, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6435935198821797, |
| "grad_norm": 2.816220291246076, |
| "learning_rate": 7.655261581767306e-06, |
| "loss": 0.2038, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.6450662739322534, |
| "grad_norm": 2.5905807406641603, |
| "learning_rate": 7.645453328879042e-06, |
| "loss": 0.1455, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.646539027982327, |
| "grad_norm": 2.1365012260561858, |
| "learning_rate": 7.635630918049202e-06, |
| "loss": 0.1585, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.6480117820324006, |
| "grad_norm": 2.402228516657614, |
| "learning_rate": 7.625794401845376e-06, |
| "loss": 0.176, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6494845360824743, |
| "grad_norm": 2.6298366040596384, |
| "learning_rate": 7.61594383291065e-06, |
| "loss": 0.1691, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.6509572901325479, |
| "grad_norm": 2.7053102309257557, |
| "learning_rate": 7.606079263963318e-06, |
| "loss": 0.1709, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6524300441826215, |
| "grad_norm": 2.6618212391915006, |
| "learning_rate": 7.5962007477965935e-06, |
| "loss": 0.1815, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.6539027982326951, |
| "grad_norm": 3.0920433094488877, |
| "learning_rate": 7.5863083372783365e-06, |
| "loss": 0.208, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6553755522827688, |
| "grad_norm": 2.621502736300709, |
| "learning_rate": 7.576402085350765e-06, |
| "loss": 0.1707, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.6568483063328424, |
| "grad_norm": 2.8975906494183543, |
| "learning_rate": 7.566482045030179e-06, |
| "loss": 0.1618, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.658321060382916, |
| "grad_norm": 2.3152886400243133, |
| "learning_rate": 7.556548269406663e-06, |
| "loss": 0.1763, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.6597938144329897, |
| "grad_norm": 3.475707187942825, |
| "learning_rate": 7.546600811643816e-06, |
| "loss": 0.2268, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.6612665684830633, |
| "grad_norm": 2.5575251767965503, |
| "learning_rate": 7.536639724978458e-06, |
| "loss": 0.1678, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.6627393225331369, |
| "grad_norm": 3.1984667154519957, |
| "learning_rate": 7.526665062720351e-06, |
| "loss": 0.2474, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6642120765832106, |
| "grad_norm": 3.4829489314963737, |
| "learning_rate": 7.516676878251907e-06, |
| "loss": 0.2525, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.6656848306332842, |
| "grad_norm": 2.597566480163576, |
| "learning_rate": 7.5066752250279104e-06, |
| "loss": 0.1661, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6671575846833578, |
| "grad_norm": 2.6300515588665396, |
| "learning_rate": 7.4966601565752265e-06, |
| "loss": 0.1767, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.6686303387334315, |
| "grad_norm": 2.655827217691609, |
| "learning_rate": 7.486631726492511e-06, |
| "loss": 0.1936, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.6701030927835051, |
| "grad_norm": 2.9626391882510945, |
| "learning_rate": 7.476589988449939e-06, |
| "loss": 0.1721, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.6715758468335787, |
| "grad_norm": 3.098417760868542, |
| "learning_rate": 7.466534996188897e-06, |
| "loss": 0.1812, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.6730486008836525, |
| "grad_norm": 2.8057882042803253, |
| "learning_rate": 7.45646680352171e-06, |
| "loss": 0.1899, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.6745213549337261, |
| "grad_norm": 2.580716315482495, |
| "learning_rate": 7.446385464331349e-06, |
| "loss": 0.1647, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.6759941089837997, |
| "grad_norm": 3.069780114418598, |
| "learning_rate": 7.436291032571142e-06, |
| "loss": 0.2297, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.6774668630338734, |
| "grad_norm": 3.1726128225437473, |
| "learning_rate": 7.426183562264487e-06, |
| "loss": 0.1997, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.678939617083947, |
| "grad_norm": 3.9757025531128583, |
| "learning_rate": 7.41606310750456e-06, |
| "loss": 0.2201, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.6804123711340206, |
| "grad_norm": 2.858930358294962, |
| "learning_rate": 7.405929722454026e-06, |
| "loss": 0.1229, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.6818851251840943, |
| "grad_norm": 3.7849179138321336, |
| "learning_rate": 7.395783461344755e-06, |
| "loss": 0.1913, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.6833578792341679, |
| "grad_norm": 3.1039636619579865, |
| "learning_rate": 7.385624378477521e-06, |
| "loss": 0.193, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.6848306332842415, |
| "grad_norm": 2.5798742938215415, |
| "learning_rate": 7.375452528221722e-06, |
| "loss": 0.1894, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.6863033873343152, |
| "grad_norm": 2.903615315853614, |
| "learning_rate": 7.365267965015086e-06, |
| "loss": 0.2063, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.6877761413843888, |
| "grad_norm": 3.568623911882026, |
| "learning_rate": 7.355070743363374e-06, |
| "loss": 0.2458, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.6892488954344624, |
| "grad_norm": 3.3873971159699297, |
| "learning_rate": 7.344860917840092e-06, |
| "loss": 0.1918, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.6907216494845361, |
| "grad_norm": 2.76053873930466, |
| "learning_rate": 7.334638543086203e-06, |
| "loss": 0.1664, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.6921944035346097, |
| "grad_norm": 2.912498192150158, |
| "learning_rate": 7.324403673809831e-06, |
| "loss": 0.1596, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6936671575846833, |
| "grad_norm": 2.624045527481493, |
| "learning_rate": 7.314156364785963e-06, |
| "loss": 0.1798, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.695139911634757, |
| "grad_norm": 2.859200421062988, |
| "learning_rate": 7.303896670856168e-06, |
| "loss": 0.195, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.6966126656848306, |
| "grad_norm": 2.3406721127592562, |
| "learning_rate": 7.29362464692829e-06, |
| "loss": 0.1871, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.6980854197349042, |
| "grad_norm": 3.004611054015597, |
| "learning_rate": 7.283340347976167e-06, |
| "loss": 0.252, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.6995581737849779, |
| "grad_norm": 2.8720492409213607, |
| "learning_rate": 7.273043829039325e-06, |
| "loss": 0.1985, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.7010309278350515, |
| "grad_norm": 2.582611133201151, |
| "learning_rate": 7.262735145222696e-06, |
| "loss": 0.1725, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7025036818851251, |
| "grad_norm": 3.0773343754931703, |
| "learning_rate": 7.252414351696305e-06, |
| "loss": 0.1922, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.7039764359351989, |
| "grad_norm": 3.1150044677015964, |
| "learning_rate": 7.242081503694996e-06, |
| "loss": 0.2428, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7054491899852725, |
| "grad_norm": 2.63019918456231, |
| "learning_rate": 7.2317366565181204e-06, |
| "loss": 0.1867, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.7069219440353461, |
| "grad_norm": 2.7112622421991457, |
| "learning_rate": 7.221379865529251e-06, |
| "loss": 0.1657, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7083946980854198, |
| "grad_norm": 3.0927886687064126, |
| "learning_rate": 7.211011186155878e-06, |
| "loss": 0.2141, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.7098674521354934, |
| "grad_norm": 2.8647429677291547, |
| "learning_rate": 7.200630673889118e-06, |
| "loss": 0.2296, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.711340206185567, |
| "grad_norm": 3.086955810955279, |
| "learning_rate": 7.190238384283413e-06, |
| "loss": 0.1752, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.7128129602356407, |
| "grad_norm": 2.1480520026160828, |
| "learning_rate": 7.179834372956236e-06, |
| "loss": 0.1512, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 2.526700952050052, |
| "learning_rate": 7.169418695587791e-06, |
| "loss": 0.185, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.7157584683357879, |
| "grad_norm": 2.669035941510801, |
| "learning_rate": 7.158991407920721e-06, |
| "loss": 0.1995, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7172312223858616, |
| "grad_norm": 2.987668157649501, |
| "learning_rate": 7.1485525657598e-06, |
| "loss": 0.2005, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.7187039764359352, |
| "grad_norm": 2.77911904744156, |
| "learning_rate": 7.13810222497164e-06, |
| "loss": 0.1804, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7201767304860088, |
| "grad_norm": 2.952831553303968, |
| "learning_rate": 7.127640441484393e-06, |
| "loss": 0.2209, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.7216494845360825, |
| "grad_norm": 2.8357399012012396, |
| "learning_rate": 7.117167271287453e-06, |
| "loss": 0.2327, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7231222385861561, |
| "grad_norm": 2.69688716055132, |
| "learning_rate": 7.106682770431144e-06, |
| "loss": 0.1771, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.7245949926362297, |
| "grad_norm": 2.5914437926095886, |
| "learning_rate": 7.096186995026439e-06, |
| "loss": 0.1517, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7260677466863034, |
| "grad_norm": 2.595142151080367, |
| "learning_rate": 7.085680001244644e-06, |
| "loss": 0.1847, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.727540500736377, |
| "grad_norm": 2.703732109079409, |
| "learning_rate": 7.07516184531711e-06, |
| "loss": 0.2144, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.7290132547864506, |
| "grad_norm": 2.89328239935156, |
| "learning_rate": 7.064632583534918e-06, |
| "loss": 0.2031, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.7304860088365243, |
| "grad_norm": 2.641040101207805, |
| "learning_rate": 7.05409227224859e-06, |
| "loss": 0.173, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7319587628865979, |
| "grad_norm": 3.2517271158299246, |
| "learning_rate": 7.043540967867782e-06, |
| "loss": 0.1974, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.7334315169366715, |
| "grad_norm": 2.519535779252281, |
| "learning_rate": 7.032978726860981e-06, |
| "loss": 0.1787, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.7349042709867453, |
| "grad_norm": 2.864687614972071, |
| "learning_rate": 7.022405605755209e-06, |
| "loss": 0.2058, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.7363770250368189, |
| "grad_norm": 2.5944755868817557, |
| "learning_rate": 7.0118216611357125e-06, |
| "loss": 0.1306, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7363770250368189, |
| "eval_loss": 0.2079160511493683, |
| "eval_runtime": 1.3465, |
| "eval_samples_per_second": 40.846, |
| "eval_steps_per_second": 10.397, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7378497790868925, |
| "grad_norm": 3.177064630921787, |
| "learning_rate": 7.001226949645663e-06, |
| "loss": 0.174, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.7393225331369662, |
| "grad_norm": 2.816317102863076, |
| "learning_rate": 6.990621527985856e-06, |
| "loss": 0.1435, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7407952871870398, |
| "grad_norm": 2.792806952818793, |
| "learning_rate": 6.980005452914404e-06, |
| "loss": 0.1862, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.7422680412371134, |
| "grad_norm": 2.564807443158852, |
| "learning_rate": 6.969378781246436e-06, |
| "loss": 0.1802, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7437407952871871, |
| "grad_norm": 3.2962420837776545, |
| "learning_rate": 6.958741569853793e-06, |
| "loss": 0.249, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.7452135493372607, |
| "grad_norm": 3.2860364528743893, |
| "learning_rate": 6.948093875664719e-06, |
| "loss": 0.2022, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7466863033873343, |
| "grad_norm": 2.810461430677853, |
| "learning_rate": 6.937435755663561e-06, |
| "loss": 0.1821, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.748159057437408, |
| "grad_norm": 3.0512682831720994, |
| "learning_rate": 6.926767266890466e-06, |
| "loss": 0.2206, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7496318114874816, |
| "grad_norm": 2.8042132115591323, |
| "learning_rate": 6.916088466441068e-06, |
| "loss": 0.1556, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.7511045655375552, |
| "grad_norm": 3.0371364783700803, |
| "learning_rate": 6.90539941146619e-06, |
| "loss": 0.1739, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7525773195876289, |
| "grad_norm": 2.569775234654742, |
| "learning_rate": 6.894700159171535e-06, |
| "loss": 0.1689, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.7540500736377025, |
| "grad_norm": 2.933321528237316, |
| "learning_rate": 6.883990766817378e-06, |
| "loss": 0.203, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7555228276877761, |
| "grad_norm": 2.954252427120007, |
| "learning_rate": 6.8732712917182645e-06, |
| "loss": 0.1905, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.7569955817378498, |
| "grad_norm": 3.094551675182008, |
| "learning_rate": 6.862541791242698e-06, |
| "loss": 0.1982, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7584683357879234, |
| "grad_norm": 2.589524276912232, |
| "learning_rate": 6.851802322812839e-06, |
| "loss": 0.1524, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.759941089837997, |
| "grad_norm": 2.6756348976293958, |
| "learning_rate": 6.84105294390419e-06, |
| "loss": 0.1643, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7614138438880707, |
| "grad_norm": 2.1674746654767927, |
| "learning_rate": 6.8302937120453e-06, |
| "loss": 0.1753, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.7628865979381443, |
| "grad_norm": 2.340644234009506, |
| "learning_rate": 6.819524684817439e-06, |
| "loss": 0.1601, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7643593519882179, |
| "grad_norm": 2.327340807374574, |
| "learning_rate": 6.808745919854307e-06, |
| "loss": 0.1483, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.7658321060382917, |
| "grad_norm": 3.250904343809451, |
| "learning_rate": 6.797957474841717e-06, |
| "loss": 0.215, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7673048600883653, |
| "grad_norm": 2.5391802727966977, |
| "learning_rate": 6.787159407517285e-06, |
| "loss": 0.1823, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.7687776141384389, |
| "grad_norm": 2.7916115575921485, |
| "learning_rate": 6.776351775670129e-06, |
| "loss": 0.2174, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.7702503681885126, |
| "grad_norm": 2.7858618390144767, |
| "learning_rate": 6.765534637140551e-06, |
| "loss": 0.1957, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.7717231222385862, |
| "grad_norm": 3.5767299189193116, |
| "learning_rate": 6.754708049819728e-06, |
| "loss": 0.2414, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.7731958762886598, |
| "grad_norm": 2.858342695786206, |
| "learning_rate": 6.743872071649411e-06, |
| "loss": 0.1901, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.7746686303387335, |
| "grad_norm": 2.8252337439834108, |
| "learning_rate": 6.733026760621607e-06, |
| "loss": 0.1992, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.7761413843888071, |
| "grad_norm": 2.3041203023856793, |
| "learning_rate": 6.722172174778267e-06, |
| "loss": 0.138, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.7776141384388807, |
| "grad_norm": 3.6433507462583883, |
| "learning_rate": 6.711308372210983e-06, |
| "loss": 0.281, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.7790868924889544, |
| "grad_norm": 3.826707632819319, |
| "learning_rate": 6.700435411060674e-06, |
| "loss": 0.2379, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.780559646539028, |
| "grad_norm": 2.335468844981763, |
| "learning_rate": 6.689553349517268e-06, |
| "loss": 0.1426, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7820324005891016, |
| "grad_norm": 3.1999212336171348, |
| "learning_rate": 6.678662245819401e-06, |
| "loss": 0.2075, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.7835051546391752, |
| "grad_norm": 2.670072436287972, |
| "learning_rate": 6.667762158254104e-06, |
| "loss": 0.1511, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.7849779086892489, |
| "grad_norm": 3.0405487574452588, |
| "learning_rate": 6.65685314515648e-06, |
| "loss": 0.2754, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.7864506627393225, |
| "grad_norm": 2.9059557625759713, |
| "learning_rate": 6.645935264909404e-06, |
| "loss": 0.1894, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.7879234167893961, |
| "grad_norm": 3.1020231365165976, |
| "learning_rate": 6.635008575943208e-06, |
| "loss": 0.2202, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.7893961708394698, |
| "grad_norm": 2.163415012871819, |
| "learning_rate": 6.6240731367353624e-06, |
| "loss": 0.1343, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.7908689248895434, |
| "grad_norm": 3.095956057328875, |
| "learning_rate": 6.6131290058101696e-06, |
| "loss": 0.1618, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.792341678939617, |
| "grad_norm": 2.934986128550977, |
| "learning_rate": 6.602176241738449e-06, |
| "loss": 0.227, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.7938144329896907, |
| "grad_norm": 2.7997400039218348, |
| "learning_rate": 6.591214903137221e-06, |
| "loss": 0.1882, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.7952871870397643, |
| "grad_norm": 3.12439323702351, |
| "learning_rate": 6.580245048669395e-06, |
| "loss": 0.1969, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7967599410898379, |
| "grad_norm": 3.2306163155596415, |
| "learning_rate": 6.569266737043459e-06, |
| "loss": 0.2152, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.7982326951399117, |
| "grad_norm": 3.4132440767323424, |
| "learning_rate": 6.558280027013155e-06, |
| "loss": 0.2407, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.7997054491899853, |
| "grad_norm": 2.8084491999693264, |
| "learning_rate": 6.547284977377182e-06, |
| "loss": 0.1558, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.801178203240059, |
| "grad_norm": 2.4808253484818907, |
| "learning_rate": 6.536281646978863e-06, |
| "loss": 0.2054, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.8026509572901326, |
| "grad_norm": 3.3744247435198833, |
| "learning_rate": 6.525270094705838e-06, |
| "loss": 0.1971, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.8041237113402062, |
| "grad_norm": 3.127584895400644, |
| "learning_rate": 6.514250379489754e-06, |
| "loss": 0.1756, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8055964653902798, |
| "grad_norm": 2.143280586687572, |
| "learning_rate": 6.503222560305941e-06, |
| "loss": 0.1301, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.8070692194403535, |
| "grad_norm": 2.778289265850501, |
| "learning_rate": 6.492186696173097e-06, |
| "loss": 0.1746, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8085419734904271, |
| "grad_norm": 2.9826518884459925, |
| "learning_rate": 6.481142846152982e-06, |
| "loss": 0.2291, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.8100147275405007, |
| "grad_norm": 3.6069313230398463, |
| "learning_rate": 6.47009106935009e-06, |
| "loss": 0.2182, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8114874815905744, |
| "grad_norm": 2.9764046649935785, |
| "learning_rate": 6.45903142491134e-06, |
| "loss": 0.1821, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.812960235640648, |
| "grad_norm": 2.7725921223575987, |
| "learning_rate": 6.447963972025752e-06, |
| "loss": 0.218, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8144329896907216, |
| "grad_norm": 2.7051072823188362, |
| "learning_rate": 6.436888769924142e-06, |
| "loss": 0.159, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.8159057437407953, |
| "grad_norm": 3.0114305097475422, |
| "learning_rate": 6.425805877878794e-06, |
| "loss": 0.1807, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.8173784977908689, |
| "grad_norm": 2.506441076459785, |
| "learning_rate": 6.414715355203149e-06, |
| "loss": 0.1519, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.8188512518409425, |
| "grad_norm": 2.5973980936466896, |
| "learning_rate": 6.403617261251485e-06, |
| "loss": 0.1879, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8203240058910162, |
| "grad_norm": 2.7762712571831107, |
| "learning_rate": 6.392511655418599e-06, |
| "loss": 0.1525, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.8217967599410898, |
| "grad_norm": 2.828187998348971, |
| "learning_rate": 6.381398597139492e-06, |
| "loss": 0.2317, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8232695139911634, |
| "grad_norm": 2.6972269121488073, |
| "learning_rate": 6.370278145889048e-06, |
| "loss": 0.186, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.8247422680412371, |
| "grad_norm": 3.542127036239562, |
| "learning_rate": 6.3591503611817155e-06, |
| "loss": 0.2108, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8262150220913107, |
| "grad_norm": 2.8818914546163565, |
| "learning_rate": 6.348015302571192e-06, |
| "loss": 0.1946, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.8276877761413843, |
| "grad_norm": 3.2379832338019026, |
| "learning_rate": 6.336873029650104e-06, |
| "loss": 0.2005, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.8291605301914581, |
| "grad_norm": 2.834420288726562, |
| "learning_rate": 6.3257236020496845e-06, |
| "loss": 0.1567, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.8306332842415317, |
| "grad_norm": 2.8967220378684586, |
| "learning_rate": 6.3145670794394595e-06, |
| "loss": 0.221, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.8321060382916053, |
| "grad_norm": 2.4062803602132066, |
| "learning_rate": 6.303403521526928e-06, |
| "loss": 0.1897, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.833578792341679, |
| "grad_norm": 3.085110345051428, |
| "learning_rate": 6.292232988057235e-06, |
| "loss": 0.2009, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.8350515463917526, |
| "grad_norm": 2.7255177270072988, |
| "learning_rate": 6.281055538812861e-06, |
| "loss": 0.1687, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.8365243004418262, |
| "grad_norm": 2.6403632558800387, |
| "learning_rate": 6.2698712336133e-06, |
| "loss": 0.1862, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.8379970544918999, |
| "grad_norm": 3.1369325934703514, |
| "learning_rate": 6.2586801323147314e-06, |
| "loss": 0.2388, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.8394698085419735, |
| "grad_norm": 2.610228596413145, |
| "learning_rate": 6.247482294809712e-06, |
| "loss": 0.172, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8409425625920471, |
| "grad_norm": 2.728140959550517, |
| "learning_rate": 6.236277781026849e-06, |
| "loss": 0.1599, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.8424153166421208, |
| "grad_norm": 3.3397424326917933, |
| "learning_rate": 6.225066650930476e-06, |
| "loss": 0.2081, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.8438880706921944, |
| "grad_norm": 2.133794920897429, |
| "learning_rate": 6.213848964520338e-06, |
| "loss": 0.1303, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.845360824742268, |
| "grad_norm": 2.8835463225639497, |
| "learning_rate": 6.202624781831269e-06, |
| "loss": 0.1654, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8468335787923417, |
| "grad_norm": 2.367932024564104, |
| "learning_rate": 6.191394162932867e-06, |
| "loss": 0.1774, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.8483063328424153, |
| "grad_norm": 2.7740754042726317, |
| "learning_rate": 6.18015716792918e-06, |
| "loss": 0.2079, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8497790868924889, |
| "grad_norm": 2.4191844169668695, |
| "learning_rate": 6.168913856958374e-06, |
| "loss": 0.1544, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.8512518409425626, |
| "grad_norm": 3.1026655189630326, |
| "learning_rate": 6.157664290192421e-06, |
| "loss": 0.2481, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.8527245949926362, |
| "grad_norm": 2.8896911560329723, |
| "learning_rate": 6.146408527836771e-06, |
| "loss": 0.1596, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.8541973490427098, |
| "grad_norm": 2.8646880811489637, |
| "learning_rate": 6.135146630130033e-06, |
| "loss": 0.1955, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8556701030927835, |
| "grad_norm": 3.3949249128943157, |
| "learning_rate": 6.123878657343648e-06, |
| "loss": 0.2115, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 2.9508305883194987, |
| "learning_rate": 6.112604669781572e-06, |
| "loss": 0.1661, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8586156111929307, |
| "grad_norm": 3.2778977717650495, |
| "learning_rate": 6.101324727779954e-06, |
| "loss": 0.2135, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.8600883652430045, |
| "grad_norm": 2.2927362430201494, |
| "learning_rate": 6.090038891706801e-06, |
| "loss": 0.1652, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.8615611192930781, |
| "grad_norm": 3.3248234221138184, |
| "learning_rate": 6.078747221961675e-06, |
| "loss": 0.2145, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.8630338733431517, |
| "grad_norm": 3.200462125587272, |
| "learning_rate": 6.06744977897535e-06, |
| "loss": 0.1932, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.8645066273932254, |
| "grad_norm": 2.623581099942445, |
| "learning_rate": 6.0561466232095e-06, |
| "loss": 0.191, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.865979381443299, |
| "grad_norm": 2.825621335260916, |
| "learning_rate": 6.044837815156377e-06, |
| "loss": 0.2154, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.8674521354933726, |
| "grad_norm": 2.8020502133871013, |
| "learning_rate": 6.033523415338473e-06, |
| "loss": 0.1903, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.8689248895434463, |
| "grad_norm": 2.858829763733425, |
| "learning_rate": 6.022203484308217e-06, |
| "loss": 0.1733, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8703976435935199, |
| "grad_norm": 2.4940821640154103, |
| "learning_rate": 6.010878082647631e-06, |
| "loss": 0.1587, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.8718703976435935, |
| "grad_norm": 2.5652459900784863, |
| "learning_rate": 5.999547270968024e-06, |
| "loss": 0.1382, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.8733431516936672, |
| "grad_norm": 2.6485105297629445, |
| "learning_rate": 5.988211109909647e-06, |
| "loss": 0.1678, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.8748159057437408, |
| "grad_norm": 2.6150060427654487, |
| "learning_rate": 5.976869660141389e-06, |
| "loss": 0.1787, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.8762886597938144, |
| "grad_norm": 2.315123548048991, |
| "learning_rate": 5.965522982360441e-06, |
| "loss": 0.1477, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.8777614138438881, |
| "grad_norm": 3.019927103557956, |
| "learning_rate": 5.954171137291968e-06, |
| "loss": 0.1771, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.8792341678939617, |
| "grad_norm": 2.608181929581868, |
| "learning_rate": 5.942814185688799e-06, |
| "loss": 0.1553, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.8807069219440353, |
| "grad_norm": 2.8420856678687567, |
| "learning_rate": 5.931452188331084e-06, |
| "loss": 0.1862, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.882179675994109, |
| "grad_norm": 3.3885149091071187, |
| "learning_rate": 5.920085206025979e-06, |
| "loss": 0.1709, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.8836524300441826, |
| "grad_norm": 2.9530752607122714, |
| "learning_rate": 5.908713299607318e-06, |
| "loss": 0.1717, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8851251840942562, |
| "grad_norm": 3.3443040794874657, |
| "learning_rate": 5.897336529935292e-06, |
| "loss": 0.2336, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.8865979381443299, |
| "grad_norm": 2.803553171269139, |
| "learning_rate": 5.885954957896115e-06, |
| "loss": 0.1811, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.8880706921944035, |
| "grad_norm": 2.462103288691291, |
| "learning_rate": 5.874568644401702e-06, |
| "loss": 0.1657, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.8895434462444771, |
| "grad_norm": 2.97521020574475, |
| "learning_rate": 5.863177650389346e-06, |
| "loss": 0.1821, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.8910162002945509, |
| "grad_norm": 2.3362815268370145, |
| "learning_rate": 5.851782036821387e-06, |
| "loss": 0.1177, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.8924889543446245, |
| "grad_norm": 2.697364125595746, |
| "learning_rate": 5.840381864684892e-06, |
| "loss": 0.2245, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.8939617083946981, |
| "grad_norm": 3.1676386145436304, |
| "learning_rate": 5.828977194991318e-06, |
| "loss": 0.258, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.8954344624447718, |
| "grad_norm": 2.9439386755156307, |
| "learning_rate": 5.817568088776195e-06, |
| "loss": 0.198, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.8969072164948454, |
| "grad_norm": 3.116267212976988, |
| "learning_rate": 5.806154607098799e-06, |
| "loss": 0.2417, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.898379970544919, |
| "grad_norm": 3.0874912391485805, |
| "learning_rate": 5.794736811041821e-06, |
| "loss": 0.1924, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8998527245949927, |
| "grad_norm": 3.121816238099267, |
| "learning_rate": 5.783314761711038e-06, |
| "loss": 0.213, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.9013254786450663, |
| "grad_norm": 2.259615605065723, |
| "learning_rate": 5.771888520234997e-06, |
| "loss": 0.1352, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.9027982326951399, |
| "grad_norm": 2.880010684013978, |
| "learning_rate": 5.760458147764673e-06, |
| "loss": 0.161, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.9042709867452136, |
| "grad_norm": 2.281221094404714, |
| "learning_rate": 5.749023705473154e-06, |
| "loss": 0.1812, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.9057437407952872, |
| "grad_norm": 3.7696229509532238, |
| "learning_rate": 5.737585254555307e-06, |
| "loss": 0.1914, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.9072164948453608, |
| "grad_norm": 2.946395578493942, |
| "learning_rate": 5.726142856227453e-06, |
| "loss": 0.1815, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.9086892488954345, |
| "grad_norm": 2.9293787170665095, |
| "learning_rate": 5.714696571727037e-06, |
| "loss": 0.1769, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.9101620029455081, |
| "grad_norm": 2.3808186014335573, |
| "learning_rate": 5.703246462312307e-06, |
| "loss": 0.1362, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9116347569955817, |
| "grad_norm": 2.956481152311808, |
| "learning_rate": 5.6917925892619775e-06, |
| "loss": 0.1745, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.9131075110456554, |
| "grad_norm": 3.4406449283229814, |
| "learning_rate": 5.680335013874903e-06, |
| "loss": 0.257, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.914580265095729, |
| "grad_norm": 3.079800184193429, |
| "learning_rate": 5.668873797469756e-06, |
| "loss": 0.167, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.9160530191458026, |
| "grad_norm": 3.253296370089598, |
| "learning_rate": 5.657409001384695e-06, |
| "loss": 0.1757, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9175257731958762, |
| "grad_norm": 2.156710713498128, |
| "learning_rate": 5.645940686977033e-06, |
| "loss": 0.1115, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.9189985272459499, |
| "grad_norm": 2.6144980122779025, |
| "learning_rate": 5.634468915622915e-06, |
| "loss": 0.1427, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.9204712812960235, |
| "grad_norm": 2.790011108638482, |
| "learning_rate": 5.622993748716987e-06, |
| "loss": 0.1843, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.9219440353460973, |
| "grad_norm": 2.48624098429388, |
| "learning_rate": 5.611515247672063e-06, |
| "loss": 0.1486, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.9234167893961709, |
| "grad_norm": 2.91750565490176, |
| "learning_rate": 5.600033473918811e-06, |
| "loss": 0.1966, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.9248895434462445, |
| "grad_norm": 2.8892768676541767, |
| "learning_rate": 5.588548488905402e-06, |
| "loss": 0.1658, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.9263622974963182, |
| "grad_norm": 2.6350547691066986, |
| "learning_rate": 5.577060354097199e-06, |
| "loss": 0.2129, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.9278350515463918, |
| "grad_norm": 3.1772750636856455, |
| "learning_rate": 5.5655691309764225e-06, |
| "loss": 0.166, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9293078055964654, |
| "grad_norm": 2.975003521974751, |
| "learning_rate": 5.554074881041818e-06, |
| "loss": 0.1699, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.930780559646539, |
| "grad_norm": 2.3653644886736513, |
| "learning_rate": 5.542577665808332e-06, |
| "loss": 0.1725, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.9322533136966127, |
| "grad_norm": 3.221238075347828, |
| "learning_rate": 5.531077546806783e-06, |
| "loss": 0.2137, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.9337260677466863, |
| "grad_norm": 2.8452909275134806, |
| "learning_rate": 5.519574585583523e-06, |
| "loss": 0.1879, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.93519882179676, |
| "grad_norm": 2.9331185445999535, |
| "learning_rate": 5.508068843700121e-06, |
| "loss": 0.2018, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.9366715758468336, |
| "grad_norm": 2.5423046775907276, |
| "learning_rate": 5.496560382733028e-06, |
| "loss": 0.1654, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.9381443298969072, |
| "grad_norm": 2.886759957099059, |
| "learning_rate": 5.485049264273241e-06, |
| "loss": 0.198, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.9396170839469808, |
| "grad_norm": 2.4925402691142517, |
| "learning_rate": 5.473535549925986e-06, |
| "loss": 0.1308, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.9410898379970545, |
| "grad_norm": 3.041109792137434, |
| "learning_rate": 5.462019301310378e-06, |
| "loss": 0.1956, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.9425625920471281, |
| "grad_norm": 3.1775307076021333, |
| "learning_rate": 5.450500580059095e-06, |
| "loss": 0.2047, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.9440353460972017, |
| "grad_norm": 2.6506780197656865, |
| "learning_rate": 5.438979447818049e-06, |
| "loss": 0.1625, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.9455081001472754, |
| "grad_norm": 3.1124656946929234, |
| "learning_rate": 5.427455966246057e-06, |
| "loss": 0.1784, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.946980854197349, |
| "grad_norm": 2.4473149482228402, |
| "learning_rate": 5.415930197014503e-06, |
| "loss": 0.1813, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.9484536082474226, |
| "grad_norm": 2.678785367089142, |
| "learning_rate": 5.404402201807022e-06, |
| "loss": 0.174, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.9499263622974963, |
| "grad_norm": 2.2719072811027012, |
| "learning_rate": 5.392872042319155e-06, |
| "loss": 0.1311, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.9513991163475699, |
| "grad_norm": 3.150733238587214, |
| "learning_rate": 5.381339780258034e-06, |
| "loss": 0.2442, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.9528718703976435, |
| "grad_norm": 3.5455424017999166, |
| "learning_rate": 5.369805477342032e-06, |
| "loss": 0.2337, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.9543446244477173, |
| "grad_norm": 2.7482737086217135, |
| "learning_rate": 5.358269195300454e-06, |
| "loss": 0.2175, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.9558173784977909, |
| "grad_norm": 2.776798925728169, |
| "learning_rate": 5.346730995873194e-06, |
| "loss": 0.1611, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.9572901325478645, |
| "grad_norm": 2.905049948649321, |
| "learning_rate": 5.335190940810407e-06, |
| "loss": 0.1973, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9587628865979382, |
| "grad_norm": 2.9860800230634283, |
| "learning_rate": 5.323649091872179e-06, |
| "loss": 0.2157, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.9602356406480118, |
| "grad_norm": 3.056382593251139, |
| "learning_rate": 5.312105510828196e-06, |
| "loss": 0.1854, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.9617083946980854, |
| "grad_norm": 1.8633584294604082, |
| "learning_rate": 5.300560259457414e-06, |
| "loss": 0.1139, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.9631811487481591, |
| "grad_norm": 2.8115313634091956, |
| "learning_rate": 5.289013399547732e-06, |
| "loss": 0.1708, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.9646539027982327, |
| "grad_norm": 2.8530294541287256, |
| "learning_rate": 5.27746499289565e-06, |
| "loss": 0.1731, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.9661266568483063, |
| "grad_norm": 2.2259269099856542, |
| "learning_rate": 5.265915101305952e-06, |
| "loss": 0.1111, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.96759941089838, |
| "grad_norm": 3.061910370458147, |
| "learning_rate": 5.254363786591368e-06, |
| "loss": 0.2037, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.9690721649484536, |
| "grad_norm": 2.6756366507310623, |
| "learning_rate": 5.242811110572243e-06, |
| "loss": 0.2021, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.9705449189985272, |
| "grad_norm": 3.1881851785691486, |
| "learning_rate": 5.231257135076205e-06, |
| "loss": 0.1759, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.9720176730486009, |
| "grad_norm": 2.6359163663678964, |
| "learning_rate": 5.219701921937845e-06, |
| "loss": 0.1518, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9734904270986745, |
| "grad_norm": 2.51301787921435, |
| "learning_rate": 5.208145532998369e-06, |
| "loss": 0.1483, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.9749631811487481, |
| "grad_norm": 3.025821223156431, |
| "learning_rate": 5.196588030105278e-06, |
| "loss": 0.1493, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.9764359351988218, |
| "grad_norm": 3.185702920503757, |
| "learning_rate": 5.185029475112038e-06, |
| "loss": 0.2023, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.9779086892488954, |
| "grad_norm": 2.096280595044242, |
| "learning_rate": 5.173469929877741e-06, |
| "loss": 0.149, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.979381443298969, |
| "grad_norm": 2.5060157239018617, |
| "learning_rate": 5.161909456266781e-06, |
| "loss": 0.1249, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.9808541973490427, |
| "grad_norm": 3.2185960894603602, |
| "learning_rate": 5.1503481161485206e-06, |
| "loss": 0.2196, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.9823269513991163, |
| "grad_norm": 2.6663010480518428, |
| "learning_rate": 5.138785971396959e-06, |
| "loss": 0.2039, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.9837997054491899, |
| "grad_norm": 2.8699961471842386, |
| "learning_rate": 5.127223083890402e-06, |
| "loss": 0.1581, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.9852724594992637, |
| "grad_norm": 2.7556015653714634, |
| "learning_rate": 5.11565951551113e-06, |
| "loss": 0.1703, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.9867452135493373, |
| "grad_norm": 3.5065573085381447, |
| "learning_rate": 5.104095328145069e-06, |
| "loss": 0.2098, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.9882179675994109, |
| "grad_norm": 2.6616463680002824, |
| "learning_rate": 5.0925305836814546e-06, |
| "loss": 0.1487, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.9896907216494846, |
| "grad_norm": 2.8133418788027753, |
| "learning_rate": 5.080965344012509e-06, |
| "loss": 0.1335, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.9911634756995582, |
| "grad_norm": 2.9595738356622103, |
| "learning_rate": 5.069399671033096e-06, |
| "loss": 0.1642, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.9926362297496318, |
| "grad_norm": 2.6728211673881073, |
| "learning_rate": 5.0578336266404085e-06, |
| "loss": 0.1389, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.9941089837997055, |
| "grad_norm": 3.080439476388202, |
| "learning_rate": 5.046267272733621e-06, |
| "loss": 0.1965, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.9955817378497791, |
| "grad_norm": 2.4701410992374524, |
| "learning_rate": 5.034700671213565e-06, |
| "loss": 0.1405, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.9970544918998527, |
| "grad_norm": 2.84391651358579, |
| "learning_rate": 5.023133883982398e-06, |
| "loss": 0.1691, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.9985272459499264, |
| "grad_norm": 2.8974978669512153, |
| "learning_rate": 5.0115669729432725e-06, |
| "loss": 0.1626, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.6071750843239125, |
| "learning_rate": 5e-06, |
| "loss": 0.171, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.0014727540500736, |
| "grad_norm": 2.0908722198043987, |
| "learning_rate": 4.988433027056729e-06, |
| "loss": 0.0877, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.0029455081001473, |
| "grad_norm": 2.3075032162377798, |
| "learning_rate": 4.976866116017604e-06, |
| "loss": 0.0878, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.004418262150221, |
| "grad_norm": 1.8339841498407248, |
| "learning_rate": 4.965299328786437e-06, |
| "loss": 0.0837, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.0058910162002945, |
| "grad_norm": 1.6006550769733303, |
| "learning_rate": 4.95373272726638e-06, |
| "loss": 0.08, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.0073637702503682, |
| "grad_norm": 2.3923089526567223, |
| "learning_rate": 4.942166373359593e-06, |
| "loss": 0.0954, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.0088365243004418, |
| "grad_norm": 1.8073001932691344, |
| "learning_rate": 4.930600328966904e-06, |
| "loss": 0.0534, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.0103092783505154, |
| "grad_norm": 2.112034239924799, |
| "learning_rate": 4.919034655987493e-06, |
| "loss": 0.0712, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.011782032400589, |
| "grad_norm": 1.9019850417584385, |
| "learning_rate": 4.907469416318547e-06, |
| "loss": 0.072, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.0132547864506627, |
| "grad_norm": 2.0877136424715577, |
| "learning_rate": 4.895904671854933e-06, |
| "loss": 0.0771, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.0147275405007363, |
| "grad_norm": 2.077572121775792, |
| "learning_rate": 4.884340484488872e-06, |
| "loss": 0.0713, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.01620029455081, |
| "grad_norm": 2.7869591073328075, |
| "learning_rate": 4.872776916109601e-06, |
| "loss": 0.1134, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.0176730486008836, |
| "grad_norm": 1.8746902344963947, |
| "learning_rate": 4.861214028603044e-06, |
| "loss": 0.0571, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.0191458026509572, |
| "grad_norm": 2.6069603988935297, |
| "learning_rate": 4.849651883851482e-06, |
| "loss": 0.0868, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.0206185567010309, |
| "grad_norm": 2.740480215432774, |
| "learning_rate": 4.838090543733222e-06, |
| "loss": 0.0948, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.0220913107511045, |
| "grad_norm": 3.8793953206281833, |
| "learning_rate": 4.826530070122262e-06, |
| "loss": 0.07, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.0235640648011781, |
| "grad_norm": 2.775139749937407, |
| "learning_rate": 4.814970524887965e-06, |
| "loss": 0.0927, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.0250368188512518, |
| "grad_norm": 3.085066313685762, |
| "learning_rate": 4.8034119698947244e-06, |
| "loss": 0.094, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.0265095729013254, |
| "grad_norm": 3.281643571176706, |
| "learning_rate": 4.791854467001634e-06, |
| "loss": 0.0658, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.027982326951399, |
| "grad_norm": 2.4078985119085634, |
| "learning_rate": 4.780298078062157e-06, |
| "loss": 0.0557, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.0294550810014726, |
| "grad_norm": 2.7060207713139923, |
| "learning_rate": 4.768742864923797e-06, |
| "loss": 0.048, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.0309278350515463, |
| "grad_norm": 2.856556717889814, |
| "learning_rate": 4.757188889427761e-06, |
| "loss": 0.06, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.0324005891016201, |
| "grad_norm": 3.159586197772598, |
| "learning_rate": 4.745636213408633e-06, |
| "loss": 0.0682, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.0338733431516938, |
| "grad_norm": 2.932631552062124, |
| "learning_rate": 4.734084898694049e-06, |
| "loss": 0.0803, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.0353460972017674, |
| "grad_norm": 3.076181585290304, |
| "learning_rate": 4.72253500710435e-06, |
| "loss": 0.0675, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.036818851251841, |
| "grad_norm": 2.8918203833899003, |
| "learning_rate": 4.710986600452269e-06, |
| "loss": 0.0615, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.0382916053019147, |
| "grad_norm": 2.4534677540942407, |
| "learning_rate": 4.699439740542586e-06, |
| "loss": 0.0611, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.0397643593519883, |
| "grad_norm": 3.277146390134441, |
| "learning_rate": 4.687894489171804e-06, |
| "loss": 0.0934, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.041237113402062, |
| "grad_norm": 2.9290800449770926, |
| "learning_rate": 4.6763509081278215e-06, |
| "loss": 0.0877, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.0427098674521356, |
| "grad_norm": 2.451559748967979, |
| "learning_rate": 4.664809059189594e-06, |
| "loss": 0.0655, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.0441826215022092, |
| "grad_norm": 3.0014355915899382, |
| "learning_rate": 4.653269004126806e-06, |
| "loss": 0.0984, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.0456553755522828, |
| "grad_norm": 2.9636564956506803, |
| "learning_rate": 4.641730804699547e-06, |
| "loss": 0.0893, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.0471281296023565, |
| "grad_norm": 2.668332309680153, |
| "learning_rate": 4.63019452265797e-06, |
| "loss": 0.0704, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.04860088365243, |
| "grad_norm": 2.025496591492349, |
| "learning_rate": 4.618660219741968e-06, |
| "loss": 0.049, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.0500736377025037, |
| "grad_norm": 2.6980749570347387, |
| "learning_rate": 4.607127957680846e-06, |
| "loss": 0.0677, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.0515463917525774, |
| "grad_norm": 3.329232545155469, |
| "learning_rate": 4.59559779819298e-06, |
| "loss": 0.0861, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.053019145802651, |
| "grad_norm": 2.8359873347772635, |
| "learning_rate": 4.584069802985498e-06, |
| "loss": 0.0755, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.0544918998527246, |
| "grad_norm": 2.274924239132417, |
| "learning_rate": 4.572544033753945e-06, |
| "loss": 0.063, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.0559646539027983, |
| "grad_norm": 2.854977227907341, |
| "learning_rate": 4.561020552181952e-06, |
| "loss": 0.1293, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.0574374079528719, |
| "grad_norm": 2.8523771548633734, |
| "learning_rate": 4.549499419940906e-06, |
| "loss": 0.0763, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.0589101620029455, |
| "grad_norm": 3.1579164863004436, |
| "learning_rate": 4.537980698689623e-06, |
| "loss": 0.0792, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.0603829160530192, |
| "grad_norm": 2.087311962701497, |
| "learning_rate": 4.526464450074016e-06, |
| "loss": 0.054, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.0618556701030928, |
| "grad_norm": 2.7362138039011126, |
| "learning_rate": 4.51495073572676e-06, |
| "loss": 0.0715, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.0633284241531664, |
| "grad_norm": 2.37470137270783, |
| "learning_rate": 4.503439617266974e-06, |
| "loss": 0.0563, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.06480117820324, |
| "grad_norm": 3.2907467519259384, |
| "learning_rate": 4.49193115629988e-06, |
| "loss": 0.095, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.0662739322533137, |
| "grad_norm": 2.4580532091105622, |
| "learning_rate": 4.480425414416479e-06, |
| "loss": 0.072, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.0677466863033873, |
| "grad_norm": 2.430633005701922, |
| "learning_rate": 4.468922453193219e-06, |
| "loss": 0.0701, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.069219440353461, |
| "grad_norm": 2.6208550727093485, |
| "learning_rate": 4.45742233419167e-06, |
| "loss": 0.0714, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.0706921944035346, |
| "grad_norm": 2.1317122254363667, |
| "learning_rate": 4.445925118958184e-06, |
| "loss": 0.0713, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.0721649484536082, |
| "grad_norm": 1.8261168040168063, |
| "learning_rate": 4.434430869023579e-06, |
| "loss": 0.0565, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.0736377025036818, |
| "grad_norm": 2.7129508356879652, |
| "learning_rate": 4.422939645902803e-06, |
| "loss": 0.087, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.0751104565537555, |
| "grad_norm": 2.4057274548479572, |
| "learning_rate": 4.4114515110946e-06, |
| "loss": 0.0655, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.076583210603829, |
| "grad_norm": 2.03399280266938, |
| "learning_rate": 4.39996652608119e-06, |
| "loss": 0.0666, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.0780559646539027, |
| "grad_norm": 2.596095403893847, |
| "learning_rate": 4.3884847523279374e-06, |
| "loss": 0.0826, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.0795287187039764, |
| "grad_norm": 2.171242877696304, |
| "learning_rate": 4.377006251283015e-06, |
| "loss": 0.073, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.08100147275405, |
| "grad_norm": 2.806803218409549, |
| "learning_rate": 4.365531084377087e-06, |
| "loss": 0.0785, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.0824742268041236, |
| "grad_norm": 2.62258755991761, |
| "learning_rate": 4.3540593130229695e-06, |
| "loss": 0.067, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.0839469808541973, |
| "grad_norm": 2.7392273222794046, |
| "learning_rate": 4.342590998615308e-06, |
| "loss": 0.0798, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.085419734904271, |
| "grad_norm": 3.2294373177028115, |
| "learning_rate": 4.331126202530245e-06, |
| "loss": 0.0825, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.0868924889543445, |
| "grad_norm": 2.1665291430967253, |
| "learning_rate": 4.319664986125099e-06, |
| "loss": 0.0774, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.0883652430044182, |
| "grad_norm": 2.686454697277816, |
| "learning_rate": 4.308207410738024e-06, |
| "loss": 0.0837, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.0898379970544918, |
| "grad_norm": 2.7185930478468348, |
| "learning_rate": 4.296753537687694e-06, |
| "loss": 0.0688, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.0913107511045654, |
| "grad_norm": 2.154998878381218, |
| "learning_rate": 4.2853034282729644e-06, |
| "loss": 0.0488, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.0927835051546393, |
| "grad_norm": 2.476327579315816, |
| "learning_rate": 4.27385714377255e-06, |
| "loss": 0.0715, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.094256259204713, |
| "grad_norm": 2.475778910547172, |
| "learning_rate": 4.2624147454446945e-06, |
| "loss": 0.0697, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.0957290132547866, |
| "grad_norm": 2.798899972530247, |
| "learning_rate": 4.250976294526847e-06, |
| "loss": 0.074, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.0972017673048602, |
| "grad_norm": 2.777019451392523, |
| "learning_rate": 4.239541852235327e-06, |
| "loss": 0.0681, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.0986745213549338, |
| "grad_norm": 2.174088157331811, |
| "learning_rate": 4.228111479765004e-06, |
| "loss": 0.0672, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.1001472754050075, |
| "grad_norm": 2.7695424502767283, |
| "learning_rate": 4.216685238288962e-06, |
| "loss": 0.0855, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.101620029455081, |
| "grad_norm": 2.99079466026466, |
| "learning_rate": 4.20526318895818e-06, |
| "loss": 0.0961, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.1030927835051547, |
| "grad_norm": 2.7732745665790275, |
| "learning_rate": 4.1938453929012014e-06, |
| "loss": 0.0772, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.1045655375552283, |
| "grad_norm": 1.9954883422550331, |
| "learning_rate": 4.182431911223806e-06, |
| "loss": 0.0707, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.106038291605302, |
| "grad_norm": 2.3261727870576516, |
| "learning_rate": 4.171022805008683e-06, |
| "loss": 0.0725, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.1075110456553756, |
| "grad_norm": 2.3142670589612306, |
| "learning_rate": 4.159618135315109e-06, |
| "loss": 0.0755, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.1089837997054492, |
| "grad_norm": 2.7210466709496894, |
| "learning_rate": 4.1482179631786126e-06, |
| "loss": 0.0751, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.1104565537555229, |
| "grad_norm": 2.8409582892005694, |
| "learning_rate": 4.1368223496106544e-06, |
| "loss": 0.0982, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.1119293078055965, |
| "grad_norm": 2.5561913135130014, |
| "learning_rate": 4.125431355598299e-06, |
| "loss": 0.0942, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.1134020618556701, |
| "grad_norm": 3.1615551599035103, |
| "learning_rate": 4.1140450421038865e-06, |
| "loss": 0.1051, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.1148748159057438, |
| "grad_norm": 1.8911758166555228, |
| "learning_rate": 4.102663470064709e-06, |
| "loss": 0.0598, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.1163475699558174, |
| "grad_norm": 2.6027868423883183, |
| "learning_rate": 4.091286700392683e-06, |
| "loss": 0.08, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.117820324005891, |
| "grad_norm": 3.3172593190538486, |
| "learning_rate": 4.079914793974024e-06, |
| "loss": 0.0932, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.1192930780559647, |
| "grad_norm": 2.584014398968938, |
| "learning_rate": 4.068547811668918e-06, |
| "loss": 0.0737, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.1207658321060383, |
| "grad_norm": 2.711235814730717, |
| "learning_rate": 4.057185814311203e-06, |
| "loss": 0.0761, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.122238586156112, |
| "grad_norm": 3.013495034597246, |
| "learning_rate": 4.0458288627080325e-06, |
| "loss": 0.1001, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.1237113402061856, |
| "grad_norm": 2.9214594992749325, |
| "learning_rate": 4.034477017639561e-06, |
| "loss": 0.0937, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.1251840942562592, |
| "grad_norm": 2.570753452463597, |
| "learning_rate": 4.0231303398586124e-06, |
| "loss": 0.0892, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.1266568483063328, |
| "grad_norm": 2.2587922331126657, |
| "learning_rate": 4.011788890090354e-06, |
| "loss": 0.0819, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.1281296023564065, |
| "grad_norm": 2.850862254910788, |
| "learning_rate": 4.000452729031978e-06, |
| "loss": 0.0746, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.12960235640648, |
| "grad_norm": 2.070636849791486, |
| "learning_rate": 3.98912191735237e-06, |
| "loss": 0.0724, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.1310751104565537, |
| "grad_norm": 2.60117941900419, |
| "learning_rate": 3.977796515691785e-06, |
| "loss": 0.0748, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.1325478645066274, |
| "grad_norm": 2.1648271561118144, |
| "learning_rate": 3.966476584661528e-06, |
| "loss": 0.0625, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.134020618556701, |
| "grad_norm": 3.459878540617657, |
| "learning_rate": 3.955162184843625e-06, |
| "loss": 0.0995, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.1354933726067746, |
| "grad_norm": 2.0253389265031645, |
| "learning_rate": 3.943853376790501e-06, |
| "loss": 0.0515, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.1369661266568483, |
| "grad_norm": 2.3346165427095724, |
| "learning_rate": 3.932550221024651e-06, |
| "loss": 0.0944, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.138438880706922, |
| "grad_norm": 2.4437185103047527, |
| "learning_rate": 3.921252778038326e-06, |
| "loss": 0.0784, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.1399116347569955, |
| "grad_norm": 2.2346000371288697, |
| "learning_rate": 3.9099611082932e-06, |
| "loss": 0.0778, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.1413843888070692, |
| "grad_norm": 3.126462530347664, |
| "learning_rate": 3.898675272220048e-06, |
| "loss": 0.0901, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 2.6212182691393417, |
| "learning_rate": 3.887395330218429e-06, |
| "loss": 0.067, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.1443298969072164, |
| "grad_norm": 2.2041859965816912, |
| "learning_rate": 3.8761213426563546e-06, |
| "loss": 0.0718, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.14580265095729, |
| "grad_norm": 2.8686011497863637, |
| "learning_rate": 3.8648533698699695e-06, |
| "loss": 0.0739, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.1472754050073637, |
| "grad_norm": 3.570906083127591, |
| "learning_rate": 3.85359147216323e-06, |
| "loss": 0.0645, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.1487481590574373, |
| "grad_norm": 2.3026385044388946, |
| "learning_rate": 3.842335709807582e-06, |
| "loss": 0.07, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.150220913107511, |
| "grad_norm": 2.3324933414823037, |
| "learning_rate": 3.831086143041628e-06, |
| "loss": 0.0701, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.1516936671575846, |
| "grad_norm": 2.870788539209186, |
| "learning_rate": 3.819842832070822e-06, |
| "loss": 0.0836, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.1531664212076582, |
| "grad_norm": 1.929302265810244, |
| "learning_rate": 3.808605837067135e-06, |
| "loss": 0.0697, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.1546391752577319, |
| "grad_norm": 2.136539690495229, |
| "learning_rate": 3.7973752181687336e-06, |
| "loss": 0.0644, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.1561119293078055, |
| "grad_norm": 2.2728146164935836, |
| "learning_rate": 3.786151035479664e-06, |
| "loss": 0.0834, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.1575846833578791, |
| "grad_norm": 2.3642399727192682, |
| "learning_rate": 3.774933349069524e-06, |
| "loss": 0.0648, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.1590574374079528, |
| "grad_norm": 2.6823533671482678, |
| "learning_rate": 3.76372221897315e-06, |
| "loss": 0.0951, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.1605301914580266, |
| "grad_norm": 2.2935431804364366, |
| "learning_rate": 3.752517705190287e-06, |
| "loss": 0.0683, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.1620029455081002, |
| "grad_norm": 2.4389409096597396, |
| "learning_rate": 3.741319867685268e-06, |
| "loss": 0.0938, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.1634756995581739, |
| "grad_norm": 2.573029251842867, |
| "learning_rate": 3.7301287663867002e-06, |
| "loss": 0.076, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.1649484536082475, |
| "grad_norm": 2.4248690619540736, |
| "learning_rate": 3.7189444611871383e-06, |
| "loss": 0.1034, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.1664212076583211, |
| "grad_norm": 2.1949237372666386, |
| "learning_rate": 3.7077670119427644e-06, |
| "loss": 0.0776, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.1678939617083948, |
| "grad_norm": 2.3811799264526234, |
| "learning_rate": 3.6965964784730717e-06, |
| "loss": 0.068, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.1693667157584684, |
| "grad_norm": 2.1656743920814336, |
| "learning_rate": 3.68543292056054e-06, |
| "loss": 0.0745, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.170839469808542, |
| "grad_norm": 2.5534075805210303, |
| "learning_rate": 3.674276397950316e-06, |
| "loss": 0.0721, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.1723122238586157, |
| "grad_norm": 3.2077906538032117, |
| "learning_rate": 3.6631269703498974e-06, |
| "loss": 0.0979, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.1737849779086893, |
| "grad_norm": 3.4097966300360545, |
| "learning_rate": 3.65198469742881e-06, |
| "loss": 0.0873, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.175257731958763, |
| "grad_norm": 2.5911991361263036, |
| "learning_rate": 3.6408496388182857e-06, |
| "loss": 0.0706, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.1767304860088366, |
| "grad_norm": 2.1867381102958636, |
| "learning_rate": 3.6297218541109537e-06, |
| "loss": 0.0552, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.1782032400589102, |
| "grad_norm": 2.571703668717305, |
| "learning_rate": 3.61860140286051e-06, |
| "loss": 0.0658, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1796759941089838, |
| "grad_norm": 2.8609822870164665, |
| "learning_rate": 3.6074883445814024e-06, |
| "loss": 0.0871, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.1811487481590575, |
| "grad_norm": 2.570340510742732, |
| "learning_rate": 3.596382738748516e-06, |
| "loss": 0.0808, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.182621502209131, |
| "grad_norm": 2.773398165504677, |
| "learning_rate": 3.5852846447968526e-06, |
| "loss": 0.0572, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.1840942562592047, |
| "grad_norm": 2.8682200523341845, |
| "learning_rate": 3.574194122121207e-06, |
| "loss": 0.0938, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.1855670103092784, |
| "grad_norm": 2.1077416494570937, |
| "learning_rate": 3.5631112300758595e-06, |
| "loss": 0.0559, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.187039764359352, |
| "grad_norm": 2.19204957528833, |
| "learning_rate": 3.55203602797425e-06, |
| "loss": 0.086, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.1885125184094256, |
| "grad_norm": 2.491020520284403, |
| "learning_rate": 3.5409685750886624e-06, |
| "loss": 0.0791, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.1899852724594993, |
| "grad_norm": 3.2077684809179625, |
| "learning_rate": 3.52990893064991e-06, |
| "loss": 0.1018, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.1914580265095729, |
| "grad_norm": 2.4858054288275353, |
| "learning_rate": 3.518857153847019e-06, |
| "loss": 0.0642, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.1929307805596465, |
| "grad_norm": 2.65749978557633, |
| "learning_rate": 3.5078133038269034e-06, |
| "loss": 0.0841, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.1944035346097202, |
| "grad_norm": 2.8455934007145682, |
| "learning_rate": 3.4967774396940606e-06, |
| "loss": 0.07, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.1958762886597938, |
| "grad_norm": 2.198946509408161, |
| "learning_rate": 3.4857496205102475e-06, |
| "loss": 0.0794, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.1973490427098674, |
| "grad_norm": 2.3707778356340956, |
| "learning_rate": 3.474729905294163e-06, |
| "loss": 0.0525, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.198821796759941, |
| "grad_norm": 3.1478887888758083, |
| "learning_rate": 3.463718353021138e-06, |
| "loss": 0.0825, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.2002945508100147, |
| "grad_norm": 2.5941236487021047, |
| "learning_rate": 3.45271502262282e-06, |
| "loss": 0.0667, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.2017673048600883, |
| "grad_norm": 3.1251883051921485, |
| "learning_rate": 3.441719972986846e-06, |
| "loss": 0.0715, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.203240058910162, |
| "grad_norm": 2.4878882120381776, |
| "learning_rate": 3.430733262956544e-06, |
| "loss": 0.0598, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.2047128129602356, |
| "grad_norm": 3.081778132428439, |
| "learning_rate": 3.4197549513306076e-06, |
| "loss": 0.0813, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.2061855670103092, |
| "grad_norm": 2.9161799323233115, |
| "learning_rate": 3.4087850968627823e-06, |
| "loss": 0.0888, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.2076583210603828, |
| "grad_norm": 3.0579431841201057, |
| "learning_rate": 3.3978237582615535e-06, |
| "loss": 0.0806, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.2091310751104565, |
| "grad_norm": 2.4024003584699662, |
| "learning_rate": 3.3868709941898325e-06, |
| "loss": 0.0685, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.21060382916053, |
| "grad_norm": 2.541898615431858, |
| "learning_rate": 3.37592686326464e-06, |
| "loss": 0.0991, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.2120765832106037, |
| "grad_norm": 2.522454961599684, |
| "learning_rate": 3.364991424056794e-06, |
| "loss": 0.0693, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.2135493372606774, |
| "grad_norm": 2.55384548126942, |
| "learning_rate": 3.3540647350905985e-06, |
| "loss": 0.06, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.2150220913107512, |
| "grad_norm": 2.620048246050263, |
| "learning_rate": 3.343146854843523e-06, |
| "loss": 0.0829, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.2164948453608249, |
| "grad_norm": 2.271484577946098, |
| "learning_rate": 3.3322378417458985e-06, |
| "loss": 0.0708, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.2179675994108985, |
| "grad_norm": 2.5246193069267138, |
| "learning_rate": 3.3213377541805995e-06, |
| "loss": 0.0782, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.2194403534609721, |
| "grad_norm": 2.108031142465364, |
| "learning_rate": 3.3104466504827327e-06, |
| "loss": 0.0732, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.2209131075110458, |
| "grad_norm": 3.3236120150572113, |
| "learning_rate": 3.2995645889393278e-06, |
| "loss": 0.0671, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.2223858615611194, |
| "grad_norm": 2.6374669794434404, |
| "learning_rate": 3.288691627789017e-06, |
| "loss": 0.0813, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.223858615611193, |
| "grad_norm": 2.851817066818382, |
| "learning_rate": 3.277827825221733e-06, |
| "loss": 0.0706, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.2253313696612667, |
| "grad_norm": 2.083133515619651, |
| "learning_rate": 3.2669732393783944e-06, |
| "loss": 0.0664, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.2268041237113403, |
| "grad_norm": 2.5390230676594863, |
| "learning_rate": 3.2561279283505888e-06, |
| "loss": 0.0824, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.228276877761414, |
| "grad_norm": 2.4416648845051707, |
| "learning_rate": 3.2452919501802714e-06, |
| "loss": 0.0754, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.2297496318114876, |
| "grad_norm": 2.8801480217785156, |
| "learning_rate": 3.234465362859451e-06, |
| "loss": 0.0714, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.2312223858615612, |
| "grad_norm": 2.5597515235457005, |
| "learning_rate": 3.223648224329872e-06, |
| "loss": 0.054, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.2326951399116348, |
| "grad_norm": 2.4482923554226788, |
| "learning_rate": 3.2128405924827154e-06, |
| "loss": 0.0829, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.2341678939617085, |
| "grad_norm": 2.856207881577955, |
| "learning_rate": 3.202042525158284e-06, |
| "loss": 0.0818, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.235640648011782, |
| "grad_norm": 2.4499839989641465, |
| "learning_rate": 3.191254080145695e-06, |
| "loss": 0.0806, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.2371134020618557, |
| "grad_norm": 2.273801185202566, |
| "learning_rate": 3.180475315182563e-06, |
| "loss": 0.0597, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.2385861561119293, |
| "grad_norm": 2.261133708950399, |
| "learning_rate": 3.1697062879547014e-06, |
| "loss": 0.0877, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.240058910162003, |
| "grad_norm": 2.2872555405628248, |
| "learning_rate": 3.1589470560958104e-06, |
| "loss": 0.0795, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.2415316642120766, |
| "grad_norm": 2.295707839329834, |
| "learning_rate": 3.1481976771871627e-06, |
| "loss": 0.0636, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.2430044182621502, |
| "grad_norm": 3.227716605839925, |
| "learning_rate": 3.1374582087573026e-06, |
| "loss": 0.1081, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.2444771723122239, |
| "grad_norm": 2.4225054229446905, |
| "learning_rate": 3.1267287082817376e-06, |
| "loss": 0.09, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.2459499263622975, |
| "grad_norm": 2.8376508149993933, |
| "learning_rate": 3.1160092331826235e-06, |
| "loss": 0.0624, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.2474226804123711, |
| "grad_norm": 2.8282605439311306, |
| "learning_rate": 3.1052998408284664e-06, |
| "loss": 0.081, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.2488954344624448, |
| "grad_norm": 2.947001349052833, |
| "learning_rate": 3.0946005885338116e-06, |
| "loss": 0.0871, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.2503681885125184, |
| "grad_norm": 2.778754685623843, |
| "learning_rate": 3.083911533558934e-06, |
| "loss": 0.0745, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.251840942562592, |
| "grad_norm": 2.384050662267025, |
| "learning_rate": 3.073232733109536e-06, |
| "loss": 0.059, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.2533136966126657, |
| "grad_norm": 2.363442330975993, |
| "learning_rate": 3.0625642443364407e-06, |
| "loss": 0.0722, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.2547864506627393, |
| "grad_norm": 2.4499491297154634, |
| "learning_rate": 3.0519061243352833e-06, |
| "loss": 0.0624, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.256259204712813, |
| "grad_norm": 2.3949434799317624, |
| "learning_rate": 3.041258430146208e-06, |
| "loss": 0.0587, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.2577319587628866, |
| "grad_norm": 2.780602647537924, |
| "learning_rate": 3.0306212187535653e-06, |
| "loss": 0.087, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.2592047128129602, |
| "grad_norm": 3.1983788447860344, |
| "learning_rate": 3.0199945470855975e-06, |
| "loss": 0.1062, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.2606774668630338, |
| "grad_norm": 2.3750027999673753, |
| "learning_rate": 3.0093784720141456e-06, |
| "loss": 0.0671, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.2621502209131075, |
| "grad_norm": 2.2615956244581263, |
| "learning_rate": 2.9987730503543387e-06, |
| "loss": 0.0539, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.263622974963181, |
| "grad_norm": 2.48990411000989, |
| "learning_rate": 2.988178338864289e-06, |
| "loss": 0.0673, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.2650957290132547, |
| "grad_norm": 2.2036932772255544, |
| "learning_rate": 2.9775943942447915e-06, |
| "loss": 0.0738, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.2665684830633284, |
| "grad_norm": 2.795202986209916, |
| "learning_rate": 2.9670212731390202e-06, |
| "loss": 0.0757, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.268041237113402, |
| "grad_norm": 2.895103572350162, |
| "learning_rate": 2.9564590321322206e-06, |
| "loss": 0.0822, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.2695139911634756, |
| "grad_norm": 2.747203222079235, |
| "learning_rate": 2.945907727751412e-06, |
| "loss": 0.0628, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.2709867452135493, |
| "grad_norm": 3.0674742113404236, |
| "learning_rate": 2.935367416465085e-06, |
| "loss": 0.0809, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.272459499263623, |
| "grad_norm": 2.3588856890649574, |
| "learning_rate": 2.924838154682893e-06, |
| "loss": 0.053, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.2739322533136965, |
| "grad_norm": 2.261318532560985, |
| "learning_rate": 2.9143199987553574e-06, |
| "loss": 0.0806, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.2754050073637702, |
| "grad_norm": 3.3505626197518934, |
| "learning_rate": 2.9038130049735634e-06, |
| "loss": 0.067, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.2768777614138438, |
| "grad_norm": 2.3336129262324685, |
| "learning_rate": 2.8933172295688576e-06, |
| "loss": 0.0575, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.2783505154639174, |
| "grad_norm": 2.40059539734755, |
| "learning_rate": 2.882832728712551e-06, |
| "loss": 0.0771, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.279823269513991, |
| "grad_norm": 2.7561605556229343, |
| "learning_rate": 2.8723595585156083e-06, |
| "loss": 0.08, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.2812960235640647, |
| "grad_norm": 2.547320563573267, |
| "learning_rate": 2.8618977750283605e-06, |
| "loss": 0.051, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.2827687776141383, |
| "grad_norm": 1.9709609992186161, |
| "learning_rate": 2.8514474342402006e-06, |
| "loss": 0.0631, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.284241531664212, |
| "grad_norm": 2.9809387595001047, |
| "learning_rate": 2.841008592079281e-06, |
| "loss": 0.0789, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 2.86008312832222, |
| "learning_rate": 2.83058130441221e-06, |
| "loss": 0.0888, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.2871870397643592, |
| "grad_norm": 2.4796550101557937, |
| "learning_rate": 2.8201656270437662e-06, |
| "loss": 0.066, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.2886597938144329, |
| "grad_norm": 2.6030800209190668, |
| "learning_rate": 2.8097616157165886e-06, |
| "loss": 0.0662, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.2901325478645067, |
| "grad_norm": 2.553858828851561, |
| "learning_rate": 2.7993693261108823e-06, |
| "loss": 0.07, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.2916053019145803, |
| "grad_norm": 2.1429985168409353, |
| "learning_rate": 2.788988813844121e-06, |
| "loss": 0.0547, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.293078055964654, |
| "grad_norm": 2.306635027448473, |
| "learning_rate": 2.7786201344707487e-06, |
| "loss": 0.0802, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.2945508100147276, |
| "grad_norm": 2.248947781614172, |
| "learning_rate": 2.768263343481881e-06, |
| "loss": 0.08, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.2960235640648012, |
| "grad_norm": 2.958537556220352, |
| "learning_rate": 2.7579184963050056e-06, |
| "loss": 0.0664, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.2974963181148749, |
| "grad_norm": 2.4922414343913664, |
| "learning_rate": 2.7475856483036967e-06, |
| "loss": 0.0677, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.2989690721649485, |
| "grad_norm": 2.562197068557909, |
| "learning_rate": 2.7372648547773063e-06, |
| "loss": 0.0728, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.3004418262150221, |
| "grad_norm": 2.1123483743229117, |
| "learning_rate": 2.726956170960674e-06, |
| "loss": 0.0579, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.3019145802650958, |
| "grad_norm": 2.7651251279632363, |
| "learning_rate": 2.716659652023833e-06, |
| "loss": 0.0786, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.3033873343151694, |
| "grad_norm": 3.0311493031355665, |
| "learning_rate": 2.706375353071712e-06, |
| "loss": 0.0729, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.304860088365243, |
| "grad_norm": 2.6379196457688505, |
| "learning_rate": 2.6961033291438343e-06, |
| "loss": 0.0882, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.3063328424153167, |
| "grad_norm": 2.3533767366059606, |
| "learning_rate": 2.685843635214038e-06, |
| "loss": 0.079, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.3078055964653903, |
| "grad_norm": 2.4437114285251904, |
| "learning_rate": 2.6755963261901706e-06, |
| "loss": 0.0612, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.309278350515464, |
| "grad_norm": 2.7288761273981263, |
| "learning_rate": 2.665361456913797e-06, |
| "loss": 0.0987, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.3107511045655376, |
| "grad_norm": 2.0952523055264165, |
| "learning_rate": 2.655139082159908e-06, |
| "loss": 0.0561, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.3122238586156112, |
| "grad_norm": 2.769276897165276, |
| "learning_rate": 2.644929256636628e-06, |
| "loss": 0.0963, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.3136966126656848, |
| "grad_norm": 2.2960722586747524, |
| "learning_rate": 2.634732034984915e-06, |
| "loss": 0.0568, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.3151693667157585, |
| "grad_norm": 2.787153179831851, |
| "learning_rate": 2.624547471778278e-06, |
| "loss": 0.0901, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.316642120765832, |
| "grad_norm": 2.861891095875749, |
| "learning_rate": 2.6143756215224803e-06, |
| "loss": 0.1107, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.3181148748159057, |
| "grad_norm": 2.2294971099864336, |
| "learning_rate": 2.604216538655247e-06, |
| "loss": 0.0791, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.3195876288659794, |
| "grad_norm": 2.3877245266570846, |
| "learning_rate": 2.594070277545975e-06, |
| "loss": 0.0725, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.321060382916053, |
| "grad_norm": 2.806049349126312, |
| "learning_rate": 2.5839368924954435e-06, |
| "loss": 0.1136, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.3225331369661266, |
| "grad_norm": 2.8513599503802087, |
| "learning_rate": 2.5738164377355148e-06, |
| "loss": 0.0776, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.3240058910162003, |
| "grad_norm": 3.1114741400011074, |
| "learning_rate": 2.563708967428859e-06, |
| "loss": 0.0658, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.3254786450662739, |
| "grad_norm": 2.2743662613163522, |
| "learning_rate": 2.5536145356686528e-06, |
| "loss": 0.0572, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.3269513991163475, |
| "grad_norm": 2.268767817457225, |
| "learning_rate": 2.5435331964782916e-06, |
| "loss": 0.0746, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.3284241531664212, |
| "grad_norm": 1.9907870732319457, |
| "learning_rate": 2.5334650038111045e-06, |
| "loss": 0.0504, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.3298969072164948, |
| "grad_norm": 2.3485970715966764, |
| "learning_rate": 2.5234100115500643e-06, |
| "loss": 0.054, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.3313696612665684, |
| "grad_norm": 2.7559810357796413, |
| "learning_rate": 2.5133682735074904e-06, |
| "loss": 0.0696, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.332842415316642, |
| "grad_norm": 2.833175599971209, |
| "learning_rate": 2.503339843424777e-06, |
| "loss": 0.0623, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.3343151693667157, |
| "grad_norm": 2.0828464328941103, |
| "learning_rate": 2.4933247749720912e-06, |
| "loss": 0.0584, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.3357879234167893, |
| "grad_norm": 2.4533237420565115, |
| "learning_rate": 2.483323121748094e-06, |
| "loss": 0.0693, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.3372606774668632, |
| "grad_norm": 2.646584000152378, |
| "learning_rate": 2.4733349372796506e-06, |
| "loss": 0.086, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.3387334315169368, |
| "grad_norm": 2.2230708948262246, |
| "learning_rate": 2.4633602750215447e-06, |
| "loss": 0.0728, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.3402061855670104, |
| "grad_norm": 2.810498628664279, |
| "learning_rate": 2.4533991883561868e-06, |
| "loss": 0.0873, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.341678939617084, |
| "grad_norm": 2.248467777163056, |
| "learning_rate": 2.4434517305933394e-06, |
| "loss": 0.0672, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.3431516936671577, |
| "grad_norm": 2.1367635036683184, |
| "learning_rate": 2.4335179549698233e-06, |
| "loss": 0.0499, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.3446244477172313, |
| "grad_norm": 3.2725373522512142, |
| "learning_rate": 2.423597914649234e-06, |
| "loss": 0.1078, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.346097201767305, |
| "grad_norm": 2.867491627460914, |
| "learning_rate": 2.4136916627216656e-06, |
| "loss": 0.0797, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.3475699558173786, |
| "grad_norm": 2.094438599376492, |
| "learning_rate": 2.403799252203408e-06, |
| "loss": 0.0744, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.3490427098674522, |
| "grad_norm": 2.8403655449439413, |
| "learning_rate": 2.393920736036683e-06, |
| "loss": 0.0636, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.3505154639175259, |
| "grad_norm": 2.2641504729859117, |
| "learning_rate": 2.38405616708935e-06, |
| "loss": 0.0759, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.3519882179675995, |
| "grad_norm": 2.6990721664555015, |
| "learning_rate": 2.374205598154624e-06, |
| "loss": 0.0715, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.3534609720176731, |
| "grad_norm": 2.0450429212053645, |
| "learning_rate": 2.3643690819507984e-06, |
| "loss": 0.0687, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.3549337260677468, |
| "grad_norm": 2.815497301963677, |
| "learning_rate": 2.3545466711209585e-06, |
| "loss": 0.0751, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.3564064801178204, |
| "grad_norm": 2.0833620917161517, |
| "learning_rate": 2.3447384182326948e-06, |
| "loss": 0.0606, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.357879234167894, |
| "grad_norm": 2.5027749716472334, |
| "learning_rate": 2.3349443757778346e-06, |
| "loss": 0.09, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.3593519882179677, |
| "grad_norm": 2.381133387963809, |
| "learning_rate": 2.3251645961721494e-06, |
| "loss": 0.0671, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.3608247422680413, |
| "grad_norm": 2.9284274100164565, |
| "learning_rate": 2.315399131755081e-06, |
| "loss": 0.0831, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.362297496318115, |
| "grad_norm": 2.242298453649294, |
| "learning_rate": 2.3056480347894584e-06, |
| "loss": 0.0617, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.3637702503681886, |
| "grad_norm": 3.411313418432218, |
| "learning_rate": 2.2959113574612204e-06, |
| "loss": 0.1254, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.3652430044182622, |
| "grad_norm": 2.2293073196874853, |
| "learning_rate": 2.2861891518791287e-06, |
| "loss": 0.0481, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.3667157584683358, |
| "grad_norm": 2.1743245071883273, |
| "learning_rate": 2.2764814700745025e-06, |
| "loss": 0.068, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.3681885125184094, |
| "grad_norm": 2.373291429434674, |
| "learning_rate": 2.266788364000929e-06, |
| "loss": 0.0452, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.369661266568483, |
| "grad_norm": 2.597864530271633, |
| "learning_rate": 2.25710988553399e-06, |
| "loss": 0.0878, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.3711340206185567, |
| "grad_norm": 3.034163746055045, |
| "learning_rate": 2.2474460864709825e-06, |
| "loss": 0.0779, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.3726067746686303, |
| "grad_norm": 3.5301260309240057, |
| "learning_rate": 2.2377970185306424e-06, |
| "loss": 0.097, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.374079528718704, |
| "grad_norm": 2.4514313697898213, |
| "learning_rate": 2.22816273335287e-06, |
| "loss": 0.0548, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.3755522827687776, |
| "grad_norm": 3.2426641042269413, |
| "learning_rate": 2.2185432824984455e-06, |
| "loss": 0.0735, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.3770250368188512, |
| "grad_norm": 2.2441096467851036, |
| "learning_rate": 2.208938717448763e-06, |
| "loss": 0.0587, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.3784977908689249, |
| "grad_norm": 2.1614600826394508, |
| "learning_rate": 2.1993490896055514e-06, |
| "loss": 0.0593, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.3799705449189985, |
| "grad_norm": 2.246163741793099, |
| "learning_rate": 2.1897744502905955e-06, |
| "loss": 0.0617, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.3814432989690721, |
| "grad_norm": 2.222744117217539, |
| "learning_rate": 2.1802148507454675e-06, |
| "loss": 0.069, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.3829160530191458, |
| "grad_norm": 2.2021966299628555, |
| "learning_rate": 2.170670342131249e-06, |
| "loss": 0.0747, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.3843888070692194, |
| "grad_norm": 2.6432292171263985, |
| "learning_rate": 2.1611409755282542e-06, |
| "loss": 0.0889, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.385861561119293, |
| "grad_norm": 2.3868568237992696, |
| "learning_rate": 2.1516268019357656e-06, |
| "loss": 0.0928, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.3873343151693667, |
| "grad_norm": 2.1533933661208984, |
| "learning_rate": 2.1421278722717524e-06, |
| "loss": 0.0482, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.3888070692194403, |
| "grad_norm": 2.478893589441798, |
| "learning_rate": 2.132644237372603e-06, |
| "loss": 0.0749, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.390279823269514, |
| "grad_norm": 2.4965674075650814, |
| "learning_rate": 2.123175947992851e-06, |
| "loss": 0.0617, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.3917525773195876, |
| "grad_norm": 2.5896583305139353, |
| "learning_rate": 2.1137230548049042e-06, |
| "loss": 0.0783, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.3932253313696612, |
| "grad_norm": 2.5229928189159163, |
| "learning_rate": 2.1042856083987694e-06, |
| "loss": 0.0721, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.3946980854197348, |
| "grad_norm": 2.620841054032406, |
| "learning_rate": 2.09486365928179e-06, |
| "loss": 0.0478, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.3961708394698085, |
| "grad_norm": 2.4635557419644627, |
| "learning_rate": 2.085457257878369e-06, |
| "loss": 0.0665, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.397643593519882, |
| "grad_norm": 2.491834759671326, |
| "learning_rate": 2.076066454529701e-06, |
| "loss": 0.0885, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.3991163475699557, |
| "grad_norm": 2.1247321015069893, |
| "learning_rate": 2.0666912994935034e-06, |
| "loss": 0.0486, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.4005891016200294, |
| "grad_norm": 2.0938641025252127, |
| "learning_rate": 2.0573318429437487e-06, |
| "loss": 0.0588, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.402061855670103, |
| "grad_norm": 2.994410693825716, |
| "learning_rate": 2.0479881349703885e-06, |
| "loss": 0.0778, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.4035346097201766, |
| "grad_norm": 2.6687029575886516, |
| "learning_rate": 2.038660225579096e-06, |
| "loss": 0.0671, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.4050073637702503, |
| "grad_norm": 2.5657403990694547, |
| "learning_rate": 2.0293481646909934e-06, |
| "loss": 0.0703, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.406480117820324, |
| "grad_norm": 2.7104348627571153, |
| "learning_rate": 2.0200520021423813e-06, |
| "loss": 0.0752, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.4079528718703975, |
| "grad_norm": 2.792438454224809, |
| "learning_rate": 2.010771787684484e-06, |
| "loss": 0.0747, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.4094256259204712, |
| "grad_norm": 3.00295539481228, |
| "learning_rate": 2.0015075709831634e-06, |
| "loss": 0.0778, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.4108983799705448, |
| "grad_norm": 2.7396339188862617, |
| "learning_rate": 1.9922594016186716e-06, |
| "loss": 0.0748, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.4123711340206184, |
| "grad_norm": 2.2053073159941095, |
| "learning_rate": 1.983027329085377e-06, |
| "loss": 0.0539, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.413843888070692, |
| "grad_norm": 2.534983064810876, |
| "learning_rate": 1.9738114027915007e-06, |
| "loss": 0.0938, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.415316642120766, |
| "grad_norm": 2.4967187142059393, |
| "learning_rate": 1.9646116720588525e-06, |
| "loss": 0.0656, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.4167893961708395, |
| "grad_norm": 2.066492663646761, |
| "learning_rate": 1.9554281861225694e-06, |
| "loss": 0.0424, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.4182621502209132, |
| "grad_norm": 2.771730400835999, |
| "learning_rate": 1.946260994130843e-06, |
| "loss": 0.0573, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.4197349042709868, |
| "grad_norm": 2.8791465865837536, |
| "learning_rate": 1.9371101451446685e-06, |
| "loss": 0.0859, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.4212076583210604, |
| "grad_norm": 3.2248308136532047, |
| "learning_rate": 1.9279756881375746e-06, |
| "loss": 0.0805, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.422680412371134, |
| "grad_norm": 2.8777696252816516, |
| "learning_rate": 1.9188576719953635e-06, |
| "loss": 0.0769, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.4241531664212077, |
| "grad_norm": 2.5130714611958704, |
| "learning_rate": 1.9097561455158502e-06, |
| "loss": 0.0812, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.4256259204712813, |
| "grad_norm": 2.531692326473217, |
| "learning_rate": 1.9006711574086006e-06, |
| "loss": 0.065, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.427098674521355, |
| "grad_norm": 2.376440428249521, |
| "learning_rate": 1.8916027562946659e-06, |
| "loss": 0.063, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 2.1528998094878276, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 0.0688, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.4300441826215022, |
| "grad_norm": 2.530258164073908, |
| "learning_rate": 1.873515909086855e-06, |
| "loss": 0.0647, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.4315169366715759, |
| "grad_norm": 2.663885408371276, |
| "learning_rate": 1.8644975597901977e-06, |
| "loss": 0.0633, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.4329896907216495, |
| "grad_norm": 2.608204191111035, |
| "learning_rate": 1.8554959910807773e-06, |
| "loss": 0.0487, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.4344624447717231, |
| "grad_norm": 2.5581762253885616, |
| "learning_rate": 1.8465112511332068e-06, |
| "loss": 0.0654, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.4359351988217968, |
| "grad_norm": 2.1927550988613027, |
| "learning_rate": 1.8375433880320293e-06, |
| "loss": 0.0603, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.4374079528718704, |
| "grad_norm": 2.5356027344331804, |
| "learning_rate": 1.8285924497714702e-06, |
| "loss": 0.0843, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.438880706921944, |
| "grad_norm": 2.126438287431127, |
| "learning_rate": 1.8196584842551772e-06, |
| "loss": 0.0704, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.4403534609720177, |
| "grad_norm": 2.4680758427563054, |
| "learning_rate": 1.8107415392959615e-06, |
| "loss": 0.0481, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.4418262150220913, |
| "grad_norm": 2.4116108572599155, |
| "learning_rate": 1.8018416626155443e-06, |
| "loss": 0.0457, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.443298969072165, |
| "grad_norm": 2.475505185016457, |
| "learning_rate": 1.7929589018443016e-06, |
| "loss": 0.0838, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.4447717231222386, |
| "grad_norm": 3.190060647366546, |
| "learning_rate": 1.7840933045210052e-06, |
| "loss": 0.0826, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.4462444771723122, |
| "grad_norm": 1.9808370523468428, |
| "learning_rate": 1.7752449180925746e-06, |
| "loss": 0.0588, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.4477172312223858, |
| "grad_norm": 3.0854014410212383, |
| "learning_rate": 1.7664137899138195e-06, |
| "loss": 0.0621, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.4491899852724595, |
| "grad_norm": 2.0882868636972116, |
| "learning_rate": 1.7575999672471866e-06, |
| "loss": 0.0505, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.450662739322533, |
| "grad_norm": 2.444569753698376, |
| "learning_rate": 1.7488034972625067e-06, |
| "loss": 0.0704, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.4521354933726067, |
| "grad_norm": 2.2628946150746154, |
| "learning_rate": 1.7400244270367429e-06, |
| "loss": 0.0722, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.4536082474226804, |
| "grad_norm": 2.732925846230406, |
| "learning_rate": 1.7312628035537388e-06, |
| "loss": 0.0611, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.455081001472754, |
| "grad_norm": 2.4632091259565057, |
| "learning_rate": 1.7225186737039639e-06, |
| "loss": 0.0819, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.4565537555228276, |
| "grad_norm": 2.989212148661273, |
| "learning_rate": 1.7137920842842675e-06, |
| "loss": 0.0861, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.4580265095729013, |
| "grad_norm": 2.221628195006125, |
| "learning_rate": 1.7050830819976266e-06, |
| "loss": 0.0375, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.4594992636229749, |
| "grad_norm": 2.4962489534632533, |
| "learning_rate": 1.696391713452893e-06, |
| "loss": 0.0953, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.4609720176730487, |
| "grad_norm": 2.428793575695664, |
| "learning_rate": 1.6877180251645487e-06, |
| "loss": 0.071, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.4624447717231224, |
| "grad_norm": 2.171999391444608, |
| "learning_rate": 1.679062063552454e-06, |
| "loss": 0.057, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.463917525773196, |
| "grad_norm": 2.7695998022109984, |
| "learning_rate": 1.6704238749415958e-06, |
| "loss": 0.0772, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.4653902798232696, |
| "grad_norm": 2.821736257619605, |
| "learning_rate": 1.6618035055618486e-06, |
| "loss": 0.0631, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.4668630338733433, |
| "grad_norm": 1.8315536946657645, |
| "learning_rate": 1.653201001547719e-06, |
| "loss": 0.0376, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.468335787923417, |
| "grad_norm": 2.7605414365927627, |
| "learning_rate": 1.6446164089381033e-06, |
| "loss": 0.0802, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.4698085419734905, |
| "grad_norm": 2.373185594157067, |
| "learning_rate": 1.6360497736760383e-06, |
| "loss": 0.073, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.4712812960235642, |
| "grad_norm": 2.7047012920858267, |
| "learning_rate": 1.6275011416084563e-06, |
| "loss": 0.0698, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.4727540500736378, |
| "grad_norm": 2.6421087343127705, |
| "learning_rate": 1.6189705584859422e-06, |
| "loss": 0.0636, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.4727540500736378, |
| "eval_loss": 0.1951112598180771, |
| "eval_runtime": 1.2906, |
| "eval_samples_per_second": 42.615, |
| "eval_steps_per_second": 10.847, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.4742268041237114, |
| "grad_norm": 2.6145048863030906, |
| "learning_rate": 1.6104580699624839e-06, |
| "loss": 0.0749, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.475699558173785, |
| "grad_norm": 2.0303418482498055, |
| "learning_rate": 1.6019637215952322e-06, |
| "loss": 0.042, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.4771723122238587, |
| "grad_norm": 2.7386176337259247, |
| "learning_rate": 1.593487558844255e-06, |
| "loss": 0.0725, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.4786450662739323, |
| "grad_norm": 3.0396736461273757, |
| "learning_rate": 1.5850296270722965e-06, |
| "loss": 0.0674, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.480117820324006, |
| "grad_norm": 3.3616222085844605, |
| "learning_rate": 1.576589971544526e-06, |
| "loss": 0.0984, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.4815905743740796, |
| "grad_norm": 2.375868239069935, |
| "learning_rate": 1.568168637428309e-06, |
| "loss": 0.0615, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.4830633284241532, |
| "grad_norm": 2.6401883677901425, |
| "learning_rate": 1.559765669792954e-06, |
| "loss": 0.0653, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.4845360824742269, |
| "grad_norm": 1.9618914207468363, |
| "learning_rate": 1.5513811136094786e-06, |
| "loss": 0.0676, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.4860088365243005, |
| "grad_norm": 2.357533639079111, |
| "learning_rate": 1.543015013750364e-06, |
| "loss": 0.0622, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.4874815905743741, |
| "grad_norm": 2.7396984560438122, |
| "learning_rate": 1.5346674149893204e-06, |
| "loss": 0.0723, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.4889543446244478, |
| "grad_norm": 1.8910283131839978, |
| "learning_rate": 1.5263383620010359e-06, |
| "loss": 0.0629, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.4904270986745214, |
| "grad_norm": 2.6877543066986322, |
| "learning_rate": 1.5180278993609527e-06, |
| "loss": 0.0628, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.491899852724595, |
| "grad_norm": 2.2672309433321973, |
| "learning_rate": 1.5097360715450187e-06, |
| "loss": 0.0648, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.4933726067746687, |
| "grad_norm": 2.4608356536595952, |
| "learning_rate": 1.5014629229294525e-06, |
| "loss": 0.0826, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.4948453608247423, |
| "grad_norm": 2.359754445643814, |
| "learning_rate": 1.4932084977905043e-06, |
| "loss": 0.0471, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.496318114874816, |
| "grad_norm": 2.027289584732701, |
| "learning_rate": 1.4849728403042213e-06, |
| "loss": 0.0561, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.4977908689248896, |
| "grad_norm": 2.464689524167278, |
| "learning_rate": 1.4767559945462073e-06, |
| "loss": 0.0604, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.4992636229749632, |
| "grad_norm": 2.1349837626514834, |
| "learning_rate": 1.4685580044913921e-06, |
| "loss": 0.0627, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.5007363770250368, |
| "grad_norm": 2.5480519428195256, |
| "learning_rate": 1.460378914013793e-06, |
| "loss": 0.0745, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.5022091310751104, |
| "grad_norm": 2.4685840222085567, |
| "learning_rate": 1.4522187668862797e-06, |
| "loss": 0.0508, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.503681885125184, |
| "grad_norm": 2.384796029124157, |
| "learning_rate": 1.444077606780342e-06, |
| "loss": 0.0815, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.5051546391752577, |
| "grad_norm": 3.0184805492577143, |
| "learning_rate": 1.4359554772658551e-06, |
| "loss": 0.0629, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.5066273932253313, |
| "grad_norm": 2.5109695121506874, |
| "learning_rate": 1.4278524218108424e-06, |
| "loss": 0.061, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.508100147275405, |
| "grad_norm": 2.076656088861386, |
| "learning_rate": 1.419768483781252e-06, |
| "loss": 0.0679, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.5095729013254786, |
| "grad_norm": 2.587031340103411, |
| "learning_rate": 1.4117037064407164e-06, |
| "loss": 0.062, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.5110456553755522, |
| "grad_norm": 2.969930237937353, |
| "learning_rate": 1.4036581329503245e-06, |
| "loss": 0.0715, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.5125184094256259, |
| "grad_norm": 2.148007992080645, |
| "learning_rate": 1.3956318063683905e-06, |
| "loss": 0.0567, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.5139911634756995, |
| "grad_norm": 2.217179463177697, |
| "learning_rate": 1.3876247696502238e-06, |
| "loss": 0.0553, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.5154639175257731, |
| "grad_norm": 2.7466217897393084, |
| "learning_rate": 1.3796370656478936e-06, |
| "loss": 0.0487, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.5169366715758468, |
| "grad_norm": 2.346631548903953, |
| "learning_rate": 1.3716687371100096e-06, |
| "loss": 0.0581, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.5184094256259204, |
| "grad_norm": 2.8877452342792997, |
| "learning_rate": 1.363719826681486e-06, |
| "loss": 0.0841, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.519882179675994, |
| "grad_norm": 2.5602367543535633, |
| "learning_rate": 1.355790376903315e-06, |
| "loss": 0.0825, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.5213549337260677, |
| "grad_norm": 2.707697935594709, |
| "learning_rate": 1.3478804302123382e-06, |
| "loss": 0.0661, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.5228276877761413, |
| "grad_norm": 2.3215844194395316, |
| "learning_rate": 1.3399900289410245e-06, |
| "loss": 0.0714, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.524300441826215, |
| "grad_norm": 2.260662446674949, |
| "learning_rate": 1.332119215317233e-06, |
| "loss": 0.0463, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.5257731958762886, |
| "grad_norm": 2.399278959045141, |
| "learning_rate": 1.3242680314639995e-06, |
| "loss": 0.0561, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.5272459499263622, |
| "grad_norm": 2.4713132106462847, |
| "learning_rate": 1.3164365193993017e-06, |
| "loss": 0.0678, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.5287187039764358, |
| "grad_norm": 2.066649377770113, |
| "learning_rate": 1.30862472103584e-06, |
| "loss": 0.0568, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.5301914580265095, |
| "grad_norm": 2.853499333421734, |
| "learning_rate": 1.30083267818081e-06, |
| "loss": 0.0683, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.531664212076583, |
| "grad_norm": 2.623260806713472, |
| "learning_rate": 1.2930604325356793e-06, |
| "loss": 0.069, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.5331369661266567, |
| "grad_norm": 2.694604045990694, |
| "learning_rate": 1.2853080256959655e-06, |
| "loss": 0.0801, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.5346097201767304, |
| "grad_norm": 2.7935173842568926, |
| "learning_rate": 1.277575499151013e-06, |
| "loss": 0.0883, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.536082474226804, |
| "grad_norm": 2.9683928203439613, |
| "learning_rate": 1.2698628942837698e-06, |
| "loss": 0.0793, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.5375552282768776, |
| "grad_norm": 2.5002622810382085, |
| "learning_rate": 1.2621702523705676e-06, |
| "loss": 0.0642, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.5390279823269513, |
| "grad_norm": 2.0435391117373976, |
| "learning_rate": 1.2544976145809018e-06, |
| "loss": 0.0595, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.540500736377025, |
| "grad_norm": 2.8188490521434906, |
| "learning_rate": 1.2468450219772054e-06, |
| "loss": 0.0782, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.5419734904270985, |
| "grad_norm": 1.9740548073880617, |
| "learning_rate": 1.2392125155146385e-06, |
| "loss": 0.0552, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.5434462444771722, |
| "grad_norm": 2.6523564783225924, |
| "learning_rate": 1.2316001360408614e-06, |
| "loss": 0.0723, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.5449189985272458, |
| "grad_norm": 2.568676844998587, |
| "learning_rate": 1.224007924295819e-06, |
| "loss": 0.0788, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.5463917525773194, |
| "grad_norm": 2.2720349996304616, |
| "learning_rate": 1.2164359209115235e-06, |
| "loss": 0.0491, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.5478645066273933, |
| "grad_norm": 2.620055235482202, |
| "learning_rate": 1.2088841664118367e-06, |
| "loss": 0.1146, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.549337260677467, |
| "grad_norm": 2.6188369554058464, |
| "learning_rate": 1.2013527012122477e-06, |
| "loss": 0.0867, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.5508100147275405, |
| "grad_norm": 3.2625473467121715, |
| "learning_rate": 1.1938415656196673e-06, |
| "loss": 0.0802, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.5522827687776142, |
| "grad_norm": 2.1439253167381938, |
| "learning_rate": 1.1863507998322022e-06, |
| "loss": 0.0593, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.5537555228276878, |
| "grad_norm": 2.7156469251806756, |
| "learning_rate": 1.1788804439389457e-06, |
| "loss": 0.0782, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.5552282768777614, |
| "grad_norm": 2.6537677951105683, |
| "learning_rate": 1.1714305379197616e-06, |
| "loss": 0.0688, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.556701030927835, |
| "grad_norm": 2.5608627638328536, |
| "learning_rate": 1.164001121645069e-06, |
| "loss": 0.0938, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.5581737849779087, |
| "grad_norm": 2.926057413992728, |
| "learning_rate": 1.1565922348756324e-06, |
| "loss": 0.0855, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.5596465390279823, |
| "grad_norm": 1.9123934797486226, |
| "learning_rate": 1.149203917262341e-06, |
| "loss": 0.0407, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.561119293078056, |
| "grad_norm": 3.1997095246827523, |
| "learning_rate": 1.1418362083460067e-06, |
| "loss": 0.0745, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.5625920471281296, |
| "grad_norm": 2.872152786504894, |
| "learning_rate": 1.1344891475571474e-06, |
| "loss": 0.0725, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.5640648011782032, |
| "grad_norm": 2.505626890725191, |
| "learning_rate": 1.1271627742157743e-06, |
| "loss": 0.0864, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.5655375552282769, |
| "grad_norm": 2.635180660318519, |
| "learning_rate": 1.1198571275311859e-06, |
| "loss": 0.0806, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.5670103092783505, |
| "grad_norm": 2.2183669641526853, |
| "learning_rate": 1.1125722466017547e-06, |
| "loss": 0.0548, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.5684830633284241, |
| "grad_norm": 3.0342759269799955, |
| "learning_rate": 1.1053081704147162e-06, |
| "loss": 0.0725, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.5699558173784978, |
| "grad_norm": 2.5683330192524165, |
| "learning_rate": 1.0980649378459668e-06, |
| "loss": 0.0655, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 2.6764766220734577, |
| "learning_rate": 1.0908425876598512e-06, |
| "loss": 0.0399, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.572901325478645, |
| "grad_norm": 2.0877629068062373, |
| "learning_rate": 1.083641158508955e-06, |
| "loss": 0.0609, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.5743740795287187, |
| "grad_norm": 2.7312069426846075, |
| "learning_rate": 1.0764606889338997e-06, |
| "loss": 0.073, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.5758468335787923, |
| "grad_norm": 2.6538924576288005, |
| "learning_rate": 1.0693012173631346e-06, |
| "loss": 0.0571, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.577319587628866, |
| "grad_norm": 2.3418650892508803, |
| "learning_rate": 1.062162782112729e-06, |
| "loss": 0.0635, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.5787923416789398, |
| "grad_norm": 2.9870001943313262, |
| "learning_rate": 1.055045421386175e-06, |
| "loss": 0.0713, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.5802650957290134, |
| "grad_norm": 2.9325987045082753, |
| "learning_rate": 1.0479491732741747e-06, |
| "loss": 0.0553, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.581737849779087, |
| "grad_norm": 2.3181702748888973, |
| "learning_rate": 1.0408740757544416e-06, |
| "loss": 0.0586, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.5832106038291607, |
| "grad_norm": 2.5995727377845337, |
| "learning_rate": 1.0338201666914942e-06, |
| "loss": 0.057, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.5846833578792343, |
| "grad_norm": 2.34412342665819, |
| "learning_rate": 1.0267874838364561e-06, |
| "loss": 0.068, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.586156111929308, |
| "grad_norm": 2.761988559212666, |
| "learning_rate": 1.0197760648268485e-06, |
| "loss": 0.0581, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.5876288659793816, |
| "grad_norm": 3.2149969036078456, |
| "learning_rate": 1.012785947186397e-06, |
| "loss": 0.0795, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.5891016200294552, |
| "grad_norm": 2.34613943148429, |
| "learning_rate": 1.0058171683248246e-06, |
| "loss": 0.058, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.5905743740795288, |
| "grad_norm": 2.065050664858047, |
| "learning_rate": 9.988697655376544e-07, |
| "loss": 0.0592, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.5920471281296025, |
| "grad_norm": 2.735250740838054, |
| "learning_rate": 9.919437760060075e-07, |
| "loss": 0.0857, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.593519882179676, |
| "grad_norm": 1.811552401317801, |
| "learning_rate": 9.850392367964085e-07, |
| "loss": 0.0519, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.5949926362297497, |
| "grad_norm": 2.562566270590748, |
| "learning_rate": 9.781561848605775e-07, |
| "loss": 0.0602, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.5964653902798234, |
| "grad_norm": 1.8016132067357058, |
| "learning_rate": 9.71294657035247e-07, |
| "loss": 0.0533, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.597938144329897, |
| "grad_norm": 2.390912745981556, |
| "learning_rate": 9.644546900419533e-07, |
| "loss": 0.0543, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.5994108983799706, |
| "grad_norm": 2.7902763091008422, |
| "learning_rate": 9.576363204868417e-07, |
| "loss": 0.0937, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.6008836524300443, |
| "grad_norm": 2.2988343421638042, |
| "learning_rate": 9.508395848604757e-07, |
| "loss": 0.0524, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.602356406480118, |
| "grad_norm": 2.4934736154800774, |
| "learning_rate": 9.440645195376341e-07, |
| "loss": 0.0491, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.6038291605301915, |
| "grad_norm": 2.6541495173985954, |
| "learning_rate": 9.373111607771241e-07, |
| "loss": 0.0981, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.6053019145802652, |
| "grad_norm": 2.633404350553692, |
| "learning_rate": 9.305795447215827e-07, |
| "loss": 0.0612, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.6067746686303388, |
| "grad_norm": 3.7648499982526973, |
| "learning_rate": 9.23869707397283e-07, |
| "loss": 0.1132, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.6082474226804124, |
| "grad_norm": 1.9284626831978018, |
| "learning_rate": 9.171816847139447e-07, |
| "loss": 0.0398, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.609720176730486, |
| "grad_norm": 2.0042841970888583, |
| "learning_rate": 9.105155124645403e-07, |
| "loss": 0.0408, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.6111929307805597, |
| "grad_norm": 1.9421306323683327, |
| "learning_rate": 9.038712263250982e-07, |
| "loss": 0.0538, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.6126656848306333, |
| "grad_norm": 2.377851383188798, |
| "learning_rate": 8.972488618545222e-07, |
| "loss": 0.0679, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.614138438880707, |
| "grad_norm": 2.3273836911909354, |
| "learning_rate": 8.906484544943933e-07, |
| "loss": 0.0723, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.6156111929307806, |
| "grad_norm": 1.9062923841071093, |
| "learning_rate": 8.840700395687824e-07, |
| "loss": 0.0561, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.6170839469808542, |
| "grad_norm": 2.973212015922115, |
| "learning_rate": 8.775136522840622e-07, |
| "loss": 0.075, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.6185567010309279, |
| "grad_norm": 2.2735420808341327, |
| "learning_rate": 8.709793277287182e-07, |
| "loss": 0.0801, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.6200294550810015, |
| "grad_norm": 2.388145827824408, |
| "learning_rate": 8.64467100873157e-07, |
| "loss": 0.0623, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.6215022091310751, |
| "grad_norm": 2.2114732024014114, |
| "learning_rate": 8.579770065695264e-07, |
| "loss": 0.0723, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.6229749631811488, |
| "grad_norm": 2.067832894100951, |
| "learning_rate": 8.515090795515247e-07, |
| "loss": 0.0563, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.6244477172312224, |
| "grad_norm": 2.464224803989418, |
| "learning_rate": 8.450633544342135e-07, |
| "loss": 0.0916, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.625920471281296, |
| "grad_norm": 2.868289318235728, |
| "learning_rate": 8.386398657138356e-07, |
| "loss": 0.0595, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.6273932253313697, |
| "grad_norm": 2.4491602380510047, |
| "learning_rate": 8.322386477676309e-07, |
| "loss": 0.0615, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.6288659793814433, |
| "grad_norm": 1.9912741641293554, |
| "learning_rate": 8.258597348536452e-07, |
| "loss": 0.0542, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.630338733431517, |
| "grad_norm": 2.6689461386284714, |
| "learning_rate": 8.195031611105564e-07, |
| "loss": 0.0832, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.6318114874815906, |
| "grad_norm": 2.4194950903750367, |
| "learning_rate": 8.131689605574867e-07, |
| "loss": 0.0538, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.6332842415316642, |
| "grad_norm": 3.477850491611638, |
| "learning_rate": 8.068571670938219e-07, |
| "loss": 0.0912, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.6347569955817378, |
| "grad_norm": 2.864529604206013, |
| "learning_rate": 8.005678144990281e-07, |
| "loss": 0.065, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.6362297496318114, |
| "grad_norm": 2.9727127439474086, |
| "learning_rate": 7.943009364324733e-07, |
| "loss": 0.0766, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.637702503681885, |
| "grad_norm": 2.788985489850606, |
| "learning_rate": 7.880565664332473e-07, |
| "loss": 0.0754, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.6391752577319587, |
| "grad_norm": 1.9204008432196964, |
| "learning_rate": 7.818347379199781e-07, |
| "loss": 0.0459, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.6406480117820323, |
| "grad_norm": 2.638641405268061, |
| "learning_rate": 7.756354841906582e-07, |
| "loss": 0.0621, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.642120765832106, |
| "grad_norm": 2.373647501644597, |
| "learning_rate": 7.694588384224655e-07, |
| "loss": 0.0613, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.6435935198821796, |
| "grad_norm": 2.464060690229074, |
| "learning_rate": 7.633048336715815e-07, |
| "loss": 0.0643, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.6450662739322532, |
| "grad_norm": 2.173724799716864, |
| "learning_rate": 7.571735028730209e-07, |
| "loss": 0.0539, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.6465390279823269, |
| "grad_norm": 2.549627425368719, |
| "learning_rate": 7.510648788404501e-07, |
| "loss": 0.0751, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.6480117820324005, |
| "grad_norm": 2.0145805351899337, |
| "learning_rate": 7.449789942660119e-07, |
| "loss": 0.0514, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.6494845360824741, |
| "grad_norm": 2.0685293893328485, |
| "learning_rate": 7.389158817201541e-07, |
| "loss": 0.0411, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.6509572901325478, |
| "grad_norm": 2.4246510587478682, |
| "learning_rate": 7.328755736514537e-07, |
| "loss": 0.0515, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.6524300441826214, |
| "grad_norm": 2.3443764090658523, |
| "learning_rate": 7.268581023864402e-07, |
| "loss": 0.0594, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.653902798232695, |
| "grad_norm": 2.4737045558016426, |
| "learning_rate": 7.208635001294278e-07, |
| "loss": 0.055, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.6553755522827687, |
| "grad_norm": 2.510219341123867, |
| "learning_rate": 7.148917989623388e-07, |
| "loss": 0.0619, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.6568483063328423, |
| "grad_norm": 2.294118510364604, |
| "learning_rate": 7.089430308445317e-07, |
| "loss": 0.0561, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.658321060382916, |
| "grad_norm": 2.37841481598977, |
| "learning_rate": 7.030172276126351e-07, |
| "loss": 0.0741, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.6597938144329896, |
| "grad_norm": 2.4737300923136325, |
| "learning_rate": 6.971144209803738e-07, |
| "loss": 0.0713, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.6612665684830632, |
| "grad_norm": 2.4437627355321045, |
| "learning_rate": 6.912346425383964e-07, |
| "loss": 0.0654, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.6627393225331368, |
| "grad_norm": 2.2165199303595564, |
| "learning_rate": 6.85377923754113e-07, |
| "loss": 0.0653, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.6642120765832105, |
| "grad_norm": 2.2014815708680895, |
| "learning_rate": 6.795442959715192e-07, |
| "loss": 0.056, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.665684830633284, |
| "grad_norm": 2.698668279501925, |
| "learning_rate": 6.737337904110341e-07, |
| "loss": 0.0508, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.6671575846833577, |
| "grad_norm": 2.454195130183351, |
| "learning_rate": 6.679464381693324e-07, |
| "loss": 0.0557, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.6686303387334314, |
| "grad_norm": 2.423192522005641, |
| "learning_rate": 6.621822702191744e-07, |
| "loss": 0.0678, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.670103092783505, |
| "grad_norm": 2.0690565021208482, |
| "learning_rate": 6.564413174092443e-07, |
| "loss": 0.0433, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.6715758468335786, |
| "grad_norm": 2.2724584676649866, |
| "learning_rate": 6.507236104639842e-07, |
| "loss": 0.0518, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.6730486008836525, |
| "grad_norm": 2.568231491867872, |
| "learning_rate": 6.450291799834257e-07, |
| "loss": 0.0712, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.6745213549337261, |
| "grad_norm": 2.3899069257751555, |
| "learning_rate": 6.393580564430319e-07, |
| "loss": 0.0521, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.6759941089837997, |
| "grad_norm": 2.7169102576317905, |
| "learning_rate": 6.337102701935322e-07, |
| "loss": 0.0796, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.6774668630338734, |
| "grad_norm": 2.1590618186403026, |
| "learning_rate": 6.28085851460758e-07, |
| "loss": 0.0588, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.678939617083947, |
| "grad_norm": 2.1974111488329946, |
| "learning_rate": 6.224848303454828e-07, |
| "loss": 0.0775, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.6804123711340206, |
| "grad_norm": 2.4856649528523955, |
| "learning_rate": 6.16907236823262e-07, |
| "loss": 0.0653, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.6818851251840943, |
| "grad_norm": 2.4487555816337236, |
| "learning_rate": 6.11353100744268e-07, |
| "loss": 0.0896, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.683357879234168, |
| "grad_norm": 2.318930173137003, |
| "learning_rate": 6.058224518331374e-07, |
| "loss": 0.0779, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.6848306332842415, |
| "grad_norm": 2.4160212230081566, |
| "learning_rate": 6.003153196888045e-07, |
| "loss": 0.0515, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.6863033873343152, |
| "grad_norm": 2.4777587223820334, |
| "learning_rate": 5.948317337843501e-07, |
| "loss": 0.0686, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.6877761413843888, |
| "grad_norm": 2.6254920597873945, |
| "learning_rate": 5.893717234668383e-07, |
| "loss": 0.0876, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.6892488954344624, |
| "grad_norm": 2.4925800987500937, |
| "learning_rate": 5.839353179571617e-07, |
| "loss": 0.0753, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.690721649484536, |
| "grad_norm": 2.5508449281167724, |
| "learning_rate": 5.785225463498828e-07, |
| "loss": 0.0603, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.6921944035346097, |
| "grad_norm": 1.805650361123776, |
| "learning_rate": 5.731334376130826e-07, |
| "loss": 0.0419, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.6936671575846833, |
| "grad_norm": 2.4585168217267785, |
| "learning_rate": 5.67768020588203e-07, |
| "loss": 0.0576, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.695139911634757, |
| "grad_norm": 3.7525272205812903, |
| "learning_rate": 5.624263239898909e-07, |
| "loss": 0.098, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.6966126656848306, |
| "grad_norm": 2.3382149888369628, |
| "learning_rate": 5.571083764058482e-07, |
| "loss": 0.0688, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.6980854197349042, |
| "grad_norm": 2.472002833620757, |
| "learning_rate": 5.518142062966759e-07, |
| "loss": 0.0607, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.6995581737849779, |
| "grad_norm": 2.6624553849748076, |
| "learning_rate": 5.465438419957209e-07, |
| "loss": 0.0868, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.7010309278350515, |
| "grad_norm": 2.330089465817626, |
| "learning_rate": 5.412973117089288e-07, |
| "loss": 0.0703, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.7025036818851251, |
| "grad_norm": 2.487074034850233, |
| "learning_rate": 5.360746435146885e-07, |
| "loss": 0.0628, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.703976435935199, |
| "grad_norm": 2.0361552942415306, |
| "learning_rate": 5.308758653636853e-07, |
| "loss": 0.0414, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.7054491899852726, |
| "grad_norm": 3.1094877949682234, |
| "learning_rate": 5.257010050787487e-07, |
| "loss": 0.0686, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.7069219440353463, |
| "grad_norm": 2.4861661405757474, |
| "learning_rate": 5.20550090354705e-07, |
| "loss": 0.0504, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.7083946980854199, |
| "grad_norm": 2.528562665713088, |
| "learning_rate": 5.154231487582273e-07, |
| "loss": 0.0655, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.7098674521354935, |
| "grad_norm": 2.4106846342395967, |
| "learning_rate": 5.103202077276908e-07, |
| "loss": 0.0726, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.7113402061855671, |
| "grad_norm": 2.0306440917007156, |
| "learning_rate": 5.05241294573024e-07, |
| "loss": 0.0596, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.7128129602356408, |
| "grad_norm": 2.2743774239571604, |
| "learning_rate": 5.001864364755626e-07, |
| "loss": 0.0645, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 2.5250273423806444, |
| "learning_rate": 4.951556604879049e-07, |
| "loss": 0.0874, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.715758468335788, |
| "grad_norm": 2.1645897297660928, |
| "learning_rate": 4.901489935337661e-07, |
| "loss": 0.0533, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.7172312223858617, |
| "grad_norm": 3.2693852161381383, |
| "learning_rate": 4.851664624078356e-07, |
| "loss": 0.0956, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.7187039764359353, |
| "grad_norm": 2.0954735704775116, |
| "learning_rate": 4.802080937756304e-07, |
| "loss": 0.0408, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.720176730486009, |
| "grad_norm": 2.4457829307784307, |
| "learning_rate": 4.7527391417335647e-07, |
| "loss": 0.0534, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.7216494845360826, |
| "grad_norm": 2.2487291126239612, |
| "learning_rate": 4.7036395000776556e-07, |
| "loss": 0.0691, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.7231222385861562, |
| "grad_norm": 3.295076111621927, |
| "learning_rate": 4.654782275560127e-07, |
| "loss": 0.0747, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.7245949926362298, |
| "grad_norm": 1.6861287640422942, |
| "learning_rate": 4.606167729655131e-07, |
| "loss": 0.0427, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.7260677466863035, |
| "grad_norm": 1.7666774627400288, |
| "learning_rate": 4.5577961225380886e-07, |
| "loss": 0.0512, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.727540500736377, |
| "grad_norm": 2.787550564417676, |
| "learning_rate": 4.5096677130842535e-07, |
| "loss": 0.0511, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.7290132547864507, |
| "grad_norm": 2.8606514719301486, |
| "learning_rate": 4.4617827588673167e-07, |
| "loss": 0.0617, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.7304860088365244, |
| "grad_norm": 3.0176679136529696, |
| "learning_rate": 4.4141415161580714e-07, |
| "loss": 0.0627, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.731958762886598, |
| "grad_norm": 2.147973320636422, |
| "learning_rate": 4.3667442399229985e-07, |
| "loss": 0.0577, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.7334315169366716, |
| "grad_norm": 2.4334937107986874, |
| "learning_rate": 4.319591183822902e-07, |
| "loss": 0.0654, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.7349042709867453, |
| "grad_norm": 2.1068870849677186, |
| "learning_rate": 4.272682600211608e-07, |
| "loss": 0.0641, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.736377025036819, |
| "grad_norm": 2.3360669303149795, |
| "learning_rate": 4.226018740134541e-07, |
| "loss": 0.0425, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.7378497790868925, |
| "grad_norm": 2.6420334626364, |
| "learning_rate": 4.179599853327426e-07, |
| "loss": 0.049, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.7393225331369662, |
| "grad_norm": 3.1620216422362097, |
| "learning_rate": 4.1334261882149505e-07, |
| "loss": 0.085, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.7407952871870398, |
| "grad_norm": 1.9512418086161625, |
| "learning_rate": 4.0874979919094004e-07, |
| "loss": 0.0471, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.7422680412371134, |
| "grad_norm": 1.9156501885886787, |
| "learning_rate": 4.041815510209396e-07, |
| "loss": 0.0449, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.743740795287187, |
| "grad_norm": 2.5213922767677106, |
| "learning_rate": 3.996378987598487e-07, |
| "loss": 0.0747, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.7452135493372607, |
| "grad_norm": 2.329334413664428, |
| "learning_rate": 3.9511886672439546e-07, |
| "loss": 0.0409, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.7466863033873343, |
| "grad_norm": 2.0605800429804786, |
| "learning_rate": 3.906244790995423e-07, |
| "loss": 0.0527, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.748159057437408, |
| "grad_norm": 2.0395950012587583, |
| "learning_rate": 3.8615475993836026e-07, |
| "loss": 0.068, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.7496318114874816, |
| "grad_norm": 2.4728396294735604, |
| "learning_rate": 3.8170973316190074e-07, |
| "loss": 0.065, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.7511045655375552, |
| "grad_norm": 2.5630343389473014, |
| "learning_rate": 3.7728942255906565e-07, |
| "loss": 0.0783, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.7525773195876289, |
| "grad_norm": 2.4356317818958217, |
| "learning_rate": 3.728938517864794e-07, |
| "loss": 0.0552, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.7540500736377025, |
| "grad_norm": 2.439624630915906, |
| "learning_rate": 3.6852304436836573e-07, |
| "loss": 0.0768, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.7555228276877761, |
| "grad_norm": 2.254303368671749, |
| "learning_rate": 3.641770236964193e-07, |
| "loss": 0.0906, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.7569955817378498, |
| "grad_norm": 2.617239568329721, |
| "learning_rate": 3.598558130296809e-07, |
| "loss": 0.072, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.7584683357879234, |
| "grad_norm": 2.340193575135741, |
| "learning_rate": 3.555594354944125e-07, |
| "loss": 0.0565, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.759941089837997, |
| "grad_norm": 1.9253376206694792, |
| "learning_rate": 3.5128791408397646e-07, |
| "loss": 0.0398, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.7614138438880707, |
| "grad_norm": 2.155096832471741, |
| "learning_rate": 3.4704127165870514e-07, |
| "loss": 0.0566, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.7628865979381443, |
| "grad_norm": 2.5829208788530145, |
| "learning_rate": 3.4281953094578877e-07, |
| "loss": 0.0608, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.764359351988218, |
| "grad_norm": 2.849311300023252, |
| "learning_rate": 3.386227145391463e-07, |
| "loss": 0.0819, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.7658321060382915, |
| "grad_norm": 2.8626031957772544, |
| "learning_rate": 3.3445084489930613e-07, |
| "loss": 0.076, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.7673048600883652, |
| "grad_norm": 2.1516838804074943, |
| "learning_rate": 3.303039443532874e-07, |
| "loss": 0.0509, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.7687776141384388, |
| "grad_norm": 2.4872295275914786, |
| "learning_rate": 3.2618203509448054e-07, |
| "loss": 0.0632, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.7702503681885124, |
| "grad_norm": 2.3779433828022194, |
| "learning_rate": 3.220851391825247e-07, |
| "loss": 0.0639, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.771723122238586, |
| "grad_norm": 2.133404720057926, |
| "learning_rate": 3.18013278543195e-07, |
| "loss": 0.0449, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.7731958762886597, |
| "grad_norm": 2.1635728674893837, |
| "learning_rate": 3.1396647496828245e-07, |
| "loss": 0.0566, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.7746686303387333, |
| "grad_norm": 1.8474795208949297, |
| "learning_rate": 3.0994475011547675e-07, |
| "loss": 0.0538, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.776141384388807, |
| "grad_norm": 2.6491356589687975, |
| "learning_rate": 3.0594812550825194e-07, |
| "loss": 0.0806, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.7776141384388806, |
| "grad_norm": 1.9201660665459854, |
| "learning_rate": 3.0197662253575123e-07, |
| "loss": 0.0462, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.7790868924889542, |
| "grad_norm": 2.456247251421313, |
| "learning_rate": 2.980302624526693e-07, |
| "loss": 0.0682, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.7805596465390279, |
| "grad_norm": 2.0836624878249435, |
| "learning_rate": 2.941090663791435e-07, |
| "loss": 0.0581, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.7820324005891015, |
| "grad_norm": 2.2164461597934504, |
| "learning_rate": 2.902130553006366e-07, |
| "loss": 0.0693, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.7835051546391751, |
| "grad_norm": 3.134611247374976, |
| "learning_rate": 2.8634225006782867e-07, |
| "loss": 0.0902, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.7849779086892488, |
| "grad_norm": 2.2452464988460465, |
| "learning_rate": 2.8249667139650215e-07, |
| "loss": 0.0542, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.7864506627393224, |
| "grad_norm": 2.733041617005635, |
| "learning_rate": 2.786763398674297e-07, |
| "loss": 0.0471, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.787923416789396, |
| "grad_norm": 2.7893962181248027, |
| "learning_rate": 2.748812759262687e-07, |
| "loss": 0.0832, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.7893961708394697, |
| "grad_norm": 1.9564030920988806, |
| "learning_rate": 2.711114998834485e-07, |
| "loss": 0.0552, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.7908689248895433, |
| "grad_norm": 2.8778504339141393, |
| "learning_rate": 2.6736703191406366e-07, |
| "loss": 0.0642, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.792341678939617, |
| "grad_norm": 2.5297695421363136, |
| "learning_rate": 2.636478920577634e-07, |
| "loss": 0.0529, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.7938144329896906, |
| "grad_norm": 2.3107730882334594, |
| "learning_rate": 2.599541002186479e-07, |
| "loss": 0.06, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.7952871870397642, |
| "grad_norm": 2.5790224501462373, |
| "learning_rate": 2.5628567616515667e-07, |
| "loss": 0.0666, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.7967599410898378, |
| "grad_norm": 2.6469259537969765, |
| "learning_rate": 2.5264263952996915e-07, |
| "loss": 0.074, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.7982326951399117, |
| "grad_norm": 2.9407112954766643, |
| "learning_rate": 2.49025009809894e-07, |
| "loss": 0.0851, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.7997054491899853, |
| "grad_norm": 2.3658889493345203, |
| "learning_rate": 2.4543280636576795e-07, |
| "loss": 0.0769, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.801178203240059, |
| "grad_norm": 2.3869878273402567, |
| "learning_rate": 2.4186604842235285e-07, |
| "loss": 0.0632, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.8026509572901326, |
| "grad_norm": 2.5921093452272777, |
| "learning_rate": 2.3832475506822937e-07, |
| "loss": 0.0607, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.8041237113402062, |
| "grad_norm": 2.460066877289172, |
| "learning_rate": 2.3480894525569564e-07, |
| "loss": 0.0616, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.8055964653902798, |
| "grad_norm": 2.2895807092767524, |
| "learning_rate": 2.3131863780067043e-07, |
| "loss": 0.0469, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.8070692194403535, |
| "grad_norm": 2.21813812283152, |
| "learning_rate": 2.278538513825862e-07, |
| "loss": 0.0581, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.8085419734904271, |
| "grad_norm": 2.40243611426377, |
| "learning_rate": 2.2441460454429298e-07, |
| "loss": 0.0554, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.8100147275405007, |
| "grad_norm": 3.508821815855263, |
| "learning_rate": 2.2100091569195735e-07, |
| "loss": 0.0617, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.8114874815905744, |
| "grad_norm": 2.5524630260490344, |
| "learning_rate": 2.1761280309496645e-07, |
| "loss": 0.0523, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.812960235640648, |
| "grad_norm": 2.8113266089557647, |
| "learning_rate": 2.1425028488582523e-07, |
| "loss": 0.0699, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.8144329896907216, |
| "grad_norm": 2.6452328788944666, |
| "learning_rate": 2.109133790600648e-07, |
| "loss": 0.065, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.8159057437407953, |
| "grad_norm": 2.8961481755343708, |
| "learning_rate": 2.0760210347614383e-07, |
| "loss": 0.0672, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.817378497790869, |
| "grad_norm": 2.477715909813392, |
| "learning_rate": 2.043164758553523e-07, |
| "loss": 0.0659, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.8188512518409425, |
| "grad_norm": 2.2219672216441784, |
| "learning_rate": 2.010565137817172e-07, |
| "loss": 0.0642, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.8203240058910162, |
| "grad_norm": 2.807219268334985, |
| "learning_rate": 1.9782223470191043e-07, |
| "loss": 0.0887, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.8217967599410898, |
| "grad_norm": 1.7331081934051469, |
| "learning_rate": 1.9461365592515103e-07, |
| "loss": 0.0462, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.8232695139911634, |
| "grad_norm": 2.487498627433016, |
| "learning_rate": 1.9143079462311644e-07, |
| "loss": 0.0553, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.824742268041237, |
| "grad_norm": 2.395541301875943, |
| "learning_rate": 1.8827366782984913e-07, |
| "loss": 0.064, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.8262150220913107, |
| "grad_norm": 2.4821082624397315, |
| "learning_rate": 1.851422924416657e-07, |
| "loss": 0.0588, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.8276877761413843, |
| "grad_norm": 1.7953626408649892, |
| "learning_rate": 1.820366852170663e-07, |
| "loss": 0.0572, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.8291605301914582, |
| "grad_norm": 2.263788364815126, |
| "learning_rate": 1.789568627766447e-07, |
| "loss": 0.0564, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.8306332842415318, |
| "grad_norm": 2.548871383114113, |
| "learning_rate": 1.7590284160300065e-07, |
| "loss": 0.0885, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.8321060382916055, |
| "grad_norm": 1.9553559535921305, |
| "learning_rate": 1.7287463804064874e-07, |
| "loss": 0.0515, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.833578792341679, |
| "grad_norm": 2.1413849397508455, |
| "learning_rate": 1.6987226829593417e-07, |
| "loss": 0.0525, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.8350515463917527, |
| "grad_norm": 2.4170731730516435, |
| "learning_rate": 1.6689574843694433e-07, |
| "loss": 0.0536, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.8365243004418264, |
| "grad_norm": 2.26376563536471, |
| "learning_rate": 1.6394509439342343e-07, |
| "loss": 0.064, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.8379970544919, |
| "grad_norm": 2.354566095562083, |
| "learning_rate": 1.6102032195668639e-07, |
| "loss": 0.054, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.8394698085419736, |
| "grad_norm": 2.200305194780377, |
| "learning_rate": 1.5812144677953667e-07, |
| "loss": 0.0597, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.8409425625920472, |
| "grad_norm": 2.8036671730837996, |
| "learning_rate": 1.5524848437617757e-07, |
| "loss": 0.0788, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.8424153166421209, |
| "grad_norm": 2.9640155658781513, |
| "learning_rate": 1.5240145012213438e-07, |
| "loss": 0.0712, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.8438880706921945, |
| "grad_norm": 2.676706527145378, |
| "learning_rate": 1.4958035925417002e-07, |
| "loss": 0.1007, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.8453608247422681, |
| "grad_norm": 2.5173421640989733, |
| "learning_rate": 1.4678522687020414e-07, |
| "loss": 0.0642, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.8468335787923418, |
| "grad_norm": 2.4792807105107726, |
| "learning_rate": 1.4401606792923018e-07, |
| "loss": 0.0733, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.8483063328424154, |
| "grad_norm": 2.590202533627658, |
| "learning_rate": 1.4127289725123783e-07, |
| "loss": 0.0637, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.849779086892489, |
| "grad_norm": 2.4701164236940247, |
| "learning_rate": 1.3855572951713247e-07, |
| "loss": 0.0549, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.8512518409425627, |
| "grad_norm": 2.667366269263811, |
| "learning_rate": 1.3586457926865692e-07, |
| "loss": 0.0563, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.8527245949926363, |
| "grad_norm": 2.534826082762876, |
| "learning_rate": 1.3319946090831372e-07, |
| "loss": 0.0667, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.85419734904271, |
| "grad_norm": 3.184285823956059, |
| "learning_rate": 1.3056038869928732e-07, |
| "loss": 0.0708, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.8556701030927836, |
| "grad_norm": 2.6334408231116644, |
| "learning_rate": 1.2794737676536993e-07, |
| "loss": 0.0679, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 3.705317330764747, |
| "learning_rate": 1.253604390908819e-07, |
| "loss": 0.0684, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.8586156111929308, |
| "grad_norm": 2.6760578559720045, |
| "learning_rate": 1.2279958952060133e-07, |
| "loss": 0.061, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.8600883652430045, |
| "grad_norm": 2.683637739242373, |
| "learning_rate": 1.2026484175968744e-07, |
| "loss": 0.0697, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.861561119293078, |
| "grad_norm": 3.2978111974990347, |
| "learning_rate": 1.1775620937360677e-07, |
| "loss": 0.0753, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.8630338733431517, |
| "grad_norm": 2.4803177953803406, |
| "learning_rate": 1.1527370578806318e-07, |
| "loss": 0.0625, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.8645066273932254, |
| "grad_norm": 2.726915019647307, |
| "learning_rate": 1.128173442889241e-07, |
| "loss": 0.0755, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.865979381443299, |
| "grad_norm": 2.678703270186324, |
| "learning_rate": 1.1038713802214718e-07, |
| "loss": 0.0554, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.8674521354933726, |
| "grad_norm": 1.7752650933266636, |
| "learning_rate": 1.0798309999371537e-07, |
| "loss": 0.0522, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.8689248895434463, |
| "grad_norm": 2.6407719120165236, |
| "learning_rate": 1.0560524306956422e-07, |
| "loss": 0.0697, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.87039764359352, |
| "grad_norm": 2.9033590642091496, |
| "learning_rate": 1.0325357997551133e-07, |
| "loss": 0.0634, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.8718703976435935, |
| "grad_norm": 1.9492796659994043, |
| "learning_rate": 1.0092812329719149e-07, |
| "loss": 0.0489, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.8733431516936672, |
| "grad_norm": 2.4516366920256045, |
| "learning_rate": 9.862888547998828e-08, |
| "loss": 0.0591, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.8748159057437408, |
| "grad_norm": 2.464207142138413, |
| "learning_rate": 9.635587882896591e-08, |
| "loss": 0.0658, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.8762886597938144, |
| "grad_norm": 2.6681818756726314, |
| "learning_rate": 9.410911550880474e-08, |
| "loss": 0.0651, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.877761413843888, |
| "grad_norm": 2.95156310869132, |
| "learning_rate": 9.188860754373751e-08, |
| "loss": 0.0627, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.8792341678939617, |
| "grad_norm": 2.664762503107241, |
| "learning_rate": 8.969436681748211e-08, |
| "loss": 0.0712, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.8807069219440353, |
| "grad_norm": 2.309055318081491, |
| "learning_rate": 8.752640507317944e-08, |
| "loss": 0.0674, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.882179675994109, |
| "grad_norm": 3.571620633943013, |
| "learning_rate": 8.53847339133318e-08, |
| "loss": 0.0989, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.8836524300441826, |
| "grad_norm": 3.4554242703648077, |
| "learning_rate": 8.326936479973735e-08, |
| "loss": 0.1032, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.8851251840942562, |
| "grad_norm": 2.3511892419309226, |
| "learning_rate": 8.118030905343244e-08, |
| "loss": 0.0587, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.8865979381443299, |
| "grad_norm": 2.9875875390382176, |
| "learning_rate": 7.911757785462882e-08, |
| "loss": 0.0771, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.8880706921944035, |
| "grad_norm": 2.667124591832099, |
| "learning_rate": 7.708118224265538e-08, |
| "loss": 0.0726, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.8895434462444771, |
| "grad_norm": 2.808103542460803, |
| "learning_rate": 7.507113311589764e-08, |
| "loss": 0.0725, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.8910162002945508, |
| "grad_norm": 2.138310695092727, |
| "learning_rate": 7.308744123174006e-08, |
| "loss": 0.053, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.8924889543446244, |
| "grad_norm": 3.23592781169601, |
| "learning_rate": 7.113011720650709e-08, |
| "loss": 0.0861, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.893961708394698, |
| "grad_norm": 2.7938972462543603, |
| "learning_rate": 6.919917151540944e-08, |
| "loss": 0.0846, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.8954344624447717, |
| "grad_norm": 2.6676277265873445, |
| "learning_rate": 6.72946144924852e-08, |
| "loss": 0.0582, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.8969072164948453, |
| "grad_norm": 2.292320346160715, |
| "learning_rate": 6.54164563305465e-08, |
| "loss": 0.0683, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.898379970544919, |
| "grad_norm": 2.7181388928578416, |
| "learning_rate": 6.356470708112295e-08, |
| "loss": 0.0684, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.8998527245949925, |
| "grad_norm": 2.42255910784497, |
| "learning_rate": 6.173937665440943e-08, |
| "loss": 0.0863, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.9013254786450662, |
| "grad_norm": 2.0005300068986216, |
| "learning_rate": 5.994047481921283e-08, |
| "loss": 0.052, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.9027982326951398, |
| "grad_norm": 2.0006374023475018, |
| "learning_rate": 5.816801120289761e-08, |
| "loss": 0.0494, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.9042709867452134, |
| "grad_norm": 2.761206395395843, |
| "learning_rate": 5.642199529133918e-08, |
| "loss": 0.0711, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.905743740795287, |
| "grad_norm": 2.7407148804823604, |
| "learning_rate": 5.47024364288673e-08, |
| "loss": 0.0862, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.9072164948453607, |
| "grad_norm": 3.091222103037569, |
| "learning_rate": 5.3009343818219985e-08, |
| "loss": 0.0756, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.9086892488954343, |
| "grad_norm": 2.163303706955241, |
| "learning_rate": 5.13427265204941e-08, |
| "loss": 0.0478, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.910162002945508, |
| "grad_norm": 2.773239322890956, |
| "learning_rate": 4.970259345509376e-08, |
| "loss": 0.0972, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.9116347569955816, |
| "grad_norm": 2.608970295810049, |
| "learning_rate": 4.808895339968644e-08, |
| "loss": 0.0604, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.9131075110456552, |
| "grad_norm": 2.660546445460094, |
| "learning_rate": 4.650181499015416e-08, |
| "loss": 0.0582, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.9145802650957289, |
| "grad_norm": 2.7839755861009947, |
| "learning_rate": 4.4941186720546257e-08, |
| "loss": 0.0592, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.9160530191458025, |
| "grad_norm": 2.5610344626678065, |
| "learning_rate": 4.340707694303614e-08, |
| "loss": 0.0818, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.9175257731958761, |
| "grad_norm": 2.1786156029620045, |
| "learning_rate": 4.189949386787462e-08, |
| "loss": 0.0642, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.9189985272459498, |
| "grad_norm": 3.1670317662030696, |
| "learning_rate": 4.041844556334717e-08, |
| "loss": 0.0922, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.9204712812960234, |
| "grad_norm": 2.5277508443914942, |
| "learning_rate": 3.896393995573178e-08, |
| "loss": 0.052, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.9219440353460973, |
| "grad_norm": 2.4096715395360824, |
| "learning_rate": 3.75359848292528e-08, |
| "loss": 0.0472, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.923416789396171, |
| "grad_norm": 2.204721916769757, |
| "learning_rate": 3.613458782604329e-08, |
| "loss": 0.0543, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.9248895434462445, |
| "grad_norm": 2.5836879143448597, |
| "learning_rate": 3.475975644610219e-08, |
| "loss": 0.0758, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.9263622974963182, |
| "grad_norm": 2.457982163657938, |
| "learning_rate": 3.341149804725496e-08, |
| "loss": 0.0482, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.9278350515463918, |
| "grad_norm": 2.6231271942883345, |
| "learning_rate": 3.2089819845111946e-08, |
| "loss": 0.0758, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.9293078055964654, |
| "grad_norm": 2.5076609668403718, |
| "learning_rate": 3.079472891303337e-08, |
| "loss": 0.0641, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.930780559646539, |
| "grad_norm": 2.709588772784406, |
| "learning_rate": 2.9526232182088834e-08, |
| "loss": 0.0706, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.9322533136966127, |
| "grad_norm": 1.9492221449254272, |
| "learning_rate": 2.8284336441021797e-08, |
| "loss": 0.0744, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.9337260677466863, |
| "grad_norm": 2.599925072248245, |
| "learning_rate": 2.7069048336211823e-08, |
| "loss": 0.0816, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.93519882179676, |
| "grad_norm": 2.8220847289646156, |
| "learning_rate": 2.5880374371639594e-08, |
| "loss": 0.0563, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.9366715758468336, |
| "grad_norm": 2.27806945669308, |
| "learning_rate": 2.471832090885251e-08, |
| "loss": 0.0703, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.9381443298969072, |
| "grad_norm": 2.153300624329934, |
| "learning_rate": 2.358289416693027e-08, |
| "loss": 0.0673, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.9396170839469808, |
| "grad_norm": 2.3865823334755287, |
| "learning_rate": 2.2474100222451557e-08, |
| "loss": 0.0677, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.9410898379970545, |
| "grad_norm": 2.0839931170659742, |
| "learning_rate": 2.1391945009461844e-08, |
| "loss": 0.0515, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.9425625920471281, |
| "grad_norm": 2.804503515201076, |
| "learning_rate": 2.0336434319440656e-08, |
| "loss": 0.0953, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.9440353460972017, |
| "grad_norm": 2.3508543215655795, |
| "learning_rate": 1.930757380127324e-08, |
| "loss": 0.0465, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.9455081001472754, |
| "grad_norm": 2.2684159030677584, |
| "learning_rate": 1.8305368961215598e-08, |
| "loss": 0.0583, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.946980854197349, |
| "grad_norm": 2.5644986282122537, |
| "learning_rate": 1.7329825162870073e-08, |
| "loss": 0.055, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.9484536082474226, |
| "grad_norm": 2.283413295915947, |
| "learning_rate": 1.6380947627153143e-08, |
| "loss": 0.0695, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.9499263622974963, |
| "grad_norm": 2.069007200708932, |
| "learning_rate": 1.545874143226933e-08, |
| "loss": 0.0518, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.95139911634757, |
| "grad_norm": 2.9711317829064585, |
| "learning_rate": 1.456321151368345e-08, |
| "loss": 0.0611, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.9528718703976435, |
| "grad_norm": 2.1673859380067624, |
| "learning_rate": 1.3694362664094518e-08, |
| "loss": 0.0566, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.9543446244477174, |
| "grad_norm": 2.3741944406331803, |
| "learning_rate": 1.2852199533407994e-08, |
| "loss": 0.0767, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.955817378497791, |
| "grad_norm": 2.667214957330843, |
| "learning_rate": 1.2036726628715245e-08, |
| "loss": 0.056, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.9572901325478647, |
| "grad_norm": 2.268524748294768, |
| "learning_rate": 1.1247948314264678e-08, |
| "loss": 0.0593, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.9587628865979383, |
| "grad_norm": 2.2321912351767708, |
| "learning_rate": 1.0485868811441757e-08, |
| "loss": 0.0611, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.960235640648012, |
| "grad_norm": 2.963136564591045, |
| "learning_rate": 9.750492198744577e-09, |
| "loss": 0.0681, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.9617083946980856, |
| "grad_norm": 2.7342946960131242, |
| "learning_rate": 9.041822411763323e-09, |
| "loss": 0.0667, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.9631811487481592, |
| "grad_norm": 2.171580235749752, |
| "learning_rate": 8.359863243158074e-09, |
| "loss": 0.0624, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.9646539027982328, |
| "grad_norm": 2.2346302144602275, |
| "learning_rate": 7.704618342638804e-09, |
| "loss": 0.0535, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.9661266568483065, |
| "grad_norm": 2.5411814724089994, |
| "learning_rate": 7.076091216946524e-09, |
| "loss": 0.0613, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.96759941089838, |
| "grad_norm": 3.0903346424039713, |
| "learning_rate": 6.4742852298338434e-09, |
| "loss": 0.0601, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.9690721649484537, |
| "grad_norm": 2.468639539072872, |
| "learning_rate": 5.899203602046655e-09, |
| "loss": 0.0748, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.9705449189985274, |
| "grad_norm": 3.1384988040511006, |
| "learning_rate": 5.35084941130748e-09, |
| "loss": 0.0701, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.972017673048601, |
| "grad_norm": 2.9703348546242156, |
| "learning_rate": 4.829225592300479e-09, |
| "loss": 0.0848, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.9734904270986746, |
| "grad_norm": 1.9317883920454113, |
| "learning_rate": 4.334334936652029e-09, |
| "loss": 0.0426, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.9749631811487482, |
| "grad_norm": 2.878576339471186, |
| "learning_rate": 3.8661800929185035e-09, |
| "loss": 0.0737, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.9764359351988219, |
| "grad_norm": 2.354349775359537, |
| "learning_rate": 3.4247635665723977e-09, |
| "loss": 0.0629, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.9779086892488955, |
| "grad_norm": 3.194601011276422, |
| "learning_rate": 3.010087719986787e-09, |
| "loss": 0.0625, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.9793814432989691, |
| "grad_norm": 2.5572029437027273, |
| "learning_rate": 2.6221547724253337e-09, |
| "loss": 0.0585, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.9808541973490428, |
| "grad_norm": 2.460763245695378, |
| "learning_rate": 2.260966800027853e-09, |
| "loss": 0.0627, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.9823269513991164, |
| "grad_norm": 2.0858178223464146, |
| "learning_rate": 1.926525735800877e-09, |
| "loss": 0.0604, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.98379970544919, |
| "grad_norm": 2.60802968225033, |
| "learning_rate": 1.6188333696059988e-09, |
| "loss": 0.0681, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.9852724594992637, |
| "grad_norm": 2.896847869807856, |
| "learning_rate": 1.3378913481526534e-09, |
| "loss": 0.073, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.9867452135493373, |
| "grad_norm": 2.5968745701467513, |
| "learning_rate": 1.0837011749864624e-09, |
| "loss": 0.08, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.988217967599411, |
| "grad_norm": 2.0860913805543837, |
| "learning_rate": 8.562642104831265e-10, |
| "loss": 0.0554, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.9896907216494846, |
| "grad_norm": 2.153075308395945, |
| "learning_rate": 6.555816718389896e-10, |
| "loss": 0.0484, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.9911634756995582, |
| "grad_norm": 2.2798904474088877, |
| "learning_rate": 4.816546330688177e-10, |
| "loss": 0.0453, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.9926362297496318, |
| "grad_norm": 2.595728602874706, |
| "learning_rate": 3.344840249946968e-10, |
| "loss": 0.065, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.9941089837997055, |
| "grad_norm": 2.313139587362148, |
| "learning_rate": 2.1407063524436777e-10, |
| "loss": 0.0757, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.995581737849779, |
| "grad_norm": 2.5156912578544075, |
| "learning_rate": 1.2041510824678525e-10, |
| "loss": 0.0826, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.9970544918998527, |
| "grad_norm": 2.043220452782378, |
| "learning_rate": 5.351794522823195e-11, |
| "loss": 0.0531, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.9985272459499264, |
| "grad_norm": 1.9871202396885614, |
| "learning_rate": 1.3379504207877703e-11, |
| "loss": 0.0434, |
| "step": 1357 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.9204232674227737, |
| "learning_rate": 0.0, |
| "loss": 0.0379, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 1358, |
| "total_flos": 2955929518080.0, |
| "train_loss": 0.13508925529605598, |
| "train_runtime": 626.3733, |
| "train_samples_per_second": 17.341, |
| "train_steps_per_second": 2.168 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1358, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50000000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2955929518080.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|