| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9980092899800929, | |
| "eval_steps": 500, | |
| "global_step": 376, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0026542800265428003, | |
| "grad_norm": 6.981875026564461, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.5142, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0053085600530856005, | |
| "grad_norm": 7.3585790096388095, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.5399, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007962840079628402, | |
| "grad_norm": 6.187456707748293, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4331, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.010617120106171201, | |
| "grad_norm": 3.322201654326747, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.4069, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013271400132714002, | |
| "grad_norm": 6.898750176066953, | |
| "learning_rate": 5e-06, | |
| "loss": 1.398, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015925680159256803, | |
| "grad_norm": 9.84996090672252, | |
| "learning_rate": 6e-06, | |
| "loss": 1.4207, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0185799601857996, | |
| "grad_norm": 5.911295227152632, | |
| "learning_rate": 7e-06, | |
| "loss": 1.396, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.021234240212342402, | |
| "grad_norm": 4.487244776192921, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.3842, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.023888520238885203, | |
| "grad_norm": 3.963029697998983, | |
| "learning_rate": 9e-06, | |
| "loss": 1.4094, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.026542800265428004, | |
| "grad_norm": 3.9061159778838777, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3289, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.029197080291970802, | |
| "grad_norm": 4.06704445096823, | |
| "learning_rate": 9.999988945517944e-06, | |
| "loss": 1.3803, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03185136031851361, | |
| "grad_norm": 2.604059841393431, | |
| "learning_rate": 9.999955782120656e-06, | |
| "loss": 1.3673, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.034505640345056404, | |
| "grad_norm": 2.257369986938731, | |
| "learning_rate": 9.999900509954779e-06, | |
| "loss": 1.3601, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0371599203715992, | |
| "grad_norm": 2.1847487718443896, | |
| "learning_rate": 9.999823129264712e-06, | |
| "loss": 1.2897, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.039814200398142006, | |
| "grad_norm": 2.07192636064569, | |
| "learning_rate": 9.99972364039262e-06, | |
| "loss": 1.3908, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.042468480424684804, | |
| "grad_norm": 1.9501283234498499, | |
| "learning_rate": 9.99960204377842e-06, | |
| "loss": 1.3159, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0451227604512276, | |
| "grad_norm": 2.493269875780714, | |
| "learning_rate": 9.999458339959787e-06, | |
| "loss": 1.294, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.047777040477770406, | |
| "grad_norm": 2.5949272964482057, | |
| "learning_rate": 9.999292529572152e-06, | |
| "loss": 1.3185, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.050431320504313204, | |
| "grad_norm": 2.2388099180255643, | |
| "learning_rate": 9.99910461334869e-06, | |
| "loss": 1.2132, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05308560053085601, | |
| "grad_norm": 2.2899507956205762, | |
| "learning_rate": 9.99889459212033e-06, | |
| "loss": 1.293, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.055739880557398806, | |
| "grad_norm": 1.9172996859984803, | |
| "learning_rate": 9.998662466815743e-06, | |
| "loss": 1.2282, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.058394160583941604, | |
| "grad_norm": 1.695255684950505, | |
| "learning_rate": 9.99840823846134e-06, | |
| "loss": 1.2453, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06104844061048441, | |
| "grad_norm": 1.9499380269432547, | |
| "learning_rate": 9.998131908181262e-06, | |
| "loss": 1.3126, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06370272063702721, | |
| "grad_norm": 2.235039095134041, | |
| "learning_rate": 9.997833477197386e-06, | |
| "loss": 1.3, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06635700066357, | |
| "grad_norm": 1.7768874556246268, | |
| "learning_rate": 9.997512946829314e-06, | |
| "loss": 1.3027, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06901128069011281, | |
| "grad_norm": 1.8710962469378072, | |
| "learning_rate": 9.997170318494362e-06, | |
| "loss": 1.2571, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07166556071665561, | |
| "grad_norm": 2.1786369089634734, | |
| "learning_rate": 9.996805593707566e-06, | |
| "loss": 1.2633, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0743198407431984, | |
| "grad_norm": 2.3145346512057805, | |
| "learning_rate": 9.996418774081658e-06, | |
| "loss": 1.2439, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07697412076974121, | |
| "grad_norm": 1.740845781272116, | |
| "learning_rate": 9.996009861327077e-06, | |
| "loss": 1.2437, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.07962840079628401, | |
| "grad_norm": 1.9183185283288997, | |
| "learning_rate": 9.99557885725195e-06, | |
| "loss": 1.333, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0822826808228268, | |
| "grad_norm": 2.1688553875791987, | |
| "learning_rate": 9.995125763762089e-06, | |
| "loss": 1.3145, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08493696084936961, | |
| "grad_norm": 1.9658120398634014, | |
| "learning_rate": 9.994650582860978e-06, | |
| "loss": 1.2682, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08759124087591241, | |
| "grad_norm": 2.136505316782775, | |
| "learning_rate": 9.994153316649769e-06, | |
| "loss": 1.24, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0902455209024552, | |
| "grad_norm": 1.670383957571605, | |
| "learning_rate": 9.99363396732727e-06, | |
| "loss": 1.2421, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09289980092899801, | |
| "grad_norm": 1.9007693724974954, | |
| "learning_rate": 9.993092537189936e-06, | |
| "loss": 1.1936, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09555408095554081, | |
| "grad_norm": 1.8427231589681057, | |
| "learning_rate": 9.992529028631859e-06, | |
| "loss": 1.2568, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0982083609820836, | |
| "grad_norm": 1.9014135968523682, | |
| "learning_rate": 9.991943444144758e-06, | |
| "loss": 1.231, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.10086264100862641, | |
| "grad_norm": 1.714724530777744, | |
| "learning_rate": 9.991335786317964e-06, | |
| "loss": 1.2559, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.10351692103516921, | |
| "grad_norm": 1.9540837660082362, | |
| "learning_rate": 9.990706057838417e-06, | |
| "loss": 1.2583, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.10617120106171202, | |
| "grad_norm": 1.7120831927587263, | |
| "learning_rate": 9.990054261490643e-06, | |
| "loss": 1.2095, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10882548108825481, | |
| "grad_norm": 1.7124302876215762, | |
| "learning_rate": 9.989380400156752e-06, | |
| "loss": 1.2361, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.11147976111479761, | |
| "grad_norm": 1.7683889355402804, | |
| "learning_rate": 9.98868447681642e-06, | |
| "loss": 1.2134, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.11413404114134042, | |
| "grad_norm": 1.8716677445605339, | |
| "learning_rate": 9.987966494546873e-06, | |
| "loss": 1.3081, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11678832116788321, | |
| "grad_norm": 1.810504125507985, | |
| "learning_rate": 9.987226456522884e-06, | |
| "loss": 1.2789, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11944260119442601, | |
| "grad_norm": 2.107999452852097, | |
| "learning_rate": 9.986464366016743e-06, | |
| "loss": 1.2965, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.12209688122096882, | |
| "grad_norm": 1.9463843496195974, | |
| "learning_rate": 9.985680226398261e-06, | |
| "loss": 1.2455, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.12475116124751161, | |
| "grad_norm": 1.8557616158057193, | |
| "learning_rate": 9.984874041134738e-06, | |
| "loss": 1.2432, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.12740544127405443, | |
| "grad_norm": 1.7060682110649106, | |
| "learning_rate": 9.984045813790959e-06, | |
| "loss": 1.1864, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1300597213005972, | |
| "grad_norm": 1.9204675247056242, | |
| "learning_rate": 9.983195548029173e-06, | |
| "loss": 1.2525, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.13271400132714, | |
| "grad_norm": 1.8711916549013854, | |
| "learning_rate": 9.98232324760908e-06, | |
| "loss": 1.2836, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1353682813536828, | |
| "grad_norm": 1.6842894937517436, | |
| "learning_rate": 9.981428916387812e-06, | |
| "loss": 1.183, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13802256138022562, | |
| "grad_norm": 1.6306230130296617, | |
| "learning_rate": 9.980512558319915e-06, | |
| "loss": 1.2369, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.14067684140676842, | |
| "grad_norm": 1.566857719000752, | |
| "learning_rate": 9.979574177457337e-06, | |
| "loss": 1.1844, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.14333112143331123, | |
| "grad_norm": 2.0361026967903966, | |
| "learning_rate": 9.978613777949401e-06, | |
| "loss": 1.2064, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.145985401459854, | |
| "grad_norm": 1.8265620367248863, | |
| "learning_rate": 9.977631364042796e-06, | |
| "loss": 1.2432, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1486396814863968, | |
| "grad_norm": 1.7838441388683621, | |
| "learning_rate": 9.976626940081553e-06, | |
| "loss": 1.2852, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1512939615129396, | |
| "grad_norm": 1.7544005767854343, | |
| "learning_rate": 9.975600510507025e-06, | |
| "loss": 1.1735, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.15394824153948242, | |
| "grad_norm": 1.80732471729061, | |
| "learning_rate": 9.974552079857873e-06, | |
| "loss": 1.2198, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.15660252156602522, | |
| "grad_norm": 1.7204547120415132, | |
| "learning_rate": 9.973481652770039e-06, | |
| "loss": 1.2409, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15925680159256803, | |
| "grad_norm": 1.7446267682486616, | |
| "learning_rate": 9.972389233976729e-06, | |
| "loss": 1.236, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1619110816191108, | |
| "grad_norm": 1.8367615413386507, | |
| "learning_rate": 9.971274828308396e-06, | |
| "loss": 1.2333, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.1645653616456536, | |
| "grad_norm": 2.024298503175875, | |
| "learning_rate": 9.970138440692706e-06, | |
| "loss": 1.1798, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1672196416721964, | |
| "grad_norm": 1.6639343310164172, | |
| "learning_rate": 9.968980076154533e-06, | |
| "loss": 1.2429, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.16987392169873922, | |
| "grad_norm": 1.7526879469365466, | |
| "learning_rate": 9.967799739815925e-06, | |
| "loss": 1.2448, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.17252820172528202, | |
| "grad_norm": 1.551089471898675, | |
| "learning_rate": 9.966597436896085e-06, | |
| "loss": 1.2221, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 3.285115069392907, | |
| "learning_rate": 9.965373172711343e-06, | |
| "loss": 1.2576, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.17783676177836763, | |
| "grad_norm": 1.9429706042692902, | |
| "learning_rate": 9.964126952675148e-06, | |
| "loss": 1.2211, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1804910418049104, | |
| "grad_norm": 1.6014290221743892, | |
| "learning_rate": 9.962858782298023e-06, | |
| "loss": 1.2105, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1831453218314532, | |
| "grad_norm": 1.7597646377706344, | |
| "learning_rate": 9.961568667187556e-06, | |
| "loss": 1.2401, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.18579960185799602, | |
| "grad_norm": 1.6408652164338016, | |
| "learning_rate": 9.960256613048367e-06, | |
| "loss": 1.1577, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18845388188453882, | |
| "grad_norm": 2.4349687274686334, | |
| "learning_rate": 9.958922625682088e-06, | |
| "loss": 1.193, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.19110816191108163, | |
| "grad_norm": 1.9037201335080784, | |
| "learning_rate": 9.957566710987338e-06, | |
| "loss": 1.1489, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.19376244193762443, | |
| "grad_norm": 2.1368452991743014, | |
| "learning_rate": 9.956188874959686e-06, | |
| "loss": 1.3215, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.1964167219641672, | |
| "grad_norm": 1.7342842074642177, | |
| "learning_rate": 9.954789123691643e-06, | |
| "loss": 1.2288, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.19907100199071, | |
| "grad_norm": 1.883985974459675, | |
| "learning_rate": 9.953367463372615e-06, | |
| "loss": 1.2349, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.20172528201725282, | |
| "grad_norm": 1.8151478617151462, | |
| "learning_rate": 9.951923900288888e-06, | |
| "loss": 1.1481, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.20437956204379562, | |
| "grad_norm": 1.8542475078063598, | |
| "learning_rate": 9.950458440823602e-06, | |
| "loss": 1.262, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.20703384207033843, | |
| "grad_norm": 1.77553753540162, | |
| "learning_rate": 9.948971091456715e-06, | |
| "loss": 1.1834, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.20968812209688123, | |
| "grad_norm": 1.8152497279053155, | |
| "learning_rate": 9.947461858764978e-06, | |
| "loss": 1.1749, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.21234240212342403, | |
| "grad_norm": 1.5929993680573362, | |
| "learning_rate": 9.945930749421903e-06, | |
| "loss": 1.2696, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2149966821499668, | |
| "grad_norm": 2.1883175245684092, | |
| "learning_rate": 9.944377770197741e-06, | |
| "loss": 1.2375, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.21765096217650962, | |
| "grad_norm": 1.7556050567556294, | |
| "learning_rate": 9.942802927959444e-06, | |
| "loss": 1.2017, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.22030524220305242, | |
| "grad_norm": 1.7392066404895135, | |
| "learning_rate": 9.941206229670634e-06, | |
| "loss": 1.1788, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.22295952222959523, | |
| "grad_norm": 2.230563622877349, | |
| "learning_rate": 9.939587682391587e-06, | |
| "loss": 1.2629, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.22561380225613803, | |
| "grad_norm": 1.7257531276366218, | |
| "learning_rate": 9.937947293279178e-06, | |
| "loss": 1.1574, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.22826808228268083, | |
| "grad_norm": 3.7946522494948134, | |
| "learning_rate": 9.93628506958687e-06, | |
| "loss": 1.2539, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2309223623092236, | |
| "grad_norm": 1.7746755901383093, | |
| "learning_rate": 9.934601018664672e-06, | |
| "loss": 1.1672, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.23357664233576642, | |
| "grad_norm": 1.6160184542663385, | |
| "learning_rate": 9.932895147959106e-06, | |
| "loss": 1.2047, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.23623092236230922, | |
| "grad_norm": 1.597818533914632, | |
| "learning_rate": 9.931167465013182e-06, | |
| "loss": 1.2087, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.23888520238885202, | |
| "grad_norm": 1.8200709191179871, | |
| "learning_rate": 9.929417977466356e-06, | |
| "loss": 1.2594, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.24153948241539483, | |
| "grad_norm": 1.5869876859286098, | |
| "learning_rate": 9.927646693054498e-06, | |
| "loss": 1.2923, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.24419376244193763, | |
| "grad_norm": 1.6678230174198274, | |
| "learning_rate": 9.925853619609858e-06, | |
| "loss": 1.1979, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2468480424684804, | |
| "grad_norm": 1.7206885835934083, | |
| "learning_rate": 9.924038765061042e-06, | |
| "loss": 1.2248, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.24950232249502322, | |
| "grad_norm": 1.8965216866987153, | |
| "learning_rate": 9.922202137432954e-06, | |
| "loss": 1.1793, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.252156602521566, | |
| "grad_norm": 1.7827181222199764, | |
| "learning_rate": 9.920343744846786e-06, | |
| "loss": 1.2539, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.25481088254810885, | |
| "grad_norm": 1.704509646049322, | |
| "learning_rate": 9.918463595519963e-06, | |
| "loss": 1.1845, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.25746516257465163, | |
| "grad_norm": 1.8008684596562938, | |
| "learning_rate": 9.916561697766114e-06, | |
| "loss": 1.1873, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2601194426011944, | |
| "grad_norm": 1.5268474470110187, | |
| "learning_rate": 9.91463805999504e-06, | |
| "loss": 1.1634, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.26277372262773724, | |
| "grad_norm": 1.69776670129652, | |
| "learning_rate": 9.912692690712667e-06, | |
| "loss": 1.2496, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.26542800265428, | |
| "grad_norm": 19.950768938401303, | |
| "learning_rate": 9.910725598521014e-06, | |
| "loss": 1.2266, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26808228268082285, | |
| "grad_norm": 1.888189833523382, | |
| "learning_rate": 9.908736792118157e-06, | |
| "loss": 1.1783, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2707365627073656, | |
| "grad_norm": 1.6553587447766995, | |
| "learning_rate": 9.906726280298185e-06, | |
| "loss": 1.1888, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2733908427339084, | |
| "grad_norm": 1.7645503651144456, | |
| "learning_rate": 9.904694071951167e-06, | |
| "loss": 1.2331, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.27604512276045123, | |
| "grad_norm": 1.7561319773931536, | |
| "learning_rate": 9.902640176063103e-06, | |
| "loss": 1.2429, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.278699402786994, | |
| "grad_norm": 1.826781329048666, | |
| "learning_rate": 9.900564601715898e-06, | |
| "loss": 1.2053, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.28135368281353684, | |
| "grad_norm": 2.0178364653670777, | |
| "learning_rate": 9.89846735808731e-06, | |
| "loss": 1.1855, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2840079628400796, | |
| "grad_norm": 2.1853732110604027, | |
| "learning_rate": 9.896348454450918e-06, | |
| "loss": 1.1514, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.28666224286662245, | |
| "grad_norm": 1.863102490412834, | |
| "learning_rate": 9.894207900176074e-06, | |
| "loss": 1.1582, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.28931652289316523, | |
| "grad_norm": 2.1558166021806504, | |
| "learning_rate": 9.892045704727864e-06, | |
| "loss": 1.2692, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.291970802919708, | |
| "grad_norm": 1.623855596114215, | |
| "learning_rate": 9.889861877667071e-06, | |
| "loss": 1.1406, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.29462508294625084, | |
| "grad_norm": 1.925359975573577, | |
| "learning_rate": 9.887656428650123e-06, | |
| "loss": 1.144, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2972793629727936, | |
| "grad_norm": 2.0780064875742634, | |
| "learning_rate": 9.885429367429062e-06, | |
| "loss": 1.2095, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.29993364299933645, | |
| "grad_norm": 1.757836459981376, | |
| "learning_rate": 9.883180703851488e-06, | |
| "loss": 1.2129, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3025879230258792, | |
| "grad_norm": 1.6497771335719753, | |
| "learning_rate": 9.880910447860527e-06, | |
| "loss": 1.1528, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.30524220305242206, | |
| "grad_norm": 1.9314161378924497, | |
| "learning_rate": 9.878618609494781e-06, | |
| "loss": 1.2038, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.30789648307896483, | |
| "grad_norm": 1.5945997988558909, | |
| "learning_rate": 9.876305198888284e-06, | |
| "loss": 1.1349, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3105507631055076, | |
| "grad_norm": 1.7095400428823162, | |
| "learning_rate": 9.873970226270458e-06, | |
| "loss": 1.1543, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.31320504313205044, | |
| "grad_norm": 1.6150384960254696, | |
| "learning_rate": 9.871613701966067e-06, | |
| "loss": 1.1527, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3158593231585932, | |
| "grad_norm": 3.288441610824325, | |
| "learning_rate": 9.869235636395177e-06, | |
| "loss": 1.2411, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.31851360318513605, | |
| "grad_norm": 1.6258023683537948, | |
| "learning_rate": 9.866836040073099e-06, | |
| "loss": 1.2002, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.32116788321167883, | |
| "grad_norm": 1.6467592688369062, | |
| "learning_rate": 9.86441492361035e-06, | |
| "loss": 1.2134, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3238221632382216, | |
| "grad_norm": 1.5988307616959179, | |
| "learning_rate": 9.861972297712606e-06, | |
| "loss": 1.2259, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.32647644326476444, | |
| "grad_norm": 1.9915164437167947, | |
| "learning_rate": 9.859508173180653e-06, | |
| "loss": 1.2369, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3291307232913072, | |
| "grad_norm": 1.751874113048822, | |
| "learning_rate": 9.857022560910338e-06, | |
| "loss": 1.1954, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.33178500331785005, | |
| "grad_norm": 1.589249974809787, | |
| "learning_rate": 9.854515471892527e-06, | |
| "loss": 1.1434, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3344392833443928, | |
| "grad_norm": 1.6571603039493696, | |
| "learning_rate": 9.851986917213044e-06, | |
| "loss": 1.1276, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.33709356337093566, | |
| "grad_norm": 1.821753338127428, | |
| "learning_rate": 9.849436908052636e-06, | |
| "loss": 1.1889, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.33974784339747843, | |
| "grad_norm": 1.55634782143693, | |
| "learning_rate": 9.846865455686915e-06, | |
| "loss": 1.1833, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3424021234240212, | |
| "grad_norm": 1.4585503339043484, | |
| "learning_rate": 9.844272571486313e-06, | |
| "loss": 1.1979, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.34505640345056404, | |
| "grad_norm": 1.6155812940652678, | |
| "learning_rate": 9.84165826691602e-06, | |
| "loss": 1.179, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3477106834771068, | |
| "grad_norm": 1.609059975302855, | |
| "learning_rate": 9.839022553535957e-06, | |
| "loss": 1.2091, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 1.6996931072095949, | |
| "learning_rate": 9.836365443000697e-06, | |
| "loss": 1.1223, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.35301924353019243, | |
| "grad_norm": 1.616355220759201, | |
| "learning_rate": 9.833686947059436e-06, | |
| "loss": 1.0918, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.35567352355673526, | |
| "grad_norm": 1.6096571582268207, | |
| "learning_rate": 9.830987077555925e-06, | |
| "loss": 1.1654, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.35832780358327804, | |
| "grad_norm": 1.565339921018465, | |
| "learning_rate": 9.828265846428428e-06, | |
| "loss": 1.1634, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3609820836098208, | |
| "grad_norm": 1.546016830156871, | |
| "learning_rate": 9.825523265709667e-06, | |
| "loss": 1.1751, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 2.0425258490960836, | |
| "learning_rate": 9.822759347526766e-06, | |
| "loss": 1.1841, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3662906436629064, | |
| "grad_norm": 1.7147276492095496, | |
| "learning_rate": 9.819974104101198e-06, | |
| "loss": 1.2335, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.36894492368944926, | |
| "grad_norm": 1.5869898030324339, | |
| "learning_rate": 9.817167547748729e-06, | |
| "loss": 1.2584, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.37159920371599203, | |
| "grad_norm": 1.6478179096824475, | |
| "learning_rate": 9.814339690879376e-06, | |
| "loss": 1.1961, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.37425348374253486, | |
| "grad_norm": 1.5596319041645448, | |
| "learning_rate": 9.811490545997331e-06, | |
| "loss": 1.2046, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.37690776376907764, | |
| "grad_norm": 1.81548347845434, | |
| "learning_rate": 9.808620125700925e-06, | |
| "loss": 1.2137, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3795620437956204, | |
| "grad_norm": 1.5700607431043994, | |
| "learning_rate": 9.80572844268256e-06, | |
| "loss": 1.2, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.38221632382216325, | |
| "grad_norm": 1.6383722320139935, | |
| "learning_rate": 9.802815509728662e-06, | |
| "loss": 1.1747, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.384870603848706, | |
| "grad_norm": 1.8125605110455933, | |
| "learning_rate": 9.799881339719615e-06, | |
| "loss": 1.1867, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.38752488387524886, | |
| "grad_norm": 1.7582804382886328, | |
| "learning_rate": 9.796925945629711e-06, | |
| "loss": 1.3162, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.39017916390179164, | |
| "grad_norm": 2.0734414221603665, | |
| "learning_rate": 9.793949340527091e-06, | |
| "loss": 1.234, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3928334439283344, | |
| "grad_norm": 1.5379766795331946, | |
| "learning_rate": 9.790951537573686e-06, | |
| "loss": 1.1185, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.39548772395487725, | |
| "grad_norm": 1.6227118631483388, | |
| "learning_rate": 9.787932550025158e-06, | |
| "loss": 1.1523, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.39814200398142, | |
| "grad_norm": 1.5530246573576652, | |
| "learning_rate": 9.784892391230847e-06, | |
| "loss": 1.1405, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.40079628400796286, | |
| "grad_norm": 1.6380120481890832, | |
| "learning_rate": 9.781831074633703e-06, | |
| "loss": 1.2153, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.40345056403450563, | |
| "grad_norm": 1.9786402997029178, | |
| "learning_rate": 9.778748613770234e-06, | |
| "loss": 1.2213, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.40610484406104846, | |
| "grad_norm": 1.5864201917409944, | |
| "learning_rate": 9.775645022270448e-06, | |
| "loss": 1.1674, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.40875912408759124, | |
| "grad_norm": 1.6501859286504295, | |
| "learning_rate": 9.772520313857777e-06, | |
| "loss": 1.1805, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.411413404114134, | |
| "grad_norm": 1.5132303328319994, | |
| "learning_rate": 9.769374502349038e-06, | |
| "loss": 1.15, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.41406768414067685, | |
| "grad_norm": 1.6907615300646948, | |
| "learning_rate": 9.766207601654356e-06, | |
| "loss": 1.1848, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4167219641672196, | |
| "grad_norm": 1.7001401085077357, | |
| "learning_rate": 9.763019625777111e-06, | |
| "loss": 1.2335, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.41937624419376246, | |
| "grad_norm": 1.7600942954439958, | |
| "learning_rate": 9.759810588813872e-06, | |
| "loss": 1.1743, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.42203052422030524, | |
| "grad_norm": 1.6515697771161784, | |
| "learning_rate": 9.756580504954334e-06, | |
| "loss": 1.2276, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.42468480424684807, | |
| "grad_norm": 1.6967375558942543, | |
| "learning_rate": 9.753329388481261e-06, | |
| "loss": 1.2082, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.42733908427339085, | |
| "grad_norm": 1.7152427713922846, | |
| "learning_rate": 9.750057253770413e-06, | |
| "loss": 1.1458, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.4299933642999336, | |
| "grad_norm": 1.625060781845651, | |
| "learning_rate": 9.746764115290496e-06, | |
| "loss": 1.2033, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.43264764432647645, | |
| "grad_norm": 1.7069397928194143, | |
| "learning_rate": 9.743449987603082e-06, | |
| "loss": 1.2342, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.43530192435301923, | |
| "grad_norm": 1.708629436384557, | |
| "learning_rate": 9.740114885362562e-06, | |
| "loss": 1.2442, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 1.5857203714681123, | |
| "learning_rate": 9.736758823316062e-06, | |
| "loss": 1.2097, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.44061048440610484, | |
| "grad_norm": 1.657268677184339, | |
| "learning_rate": 9.733381816303395e-06, | |
| "loss": 1.1215, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4432647644326476, | |
| "grad_norm": 1.4640436666626744, | |
| "learning_rate": 9.729983879256988e-06, | |
| "loss": 1.1646, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.44591904445919045, | |
| "grad_norm": 1.6268091054804499, | |
| "learning_rate": 9.726565027201813e-06, | |
| "loss": 1.2264, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4485733244857332, | |
| "grad_norm": 1.5858930123997803, | |
| "learning_rate": 9.723125275255325e-06, | |
| "loss": 1.1661, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.45122760451227606, | |
| "grad_norm": 1.5759031230494174, | |
| "learning_rate": 9.719664638627395e-06, | |
| "loss": 1.1558, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.45388188453881884, | |
| "grad_norm": 1.7486351365651316, | |
| "learning_rate": 9.716183132620242e-06, | |
| "loss": 1.19, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.45653616456536167, | |
| "grad_norm": 1.7251231178841304, | |
| "learning_rate": 9.712680772628365e-06, | |
| "loss": 1.2261, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.45919044459190445, | |
| "grad_norm": 1.6118734678264717, | |
| "learning_rate": 9.70915757413847e-06, | |
| "loss": 1.2014, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.4618447246184472, | |
| "grad_norm": 1.5762577213086215, | |
| "learning_rate": 9.705613552729416e-06, | |
| "loss": 1.1487, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.46449900464499005, | |
| "grad_norm": 1.5672859542358526, | |
| "learning_rate": 9.702048724072128e-06, | |
| "loss": 1.1892, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.46715328467153283, | |
| "grad_norm": 1.616073022266597, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 1.1718, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.46980756469807566, | |
| "grad_norm": 1.605222482810264, | |
| "learning_rate": 9.694856708156526e-06, | |
| "loss": 1.1022, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.47246184472461844, | |
| "grad_norm": 1.483617625625729, | |
| "learning_rate": 9.691229552699817e-06, | |
| "loss": 1.1204, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4751161247511613, | |
| "grad_norm": 1.6018473502205803, | |
| "learning_rate": 9.68758165359794e-06, | |
| "loss": 1.1816, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.47777040477770405, | |
| "grad_norm": 1.5779836150848479, | |
| "learning_rate": 9.683913026981155e-06, | |
| "loss": 1.1871, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4804246848042468, | |
| "grad_norm": 1.6463102663610685, | |
| "learning_rate": 9.680223689071364e-06, | |
| "loss": 1.1139, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.48307896483078966, | |
| "grad_norm": 1.7091000919337074, | |
| "learning_rate": 9.676513656182059e-06, | |
| "loss": 1.1695, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.48573324485733244, | |
| "grad_norm": 1.633509337933534, | |
| "learning_rate": 9.672782944718234e-06, | |
| "loss": 1.1811, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.48838752488387527, | |
| "grad_norm": 1.5767561431519088, | |
| "learning_rate": 9.669031571176322e-06, | |
| "loss": 1.2062, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.49104180491041804, | |
| "grad_norm": 1.6306907003550404, | |
| "learning_rate": 9.665259552144122e-06, | |
| "loss": 1.1829, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4936960849369608, | |
| "grad_norm": 1.517988528061533, | |
| "learning_rate": 9.66146690430072e-06, | |
| "loss": 1.2014, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.49635036496350365, | |
| "grad_norm": 1.598871387440831, | |
| "learning_rate": 9.657653644416417e-06, | |
| "loss": 1.1496, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.49900464499004643, | |
| "grad_norm": 2.400377785726973, | |
| "learning_rate": 9.65381978935266e-06, | |
| "loss": 1.1905, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5016589250165893, | |
| "grad_norm": 1.5306038174802905, | |
| "learning_rate": 9.649965356061961e-06, | |
| "loss": 1.1225, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.504313205043132, | |
| "grad_norm": 1.7432637039460837, | |
| "learning_rate": 9.646090361587828e-06, | |
| "loss": 1.2338, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5069674850696748, | |
| "grad_norm": 1.549174772320108, | |
| "learning_rate": 9.642194823064679e-06, | |
| "loss": 1.1395, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5096217650962177, | |
| "grad_norm": 1.4556718082039433, | |
| "learning_rate": 9.63827875771778e-06, | |
| "loss": 1.1054, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5122760451227605, | |
| "grad_norm": 1.546232476076245, | |
| "learning_rate": 9.634342182863163e-06, | |
| "loss": 1.1821, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5149303251493033, | |
| "grad_norm": 1.6428065540768686, | |
| "learning_rate": 9.630385115907545e-06, | |
| "loss": 1.2078, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.517584605175846, | |
| "grad_norm": 1.5932949193165389, | |
| "learning_rate": 9.626407574348258e-06, | |
| "loss": 1.1646, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5202388852023888, | |
| "grad_norm": 1.5803201555935116, | |
| "learning_rate": 9.622409575773162e-06, | |
| "loss": 1.166, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5228931652289317, | |
| "grad_norm": 1.5292820314306055, | |
| "learning_rate": 9.618391137860583e-06, | |
| "loss": 1.2152, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 1.3727930028186761, | |
| "learning_rate": 9.614352278379217e-06, | |
| "loss": 1.1402, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5282017252820173, | |
| "grad_norm": 1.6819090165661312, | |
| "learning_rate": 9.610293015188067e-06, | |
| "loss": 1.1665, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.53085600530856, | |
| "grad_norm": 1.5006037012098021, | |
| "learning_rate": 9.606213366236354e-06, | |
| "loss": 1.1877, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5335102853351028, | |
| "grad_norm": 1.6016624668408799, | |
| "learning_rate": 9.60211334956344e-06, | |
| "loss": 1.1498, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.5361645653616457, | |
| "grad_norm": 1.7368140744461305, | |
| "learning_rate": 9.597992983298748e-06, | |
| "loss": 1.1922, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5388188453881885, | |
| "grad_norm": 1.6176251602621352, | |
| "learning_rate": 9.593852285661684e-06, | |
| "loss": 1.1459, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5414731254147312, | |
| "grad_norm": 1.3750495471617235, | |
| "learning_rate": 9.589691274961556e-06, | |
| "loss": 1.0835, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.544127405441274, | |
| "grad_norm": 1.6906485599869903, | |
| "learning_rate": 9.585509969597491e-06, | |
| "loss": 1.22, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5467816854678168, | |
| "grad_norm": 1.5439326128894457, | |
| "learning_rate": 9.581308388058354e-06, | |
| "loss": 1.1364, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5494359654943597, | |
| "grad_norm": 1.5251041120197495, | |
| "learning_rate": 9.577086548922671e-06, | |
| "loss": 1.2201, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5520902455209025, | |
| "grad_norm": 1.511712369802414, | |
| "learning_rate": 9.572844470858537e-06, | |
| "loss": 1.1091, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5547445255474452, | |
| "grad_norm": 1.8573483808679467, | |
| "learning_rate": 9.568582172623544e-06, | |
| "loss": 1.2284, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.557398805573988, | |
| "grad_norm": 1.4309251806187955, | |
| "learning_rate": 9.56429967306469e-06, | |
| "loss": 1.1646, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5600530856005309, | |
| "grad_norm": 1.6268260856080405, | |
| "learning_rate": 9.559996991118304e-06, | |
| "loss": 1.1812, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5627073656270737, | |
| "grad_norm": 1.6752285964398912, | |
| "learning_rate": 9.55567414580995e-06, | |
| "loss": 1.19, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5653616456536165, | |
| "grad_norm": 1.6202125494829664, | |
| "learning_rate": 9.551331156254358e-06, | |
| "loss": 1.2001, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5680159256801592, | |
| "grad_norm": 1.4441208249265054, | |
| "learning_rate": 9.546968041655326e-06, | |
| "loss": 1.2011, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.570670205706702, | |
| "grad_norm": 1.4681168393734876, | |
| "learning_rate": 9.542584821305643e-06, | |
| "loss": 1.118, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5733244857332449, | |
| "grad_norm": 1.67215223118757, | |
| "learning_rate": 9.538181514587004e-06, | |
| "loss": 1.1441, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5759787657597877, | |
| "grad_norm": 1.840004210878956, | |
| "learning_rate": 9.533758140969913e-06, | |
| "loss": 1.1689, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5786330457863305, | |
| "grad_norm": 2.0817799067244387, | |
| "learning_rate": 9.529314720013618e-06, | |
| "loss": 1.1879, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5812873258128732, | |
| "grad_norm": 1.6384013753881452, | |
| "learning_rate": 9.524851271366002e-06, | |
| "loss": 1.1157, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.583941605839416, | |
| "grad_norm": 1.6847540459176993, | |
| "learning_rate": 9.520367814763514e-06, | |
| "loss": 1.1583, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5865958858659589, | |
| "grad_norm": 1.553367758905212, | |
| "learning_rate": 9.515864370031066e-06, | |
| "loss": 1.0916, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5892501658925017, | |
| "grad_norm": 1.6595661898312408, | |
| "learning_rate": 9.511340957081957e-06, | |
| "loss": 1.1912, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5919044459190445, | |
| "grad_norm": 1.6816767854984012, | |
| "learning_rate": 9.506797595917787e-06, | |
| "loss": 1.1948, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5945587259455872, | |
| "grad_norm": 1.4766094174812612, | |
| "learning_rate": 9.502234306628354e-06, | |
| "loss": 1.1607, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.59721300597213, | |
| "grad_norm": 1.5815513019760774, | |
| "learning_rate": 9.49765110939158e-06, | |
| "loss": 1.1248, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5998672859986729, | |
| "grad_norm": 1.6485658910927394, | |
| "learning_rate": 9.493048024473413e-06, | |
| "loss": 1.2191, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6025215660252157, | |
| "grad_norm": 1.424065848656427, | |
| "learning_rate": 9.488425072227738e-06, | |
| "loss": 1.2521, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6051758460517584, | |
| "grad_norm": 1.4486333802405926, | |
| "learning_rate": 9.483782273096295e-06, | |
| "loss": 1.1734, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6078301260783012, | |
| "grad_norm": 1.6817918601770532, | |
| "learning_rate": 9.47911964760858e-06, | |
| "loss": 1.1695, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6104844061048441, | |
| "grad_norm": 1.6160290558732326, | |
| "learning_rate": 9.474437216381756e-06, | |
| "loss": 1.154, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6131386861313869, | |
| "grad_norm": 1.4261795572898603, | |
| "learning_rate": 9.469735000120564e-06, | |
| "loss": 1.1544, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6157929661579297, | |
| "grad_norm": 1.458151411666846, | |
| "learning_rate": 9.46501301961723e-06, | |
| "loss": 1.2065, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6184472461844724, | |
| "grad_norm": 1.5627499060274408, | |
| "learning_rate": 9.460271295751373e-06, | |
| "loss": 1.1579, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6211015262110152, | |
| "grad_norm": 1.86944032236805, | |
| "learning_rate": 9.455509849489915e-06, | |
| "loss": 1.1519, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6237558062375581, | |
| "grad_norm": 1.979766174904363, | |
| "learning_rate": 9.450728701886985e-06, | |
| "loss": 1.2358, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6264100862641009, | |
| "grad_norm": 1.5229843416844162, | |
| "learning_rate": 9.445927874083825e-06, | |
| "loss": 1.1207, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6290643662906437, | |
| "grad_norm": 1.5916340950774943, | |
| "learning_rate": 9.441107387308701e-06, | |
| "loss": 1.2486, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6317186463171864, | |
| "grad_norm": 1.4982052500691954, | |
| "learning_rate": 9.436267262876808e-06, | |
| "loss": 1.1445, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6343729263437292, | |
| "grad_norm": 1.868028818978397, | |
| "learning_rate": 9.431407522190176e-06, | |
| "loss": 1.2215, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6370272063702721, | |
| "grad_norm": 1.5000893386206633, | |
| "learning_rate": 9.426528186737566e-06, | |
| "loss": 1.1748, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6396814863968149, | |
| "grad_norm": 1.6105517075622542, | |
| "learning_rate": 9.421629278094394e-06, | |
| "loss": 1.1444, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6423357664233577, | |
| "grad_norm": 1.6245044582496362, | |
| "learning_rate": 9.416710817922615e-06, | |
| "loss": 1.2016, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6449900464499004, | |
| "grad_norm": 1.582791773770731, | |
| "learning_rate": 9.411772827970642e-06, | |
| "loss": 1.1595, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6476443264764432, | |
| "grad_norm": 1.5289298221123744, | |
| "learning_rate": 9.406815330073244e-06, | |
| "loss": 1.196, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6502986065029861, | |
| "grad_norm": 1.494805179412693, | |
| "learning_rate": 9.40183834615145e-06, | |
| "loss": 1.119, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6529528865295289, | |
| "grad_norm": 1.6857955705395817, | |
| "learning_rate": 9.396841898212452e-06, | |
| "loss": 1.1222, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6556071665560717, | |
| "grad_norm": 1.465569644664737, | |
| "learning_rate": 9.391826008349507e-06, | |
| "loss": 1.1196, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6582614465826144, | |
| "grad_norm": 1.6038700287536702, | |
| "learning_rate": 9.38679069874184e-06, | |
| "loss": 1.1596, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6609157266091573, | |
| "grad_norm": 1.754259412074635, | |
| "learning_rate": 9.381735991654547e-06, | |
| "loss": 1.185, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6635700066357001, | |
| "grad_norm": 1.5054976516017162, | |
| "learning_rate": 9.376661909438496e-06, | |
| "loss": 1.14, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6662242866622429, | |
| "grad_norm": 1.6591136646331954, | |
| "learning_rate": 9.371568474530228e-06, | |
| "loss": 1.1453, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6688785666887856, | |
| "grad_norm": 1.602614315373211, | |
| "learning_rate": 9.366455709451857e-06, | |
| "loss": 1.115, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6715328467153284, | |
| "grad_norm": 1.3802344389470933, | |
| "learning_rate": 9.36132363681097e-06, | |
| "loss": 1.0926, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6741871267418713, | |
| "grad_norm": 1.5028041314507699, | |
| "learning_rate": 9.356172279300528e-06, | |
| "loss": 1.1388, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6768414067684141, | |
| "grad_norm": 1.4603385973006835, | |
| "learning_rate": 9.35100165969877e-06, | |
| "loss": 1.1261, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6794956867949569, | |
| "grad_norm": 2.222737357031752, | |
| "learning_rate": 9.3458118008691e-06, | |
| "loss": 1.1181, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6821499668214996, | |
| "grad_norm": 1.5628150576174966, | |
| "learning_rate": 9.340602725760003e-06, | |
| "loss": 1.1269, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6848042468480424, | |
| "grad_norm": 1.7660936315398623, | |
| "learning_rate": 9.335374457404928e-06, | |
| "loss": 1.1567, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6874585268745853, | |
| "grad_norm": 1.5095573241471834, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 1.1407, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6901128069011281, | |
| "grad_norm": 1.4506359372228914, | |
| "learning_rate": 9.324860433514888e-06, | |
| "loss": 1.1668, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6927670869276709, | |
| "grad_norm": 1.536882345986633, | |
| "learning_rate": 9.319574724470756e-06, | |
| "loss": 1.1581, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6954213669542136, | |
| "grad_norm": 1.4356269422691534, | |
| "learning_rate": 9.314269915162115e-06, | |
| "loss": 1.1075, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6980756469807564, | |
| "grad_norm": 1.373904876593965, | |
| "learning_rate": 9.308946029045726e-06, | |
| "loss": 1.1121, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 1.5328812905843867, | |
| "learning_rate": 9.303603089662717e-06, | |
| "loss": 1.0921, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7033842070338421, | |
| "grad_norm": 1.5072781837506157, | |
| "learning_rate": 9.298241120638451e-06, | |
| "loss": 1.1198, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7060384870603849, | |
| "grad_norm": 1.5995295442728128, | |
| "learning_rate": 9.292860145682451e-06, | |
| "loss": 1.1472, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.7086927670869276, | |
| "grad_norm": 1.586589487215959, | |
| "learning_rate": 9.287460188588272e-06, | |
| "loss": 1.2081, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7113470471134705, | |
| "grad_norm": 1.6738675413951511, | |
| "learning_rate": 9.282041273233402e-06, | |
| "loss": 1.1676, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7140013271400133, | |
| "grad_norm": 1.5986869946296454, | |
| "learning_rate": 9.276603423579164e-06, | |
| "loss": 1.213, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7166556071665561, | |
| "grad_norm": 1.5027119454217344, | |
| "learning_rate": 9.271146663670605e-06, | |
| "loss": 1.1622, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7193098871930989, | |
| "grad_norm": 1.4752249291840163, | |
| "learning_rate": 9.265671017636384e-06, | |
| "loss": 1.0725, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7219641672196416, | |
| "grad_norm": 1.6425492982199013, | |
| "learning_rate": 9.260176509688673e-06, | |
| "loss": 1.2088, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7246184472461845, | |
| "grad_norm": 1.671119694405482, | |
| "learning_rate": 9.254663164123052e-06, | |
| "loss": 1.1584, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 3.2839083971639016, | |
| "learning_rate": 9.249131005318388e-06, | |
| "loss": 1.0801, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7299270072992701, | |
| "grad_norm": 1.590670276122513, | |
| "learning_rate": 9.243580057736743e-06, | |
| "loss": 1.1157, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7325812873258128, | |
| "grad_norm": 1.4517652800533363, | |
| "learning_rate": 9.238010345923257e-06, | |
| "loss": 1.1446, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7352355673523556, | |
| "grad_norm": 1.9696673043614277, | |
| "learning_rate": 9.232421894506043e-06, | |
| "loss": 1.1857, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7378898473788985, | |
| "grad_norm": 1.4778960277561557, | |
| "learning_rate": 9.226814728196072e-06, | |
| "loss": 1.1397, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7405441274054413, | |
| "grad_norm": 1.6498804570471097, | |
| "learning_rate": 9.221188871787076e-06, | |
| "loss": 1.1625, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7431984074319841, | |
| "grad_norm": 1.5796993896804141, | |
| "learning_rate": 9.215544350155423e-06, | |
| "loss": 1.1459, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7458526874585268, | |
| "grad_norm": 1.5226644568838132, | |
| "learning_rate": 9.209881188260021e-06, | |
| "loss": 1.1894, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7485069674850697, | |
| "grad_norm": 1.6645552718061039, | |
| "learning_rate": 9.204199411142196e-06, | |
| "loss": 1.0811, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7511612475116125, | |
| "grad_norm": 1.6581847965929961, | |
| "learning_rate": 9.198499043925591e-06, | |
| "loss": 1.1706, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7538155275381553, | |
| "grad_norm": 1.5270964606037345, | |
| "learning_rate": 9.192780111816048e-06, | |
| "loss": 1.1009, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7564698075646981, | |
| "grad_norm": 1.6698962782227256, | |
| "learning_rate": 9.1870426401015e-06, | |
| "loss": 1.1708, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7591240875912408, | |
| "grad_norm": 1.7012646465038568, | |
| "learning_rate": 9.18128665415186e-06, | |
| "loss": 1.1728, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7617783676177837, | |
| "grad_norm": 1.4354980241800914, | |
| "learning_rate": 9.175512179418903e-06, | |
| "loss": 1.1138, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7644326476443265, | |
| "grad_norm": 1.5648924104277102, | |
| "learning_rate": 9.169719241436162e-06, | |
| "loss": 1.0936, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7670869276708693, | |
| "grad_norm": 1.535950564272176, | |
| "learning_rate": 9.163907865818806e-06, | |
| "loss": 1.0884, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.769741207697412, | |
| "grad_norm": 1.4657493870841045, | |
| "learning_rate": 9.158078078263536e-06, | |
| "loss": 1.0962, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7723954877239548, | |
| "grad_norm": 1.5960566218254721, | |
| "learning_rate": 9.152229904548464e-06, | |
| "loss": 1.1003, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7750497677504977, | |
| "grad_norm": 1.5026317273526155, | |
| "learning_rate": 9.146363370533004e-06, | |
| "loss": 1.1334, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7777040477770405, | |
| "grad_norm": 1.4667451034506551, | |
| "learning_rate": 9.14047850215775e-06, | |
| "loss": 1.188, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7803583278035833, | |
| "grad_norm": 2.5527846830656773, | |
| "learning_rate": 9.134575325444377e-06, | |
| "loss": 1.1489, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.783012607830126, | |
| "grad_norm": 1.5656317760690617, | |
| "learning_rate": 9.128653866495504e-06, | |
| "loss": 1.1049, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.7856668878566688, | |
| "grad_norm": 1.4532042000319447, | |
| "learning_rate": 9.122714151494599e-06, | |
| "loss": 1.1156, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7883211678832117, | |
| "grad_norm": 1.4759483242959985, | |
| "learning_rate": 9.116756206705848e-06, | |
| "loss": 1.1396, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7909754479097545, | |
| "grad_norm": 1.4531099151254951, | |
| "learning_rate": 9.110780058474052e-06, | |
| "loss": 1.1011, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7936297279362973, | |
| "grad_norm": 1.509245001105786, | |
| "learning_rate": 9.104785733224498e-06, | |
| "loss": 1.1052, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.79628400796284, | |
| "grad_norm": 1.4742686115404562, | |
| "learning_rate": 9.09877325746285e-06, | |
| "loss": 1.1627, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7989382879893829, | |
| "grad_norm": 1.4451227706627736, | |
| "learning_rate": 9.092742657775031e-06, | |
| "loss": 1.1118, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.8015925680159257, | |
| "grad_norm": 1.575230566769605, | |
| "learning_rate": 9.086693960827106e-06, | |
| "loss": 1.1625, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8042468480424685, | |
| "grad_norm": 1.6679637319120473, | |
| "learning_rate": 9.080627193365155e-06, | |
| "loss": 1.1452, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.8069011280690113, | |
| "grad_norm": 1.4072750238146392, | |
| "learning_rate": 9.07454238221517e-06, | |
| "loss": 1.1121, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.809555408095554, | |
| "grad_norm": 1.399645387242144, | |
| "learning_rate": 9.068439554282924e-06, | |
| "loss": 1.1101, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8122096881220969, | |
| "grad_norm": 1.9740369624876526, | |
| "learning_rate": 9.06231873655386e-06, | |
| "loss": 1.0986, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8148639681486397, | |
| "grad_norm": 1.4581046261229995, | |
| "learning_rate": 9.056179956092961e-06, | |
| "loss": 1.1228, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8175182481751825, | |
| "grad_norm": 2.628430909687979, | |
| "learning_rate": 9.050023240044649e-06, | |
| "loss": 1.0783, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8201725282017253, | |
| "grad_norm": 1.6691124773863195, | |
| "learning_rate": 9.043848615632643e-06, | |
| "loss": 1.167, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.822826808228268, | |
| "grad_norm": 1.7459906965590473, | |
| "learning_rate": 9.03765611015985e-06, | |
| "loss": 1.2287, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8254810882548109, | |
| "grad_norm": 1.5373249007323673, | |
| "learning_rate": 9.031445751008252e-06, | |
| "loss": 1.1446, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.8281353682813537, | |
| "grad_norm": 1.526522854497616, | |
| "learning_rate": 9.025217565638766e-06, | |
| "loss": 1.1609, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.8307896483078965, | |
| "grad_norm": 1.3715974716678416, | |
| "learning_rate": 9.018971581591141e-06, | |
| "loss": 1.1761, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8334439283344393, | |
| "grad_norm": 1.733161587991312, | |
| "learning_rate": 9.012707826483823e-06, | |
| "loss": 1.1241, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.836098208360982, | |
| "grad_norm": 1.5851407690090333, | |
| "learning_rate": 9.006426328013838e-06, | |
| "loss": 1.1898, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8387524883875249, | |
| "grad_norm": 1.492565448115301, | |
| "learning_rate": 9.000127113956673e-06, | |
| "loss": 1.1281, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8414067684140677, | |
| "grad_norm": 1.4675427619453145, | |
| "learning_rate": 8.993810212166147e-06, | |
| "loss": 1.1078, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8440610484406105, | |
| "grad_norm": 1.7806802137808329, | |
| "learning_rate": 8.987475650574289e-06, | |
| "loss": 1.1113, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8467153284671532, | |
| "grad_norm": 1.7957085592461643, | |
| "learning_rate": 8.98112345719122e-06, | |
| "loss": 1.0371, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8493696084936961, | |
| "grad_norm": 1.6891739001445774, | |
| "learning_rate": 8.974753660105023e-06, | |
| "loss": 1.1939, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8520238885202389, | |
| "grad_norm": 1.361414937851007, | |
| "learning_rate": 8.968366287481621e-06, | |
| "loss": 1.0606, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8546781685467817, | |
| "grad_norm": 1.5477011631255944, | |
| "learning_rate": 8.961961367564652e-06, | |
| "loss": 1.1343, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8573324485733245, | |
| "grad_norm": 1.398038196798421, | |
| "learning_rate": 8.955538928675343e-06, | |
| "loss": 1.0537, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8599867285998672, | |
| "grad_norm": 1.4829616588106211, | |
| "learning_rate": 8.94909899921239e-06, | |
| "loss": 1.1244, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8626410086264101, | |
| "grad_norm": 1.458234865181319, | |
| "learning_rate": 8.94264160765183e-06, | |
| "loss": 1.0945, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8652952886529529, | |
| "grad_norm": 1.48674147774638, | |
| "learning_rate": 8.936166782546907e-06, | |
| "loss": 1.0698, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8679495686794957, | |
| "grad_norm": 1.3468414140104497, | |
| "learning_rate": 8.929674552527956e-06, | |
| "loss": 1.0428, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8706038487060385, | |
| "grad_norm": 1.4927690225590464, | |
| "learning_rate": 8.923164946302274e-06, | |
| "loss": 1.1367, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8732581287325812, | |
| "grad_norm": 1.3780023302624582, | |
| "learning_rate": 8.91663799265399e-06, | |
| "loss": 1.1048, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 1.8364949062401694, | |
| "learning_rate": 8.910093720443945e-06, | |
| "loss": 1.1962, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8785666887856669, | |
| "grad_norm": 1.6803377587803117, | |
| "learning_rate": 8.903532158609548e-06, | |
| "loss": 1.1919, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8812209688122097, | |
| "grad_norm": 1.5621810302199315, | |
| "learning_rate": 8.89695333616467e-06, | |
| "loss": 1.1177, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8838752488387525, | |
| "grad_norm": 1.4462262022449852, | |
| "learning_rate": 8.890357282199504e-06, | |
| "loss": 1.1321, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8865295288652952, | |
| "grad_norm": 1.4286236136174415, | |
| "learning_rate": 8.883744025880429e-06, | |
| "loss": 1.1717, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8891838088918381, | |
| "grad_norm": 1.4484748876813012, | |
| "learning_rate": 8.877113596449895e-06, | |
| "loss": 1.1004, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.8918380889183809, | |
| "grad_norm": 1.4164984401949983, | |
| "learning_rate": 8.87046602322629e-06, | |
| "loss": 1.079, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.8944923689449237, | |
| "grad_norm": 1.3708607011124272, | |
| "learning_rate": 8.863801335603802e-06, | |
| "loss": 1.133, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.8971466489714665, | |
| "grad_norm": 1.3626382714893748, | |
| "learning_rate": 8.857119563052301e-06, | |
| "loss": 1.0734, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.8998009289980093, | |
| "grad_norm": 1.5082034601534042, | |
| "learning_rate": 8.850420735117202e-06, | |
| "loss": 1.1691, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9024552090245521, | |
| "grad_norm": 1.3234730893075355, | |
| "learning_rate": 8.843704881419333e-06, | |
| "loss": 1.046, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9051094890510949, | |
| "grad_norm": 1.4896833219647911, | |
| "learning_rate": 8.836972031654807e-06, | |
| "loss": 1.1586, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9077637690776377, | |
| "grad_norm": 1.3697029850159739, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 1.0817, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9104180491041804, | |
| "grad_norm": 1.747564979115208, | |
| "learning_rate": 8.823455463085873e-06, | |
| "loss": 1.0905, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.9130723291307233, | |
| "grad_norm": 1.5649272934153584, | |
| "learning_rate": 8.816671804048933e-06, | |
| "loss": 1.0434, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9157266091572661, | |
| "grad_norm": 1.4823250348157, | |
| "learning_rate": 8.809871268480004e-06, | |
| "loss": 1.0895, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9183808891838089, | |
| "grad_norm": 1.4264959835661182, | |
| "learning_rate": 8.803053886449644e-06, | |
| "loss": 1.1502, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9210351692103517, | |
| "grad_norm": 1.5424239648407791, | |
| "learning_rate": 8.796219688102906e-06, | |
| "loss": 1.0734, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.9236894492368944, | |
| "grad_norm": 1.594778792432936, | |
| "learning_rate": 8.789368703659199e-06, | |
| "loss": 1.06, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9263437292634373, | |
| "grad_norm": 1.425756455063989, | |
| "learning_rate": 8.782500963412156e-06, | |
| "loss": 1.1091, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.9289980092899801, | |
| "grad_norm": 1.4480941030784251, | |
| "learning_rate": 8.775616497729502e-06, | |
| "loss": 1.1146, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9316522893165229, | |
| "grad_norm": 1.9595578470904635, | |
| "learning_rate": 8.768715337052918e-06, | |
| "loss": 1.1353, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.9343065693430657, | |
| "grad_norm": 1.6462295827570508, | |
| "learning_rate": 8.761797511897907e-06, | |
| "loss": 1.1376, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9369608493696084, | |
| "grad_norm": 1.393588576405631, | |
| "learning_rate": 8.754863052853658e-06, | |
| "loss": 1.1317, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.9396151293961513, | |
| "grad_norm": 2.230474529090937, | |
| "learning_rate": 8.747911990582912e-06, | |
| "loss": 1.1086, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9422694094226941, | |
| "grad_norm": 1.809443765521074, | |
| "learning_rate": 8.740944355821827e-06, | |
| "loss": 1.1018, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.9449236894492369, | |
| "grad_norm": 1.6826959358419462, | |
| "learning_rate": 8.733960179379842e-06, | |
| "loss": 1.1766, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9475779694757797, | |
| "grad_norm": 1.429793323082417, | |
| "learning_rate": 8.726959492139535e-06, | |
| "loss": 1.062, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9502322495023225, | |
| "grad_norm": 1.3304241051942485, | |
| "learning_rate": 8.719942325056496e-06, | |
| "loss": 1.0864, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9528865295288653, | |
| "grad_norm": 1.838527760485716, | |
| "learning_rate": 8.712908709159183e-06, | |
| "loss": 1.08, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9555408095554081, | |
| "grad_norm": 1.8095644142003555, | |
| "learning_rate": 8.70585867554879e-06, | |
| "loss": 1.0622, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9581950895819509, | |
| "grad_norm": 1.3961944428914481, | |
| "learning_rate": 8.698792255399104e-06, | |
| "loss": 1.1279, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9608493696084937, | |
| "grad_norm": 1.4265196608054989, | |
| "learning_rate": 8.691709479956373e-06, | |
| "loss": 1.0786, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9635036496350365, | |
| "grad_norm": 1.5175948559199692, | |
| "learning_rate": 8.68461038053916e-06, | |
| "loss": 1.1046, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9661579296615793, | |
| "grad_norm": 1.5709878342434411, | |
| "learning_rate": 8.67749498853821e-06, | |
| "loss": 1.0947, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9688122096881221, | |
| "grad_norm": 1.5372734019009258, | |
| "learning_rate": 8.670363335416319e-06, | |
| "loss": 1.0346, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.9714664897146649, | |
| "grad_norm": 1.3550031766754063, | |
| "learning_rate": 8.663215452708173e-06, | |
| "loss": 1.0868, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9741207697412076, | |
| "grad_norm": 1.5040356499297907, | |
| "learning_rate": 8.656051372020232e-06, | |
| "loss": 1.1083, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.9767750497677505, | |
| "grad_norm": 1.5264462091802162, | |
| "learning_rate": 8.648871125030576e-06, | |
| "loss": 1.1647, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9794293297942933, | |
| "grad_norm": 1.8183949324824284, | |
| "learning_rate": 8.64167474348877e-06, | |
| "loss": 1.0809, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9820836098208361, | |
| "grad_norm": 2.6148655405710874, | |
| "learning_rate": 8.634462259215719e-06, | |
| "loss": 1.1195, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9847378898473789, | |
| "grad_norm": 1.5140959417993884, | |
| "learning_rate": 8.627233704103538e-06, | |
| "loss": 1.0768, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.9873921698739216, | |
| "grad_norm": 1.3953146864224168, | |
| "learning_rate": 8.619989110115398e-06, | |
| "loss": 1.0998, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9900464499004645, | |
| "grad_norm": 1.5342377987936564, | |
| "learning_rate": 8.612728509285395e-06, | |
| "loss": 1.1627, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.9927007299270073, | |
| "grad_norm": 1.6359257997310512, | |
| "learning_rate": 8.6054519337184e-06, | |
| "loss": 1.0947, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9953550099535501, | |
| "grad_norm": 1.468781234700457, | |
| "learning_rate": 8.59815941558992e-06, | |
| "loss": 1.0958, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.9980092899800929, | |
| "grad_norm": 1.8585796860334978, | |
| "learning_rate": 8.590850987145964e-06, | |
| "loss": 1.1439, | |
| "step": 376 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1504, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 376, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 357968545579008.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |