| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.872057318321392, | |
| "eval_steps": 500, | |
| "global_step": 1600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.030706243602865915, | |
| "grad_norm": 4.6859283447265625, | |
| "learning_rate": 1.6460905349794242e-07, | |
| "loss": 0.6431, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06141248720573183, | |
| "grad_norm": 4.602360725402832, | |
| "learning_rate": 3.7037037037037036e-07, | |
| "loss": 0.6447, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09211873080859775, | |
| "grad_norm": 4.142496585845947, | |
| "learning_rate": 5.761316872427984e-07, | |
| "loss": 0.6395, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.12282497441146366, | |
| "grad_norm": 3.5161383152008057, | |
| "learning_rate": 7.818930041152265e-07, | |
| "loss": 0.6226, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1535312180143296, | |
| "grad_norm": 1.9937578439712524, | |
| "learning_rate": 9.876543209876544e-07, | |
| "loss": 0.5901, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1842374616171955, | |
| "grad_norm": 1.0944325923919678, | |
| "learning_rate": 1.1934156378600823e-06, | |
| "loss": 0.575, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.21494370522006143, | |
| "grad_norm": 0.8474246859550476, | |
| "learning_rate": 1.3991769547325104e-06, | |
| "loss": 0.5552, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.24564994882292732, | |
| "grad_norm": 0.869394838809967, | |
| "learning_rate": 1.6049382716049383e-06, | |
| "loss": 0.5416, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.27635619242579323, | |
| "grad_norm": 0.6358171105384827, | |
| "learning_rate": 1.8106995884773665e-06, | |
| "loss": 0.534, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.3070624360286592, | |
| "grad_norm": 0.5201377868652344, | |
| "learning_rate": 2.0164609053497946e-06, | |
| "loss": 0.5236, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.33776867963152507, | |
| "grad_norm": 0.44874125719070435, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.5176, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.368474923234391, | |
| "grad_norm": 0.33835747838020325, | |
| "learning_rate": 2.4279835390946504e-06, | |
| "loss": 0.5123, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3991811668372569, | |
| "grad_norm": 0.32708311080932617, | |
| "learning_rate": 2.6337448559670788e-06, | |
| "loss": 0.5092, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.42988741044012285, | |
| "grad_norm": 0.2938404083251953, | |
| "learning_rate": 2.8395061728395062e-06, | |
| "loss": 0.5046, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.46059365404298874, | |
| "grad_norm": 0.2861787974834442, | |
| "learning_rate": 3.0452674897119346e-06, | |
| "loss": 0.4985, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.49129989764585463, | |
| "grad_norm": 0.26230937242507935, | |
| "learning_rate": 3.2510288065843625e-06, | |
| "loss": 0.4994, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5220061412487206, | |
| "grad_norm": 0.2605642080307007, | |
| "learning_rate": 3.4567901234567904e-06, | |
| "loss": 0.499, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5527123848515865, | |
| "grad_norm": 0.2520701289176941, | |
| "learning_rate": 3.6625514403292183e-06, | |
| "loss": 0.4969, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5834186284544524, | |
| "grad_norm": 0.2760030925273895, | |
| "learning_rate": 3.868312757201647e-06, | |
| "loss": 0.4972, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6141248720573184, | |
| "grad_norm": 0.24590979516506195, | |
| "learning_rate": 4.074074074074074e-06, | |
| "loss": 0.492, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6448311156601843, | |
| "grad_norm": 0.24135281145572662, | |
| "learning_rate": 4.2798353909465025e-06, | |
| "loss": 0.4861, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6755373592630501, | |
| "grad_norm": 0.2440766990184784, | |
| "learning_rate": 4.485596707818931e-06, | |
| "loss": 0.4924, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.706243602865916, | |
| "grad_norm": 0.25154179334640503, | |
| "learning_rate": 4.691358024691358e-06, | |
| "loss": 0.4877, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.736949846468782, | |
| "grad_norm": 0.2469755858182907, | |
| "learning_rate": 4.897119341563787e-06, | |
| "loss": 0.4821, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7676560900716479, | |
| "grad_norm": 0.282875657081604, | |
| "learning_rate": 5.102880658436214e-06, | |
| "loss": 0.4849, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7983623336745138, | |
| "grad_norm": 0.24434781074523926, | |
| "learning_rate": 5.3086419753086425e-06, | |
| "loss": 0.4842, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8290685772773797, | |
| "grad_norm": 0.2680870294570923, | |
| "learning_rate": 5.514403292181071e-06, | |
| "loss": 0.4801, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8597748208802457, | |
| "grad_norm": 0.25792691111564636, | |
| "learning_rate": 5.720164609053498e-06, | |
| "loss": 0.4811, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8904810644831116, | |
| "grad_norm": 0.25268977880477905, | |
| "learning_rate": 5.925925925925926e-06, | |
| "loss": 0.4773, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9211873080859775, | |
| "grad_norm": 0.2891627848148346, | |
| "learning_rate": 6.131687242798354e-06, | |
| "loss": 0.48, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9518935516888434, | |
| "grad_norm": 0.27274736762046814, | |
| "learning_rate": 6.3374485596707825e-06, | |
| "loss": 0.4745, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.9825997952917093, | |
| "grad_norm": 0.4910643398761749, | |
| "learning_rate": 6.543209876543211e-06, | |
| "loss": 0.4814, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0184237461617196, | |
| "grad_norm": 0.35264864563941956, | |
| "learning_rate": 6.748971193415639e-06, | |
| "loss": 0.568, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.0491299897645854, | |
| "grad_norm": 0.2881726026535034, | |
| "learning_rate": 6.954732510288067e-06, | |
| "loss": 0.4704, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0798362333674514, | |
| "grad_norm": 0.27516046166419983, | |
| "learning_rate": 7.160493827160494e-06, | |
| "loss": 0.469, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.1105424769703174, | |
| "grad_norm": 0.28540414571762085, | |
| "learning_rate": 7.3662551440329225e-06, | |
| "loss": 0.4721, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1412487205731832, | |
| "grad_norm": 0.28977257013320923, | |
| "learning_rate": 7.57201646090535e-06, | |
| "loss": 0.4654, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.1719549641760492, | |
| "grad_norm": 0.2729812264442444, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.4667, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.202661207778915, | |
| "grad_norm": 0.3296419084072113, | |
| "learning_rate": 7.983539094650207e-06, | |
| "loss": 0.4626, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.233367451381781, | |
| "grad_norm": 0.2620701491832733, | |
| "learning_rate": 8.189300411522634e-06, | |
| "loss": 0.4668, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.264073694984647, | |
| "grad_norm": 0.29654592275619507, | |
| "learning_rate": 8.395061728395062e-06, | |
| "loss": 0.4618, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.2947799385875127, | |
| "grad_norm": 0.29271960258483887, | |
| "learning_rate": 8.60082304526749e-06, | |
| "loss": 0.4669, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.3254861821903787, | |
| "grad_norm": 0.3072218596935272, | |
| "learning_rate": 8.806584362139918e-06, | |
| "loss": 0.462, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.3561924257932447, | |
| "grad_norm": 0.28791937232017517, | |
| "learning_rate": 9.012345679012346e-06, | |
| "loss": 0.4644, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.3868986693961105, | |
| "grad_norm": 0.49683865904808044, | |
| "learning_rate": 9.218106995884775e-06, | |
| "loss": 0.4644, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.4176049129989765, | |
| "grad_norm": 0.3112960159778595, | |
| "learning_rate": 9.423868312757202e-06, | |
| "loss": 0.4658, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.4483111566018425, | |
| "grad_norm": 0.2987211048603058, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.457, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.4790174002047083, | |
| "grad_norm": 0.32146817445755005, | |
| "learning_rate": 9.835390946502057e-06, | |
| "loss": 0.4617, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.509723643807574, | |
| "grad_norm": 0.5294284820556641, | |
| "learning_rate": 9.999994841278135e-06, | |
| "loss": 0.4639, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.54042988741044, | |
| "grad_norm": 0.28634026646614075, | |
| "learning_rate": 9.99981428713058e-06, | |
| "loss": 0.4641, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.571136131013306, | |
| "grad_norm": 0.30058643221855164, | |
| "learning_rate": 9.999375807534642e-06, | |
| "loss": 0.4614, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.6018423746161718, | |
| "grad_norm": 0.2915442883968353, | |
| "learning_rate": 9.998679425110168e-06, | |
| "loss": 0.4627, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6325486182190379, | |
| "grad_norm": 0.3363260328769684, | |
| "learning_rate": 9.997725175781445e-06, | |
| "loss": 0.4609, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.6632548618219039, | |
| "grad_norm": 0.31282299757003784, | |
| "learning_rate": 9.996513108775338e-06, | |
| "loss": 0.4591, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.6939611054247696, | |
| "grad_norm": 0.3025549650192261, | |
| "learning_rate": 9.995043286618752e-06, | |
| "loss": 0.4593, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.7246673490276356, | |
| "grad_norm": 0.2985345721244812, | |
| "learning_rate": 9.993315785135417e-06, | |
| "loss": 0.4576, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.7553735926305016, | |
| "grad_norm": 0.3218015134334564, | |
| "learning_rate": 9.991330693441956e-06, | |
| "loss": 0.4553, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.7860798362333674, | |
| "grad_norm": 0.32680895924568176, | |
| "learning_rate": 9.989088113943309e-06, | |
| "loss": 0.4583, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.8167860798362334, | |
| "grad_norm": 0.29413729906082153, | |
| "learning_rate": 9.986588162327436e-06, | |
| "loss": 0.4598, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.8474923234390994, | |
| "grad_norm": 0.3157609701156616, | |
| "learning_rate": 9.983830967559355e-06, | |
| "loss": 0.4566, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.8781985670419652, | |
| "grad_norm": 0.29272714257240295, | |
| "learning_rate": 9.98081667187449e-06, | |
| "loss": 0.4521, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.908904810644831, | |
| "grad_norm": 0.31150153279304504, | |
| "learning_rate": 9.977545430771332e-06, | |
| "loss": 0.4582, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.9396110542476972, | |
| "grad_norm": 0.31409797072410583, | |
| "learning_rate": 9.974017413003407e-06, | |
| "loss": 0.4543, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.970317297850563, | |
| "grad_norm": 0.3109057545661926, | |
| "learning_rate": 9.970232800570594e-06, | |
| "loss": 0.4543, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.006141248720573, | |
| "grad_norm": 0.656970202922821, | |
| "learning_rate": 9.966191788709716e-06, | |
| "loss": 0.5494, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.036847492323439, | |
| "grad_norm": 0.3206768333911896, | |
| "learning_rate": 9.961894585884472e-06, | |
| "loss": 0.4347, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.067553735926305, | |
| "grad_norm": 0.35229599475860596, | |
| "learning_rate": 9.957341413774693e-06, | |
| "loss": 0.4389, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.0982599795291708, | |
| "grad_norm": 0.3107539117336273, | |
| "learning_rate": 9.952532507264892e-06, | |
| "loss": 0.4381, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.128966223132037, | |
| "grad_norm": 0.3630954325199127, | |
| "learning_rate": 9.947468114432156e-06, | |
| "loss": 0.4396, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.1596724667349028, | |
| "grad_norm": 0.3643966019153595, | |
| "learning_rate": 9.942148496533348e-06, | |
| "loss": 0.4403, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.1903787103377685, | |
| "grad_norm": 0.2925087809562683, | |
| "learning_rate": 9.936573927991631e-06, | |
| "loss": 0.4381, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.2210849539406348, | |
| "grad_norm": 0.30517083406448364, | |
| "learning_rate": 9.930744696382298e-06, | |
| "loss": 0.4372, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.2517911975435005, | |
| "grad_norm": 0.30064260959625244, | |
| "learning_rate": 9.924661102417959e-06, | |
| "loss": 0.4413, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.2824974411463663, | |
| "grad_norm": 0.3043610155582428, | |
| "learning_rate": 9.918323459933006e-06, | |
| "loss": 0.4373, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.313203684749232, | |
| "grad_norm": 0.35172680020332336, | |
| "learning_rate": 9.911732095867443e-06, | |
| "loss": 0.4369, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.3439099283520983, | |
| "grad_norm": 0.3194226026535034, | |
| "learning_rate": 9.904887350250002e-06, | |
| "loss": 0.4377, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.374616171954964, | |
| "grad_norm": 0.33400508761405945, | |
| "learning_rate": 9.897789576180617e-06, | |
| "loss": 0.4389, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.40532241555783, | |
| "grad_norm": 0.3146711587905884, | |
| "learning_rate": 9.8904391398122e-06, | |
| "loss": 0.4392, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.436028659160696, | |
| "grad_norm": 0.3004317581653595, | |
| "learning_rate": 9.882836420331753e-06, | |
| "loss": 0.4382, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.466734902763562, | |
| "grad_norm": 0.28334906697273254, | |
| "learning_rate": 9.87498180994081e-06, | |
| "loss": 0.4369, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.4974411463664277, | |
| "grad_norm": 0.31731870770454407, | |
| "learning_rate": 9.8668757138352e-06, | |
| "loss": 0.4371, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.528147389969294, | |
| "grad_norm": 0.31999263167381287, | |
| "learning_rate": 9.858518550184154e-06, | |
| "loss": 0.4361, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.5588536335721597, | |
| "grad_norm": 0.288147896528244, | |
| "learning_rate": 9.849910750108718e-06, | |
| "loss": 0.438, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.5895598771750254, | |
| "grad_norm": 0.28145623207092285, | |
| "learning_rate": 9.841052757659525e-06, | |
| "loss": 0.4375, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.6202661207778917, | |
| "grad_norm": 0.30709531903266907, | |
| "learning_rate": 9.831945029793884e-06, | |
| "loss": 0.4403, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.6509723643807575, | |
| "grad_norm": 0.285591185092926, | |
| "learning_rate": 9.822588036352201e-06, | |
| "loss": 0.4375, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.6816786079836232, | |
| "grad_norm": 0.2885895371437073, | |
| "learning_rate": 9.812982260033753e-06, | |
| "loss": 0.4402, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.7123848515864895, | |
| "grad_norm": 0.28683528304100037, | |
| "learning_rate": 9.803128196371778e-06, | |
| "loss": 0.4364, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.7430910951893552, | |
| "grad_norm": 0.3029756247997284, | |
| "learning_rate": 9.793026353707915e-06, | |
| "loss": 0.4377, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.773797338792221, | |
| "grad_norm": 0.3118147850036621, | |
| "learning_rate": 9.782677253165979e-06, | |
| "loss": 0.4344, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.8045035823950872, | |
| "grad_norm": 0.30943065881729126, | |
| "learning_rate": 9.77208142862508e-06, | |
| "loss": 0.4377, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.835209825997953, | |
| "grad_norm": 0.3110142946243286, | |
| "learning_rate": 9.761239426692077e-06, | |
| "loss": 0.4361, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.865916069600819, | |
| "grad_norm": 0.29598164558410645, | |
| "learning_rate": 9.750151806673389e-06, | |
| "loss": 0.4357, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.896622313203685, | |
| "grad_norm": 0.3019062280654907, | |
| "learning_rate": 9.738819140546135e-06, | |
| "loss": 0.433, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.927328556806551, | |
| "grad_norm": 0.28945985436439514, | |
| "learning_rate": 9.727242012928622e-06, | |
| "loss": 0.4397, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.9580348004094166, | |
| "grad_norm": 0.31174397468566895, | |
| "learning_rate": 9.715421021050205e-06, | |
| "loss": 0.4331, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.9887410440122824, | |
| "grad_norm": 0.306142121553421, | |
| "learning_rate": 9.703356774720454e-06, | |
| "loss": 0.4356, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.024564994882293, | |
| "grad_norm": 0.3875485062599182, | |
| "learning_rate": 9.69104989629772e-06, | |
| "loss": 0.5058, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.0552712384851586, | |
| "grad_norm": 0.3581351339817047, | |
| "learning_rate": 9.678501020657008e-06, | |
| "loss": 0.4168, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.0859774820880244, | |
| "grad_norm": 0.32925835251808167, | |
| "learning_rate": 9.665710795157236e-06, | |
| "loss": 0.4162, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.1166837256908906, | |
| "grad_norm": 0.33935922384262085, | |
| "learning_rate": 9.652679879607843e-06, | |
| "loss": 0.4188, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.1473899692937564, | |
| "grad_norm": 0.3029758930206299, | |
| "learning_rate": 9.639408946234745e-06, | |
| "loss": 0.4175, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.178096212896622, | |
| "grad_norm": 0.32193148136138916, | |
| "learning_rate": 9.625898679645656e-06, | |
| "loss": 0.4163, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.2088024564994884, | |
| "grad_norm": 0.34081465005874634, | |
| "learning_rate": 9.612149776794776e-06, | |
| "loss": 0.4153, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.239508700102354, | |
| "grad_norm": 0.31485581398010254, | |
| "learning_rate": 9.59816294694684e-06, | |
| "loss": 0.4133, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.27021494370522, | |
| "grad_norm": 0.32688620686531067, | |
| "learning_rate": 9.583938911640513e-06, | |
| "loss": 0.4211, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.300921187308086, | |
| "grad_norm": 0.3346046209335327, | |
| "learning_rate": 9.569478404651192e-06, | |
| "loss": 0.4172, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.331627430910952, | |
| "grad_norm": 0.29652711749076843, | |
| "learning_rate": 9.55478217195313e-06, | |
| "loss": 0.4146, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.3623336745138177, | |
| "grad_norm": 0.33152851462364197, | |
| "learning_rate": 9.53985097168097e-06, | |
| "loss": 0.4185, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.393039918116684, | |
| "grad_norm": 0.32041046023368835, | |
| "learning_rate": 9.524685574090627e-06, | |
| "loss": 0.4178, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.4237461617195497, | |
| "grad_norm": 0.3175600469112396, | |
| "learning_rate": 9.50928676151955e-06, | |
| "loss": 0.418, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.4544524053224155, | |
| "grad_norm": 0.4362170696258545, | |
| "learning_rate": 9.493655328346378e-06, | |
| "loss": 0.4181, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.4851586489252817, | |
| "grad_norm": 0.28091391921043396, | |
| "learning_rate": 9.477792080949938e-06, | |
| "loss": 0.4193, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.5158648925281475, | |
| "grad_norm": 0.30598944425582886, | |
| "learning_rate": 9.461697837667668e-06, | |
| "loss": 0.418, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.5465711361310133, | |
| "grad_norm": 0.2786954641342163, | |
| "learning_rate": 9.445373428753386e-06, | |
| "loss": 0.4176, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.5772773797338795, | |
| "grad_norm": 0.30698952078819275, | |
| "learning_rate": 9.42881969633447e-06, | |
| "loss": 0.4171, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.6079836233367453, | |
| "grad_norm": 0.3353785276412964, | |
| "learning_rate": 9.412037494368412e-06, | |
| "loss": 0.4212, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.638689866939611, | |
| "grad_norm": 0.48117902874946594, | |
| "learning_rate": 9.395027688598756e-06, | |
| "loss": 0.4201, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.669396110542477, | |
| "grad_norm": 0.2947028577327728, | |
| "learning_rate": 9.377791156510456e-06, | |
| "loss": 0.4211, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.7001023541453426, | |
| "grad_norm": 0.3083222210407257, | |
| "learning_rate": 9.360328787284587e-06, | |
| "loss": 0.4184, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.730808597748209, | |
| "grad_norm": 0.28462743759155273, | |
| "learning_rate": 9.342641481752492e-06, | |
| "loss": 0.4194, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.7615148413510746, | |
| "grad_norm": 0.29272404313087463, | |
| "learning_rate": 9.324730152349305e-06, | |
| "loss": 0.4139, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.7922210849539404, | |
| "grad_norm": 0.305203914642334, | |
| "learning_rate": 9.306595723066878e-06, | |
| "loss": 0.4205, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.8229273285568066, | |
| "grad_norm": 0.3017217516899109, | |
| "learning_rate": 9.28823912940612e-06, | |
| "loss": 0.4195, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.8536335721596724, | |
| "grad_norm": 0.30164411664009094, | |
| "learning_rate": 9.26966131832873e-06, | |
| "loss": 0.4201, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.884339815762538, | |
| "grad_norm": 0.2872079312801361, | |
| "learning_rate": 9.250863248208357e-06, | |
| "loss": 0.4209, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.9150460593654044, | |
| "grad_norm": 0.29657259583473206, | |
| "learning_rate": 9.231845888781153e-06, | |
| "loss": 0.4179, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.94575230296827, | |
| "grad_norm": 0.2927268147468567, | |
| "learning_rate": 9.212610221095748e-06, | |
| "loss": 0.421, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.976458546571136, | |
| "grad_norm": 0.37469282746315, | |
| "learning_rate": 9.193157237462642e-06, | |
| "loss": 0.4188, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 4.012282497441146, | |
| "grad_norm": 0.41009268164634705, | |
| "learning_rate": 9.173487941403011e-06, | |
| "loss": 0.4919, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.042988741044012, | |
| "grad_norm": 0.3871892988681793, | |
| "learning_rate": 9.153603347596946e-06, | |
| "loss": 0.3978, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.073694984646878, | |
| "grad_norm": 0.34029635787010193, | |
| "learning_rate": 9.133504481831103e-06, | |
| "loss": 0.4014, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.104401228249744, | |
| "grad_norm": 0.3581830859184265, | |
| "learning_rate": 9.113192380945783e-06, | |
| "loss": 0.3992, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.13510747185261, | |
| "grad_norm": 0.33405575156211853, | |
| "learning_rate": 9.092668092781454e-06, | |
| "loss": 0.4003, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.165813715455476, | |
| "grad_norm": 0.35374191403388977, | |
| "learning_rate": 9.071932676124686e-06, | |
| "loss": 0.3983, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.1965199590583415, | |
| "grad_norm": 0.32420825958251953, | |
| "learning_rate": 9.050987200653538e-06, | |
| "loss": 0.3992, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.227226202661208, | |
| "grad_norm": 0.307517409324646, | |
| "learning_rate": 9.029832746882372e-06, | |
| "loss": 0.4005, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.257932446264074, | |
| "grad_norm": 0.3153163492679596, | |
| "learning_rate": 9.008470406106118e-06, | |
| "loss": 0.3957, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.288638689866939, | |
| "grad_norm": 0.32640504837036133, | |
| "learning_rate": 8.986901280343973e-06, | |
| "loss": 0.4002, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.3193449334698055, | |
| "grad_norm": 0.29307159781455994, | |
| "learning_rate": 8.96512648228255e-06, | |
| "loss": 0.4008, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.350051177072672, | |
| "grad_norm": 0.3047451078891754, | |
| "learning_rate": 8.943147135218482e-06, | |
| "loss": 0.4006, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.380757420675537, | |
| "grad_norm": 0.3649887442588806, | |
| "learning_rate": 8.920964373000474e-06, | |
| "loss": 0.4013, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.411463664278403, | |
| "grad_norm": 0.630779504776001, | |
| "learning_rate": 8.898579339970806e-06, | |
| "loss": 0.4024, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.4421699078812695, | |
| "grad_norm": 0.2852974236011505, | |
| "learning_rate": 8.875993190906309e-06, | |
| "loss": 0.4028, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.472876151484135, | |
| "grad_norm": 0.35749351978302, | |
| "learning_rate": 8.85320709095878e-06, | |
| "loss": 0.4001, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.503582395087001, | |
| "grad_norm": 0.32927122712135315, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.4007, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.534288638689867, | |
| "grad_norm": 0.2845518887042999, | |
| "learning_rate": 8.80703975053554e-06, | |
| "loss": 0.3998, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.564994882292733, | |
| "grad_norm": 0.29234543442726135, | |
| "learning_rate": 8.783660891694683e-06, | |
| "loss": 0.4031, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.595701125895599, | |
| "grad_norm": 0.2943549156188965, | |
| "learning_rate": 8.760086845117648e-06, | |
| "loss": 0.3995, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.626407369498464, | |
| "grad_norm": 0.2838557958602905, | |
| "learning_rate": 8.736318826918909e-06, | |
| "loss": 0.3998, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.65711361310133, | |
| "grad_norm": 0.29005348682403564, | |
| "learning_rate": 8.71235806321936e-06, | |
| "loss": 0.4007, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.687819856704197, | |
| "grad_norm": 0.300619512796402, | |
| "learning_rate": 8.688205790083053e-06, | |
| "loss": 0.3994, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.718526100307063, | |
| "grad_norm": 0.3142443895339966, | |
| "learning_rate": 8.663863253453444e-06, | |
| "loss": 0.4026, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.749232343909928, | |
| "grad_norm": 0.3032344877719879, | |
| "learning_rate": 8.639331709089107e-06, | |
| "loss": 0.403, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.779938587512794, | |
| "grad_norm": 0.4341191351413727, | |
| "learning_rate": 8.614612422498965e-06, | |
| "loss": 0.4014, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.81064483111566, | |
| "grad_norm": 0.29899492859840393, | |
| "learning_rate": 8.589706668876995e-06, | |
| "loss": 0.4029, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.841351074718526, | |
| "grad_norm": 0.30844879150390625, | |
| "learning_rate": 8.564615733036457e-06, | |
| "loss": 0.4009, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.872057318321392, | |
| "grad_norm": 0.3283848166465759, | |
| "learning_rate": 8.539340909343597e-06, | |
| "loss": 0.4042, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.9027635619242576, | |
| "grad_norm": 0.287864089012146, | |
| "learning_rate": 8.513883501650892e-06, | |
| "loss": 0.403, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 4.933469805527124, | |
| "grad_norm": 0.3053203523159027, | |
| "learning_rate": 8.488244823229781e-06, | |
| "loss": 0.4041, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.96417604912999, | |
| "grad_norm": 0.30933913588523865, | |
| "learning_rate": 8.462426196702912e-06, | |
| "loss": 0.3996, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 4.994882292732855, | |
| "grad_norm": 0.29554951190948486, | |
| "learning_rate": 8.436428953975921e-06, | |
| "loss": 0.3998, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.030706243602866, | |
| "grad_norm": 0.38984817266464233, | |
| "learning_rate": 8.41025443616872e-06, | |
| "loss": 0.4632, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 5.061412487205732, | |
| "grad_norm": 0.33434659242630005, | |
| "learning_rate": 8.38390399354631e-06, | |
| "loss": 0.3828, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.092118730808598, | |
| "grad_norm": 0.3267410099506378, | |
| "learning_rate": 8.357378985449124e-06, | |
| "loss": 0.382, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 5.122824974411464, | |
| "grad_norm": 0.33452486991882324, | |
| "learning_rate": 8.330680780222907e-06, | |
| "loss": 0.3821, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.153531218014329, | |
| "grad_norm": 0.3105202913284302, | |
| "learning_rate": 8.303810755148127e-06, | |
| "loss": 0.3843, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 5.184237461617196, | |
| "grad_norm": 0.3055059313774109, | |
| "learning_rate": 8.276770296368922e-06, | |
| "loss": 0.3873, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.214943705220062, | |
| "grad_norm": 0.2939026653766632, | |
| "learning_rate": 8.249560798821592e-06, | |
| "loss": 0.3822, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 5.245649948822927, | |
| "grad_norm": 0.32671213150024414, | |
| "learning_rate": 8.222183666162647e-06, | |
| "loss": 0.3825, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.276356192425793, | |
| "grad_norm": 0.3374998867511749, | |
| "learning_rate": 8.194640310696383e-06, | |
| "loss": 0.3865, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 5.30706243602866, | |
| "grad_norm": 0.3487096130847931, | |
| "learning_rate": 8.16693215330204e-06, | |
| "loss": 0.3833, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.337768679631525, | |
| "grad_norm": 0.3201228976249695, | |
| "learning_rate": 8.139060623360494e-06, | |
| "loss": 0.3837, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 5.368474923234391, | |
| "grad_norm": 0.30659347772598267, | |
| "learning_rate": 8.111027158680516e-06, | |
| "loss": 0.3842, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.399181166837257, | |
| "grad_norm": 0.2944610118865967, | |
| "learning_rate": 8.082833205424614e-06, | |
| "loss": 0.385, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 5.429887410440123, | |
| "grad_norm": 0.3064741790294647, | |
| "learning_rate": 8.054480218034415e-06, | |
| "loss": 0.3836, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.460593654042989, | |
| "grad_norm": 0.3105819523334503, | |
| "learning_rate": 8.02596965915564e-06, | |
| "loss": 0.3864, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 5.491299897645854, | |
| "grad_norm": 0.3280338644981384, | |
| "learning_rate": 7.997302999562657e-06, | |
| "loss": 0.3836, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.5220061412487205, | |
| "grad_norm": 0.3340366780757904, | |
| "learning_rate": 7.968481718082601e-06, | |
| "loss": 0.382, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 5.552712384851587, | |
| "grad_norm": 0.30426710844039917, | |
| "learning_rate": 7.93950730151908e-06, | |
| "loss": 0.3825, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.583418628454452, | |
| "grad_norm": 0.3064902126789093, | |
| "learning_rate": 7.910381244575491e-06, | |
| "loss": 0.3838, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 5.614124872057318, | |
| "grad_norm": 0.2979416251182556, | |
| "learning_rate": 7.881105049777902e-06, | |
| "loss": 0.3876, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.6448311156601845, | |
| "grad_norm": 0.29067227244377136, | |
| "learning_rate": 7.851680227397541e-06, | |
| "loss": 0.3898, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 5.67553735926305, | |
| "grad_norm": 0.3097393214702606, | |
| "learning_rate": 7.82210829537289e-06, | |
| "loss": 0.3821, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.706243602865916, | |
| "grad_norm": 0.3296332061290741, | |
| "learning_rate": 7.792390779231374e-06, | |
| "loss": 0.3853, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 5.736949846468782, | |
| "grad_norm": 0.3411824405193329, | |
| "learning_rate": 7.762529212010675e-06, | |
| "loss": 0.3857, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.767656090071648, | |
| "grad_norm": 0.30363622307777405, | |
| "learning_rate": 7.732525134179625e-06, | |
| "loss": 0.3869, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 5.798362333674514, | |
| "grad_norm": 0.38818359375, | |
| "learning_rate": 7.702380093558766e-06, | |
| "loss": 0.3884, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.82906857727738, | |
| "grad_norm": 0.302729070186615, | |
| "learning_rate": 7.672095645240479e-06, | |
| "loss": 0.3849, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 5.859774820880245, | |
| "grad_norm": 0.29930469393730164, | |
| "learning_rate": 7.641673351508774e-06, | |
| "loss": 0.3835, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.890481064483112, | |
| "grad_norm": 0.28438737988471985, | |
| "learning_rate": 7.6111147817586925e-06, | |
| "loss": 0.385, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 5.921187308085978, | |
| "grad_norm": 0.2847469747066498, | |
| "learning_rate": 7.580421512415349e-06, | |
| "loss": 0.385, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.951893551688843, | |
| "grad_norm": 0.2834565043449402, | |
| "learning_rate": 7.549595126852605e-06, | |
| "loss": 0.3846, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 5.982599795291709, | |
| "grad_norm": 0.2991902232170105, | |
| "learning_rate": 7.518637215311388e-06, | |
| "loss": 0.385, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 6.018423746161719, | |
| "grad_norm": 0.3554234802722931, | |
| "learning_rate": 7.487549374817662e-06, | |
| "loss": 0.4497, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 6.049129989764586, | |
| "grad_norm": 0.379526287317276, | |
| "learning_rate": 7.456333209100032e-06, | |
| "loss": 0.367, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.079836233367452, | |
| "grad_norm": 0.32687127590179443, | |
| "learning_rate": 7.424990328507017e-06, | |
| "loss": 0.3642, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 6.110542476970317, | |
| "grad_norm": 0.3256913423538208, | |
| "learning_rate": 7.393522349923981e-06, | |
| "loss": 0.3662, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.141248720573183, | |
| "grad_norm": 0.3289490044116974, | |
| "learning_rate": 7.361930896689713e-06, | |
| "loss": 0.3668, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 6.171954964176049, | |
| "grad_norm": 0.3317910134792328, | |
| "learning_rate": 7.330217598512696e-06, | |
| "loss": 0.3698, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.202661207778915, | |
| "grad_norm": 0.3371947705745697, | |
| "learning_rate": 7.2983840913870215e-06, | |
| "loss": 0.3669, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 6.233367451381781, | |
| "grad_norm": 0.32799994945526123, | |
| "learning_rate": 7.266432017508008e-06, | |
| "loss": 0.3699, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.2640736949846465, | |
| "grad_norm": 0.3218367397785187, | |
| "learning_rate": 7.234363025187474e-06, | |
| "loss": 0.3661, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 6.294779938587513, | |
| "grad_norm": 0.3230588436126709, | |
| "learning_rate": 7.202178768768711e-06, | |
| "loss": 0.3663, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.325486182190379, | |
| "grad_norm": 0.3200216591358185, | |
| "learning_rate": 7.169880908541136e-06, | |
| "loss": 0.3654, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 6.356192425793244, | |
| "grad_norm": 0.3350008726119995, | |
| "learning_rate": 7.137471110654656e-06, | |
| "loss": 0.3676, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.3868986693961105, | |
| "grad_norm": 0.32792818546295166, | |
| "learning_rate": 7.104951047033697e-06, | |
| "loss": 0.3666, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 6.417604912998977, | |
| "grad_norm": 0.31437090039253235, | |
| "learning_rate": 7.0723223952909694e-06, | |
| "loss": 0.3668, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.448311156601842, | |
| "grad_norm": 0.30968931317329407, | |
| "learning_rate": 7.039586838640918e-06, | |
| "loss": 0.367, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 6.479017400204708, | |
| "grad_norm": 0.2952023148536682, | |
| "learning_rate": 7.006746065812895e-06, | |
| "loss": 0.371, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.5097236438075745, | |
| "grad_norm": 0.3054288625717163, | |
| "learning_rate": 6.973801770964031e-06, | |
| "loss": 0.3666, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 6.54042988741044, | |
| "grad_norm": 0.3081178069114685, | |
| "learning_rate": 6.940755653591859e-06, | |
| "loss": 0.3725, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 6.571136131013306, | |
| "grad_norm": 0.30962446331977844, | |
| "learning_rate": 6.907609418446623e-06, | |
| "loss": 0.3678, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 6.601842374616172, | |
| "grad_norm": 0.37370339035987854, | |
| "learning_rate": 6.8743647754433485e-06, | |
| "loss": 0.3718, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.632548618219038, | |
| "grad_norm": 0.34246236085891724, | |
| "learning_rate": 6.841023439573623e-06, | |
| "loss": 0.3707, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 6.663254861821904, | |
| "grad_norm": 0.3560137152671814, | |
| "learning_rate": 6.807587130817134e-06, | |
| "loss": 0.3702, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 6.69396110542477, | |
| "grad_norm": 0.31180328130722046, | |
| "learning_rate": 6.774057574052932e-06, | |
| "loss": 0.3694, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 6.724667349027635, | |
| "grad_norm": 0.3135690987110138, | |
| "learning_rate": 6.740436498970453e-06, | |
| "loss": 0.3685, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 6.755373592630502, | |
| "grad_norm": 0.30377304553985596, | |
| "learning_rate": 6.706725639980294e-06, | |
| "loss": 0.3695, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 6.786079836233368, | |
| "grad_norm": 0.30792489647865295, | |
| "learning_rate": 6.6729267361247295e-06, | |
| "loss": 0.3716, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.816786079836233, | |
| "grad_norm": 0.3061661422252655, | |
| "learning_rate": 6.639041530988009e-06, | |
| "loss": 0.3694, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 6.847492323439099, | |
| "grad_norm": 0.3261125683784485, | |
| "learning_rate": 6.605071772606404e-06, | |
| "loss": 0.3696, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 6.878198567041965, | |
| "grad_norm": 0.295218288898468, | |
| "learning_rate": 6.571019213378034e-06, | |
| "loss": 0.3683, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 6.908904810644831, | |
| "grad_norm": 0.30800992250442505, | |
| "learning_rate": 6.536885609972467e-06, | |
| "loss": 0.3711, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 6.939611054247697, | |
| "grad_norm": 0.2889186143875122, | |
| "learning_rate": 6.502672723240103e-06, | |
| "loss": 0.3733, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 6.970317297850563, | |
| "grad_norm": 0.30022063851356506, | |
| "learning_rate": 6.4683823181213224e-06, | |
| "loss": 0.3683, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 7.006141248720573, | |
| "grad_norm": 0.7506215572357178, | |
| "learning_rate": 6.434016163555452e-06, | |
| "loss": 0.4364, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 7.036847492323439, | |
| "grad_norm": 0.3902629315853119, | |
| "learning_rate": 6.399576032389505e-06, | |
| "loss": 0.3502, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.067553735926305, | |
| "grad_norm": 0.3617767095565796, | |
| "learning_rate": 6.365063701286728e-06, | |
| "loss": 0.3541, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 7.098259979529171, | |
| "grad_norm": 0.346937358379364, | |
| "learning_rate": 6.330480950634942e-06, | |
| "loss": 0.3529, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.1289662231320365, | |
| "grad_norm": 0.4673236310482025, | |
| "learning_rate": 6.2958295644547026e-06, | |
| "loss": 0.3524, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 7.159672466734903, | |
| "grad_norm": 0.32648974657058716, | |
| "learning_rate": 6.261111330307272e-06, | |
| "loss": 0.3508, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.190378710337769, | |
| "grad_norm": 0.31994733214378357, | |
| "learning_rate": 6.22632803920239e-06, | |
| "loss": 0.3491, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 7.221084953940634, | |
| "grad_norm": 0.33214271068573, | |
| "learning_rate": 6.191481485505898e-06, | |
| "loss": 0.3491, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 7.2517911975435005, | |
| "grad_norm": 0.33862602710723877, | |
| "learning_rate": 6.1565734668471614e-06, | |
| "loss": 0.3518, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 7.282497441146367, | |
| "grad_norm": 0.3418470025062561, | |
| "learning_rate": 6.121605784026339e-06, | |
| "loss": 0.3552, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 7.313203684749232, | |
| "grad_norm": 0.30670255422592163, | |
| "learning_rate": 6.086580240921486e-06, | |
| "loss": 0.3499, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 7.343909928352098, | |
| "grad_norm": 0.2968249022960663, | |
| "learning_rate": 6.051498644395496e-06, | |
| "loss": 0.353, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 7.3746161719549645, | |
| "grad_norm": 0.32068145275115967, | |
| "learning_rate": 6.01636280420289e-06, | |
| "loss": 0.3531, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 7.40532241555783, | |
| "grad_norm": 0.30007636547088623, | |
| "learning_rate": 5.981174532896459e-06, | |
| "loss": 0.3505, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.436028659160696, | |
| "grad_norm": 0.32065504789352417, | |
| "learning_rate": 5.9459356457337556e-06, | |
| "loss": 0.3488, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 7.466734902763562, | |
| "grad_norm": 0.32740339636802673, | |
| "learning_rate": 5.910647960583458e-06, | |
| "loss": 0.3533, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 7.497441146366428, | |
| "grad_norm": 0.3315354883670807, | |
| "learning_rate": 5.875313297831579e-06, | |
| "loss": 0.3538, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 7.528147389969294, | |
| "grad_norm": 0.3044270873069763, | |
| "learning_rate": 5.839933480287572e-06, | |
| "loss": 0.3525, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 7.558853633572159, | |
| "grad_norm": 0.3230973184108734, | |
| "learning_rate": 5.804510333090287e-06, | |
| "loss": 0.3525, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 7.5895598771750254, | |
| "grad_norm": 0.305621862411499, | |
| "learning_rate": 5.769045683613822e-06, | |
| "loss": 0.3536, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 7.620266120777892, | |
| "grad_norm": 0.30117788910865784, | |
| "learning_rate": 5.733541361373253e-06, | |
| "loss": 0.3531, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 7.650972364380758, | |
| "grad_norm": 0.3083040118217468, | |
| "learning_rate": 5.697999197930259e-06, | |
| "loss": 0.3547, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 7.681678607983623, | |
| "grad_norm": 0.3200486898422241, | |
| "learning_rate": 5.662421026798624e-06, | |
| "loss": 0.3541, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 7.7123848515864895, | |
| "grad_norm": 0.3322235941886902, | |
| "learning_rate": 5.626808683349672e-06, | |
| "loss": 0.3539, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 7.743091095189355, | |
| "grad_norm": 0.3168823719024658, | |
| "learning_rate": 5.591164004717567e-06, | |
| "loss": 0.3571, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 7.773797338792221, | |
| "grad_norm": 0.3052806854248047, | |
| "learning_rate": 5.55548882970455e-06, | |
| "loss": 0.3523, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.804503582395087, | |
| "grad_norm": 0.3077663481235504, | |
| "learning_rate": 5.519784998686081e-06, | |
| "loss": 0.3553, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 7.835209825997953, | |
| "grad_norm": 0.33488982915878296, | |
| "learning_rate": 5.484054353515896e-06, | |
| "loss": 0.3552, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 7.865916069600819, | |
| "grad_norm": 0.31428563594818115, | |
| "learning_rate": 5.448298737430992e-06, | |
| "loss": 0.3539, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 7.896622313203685, | |
| "grad_norm": 0.6753739714622498, | |
| "learning_rate": 5.412519994956543e-06, | |
| "loss": 0.3559, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 7.92732855680655, | |
| "grad_norm": 0.31677567958831787, | |
| "learning_rate": 5.376719971810741e-06, | |
| "loss": 0.3546, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 7.958034800409417, | |
| "grad_norm": 0.3080218732357025, | |
| "learning_rate": 5.340900514809587e-06, | |
| "loss": 0.3544, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 7.988741044012283, | |
| "grad_norm": 0.31371742486953735, | |
| "learning_rate": 5.305063471771614e-06, | |
| "loss": 0.3548, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 8.024564994882292, | |
| "grad_norm": 0.40025079250335693, | |
| "learning_rate": 5.26921069142257e-06, | |
| "loss": 0.4121, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.055271238485158, | |
| "grad_norm": 0.35833466053009033, | |
| "learning_rate": 5.233344023300037e-06, | |
| "loss": 0.3357, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 8.085977482088024, | |
| "grad_norm": 0.3553675711154938, | |
| "learning_rate": 5.197465317658036e-06, | |
| "loss": 0.3353, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.11668372569089, | |
| "grad_norm": 0.3430934548377991, | |
| "learning_rate": 5.161576425371554e-06, | |
| "loss": 0.3347, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 8.147389969293757, | |
| "grad_norm": 0.33370262384414673, | |
| "learning_rate": 5.125679197841088e-06, | |
| "loss": 0.3362, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 8.178096212896623, | |
| "grad_norm": 0.32513174414634705, | |
| "learning_rate": 5.089775486897121e-06, | |
| "loss": 0.3368, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 8.208802456499487, | |
| "grad_norm": 0.35142919421195984, | |
| "learning_rate": 5.053867144704594e-06, | |
| "loss": 0.3359, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 8.239508700102354, | |
| "grad_norm": 0.32932382822036743, | |
| "learning_rate": 5.017956023667363e-06, | |
| "loss": 0.3359, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 8.27021494370522, | |
| "grad_norm": 0.32409489154815674, | |
| "learning_rate": 4.982043976332638e-06, | |
| "loss": 0.3365, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 8.300921187308086, | |
| "grad_norm": 0.32708844542503357, | |
| "learning_rate": 4.946132855295407e-06, | |
| "loss": 0.3389, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 8.331627430910952, | |
| "grad_norm": 0.30396559834480286, | |
| "learning_rate": 4.910224513102881e-06, | |
| "loss": 0.3361, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 8.362333674513819, | |
| "grad_norm": 0.3169805109500885, | |
| "learning_rate": 4.8743208021589135e-06, | |
| "loss": 0.338, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 8.393039918116683, | |
| "grad_norm": 0.32971227169036865, | |
| "learning_rate": 4.838423574628447e-06, | |
| "loss": 0.3371, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 8.42374616171955, | |
| "grad_norm": 0.43314287066459656, | |
| "learning_rate": 4.802534682341966e-06, | |
| "loss": 0.3376, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 8.454452405322415, | |
| "grad_norm": 0.3369028866291046, | |
| "learning_rate": 4.7666559766999635e-06, | |
| "loss": 0.3376, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 8.485158648925282, | |
| "grad_norm": 0.3581888973712921, | |
| "learning_rate": 4.730789308577432e-06, | |
| "loss": 0.3391, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 8.515864892528148, | |
| "grad_norm": 0.4204093813896179, | |
| "learning_rate": 4.694936528228387e-06, | |
| "loss": 0.3378, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 8.546571136131014, | |
| "grad_norm": 0.3086753487586975, | |
| "learning_rate": 4.659099485190414e-06, | |
| "loss": 0.336, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 8.577277379733879, | |
| "grad_norm": 0.3337133526802063, | |
| "learning_rate": 4.6232800281892604e-06, | |
| "loss": 0.3379, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 8.607983623336745, | |
| "grad_norm": 0.313374787569046, | |
| "learning_rate": 4.587480005043458e-06, | |
| "loss": 0.3385, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 8.638689866939611, | |
| "grad_norm": 0.3218012750148773, | |
| "learning_rate": 4.551701262569009e-06, | |
| "loss": 0.336, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.669396110542477, | |
| "grad_norm": 0.3326752781867981, | |
| "learning_rate": 4.515945646484105e-06, | |
| "loss": 0.3407, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 8.700102354145343, | |
| "grad_norm": 0.31090524792671204, | |
| "learning_rate": 4.480215001313919e-06, | |
| "loss": 0.3383, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 8.730808597748208, | |
| "grad_norm": 0.3121759593486786, | |
| "learning_rate": 4.444511170295451e-06, | |
| "loss": 0.3394, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 8.761514841351074, | |
| "grad_norm": 0.3316711485385895, | |
| "learning_rate": 4.408835995282434e-06, | |
| "loss": 0.3406, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 8.79222108495394, | |
| "grad_norm": 0.3368071913719177, | |
| "learning_rate": 4.373191316650328e-06, | |
| "loss": 0.3392, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 8.822927328556807, | |
| "grad_norm": 0.3207019567489624, | |
| "learning_rate": 4.3375789732013775e-06, | |
| "loss": 0.3395, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 8.853633572159673, | |
| "grad_norm": 0.3298065662384033, | |
| "learning_rate": 4.302000802069744e-06, | |
| "loss": 0.3413, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 8.884339815762539, | |
| "grad_norm": 0.30852875113487244, | |
| "learning_rate": 4.2664586386267474e-06, | |
| "loss": 0.3417, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 8.915046059365404, | |
| "grad_norm": 0.31210556626319885, | |
| "learning_rate": 4.230954316386179e-06, | |
| "loss": 0.336, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 8.94575230296827, | |
| "grad_norm": 0.3171374797821045, | |
| "learning_rate": 4.195489666909714e-06, | |
| "loss": 0.3428, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 8.976458546571136, | |
| "grad_norm": 0.32448017597198486, | |
| "learning_rate": 4.160066519712428e-06, | |
| "loss": 0.3413, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 9.012282497441147, | |
| "grad_norm": 0.39366790652275085, | |
| "learning_rate": 4.1246867021684206e-06, | |
| "loss": 0.401, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 9.042988741044013, | |
| "grad_norm": 0.4089623987674713, | |
| "learning_rate": 4.089352039416543e-06, | |
| "loss": 0.3221, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 9.073694984646878, | |
| "grad_norm": 0.5040475130081177, | |
| "learning_rate": 4.054064354266244e-06, | |
| "loss": 0.3216, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 9.104401228249744, | |
| "grad_norm": 0.3531099557876587, | |
| "learning_rate": 4.018825467103542e-06, | |
| "loss": 0.3208, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 9.13510747185261, | |
| "grad_norm": 0.34139832854270935, | |
| "learning_rate": 3.983637195797111e-06, | |
| "loss": 0.3202, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 9.165813715455476, | |
| "grad_norm": 0.3356687128543854, | |
| "learning_rate": 3.948501355604507e-06, | |
| "loss": 0.3236, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 9.196519959058342, | |
| "grad_norm": 0.34154486656188965, | |
| "learning_rate": 3.9134197590785164e-06, | |
| "loss": 0.3201, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 9.227226202661209, | |
| "grad_norm": 0.32101234793663025, | |
| "learning_rate": 3.878394215973663e-06, | |
| "loss": 0.3229, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 9.257932446264073, | |
| "grad_norm": 0.31901758909225464, | |
| "learning_rate": 3.843426533152841e-06, | |
| "loss": 0.3235, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.28863868986694, | |
| "grad_norm": 0.4290211796760559, | |
| "learning_rate": 3.808518514494105e-06, | |
| "loss": 0.3209, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 9.319344933469806, | |
| "grad_norm": 0.34095343947410583, | |
| "learning_rate": 3.773671960797613e-06, | |
| "loss": 0.323, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 9.350051177072672, | |
| "grad_norm": 0.4352162182331085, | |
| "learning_rate": 3.7388886696927317e-06, | |
| "loss": 0.321, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 9.380757420675538, | |
| "grad_norm": 0.3285370171070099, | |
| "learning_rate": 3.704170435545299e-06, | |
| "loss": 0.3227, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 9.411463664278402, | |
| "grad_norm": 0.31492966413497925, | |
| "learning_rate": 3.6695190493650608e-06, | |
| "loss": 0.3238, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 9.442169907881269, | |
| "grad_norm": 0.3235698938369751, | |
| "learning_rate": 3.634936298713274e-06, | |
| "loss": 0.3249, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 9.472876151484135, | |
| "grad_norm": 0.3167796730995178, | |
| "learning_rate": 3.6004239676104957e-06, | |
| "loss": 0.3252, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 9.503582395087001, | |
| "grad_norm": 0.3134520649909973, | |
| "learning_rate": 3.5659838364445505e-06, | |
| "loss": 0.3233, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 9.534288638689867, | |
| "grad_norm": 0.3369145691394806, | |
| "learning_rate": 3.5316176818786797e-06, | |
| "loss": 0.3253, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 9.564994882292734, | |
| "grad_norm": 0.313290536403656, | |
| "learning_rate": 3.497327276759899e-06, | |
| "loss": 0.3275, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 9.595701125895598, | |
| "grad_norm": 0.3463856279850006, | |
| "learning_rate": 3.463114390027533e-06, | |
| "loss": 0.3247, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 9.626407369498464, | |
| "grad_norm": 0.3312392234802246, | |
| "learning_rate": 3.4289807866219683e-06, | |
| "loss": 0.3271, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 9.65711361310133, | |
| "grad_norm": 0.32030391693115234, | |
| "learning_rate": 3.394928227393598e-06, | |
| "loss": 0.3253, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 9.687819856704197, | |
| "grad_norm": 0.3277048170566559, | |
| "learning_rate": 3.3609584690119924e-06, | |
| "loss": 0.325, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 9.718526100307063, | |
| "grad_norm": 0.3145771920681, | |
| "learning_rate": 3.3270732638752713e-06, | |
| "loss": 0.3238, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 9.749232343909929, | |
| "grad_norm": 0.33325299620628357, | |
| "learning_rate": 3.293274360019707e-06, | |
| "loss": 0.3262, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 9.779938587512794, | |
| "grad_norm": 0.3249775469303131, | |
| "learning_rate": 3.259563501029548e-06, | |
| "loss": 0.3263, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 9.81064483111566, | |
| "grad_norm": 0.3150160610675812, | |
| "learning_rate": 3.2259424259470705e-06, | |
| "loss": 0.3248, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 9.841351074718526, | |
| "grad_norm": 0.3632504343986511, | |
| "learning_rate": 3.1924128691828678e-06, | |
| "loss": 0.3234, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 9.872057318321392, | |
| "grad_norm": 0.32622626423835754, | |
| "learning_rate": 3.158976560426379e-06, | |
| "loss": 0.324, | |
| "step": 1600 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 2430, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.48962200614737e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |