| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 10725, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002331002331002331, |
| "grad_norm": 2.2309207419995793, |
| "learning_rate": 4.655493482309125e-07, |
| "loss": 0.868, |
| "num_tokens": 1310720.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.004662004662004662, |
| "grad_norm": 2.1118896401046383, |
| "learning_rate": 9.31098696461825e-07, |
| "loss": 0.8785, |
| "num_tokens": 2621440.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006993006993006993, |
| "grad_norm": 1.5940427530885184, |
| "learning_rate": 1.3966480446927373e-06, |
| "loss": 0.8483, |
| "num_tokens": 3932160.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.009324009324009324, |
| "grad_norm": 1.2314768273947896, |
| "learning_rate": 1.86219739292365e-06, |
| "loss": 0.8081, |
| "num_tokens": 5242880.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011655011655011656, |
| "grad_norm": 1.1773776679892092, |
| "learning_rate": 2.3277467411545626e-06, |
| "loss": 0.81, |
| "num_tokens": 6553600.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.013986013986013986, |
| "grad_norm": 0.956055714801497, |
| "learning_rate": 2.7932960893854746e-06, |
| "loss": 0.8035, |
| "num_tokens": 7864320.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.016317016317016316, |
| "grad_norm": 0.7062278575164957, |
| "learning_rate": 3.2588454376163876e-06, |
| "loss": 0.7718, |
| "num_tokens": 9154672.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.018648018648018648, |
| "grad_norm": 0.772524803320663, |
| "learning_rate": 3.7243947858473e-06, |
| "loss": 0.7217, |
| "num_tokens": 10465392.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02097902097902098, |
| "grad_norm": 0.5893728153785672, |
| "learning_rate": 4.189944134078212e-06, |
| "loss": 0.7274, |
| "num_tokens": 11776112.0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.023310023310023312, |
| "grad_norm": 0.5065797141230348, |
| "learning_rate": 4.655493482309125e-06, |
| "loss": 0.7234, |
| "num_tokens": 13086832.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02564102564102564, |
| "grad_norm": 0.48651817735023045, |
| "learning_rate": 5.121042830540038e-06, |
| "loss": 0.7267, |
| "num_tokens": 14397552.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.027972027972027972, |
| "grad_norm": 0.4552730080037496, |
| "learning_rate": 5.586592178770949e-06, |
| "loss": 0.6927, |
| "num_tokens": 15708272.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.030303030303030304, |
| "grad_norm": 0.46005771739470563, |
| "learning_rate": 6.052141527001862e-06, |
| "loss": 0.6771, |
| "num_tokens": 17018992.0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03263403263403263, |
| "grad_norm": 0.4299236849439654, |
| "learning_rate": 6.517690875232775e-06, |
| "loss": 0.6663, |
| "num_tokens": 18329712.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03496503496503497, |
| "grad_norm": 0.4516492360855498, |
| "learning_rate": 6.983240223463687e-06, |
| "loss": 0.6653, |
| "num_tokens": 19640432.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.037296037296037296, |
| "grad_norm": 0.4149060424751554, |
| "learning_rate": 7.4487895716946e-06, |
| "loss": 0.6742, |
| "num_tokens": 20951152.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.039627039627039624, |
| "grad_norm": 0.4609610250520845, |
| "learning_rate": 7.914338919925513e-06, |
| "loss": 0.6415, |
| "num_tokens": 22261872.0, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.04195804195804196, |
| "grad_norm": 0.43308223843066856, |
| "learning_rate": 8.379888268156424e-06, |
| "loss": 0.6748, |
| "num_tokens": 23572592.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.04428904428904429, |
| "grad_norm": 0.4477929543500944, |
| "learning_rate": 8.845437616387337e-06, |
| "loss": 0.6767, |
| "num_tokens": 24883312.0, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.046620046620046623, |
| "grad_norm": 0.43548039599877864, |
| "learning_rate": 9.31098696461825e-06, |
| "loss": 0.6415, |
| "num_tokens": 26194032.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04895104895104895, |
| "grad_norm": 0.49046398552976145, |
| "learning_rate": 9.776536312849161e-06, |
| "loss": 0.6372, |
| "num_tokens": 27504752.0, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.05128205128205128, |
| "grad_norm": 0.4895199721021838, |
| "learning_rate": 1.0242085661080076e-05, |
| "loss": 0.6416, |
| "num_tokens": 28815472.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.053613053613053616, |
| "grad_norm": 0.46201645981993494, |
| "learning_rate": 1.0707635009310987e-05, |
| "loss": 0.6493, |
| "num_tokens": 30126192.0, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.055944055944055944, |
| "grad_norm": 0.46502125769455865, |
| "learning_rate": 1.1173184357541899e-05, |
| "loss": 0.6521, |
| "num_tokens": 31436912.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05827505827505827, |
| "grad_norm": 0.4531752952796131, |
| "learning_rate": 1.1638733705772813e-05, |
| "loss": 0.6332, |
| "num_tokens": 32747632.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06060606060606061, |
| "grad_norm": 0.4905266086400909, |
| "learning_rate": 1.2104283054003724e-05, |
| "loss": 0.6285, |
| "num_tokens": 34058352.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06293706293706294, |
| "grad_norm": 0.47150776268903466, |
| "learning_rate": 1.2569832402234637e-05, |
| "loss": 0.6219, |
| "num_tokens": 35355669.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06526806526806526, |
| "grad_norm": 0.46418902222985564, |
| "learning_rate": 1.303538175046555e-05, |
| "loss": 0.6285, |
| "num_tokens": 36666389.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0675990675990676, |
| "grad_norm": 0.5360827942508652, |
| "learning_rate": 1.3500931098696462e-05, |
| "loss": 0.6311, |
| "num_tokens": 37977109.0, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06993006993006994, |
| "grad_norm": 0.4574856423555115, |
| "learning_rate": 1.3966480446927374e-05, |
| "loss": 0.6265, |
| "num_tokens": 39287829.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07226107226107226, |
| "grad_norm": 0.509429199061911, |
| "learning_rate": 1.4432029795158286e-05, |
| "loss": 0.6212, |
| "num_tokens": 40598549.0, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07459207459207459, |
| "grad_norm": 0.47709542958562123, |
| "learning_rate": 1.48975791433892e-05, |
| "loss": 0.6225, |
| "num_tokens": 41909269.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 0.49691106283476394, |
| "learning_rate": 1.5363128491620113e-05, |
| "loss": 0.6349, |
| "num_tokens": 43219989.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07925407925407925, |
| "grad_norm": 0.5573190120950272, |
| "learning_rate": 1.5828677839851026e-05, |
| "loss": 0.637, |
| "num_tokens": 44530709.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08158508158508158, |
| "grad_norm": 0.49003264776858235, |
| "learning_rate": 1.6294227188081936e-05, |
| "loss": 0.6199, |
| "num_tokens": 45841429.0, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08391608391608392, |
| "grad_norm": 0.48723276020731904, |
| "learning_rate": 1.675977653631285e-05, |
| "loss": 0.6223, |
| "num_tokens": 47152149.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08624708624708624, |
| "grad_norm": 0.5590552496737182, |
| "learning_rate": 1.7225325884543765e-05, |
| "loss": 0.6203, |
| "num_tokens": 48462869.0, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08857808857808858, |
| "grad_norm": 0.5019716267640132, |
| "learning_rate": 1.7690875232774675e-05, |
| "loss": 0.6099, |
| "num_tokens": 49773589.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 0.48128033207729953, |
| "learning_rate": 1.8156424581005588e-05, |
| "loss": 0.591, |
| "num_tokens": 51058347.0, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.09324009324009325, |
| "grad_norm": 0.48039162413529596, |
| "learning_rate": 1.86219739292365e-05, |
| "loss": 0.6084, |
| "num_tokens": 52353719.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09557109557109557, |
| "grad_norm": 0.48115713986962244, |
| "learning_rate": 1.9087523277467413e-05, |
| "loss": 0.6106, |
| "num_tokens": 53658551.0, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.0979020979020979, |
| "grad_norm": 0.4870180321160556, |
| "learning_rate": 1.9553072625698323e-05, |
| "loss": 0.6173, |
| "num_tokens": 54952917.0, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.10023310023310024, |
| "grad_norm": 0.5094823620103848, |
| "learning_rate": 2.001862197392924e-05, |
| "loss": 0.6098, |
| "num_tokens": 56263637.0, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.10256410256410256, |
| "grad_norm": 0.5013089386317249, |
| "learning_rate": 2.0484171322160152e-05, |
| "loss": 0.6096, |
| "num_tokens": 57574357.0, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1048951048951049, |
| "grad_norm": 0.5232640161264629, |
| "learning_rate": 2.0949720670391062e-05, |
| "loss": 0.6201, |
| "num_tokens": 58885077.0, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10722610722610723, |
| "grad_norm": 0.550970201278118, |
| "learning_rate": 2.1415270018621975e-05, |
| "loss": 0.593, |
| "num_tokens": 60191231.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10955710955710955, |
| "grad_norm": 0.6029709013121757, |
| "learning_rate": 2.1880819366852888e-05, |
| "loss": 0.6074, |
| "num_tokens": 61491882.0, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.11188811188811189, |
| "grad_norm": 0.5486381526391435, |
| "learning_rate": 2.2346368715083797e-05, |
| "loss": 0.6098, |
| "num_tokens": 62792124.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11421911421911422, |
| "grad_norm": 0.6116198358074656, |
| "learning_rate": 2.2811918063314713e-05, |
| "loss": 0.5992, |
| "num_tokens": 64102844.0, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11655011655011654, |
| "grad_norm": 0.6390765452401985, |
| "learning_rate": 2.3277467411545626e-05, |
| "loss": 0.5884, |
| "num_tokens": 65405812.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11888111888111888, |
| "grad_norm": 0.5592180145233874, |
| "learning_rate": 2.3743016759776536e-05, |
| "loss": 0.6115, |
| "num_tokens": 66716532.0, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.12121212121212122, |
| "grad_norm": 0.5867882274444552, |
| "learning_rate": 2.420856610800745e-05, |
| "loss": 0.6322, |
| "num_tokens": 68027252.0, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12354312354312354, |
| "grad_norm": 0.4978541728996833, |
| "learning_rate": 2.4674115456238362e-05, |
| "loss": 0.6125, |
| "num_tokens": 69337972.0, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.1258741258741259, |
| "grad_norm": 0.5176243116469658, |
| "learning_rate": 2.5139664804469275e-05, |
| "loss": 0.5889, |
| "num_tokens": 70648692.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 0.5205723612646859, |
| "learning_rate": 2.5605214152700184e-05, |
| "loss": 0.5806, |
| "num_tokens": 71949722.0, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.13053613053613053, |
| "grad_norm": 0.5105297626501796, |
| "learning_rate": 2.60707635009311e-05, |
| "loss": 0.5903, |
| "num_tokens": 73260442.0, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.13286713286713286, |
| "grad_norm": 0.5983979159514128, |
| "learning_rate": 2.6536312849162014e-05, |
| "loss": 0.572, |
| "num_tokens": 74571162.0, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.1351981351981352, |
| "grad_norm": 0.5344572430585598, |
| "learning_rate": 2.7001862197392923e-05, |
| "loss": 0.611, |
| "num_tokens": 75881882.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13752913752913754, |
| "grad_norm": 0.4971065663534153, |
| "learning_rate": 2.746741154562384e-05, |
| "loss": 0.584, |
| "num_tokens": 77192602.0, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13986013986013987, |
| "grad_norm": 0.47253775531322106, |
| "learning_rate": 2.793296089385475e-05, |
| "loss": 0.6098, |
| "num_tokens": 78503322.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14219114219114218, |
| "grad_norm": 0.5871484700261882, |
| "learning_rate": 2.8398510242085662e-05, |
| "loss": 0.5873, |
| "num_tokens": 79814042.0, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1445221445221445, |
| "grad_norm": 0.5397162657687059, |
| "learning_rate": 2.886405959031657e-05, |
| "loss": 0.5601, |
| "num_tokens": 81124762.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14685314685314685, |
| "grad_norm": 0.5003299556137542, |
| "learning_rate": 2.9329608938547488e-05, |
| "loss": 0.5765, |
| "num_tokens": 82416836.0, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.14918414918414918, |
| "grad_norm": 0.5745464582996782, |
| "learning_rate": 2.97951582867784e-05, |
| "loss": 0.6041, |
| "num_tokens": 83727556.0, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 0.6004282699112464, |
| "learning_rate": 3.026070763500931e-05, |
| "loss": 0.5826, |
| "num_tokens": 85038276.0, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 0.6113197906754673, |
| "learning_rate": 3.0726256983240227e-05, |
| "loss": 0.599, |
| "num_tokens": 86348996.0, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1561771561771562, |
| "grad_norm": 0.5284281986815572, |
| "learning_rate": 3.1191806331471136e-05, |
| "loss": 0.594, |
| "num_tokens": 87657694.0, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.1585081585081585, |
| "grad_norm": 0.6592854288333747, |
| "learning_rate": 3.165735567970205e-05, |
| "loss": 0.5971, |
| "num_tokens": 88959058.0, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.16083916083916083, |
| "grad_norm": 0.6611677002486963, |
| "learning_rate": 3.212290502793296e-05, |
| "loss": 0.5869, |
| "num_tokens": 90269778.0, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.16317016317016317, |
| "grad_norm": 0.5569884136454792, |
| "learning_rate": 3.258845437616387e-05, |
| "loss": 0.5721, |
| "num_tokens": 91580498.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1655011655011655, |
| "grad_norm": 0.5645452564927658, |
| "learning_rate": 3.305400372439479e-05, |
| "loss": 0.6015, |
| "num_tokens": 92891218.0, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16783216783216784, |
| "grad_norm": 0.6374940933275999, |
| "learning_rate": 3.35195530726257e-05, |
| "loss": 0.5844, |
| "num_tokens": 94201938.0, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.17016317016317017, |
| "grad_norm": 0.6011646060068315, |
| "learning_rate": 3.3985102420856614e-05, |
| "loss": 0.5875, |
| "num_tokens": 95507207.0, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.17249417249417248, |
| "grad_norm": 0.5884433838979773, |
| "learning_rate": 3.445065176908753e-05, |
| "loss": 0.5751, |
| "num_tokens": 96817927.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.17482517482517482, |
| "grad_norm": 0.6051679815251396, |
| "learning_rate": 3.491620111731844e-05, |
| "loss": 0.5858, |
| "num_tokens": 98128647.0, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.17715617715617715, |
| "grad_norm": 0.6049709145754255, |
| "learning_rate": 3.538175046554935e-05, |
| "loss": 0.5924, |
| "num_tokens": 99432200.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1794871794871795, |
| "grad_norm": 0.4705325840654263, |
| "learning_rate": 3.584729981378026e-05, |
| "loss": 0.5848, |
| "num_tokens": 100738580.0, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.6888947351263296, |
| "learning_rate": 3.6312849162011175e-05, |
| "loss": 0.5765, |
| "num_tokens": 102049300.0, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.18414918414918416, |
| "grad_norm": 0.5752502148380462, |
| "learning_rate": 3.6778398510242085e-05, |
| "loss": 0.5789, |
| "num_tokens": 103360020.0, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.1864801864801865, |
| "grad_norm": 0.6432121990501041, |
| "learning_rate": 3.7243947858473e-05, |
| "loss": 0.5997, |
| "num_tokens": 104670740.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1888111888111888, |
| "grad_norm": 0.5905703187842473, |
| "learning_rate": 3.770949720670392e-05, |
| "loss": 0.5887, |
| "num_tokens": 105981460.0, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.19114219114219114, |
| "grad_norm": 0.6634303714634665, |
| "learning_rate": 3.817504655493483e-05, |
| "loss": 0.5745, |
| "num_tokens": 107287613.0, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.19347319347319347, |
| "grad_norm": 0.6069673456722715, |
| "learning_rate": 3.8640595903165736e-05, |
| "loss": 0.6034, |
| "num_tokens": 108598333.0, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.1958041958041958, |
| "grad_norm": 0.6162680847616665, |
| "learning_rate": 3.9106145251396646e-05, |
| "loss": 0.5891, |
| "num_tokens": 109909053.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19813519813519814, |
| "grad_norm": 0.5857092916452535, |
| "learning_rate": 3.957169459962756e-05, |
| "loss": 0.5789, |
| "num_tokens": 111219773.0, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.20046620046620048, |
| "grad_norm": 0.6647569324831807, |
| "learning_rate": 4.003724394785848e-05, |
| "loss": 0.606, |
| "num_tokens": 112530493.0, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.20279720279720279, |
| "grad_norm": 0.5524426568425298, |
| "learning_rate": 4.050279329608939e-05, |
| "loss": 0.5825, |
| "num_tokens": 113841213.0, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.20512820512820512, |
| "grad_norm": 0.5810994370146649, |
| "learning_rate": 4.0968342644320304e-05, |
| "loss": 0.6003, |
| "num_tokens": 115151933.0, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20745920745920746, |
| "grad_norm": 0.5641669807873702, |
| "learning_rate": 4.143389199255121e-05, |
| "loss": 0.5916, |
| "num_tokens": 116454721.0, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.2097902097902098, |
| "grad_norm": 0.5150156412806575, |
| "learning_rate": 4.1899441340782123e-05, |
| "loss": 0.5836, |
| "num_tokens": 117765441.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.21212121212121213, |
| "grad_norm": 0.47565520542426254, |
| "learning_rate": 4.236499068901304e-05, |
| "loss": 0.5875, |
| "num_tokens": 119076161.0, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.21445221445221446, |
| "grad_norm": 0.5296345463311969, |
| "learning_rate": 4.283054003724395e-05, |
| "loss": 0.5875, |
| "num_tokens": 120376968.0, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.21678321678321677, |
| "grad_norm": 0.5322255322866658, |
| "learning_rate": 4.3296089385474866e-05, |
| "loss": 0.5675, |
| "num_tokens": 121685949.0, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.2191142191142191, |
| "grad_norm": 0.5124276326443321, |
| "learning_rate": 4.3761638733705775e-05, |
| "loss": 0.5834, |
| "num_tokens": 122996669.0, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.22144522144522144, |
| "grad_norm": 0.5493568540280628, |
| "learning_rate": 4.4227188081936685e-05, |
| "loss": 0.5699, |
| "num_tokens": 124296690.0, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.22377622377622378, |
| "grad_norm": 0.5080116188657589, |
| "learning_rate": 4.4692737430167594e-05, |
| "loss": 0.5859, |
| "num_tokens": 125607410.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2261072261072261, |
| "grad_norm": 0.4796592051544516, |
| "learning_rate": 4.515828677839851e-05, |
| "loss": 0.5707, |
| "num_tokens": 126904766.0, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.22843822843822845, |
| "grad_norm": 0.6183745714886669, |
| "learning_rate": 4.562383612662943e-05, |
| "loss": 0.5921, |
| "num_tokens": 128215486.0, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.5361760011420288, |
| "learning_rate": 4.6089385474860336e-05, |
| "loss": 0.5669, |
| "num_tokens": 129521193.0, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2331002331002331, |
| "grad_norm": 0.635330676879503, |
| "learning_rate": 4.655493482309125e-05, |
| "loss": 0.5884, |
| "num_tokens": 130831913.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23543123543123542, |
| "grad_norm": 0.7485046943209333, |
| "learning_rate": 4.702048417132216e-05, |
| "loss": 0.5685, |
| "num_tokens": 132142633.0, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.23776223776223776, |
| "grad_norm": 0.48973907523525306, |
| "learning_rate": 4.748603351955307e-05, |
| "loss": 0.5748, |
| "num_tokens": 133447076.0, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2400932400932401, |
| "grad_norm": 0.501560835222231, |
| "learning_rate": 4.795158286778399e-05, |
| "loss": 0.591, |
| "num_tokens": 134742243.0, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.24242424242424243, |
| "grad_norm": 0.5768660260790422, |
| "learning_rate": 4.84171322160149e-05, |
| "loss": 0.5918, |
| "num_tokens": 136036466.0, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.24475524475524477, |
| "grad_norm": 0.5577461974387155, |
| "learning_rate": 4.8882681564245814e-05, |
| "loss": 0.5881, |
| "num_tokens": 137347186.0, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.24708624708624707, |
| "grad_norm": 0.511973917006169, |
| "learning_rate": 4.9348230912476724e-05, |
| "loss": 0.5767, |
| "num_tokens": 138657906.0, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2494172494172494, |
| "grad_norm": 0.5058707953577455, |
| "learning_rate": 4.981378026070764e-05, |
| "loss": 0.5768, |
| "num_tokens": 139963192.0, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2517482517482518, |
| "grad_norm": 0.5486438929823766, |
| "learning_rate": 4.999999037242581e-05, |
| "loss": 0.5832, |
| "num_tokens": 141273912.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2540792540792541, |
| "grad_norm": 0.5099572474285374, |
| "learning_rate": 4.999993153728008e-05, |
| "loss": 0.5689, |
| "num_tokens": 142570091.0, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 0.5750075114680016, |
| "learning_rate": 4.9999819215780634e-05, |
| "loss": 0.5811, |
| "num_tokens": 143880811.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25874125874125875, |
| "grad_norm": 0.6134226971370147, |
| "learning_rate": 4.9999653408194474e-05, |
| "loss": 0.5843, |
| "num_tokens": 145191531.0, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.26107226107226106, |
| "grad_norm": 0.6314862123358536, |
| "learning_rate": 4.999943411491576e-05, |
| "loss": 0.5793, |
| "num_tokens": 146502251.0, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2634032634032634, |
| "grad_norm": 0.6889606086375162, |
| "learning_rate": 4.9999161336465794e-05, |
| "loss": 0.5702, |
| "num_tokens": 147812971.0, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.26573426573426573, |
| "grad_norm": 0.6217160853993534, |
| "learning_rate": 4.999883507349302e-05, |
| "loss": 0.5774, |
| "num_tokens": 149113471.0, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2680652680652681, |
| "grad_norm": 0.8603569158115519, |
| "learning_rate": 4.9998455326773e-05, |
| "loss": 0.5723, |
| "num_tokens": 150424191.0, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2703962703962704, |
| "grad_norm": 0.5345785403032188, |
| "learning_rate": 4.9998022097208494e-05, |
| "loss": 0.5841, |
| "num_tokens": 151734911.0, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.4897793835702214, |
| "learning_rate": 4.9997535385829355e-05, |
| "loss": 0.5847, |
| "num_tokens": 153045631.0, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.27505827505827507, |
| "grad_norm": 0.6403908386621698, |
| "learning_rate": 4.9996995193792575e-05, |
| "loss": 0.5852, |
| "num_tokens": 154356351.0, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2773892773892774, |
| "grad_norm": 0.5209712741969316, |
| "learning_rate": 4.9996401522382285e-05, |
| "loss": 0.5581, |
| "num_tokens": 155667071.0, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.27972027972027974, |
| "grad_norm": 0.4869182742809078, |
| "learning_rate": 4.9995754373009756e-05, |
| "loss": 0.5818, |
| "num_tokens": 156977308.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.28205128205128205, |
| "grad_norm": 0.5217363999507327, |
| "learning_rate": 4.999505374721338e-05, |
| "loss": 0.568, |
| "num_tokens": 158288028.0, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.28438228438228436, |
| "grad_norm": 0.5636140387216821, |
| "learning_rate": 4.999429964665866e-05, |
| "loss": 0.5685, |
| "num_tokens": 159598748.0, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2867132867132867, |
| "grad_norm": 0.4953249441318155, |
| "learning_rate": 4.999349207313823e-05, |
| "loss": 0.5569, |
| "num_tokens": 160909468.0, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.289044289044289, |
| "grad_norm": 0.5870745218369171, |
| "learning_rate": 4.999263102857185e-05, |
| "loss": 0.5684, |
| "num_tokens": 162220188.0, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2913752913752914, |
| "grad_norm": 0.5796420642176587, |
| "learning_rate": 4.9991716515006354e-05, |
| "loss": 0.5908, |
| "num_tokens": 163517758.0, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.2937062937062937, |
| "grad_norm": 0.5238869041431976, |
| "learning_rate": 4.9990748534615714e-05, |
| "loss": 0.5591, |
| "num_tokens": 164828478.0, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.29603729603729606, |
| "grad_norm": 0.5863277078576777, |
| "learning_rate": 4.998972708970101e-05, |
| "loss": 0.5777, |
| "num_tokens": 166123691.0, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.29836829836829837, |
| "grad_norm": 0.5171070705654046, |
| "learning_rate": 4.998865218269036e-05, |
| "loss": 0.5659, |
| "num_tokens": 167423794.0, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3006993006993007, |
| "grad_norm": 0.6049960425262351, |
| "learning_rate": 4.998752381613905e-05, |
| "loss": 0.5683, |
| "num_tokens": 168734514.0, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 0.4913193380088962, |
| "learning_rate": 4.998634199272939e-05, |
| "loss": 0.5561, |
| "num_tokens": 170045234.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.30536130536130535, |
| "grad_norm": 0.47269645394182036, |
| "learning_rate": 4.9985106715270786e-05, |
| "loss": 0.5509, |
| "num_tokens": 171355954.0, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.5606565456686575, |
| "learning_rate": 4.99838179866997e-05, |
| "loss": 0.5639, |
| "num_tokens": 172657586.0, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.31002331002331, |
| "grad_norm": 0.5304938940189576, |
| "learning_rate": 4.99824758100797e-05, |
| "loss": 0.5512, |
| "num_tokens": 173968306.0, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3123543123543124, |
| "grad_norm": 0.4909731279417892, |
| "learning_rate": 4.998108018860136e-05, |
| "loss": 0.5729, |
| "num_tokens": 175279026.0, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3146853146853147, |
| "grad_norm": 0.5316113973406738, |
| "learning_rate": 4.997963112558232e-05, |
| "loss": 0.5679, |
| "num_tokens": 176589746.0, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.317016317016317, |
| "grad_norm": 0.5548933976438383, |
| "learning_rate": 4.9978128624467266e-05, |
| "loss": 0.5559, |
| "num_tokens": 177900466.0, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.31934731934731936, |
| "grad_norm": 0.6354292278890509, |
| "learning_rate": 4.997657268882791e-05, |
| "loss": 0.569, |
| "num_tokens": 179211186.0, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.32167832167832167, |
| "grad_norm": 0.5118276377254981, |
| "learning_rate": 4.9974963322362986e-05, |
| "loss": 0.575, |
| "num_tokens": 180509493.0, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.32400932400932403, |
| "grad_norm": 0.5775683157667488, |
| "learning_rate": 4.997330052889826e-05, |
| "loss": 0.5627, |
| "num_tokens": 181820213.0, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.32634032634032634, |
| "grad_norm": 0.5342512966303329, |
| "learning_rate": 4.9971584312386467e-05, |
| "loss": 0.5616, |
| "num_tokens": 183130933.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.32867132867132864, |
| "grad_norm": 0.4679024175601337, |
| "learning_rate": 4.996981467690738e-05, |
| "loss": 0.5549, |
| "num_tokens": 184441653.0, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.331002331002331, |
| "grad_norm": 0.46007737133219345, |
| "learning_rate": 4.9967991626667726e-05, |
| "loss": 0.5709, |
| "num_tokens": 185752373.0, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.5781752361746175, |
| "learning_rate": 4.996611516600122e-05, |
| "loss": 0.5705, |
| "num_tokens": 187063093.0, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3356643356643357, |
| "grad_norm": 0.505350299094054, |
| "learning_rate": 4.996418529936855e-05, |
| "loss": 0.5488, |
| "num_tokens": 188373813.0, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.337995337995338, |
| "grad_norm": 0.5228405284991805, |
| "learning_rate": 4.9962202031357356e-05, |
| "loss": 0.5719, |
| "num_tokens": 189684342.0, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.34032634032634035, |
| "grad_norm": 0.5260336277261016, |
| "learning_rate": 4.996016536668221e-05, |
| "loss": 0.5723, |
| "num_tokens": 190995062.0, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.34265734265734266, |
| "grad_norm": 0.5197547644486562, |
| "learning_rate": 4.9958075310184634e-05, |
| "loss": 0.5769, |
| "num_tokens": 192305782.0, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.34498834498834496, |
| "grad_norm": 0.4760010257555004, |
| "learning_rate": 4.995593186683308e-05, |
| "loss": 0.5504, |
| "num_tokens": 193616502.0, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3473193473193473, |
| "grad_norm": 0.5832438782265436, |
| "learning_rate": 4.995373504172286e-05, |
| "loss": 0.5709, |
| "num_tokens": 194927222.0, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.34965034965034963, |
| "grad_norm": 0.4426080176608608, |
| "learning_rate": 4.9951484840076246e-05, |
| "loss": 0.56, |
| "num_tokens": 196237942.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.351981351981352, |
| "grad_norm": 0.5864066431584307, |
| "learning_rate": 4.9949181267242365e-05, |
| "loss": 0.5494, |
| "num_tokens": 197548662.0, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.3543123543123543, |
| "grad_norm": 0.6479586979194639, |
| "learning_rate": 4.994682432869722e-05, |
| "loss": 0.548, |
| "num_tokens": 198859382.0, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.35664335664335667, |
| "grad_norm": 0.5890555644210004, |
| "learning_rate": 4.994441403004366e-05, |
| "loss": 0.5513, |
| "num_tokens": 200170102.0, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.358974358974359, |
| "grad_norm": 0.5160808856165031, |
| "learning_rate": 4.9941950377011424e-05, |
| "loss": 0.5554, |
| "num_tokens": 201480822.0, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3613053613053613, |
| "grad_norm": 0.46297814176374613, |
| "learning_rate": 4.993943337545703e-05, |
| "loss": 0.5607, |
| "num_tokens": 202791542.0, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.5611193032764832, |
| "learning_rate": 4.993686303136385e-05, |
| "loss": 0.5539, |
| "num_tokens": 204102262.0, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.36596736596736595, |
| "grad_norm": 0.5066631995519579, |
| "learning_rate": 4.9934239350842064e-05, |
| "loss": 0.5613, |
| "num_tokens": 205412982.0, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3682983682983683, |
| "grad_norm": 0.5659611475992192, |
| "learning_rate": 4.99315623401286e-05, |
| "loss": 0.5613, |
| "num_tokens": 206723702.0, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3706293706293706, |
| "grad_norm": 0.48541130012045497, |
| "learning_rate": 4.992883200558721e-05, |
| "loss": 0.5534, |
| "num_tokens": 208034422.0, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.372960372960373, |
| "grad_norm": 0.5007321223818056, |
| "learning_rate": 4.992604835370838e-05, |
| "loss": 0.5676, |
| "num_tokens": 209345142.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3752913752913753, |
| "grad_norm": 0.49768356226116434, |
| "learning_rate": 4.992321139110935e-05, |
| "loss": 0.5628, |
| "num_tokens": 210655862.0, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3776223776223776, |
| "grad_norm": 0.49652952690828717, |
| "learning_rate": 4.992032112453409e-05, |
| "loss": 0.5602, |
| "num_tokens": 211966582.0, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.37995337995337997, |
| "grad_norm": 0.4649469721716684, |
| "learning_rate": 4.9917377560853265e-05, |
| "loss": 0.5545, |
| "num_tokens": 213277302.0, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3822843822843823, |
| "grad_norm": 0.5360683146657782, |
| "learning_rate": 4.991438070706428e-05, |
| "loss": 0.5519, |
| "num_tokens": 214586037.0, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 0.49705599435755293, |
| "learning_rate": 4.991133057029116e-05, |
| "loss": 0.5509, |
| "num_tokens": 215896757.0, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.38694638694638694, |
| "grad_norm": 0.5283918826785868, |
| "learning_rate": 4.9908227157784645e-05, |
| "loss": 0.5391, |
| "num_tokens": 217207477.0, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.38927738927738925, |
| "grad_norm": 0.5007115483504386, |
| "learning_rate": 4.9905070476922086e-05, |
| "loss": 0.5639, |
| "num_tokens": 218509916.0, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.3916083916083916, |
| "grad_norm": 0.5498454232202491, |
| "learning_rate": 4.9901860535207486e-05, |
| "loss": 0.5705, |
| "num_tokens": 219820636.0, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3939393939393939, |
| "grad_norm": 0.48240740398938314, |
| "learning_rate": 4.9898597340271446e-05, |
| "loss": 0.5368, |
| "num_tokens": 221131356.0, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3962703962703963, |
| "grad_norm": 0.5056929824942472, |
| "learning_rate": 4.989528089987117e-05, |
| "loss": 0.5575, |
| "num_tokens": 222442076.0, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3986013986013986, |
| "grad_norm": 0.489254628262671, |
| "learning_rate": 4.989191122189042e-05, |
| "loss": 0.5493, |
| "num_tokens": 223752796.0, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.40093240093240096, |
| "grad_norm": 0.48008396663558006, |
| "learning_rate": 4.988848831433952e-05, |
| "loss": 0.5428, |
| "num_tokens": 225063516.0, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.40326340326340326, |
| "grad_norm": 0.49333444429559375, |
| "learning_rate": 4.9885012185355346e-05, |
| "loss": 0.5481, |
| "num_tokens": 226374236.0, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.40559440559440557, |
| "grad_norm": 0.45124369244739004, |
| "learning_rate": 4.9881482843201266e-05, |
| "loss": 0.555, |
| "num_tokens": 227684956.0, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.40792540792540793, |
| "grad_norm": 0.5496711964879529, |
| "learning_rate": 4.987790029626716e-05, |
| "loss": 0.5616, |
| "num_tokens": 228995676.0, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.41025641025641024, |
| "grad_norm": 0.47265434607763146, |
| "learning_rate": 4.9874264553069376e-05, |
| "loss": 0.5386, |
| "num_tokens": 230306396.0, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4125874125874126, |
| "grad_norm": 0.5135497697717332, |
| "learning_rate": 4.987057562225074e-05, |
| "loss": 0.5603, |
| "num_tokens": 231617116.0, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4149184149184149, |
| "grad_norm": 0.4682122711297366, |
| "learning_rate": 4.986683351258048e-05, |
| "loss": 0.5445, |
| "num_tokens": 232927836.0, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4172494172494173, |
| "grad_norm": 0.4112633329315492, |
| "learning_rate": 4.986303823295427e-05, |
| "loss": 0.5426, |
| "num_tokens": 234238556.0, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.4195804195804196, |
| "grad_norm": 0.402214665476133, |
| "learning_rate": 4.985918979239416e-05, |
| "loss": 0.5485, |
| "num_tokens": 235549276.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4219114219114219, |
| "grad_norm": 0.5455511517804665, |
| "learning_rate": 4.985528820004859e-05, |
| "loss": 0.557, |
| "num_tokens": 236859996.0, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.42424242424242425, |
| "grad_norm": 0.47199199317580776, |
| "learning_rate": 4.9851333465192336e-05, |
| "loss": 0.5371, |
| "num_tokens": 238170716.0, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.42657342657342656, |
| "grad_norm": 0.4776585972671657, |
| "learning_rate": 4.984732559722651e-05, |
| "loss": 0.555, |
| "num_tokens": 239481436.0, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.4289044289044289, |
| "grad_norm": 0.5249113633053311, |
| "learning_rate": 4.984326460567852e-05, |
| "loss": 0.5629, |
| "num_tokens": 240792156.0, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.43123543123543123, |
| "grad_norm": 0.5202213780622079, |
| "learning_rate": 4.9839150500202085e-05, |
| "loss": 0.5443, |
| "num_tokens": 242102876.0, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.43356643356643354, |
| "grad_norm": 0.5456374996972472, |
| "learning_rate": 4.983498329057715e-05, |
| "loss": 0.5597, |
| "num_tokens": 243413596.0, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.4358974358974359, |
| "grad_norm": 0.4380920966683162, |
| "learning_rate": 4.983076298670994e-05, |
| "loss": 0.5325, |
| "num_tokens": 244719166.0, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.4382284382284382, |
| "grad_norm": 0.5542937783335792, |
| "learning_rate": 4.982648959863285e-05, |
| "loss": 0.5562, |
| "num_tokens": 246029886.0, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4405594405594406, |
| "grad_norm": 0.503124227495007, |
| "learning_rate": 4.982216313650448e-05, |
| "loss": 0.554, |
| "num_tokens": 247327205.0, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.4428904428904429, |
| "grad_norm": 0.5660923842068369, |
| "learning_rate": 4.981778361060962e-05, |
| "loss": 0.5592, |
| "num_tokens": 248637925.0, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.44522144522144524, |
| "grad_norm": 0.46034726240926843, |
| "learning_rate": 4.981335103135919e-05, |
| "loss": 0.5484, |
| "num_tokens": 249948645.0, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.44755244755244755, |
| "grad_norm": 0.46499481242052637, |
| "learning_rate": 4.980886540929021e-05, |
| "loss": 0.5432, |
| "num_tokens": 251259365.0, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.44988344988344986, |
| "grad_norm": 0.5139881063742346, |
| "learning_rate": 4.98043267550658e-05, |
| "loss": 0.5609, |
| "num_tokens": 252570085.0, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.4522144522144522, |
| "grad_norm": 0.44368184026909635, |
| "learning_rate": 4.979973507947516e-05, |
| "loss": 0.5372, |
| "num_tokens": 253880805.0, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.45865519207556255, |
| "learning_rate": 4.979509039343352e-05, |
| "loss": 0.559, |
| "num_tokens": 255191525.0, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4568764568764569, |
| "grad_norm": 0.5353248215200734, |
| "learning_rate": 4.9790392707982137e-05, |
| "loss": 0.5715, |
| "num_tokens": 256502245.0, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4592074592074592, |
| "grad_norm": 0.4173868436003061, |
| "learning_rate": 4.978564203428823e-05, |
| "loss": 0.5447, |
| "num_tokens": 257812965.0, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 0.5116076701150912, |
| "learning_rate": 4.9780838383645007e-05, |
| "loss": 0.5551, |
| "num_tokens": 259123685.0, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.46386946386946387, |
| "grad_norm": 0.46300754955199347, |
| "learning_rate": 4.977598176747161e-05, |
| "loss": 0.539, |
| "num_tokens": 260425724.0, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4662004662004662, |
| "grad_norm": 0.5076641753208481, |
| "learning_rate": 4.977107219731307e-05, |
| "loss": 0.5526, |
| "num_tokens": 261736444.0, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.46853146853146854, |
| "grad_norm": 0.47654605410882744, |
| "learning_rate": 4.9766109684840316e-05, |
| "loss": 0.5507, |
| "num_tokens": 263047164.0, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.47086247086247085, |
| "grad_norm": 0.47216898809671914, |
| "learning_rate": 4.9761094241850137e-05, |
| "loss": 0.5564, |
| "num_tokens": 264341546.0, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4731934731934732, |
| "grad_norm": 0.522140635226013, |
| "learning_rate": 4.9756025880265124e-05, |
| "loss": 0.5583, |
| "num_tokens": 265652266.0, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.4755244755244755, |
| "grad_norm": 0.48172818261748473, |
| "learning_rate": 4.975090461213368e-05, |
| "loss": 0.5534, |
| "num_tokens": 266962986.0, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.47785547785547783, |
| "grad_norm": 0.43774729877649815, |
| "learning_rate": 4.9745730449629967e-05, |
| "loss": 0.5398, |
| "num_tokens": 268273706.0, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.4801864801864802, |
| "grad_norm": 0.45666010168372156, |
| "learning_rate": 4.9740503405053904e-05, |
| "loss": 0.558, |
| "num_tokens": 269584426.0, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4825174825174825, |
| "grad_norm": 0.45349675110750093, |
| "learning_rate": 4.9735223490831104e-05, |
| "loss": 0.5558, |
| "num_tokens": 270895146.0, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 0.4811125107950563, |
| "learning_rate": 4.9729890719512875e-05, |
| "loss": 0.5332, |
| "num_tokens": 272205866.0, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.48717948717948717, |
| "grad_norm": 0.48607706780099336, |
| "learning_rate": 4.972450510377615e-05, |
| "loss": 0.5511, |
| "num_tokens": 273514547.0, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.48951048951048953, |
| "grad_norm": 0.4988992952928454, |
| "learning_rate": 4.971906665642351e-05, |
| "loss": 0.5509, |
| "num_tokens": 274825267.0, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.49184149184149184, |
| "grad_norm": 0.45178980549967424, |
| "learning_rate": 4.971357539038311e-05, |
| "loss": 0.5352, |
| "num_tokens": 276135501.0, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.49417249417249415, |
| "grad_norm": 0.4628244844080021, |
| "learning_rate": 4.970803131870867e-05, |
| "loss": 0.5576, |
| "num_tokens": 277446221.0, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4965034965034965, |
| "grad_norm": 0.4968199670572577, |
| "learning_rate": 4.9702434454579435e-05, |
| "loss": 0.5302, |
| "num_tokens": 278751167.0, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.4988344988344988, |
| "grad_norm": 0.5350366333592432, |
| "learning_rate": 4.969678481130017e-05, |
| "loss": 0.5447, |
| "num_tokens": 280061887.0, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5011655011655012, |
| "grad_norm": 0.4428901687128577, |
| "learning_rate": 4.9691082402301056e-05, |
| "loss": 0.5515, |
| "num_tokens": 281372607.0, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5034965034965035, |
| "grad_norm": 0.4375390965832773, |
| "learning_rate": 4.9685327241137755e-05, |
| "loss": 0.5429, |
| "num_tokens": 282683327.0, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5058275058275058, |
| "grad_norm": 0.4522043765274002, |
| "learning_rate": 4.967951934149132e-05, |
| "loss": 0.5603, |
| "num_tokens": 283980719.0, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.5081585081585082, |
| "grad_norm": 0.45069894232095004, |
| "learning_rate": 4.967365871716814e-05, |
| "loss": 0.5528, |
| "num_tokens": 285291439.0, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5104895104895105, |
| "grad_norm": 0.4750493200396816, |
| "learning_rate": 4.9667745382099986e-05, |
| "loss": 0.533, |
| "num_tokens": 286602159.0, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 0.4549131138548312, |
| "learning_rate": 4.96617793503439e-05, |
| "loss": 0.5531, |
| "num_tokens": 287912879.0, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5151515151515151, |
| "grad_norm": 0.5019297246106439, |
| "learning_rate": 4.9655760636082214e-05, |
| "loss": 0.5749, |
| "num_tokens": 289215063.0, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5174825174825175, |
| "grad_norm": 0.4113411292647171, |
| "learning_rate": 4.964968925362248e-05, |
| "loss": 0.5372, |
| "num_tokens": 290525783.0, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5198135198135199, |
| "grad_norm": 0.41397626069442495, |
| "learning_rate": 4.964356521739746e-05, |
| "loss": 0.5385, |
| "num_tokens": 291823567.0, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5221445221445221, |
| "grad_norm": 0.44284424686828006, |
| "learning_rate": 4.9637388541965074e-05, |
| "loss": 0.5346, |
| "num_tokens": 293122806.0, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5244755244755245, |
| "grad_norm": 0.47707957535562784, |
| "learning_rate": 4.9631159242008394e-05, |
| "loss": 0.5411, |
| "num_tokens": 294423907.0, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5268065268065268, |
| "grad_norm": 0.5044770815753655, |
| "learning_rate": 4.9624877332335576e-05, |
| "loss": 0.5675, |
| "num_tokens": 295734627.0, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5291375291375291, |
| "grad_norm": 0.5774114892004122, |
| "learning_rate": 4.9618542827879826e-05, |
| "loss": 0.5546, |
| "num_tokens": 297045347.0, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5314685314685315, |
| "grad_norm": 0.4376278267424837, |
| "learning_rate": 4.9612155743699416e-05, |
| "loss": 0.5377, |
| "num_tokens": 298356067.0, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5337995337995338, |
| "grad_norm": 0.5642480010358203, |
| "learning_rate": 4.960571609497756e-05, |
| "loss": 0.5576, |
| "num_tokens": 299666787.0, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.5361305361305362, |
| "grad_norm": 0.46779318091035216, |
| "learning_rate": 4.9599223897022474e-05, |
| "loss": 0.5292, |
| "num_tokens": 300977507.0, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 0.5065887547269632, |
| "learning_rate": 4.959267916526726e-05, |
| "loss": 0.5493, |
| "num_tokens": 302288227.0, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5407925407925408, |
| "grad_norm": 0.4516510903602445, |
| "learning_rate": 4.958608191526992e-05, |
| "loss": 0.5392, |
| "num_tokens": 303598947.0, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5431235431235432, |
| "grad_norm": 0.4860574849415324, |
| "learning_rate": 4.957943216271328e-05, |
| "loss": 0.5479, |
| "num_tokens": 304909667.0, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.4464946298053418, |
| "learning_rate": 4.9572729923405e-05, |
| "loss": 0.5459, |
| "num_tokens": 306213321.0, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5477855477855478, |
| "grad_norm": 0.4873945912917641, |
| "learning_rate": 4.956597521327751e-05, |
| "loss": 0.5616, |
| "num_tokens": 307524041.0, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.5501165501165501, |
| "grad_norm": 0.4388018067262825, |
| "learning_rate": 4.955916804838794e-05, |
| "loss": 0.5423, |
| "num_tokens": 308834761.0, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5524475524475524, |
| "grad_norm": 0.5105304916507707, |
| "learning_rate": 4.955230844491815e-05, |
| "loss": 0.5437, |
| "num_tokens": 310145481.0, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5547785547785548, |
| "grad_norm": 0.4456654310545227, |
| "learning_rate": 4.954539641917464e-05, |
| "loss": 0.522, |
| "num_tokens": 311456201.0, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5571095571095571, |
| "grad_norm": 0.4454183246164721, |
| "learning_rate": 4.953843198758853e-05, |
| "loss": 0.5404, |
| "num_tokens": 312766921.0, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5594405594405595, |
| "grad_norm": 0.4348791044569224, |
| "learning_rate": 4.953141516671551e-05, |
| "loss": 0.543, |
| "num_tokens": 314077641.0, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5617715617715617, |
| "grad_norm": 0.4380820952282363, |
| "learning_rate": 4.952434597323582e-05, |
| "loss": 0.5396, |
| "num_tokens": 315388361.0, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.5641025641025641, |
| "grad_norm": 0.5026501223667641, |
| "learning_rate": 4.9517224423954207e-05, |
| "loss": 0.5347, |
| "num_tokens": 316699081.0, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5664335664335665, |
| "grad_norm": 0.44043504602766326, |
| "learning_rate": 4.951005053579985e-05, |
| "loss": 0.5339, |
| "num_tokens": 317998833.0, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5687645687645687, |
| "grad_norm": 0.4743068657928212, |
| "learning_rate": 4.950282432582635e-05, |
| "loss": 0.5339, |
| "num_tokens": 319309553.0, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5710955710955711, |
| "grad_norm": 0.47052451377323873, |
| "learning_rate": 4.9495545811211724e-05, |
| "loss": 0.5294, |
| "num_tokens": 320613226.0, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.5734265734265734, |
| "grad_norm": 0.4514576116741238, |
| "learning_rate": 4.948821500925829e-05, |
| "loss": 0.5322, |
| "num_tokens": 321923946.0, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5757575757575758, |
| "grad_norm": 0.499267629747507, |
| "learning_rate": 4.948083193739267e-05, |
| "loss": 0.5288, |
| "num_tokens": 323234666.0, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.578088578088578, |
| "grad_norm": 0.41784291038069327, |
| "learning_rate": 4.947339661316574e-05, |
| "loss": 0.5412, |
| "num_tokens": 324545386.0, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5804195804195804, |
| "grad_norm": 0.4039329790155304, |
| "learning_rate": 4.946590905425262e-05, |
| "loss": 0.5417, |
| "num_tokens": 325856106.0, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.5827505827505828, |
| "grad_norm": 0.47163444684620975, |
| "learning_rate": 4.9458369278452536e-05, |
| "loss": 0.5312, |
| "num_tokens": 327166826.0, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.585081585081585, |
| "grad_norm": 0.49628607779772915, |
| "learning_rate": 4.94507773036889e-05, |
| "loss": 0.5646, |
| "num_tokens": 328477546.0, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.5874125874125874, |
| "grad_norm": 0.47690969125101035, |
| "learning_rate": 4.9443133148009193e-05, |
| "loss": 0.5458, |
| "num_tokens": 329788266.0, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5897435897435898, |
| "grad_norm": 0.5529225791199783, |
| "learning_rate": 4.943543682958494e-05, |
| "loss": 0.5515, |
| "num_tokens": 331098986.0, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.5920745920745921, |
| "grad_norm": 0.4570156847193979, |
| "learning_rate": 4.942768836671165e-05, |
| "loss": 0.5624, |
| "num_tokens": 332409706.0, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.5944055944055944, |
| "grad_norm": 0.4476694941858805, |
| "learning_rate": 4.941988777780881e-05, |
| "loss": 0.5278, |
| "num_tokens": 333720426.0, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.5967365967365967, |
| "grad_norm": 0.5092541231473139, |
| "learning_rate": 4.941203508141982e-05, |
| "loss": 0.541, |
| "num_tokens": 335031146.0, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5990675990675991, |
| "grad_norm": 0.49506494033393816, |
| "learning_rate": 4.940413029621193e-05, |
| "loss": 0.5176, |
| "num_tokens": 336338731.0, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.6013986013986014, |
| "grad_norm": 0.4380679777553619, |
| "learning_rate": 4.939617344097622e-05, |
| "loss": 0.5303, |
| "num_tokens": 337649451.0, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.6037296037296037, |
| "grad_norm": 0.48586508532838385, |
| "learning_rate": 4.938816453462758e-05, |
| "loss": 0.536, |
| "num_tokens": 338960171.0, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 0.530127091401037, |
| "learning_rate": 4.9380103596204584e-05, |
| "loss": 0.5227, |
| "num_tokens": 340270891.0, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6083916083916084, |
| "grad_norm": 0.4723471427584443, |
| "learning_rate": 4.9371990644869534e-05, |
| "loss": 0.5364, |
| "num_tokens": 341578590.0, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.6107226107226107, |
| "grad_norm": 0.4745273455014607, |
| "learning_rate": 4.936382569990837e-05, |
| "loss": 0.5294, |
| "num_tokens": 342889310.0, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6130536130536131, |
| "grad_norm": 0.4155339618600452, |
| "learning_rate": 4.935560878073061e-05, |
| "loss": 0.5167, |
| "num_tokens": 344200030.0, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.4782114553800321, |
| "learning_rate": 4.934733990686934e-05, |
| "loss": 0.5185, |
| "num_tokens": 345504904.0, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6177156177156177, |
| "grad_norm": 0.44089991433891135, |
| "learning_rate": 4.9339019097981155e-05, |
| "loss": 0.5533, |
| "num_tokens": 346815624.0, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.62004662004662, |
| "grad_norm": 0.43436432584179596, |
| "learning_rate": 4.933064637384611e-05, |
| "loss": 0.5159, |
| "num_tokens": 348126344.0, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6223776223776224, |
| "grad_norm": 0.43330361810730905, |
| "learning_rate": 4.932222175436764e-05, |
| "loss": 0.5162, |
| "num_tokens": 349437064.0, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.6247086247086248, |
| "grad_norm": 0.49741869953347956, |
| "learning_rate": 4.9313745259572594e-05, |
| "loss": 0.539, |
| "num_tokens": 350734169.0, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.627039627039627, |
| "grad_norm": 0.4766273272639524, |
| "learning_rate": 4.93052169096111e-05, |
| "loss": 0.5331, |
| "num_tokens": 352031299.0, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6293706293706294, |
| "grad_norm": 0.4354355175881609, |
| "learning_rate": 4.9296636724756576e-05, |
| "loss": 0.5616, |
| "num_tokens": 353342019.0, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6317016317016317, |
| "grad_norm": 0.4509143900260654, |
| "learning_rate": 4.928800472540564e-05, |
| "loss": 0.5162, |
| "num_tokens": 354652739.0, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.634032634032634, |
| "grad_norm": 0.48892330783121585, |
| "learning_rate": 4.9279320932078114e-05, |
| "loss": 0.5432, |
| "num_tokens": 355956520.0, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.49979045808989403, |
| "learning_rate": 4.927058536541691e-05, |
| "loss": 0.5421, |
| "num_tokens": 357259308.0, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.6386946386946387, |
| "grad_norm": 0.4773067120764309, |
| "learning_rate": 4.926179804618805e-05, |
| "loss": 0.5232, |
| "num_tokens": 358570028.0, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 0.4807159216924898, |
| "learning_rate": 4.925295899528052e-05, |
| "loss": 0.5378, |
| "num_tokens": 359880748.0, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.6433566433566433, |
| "grad_norm": 0.5792009053156909, |
| "learning_rate": 4.924406823370637e-05, |
| "loss": 0.5505, |
| "num_tokens": 361191468.0, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6456876456876457, |
| "grad_norm": 0.43269843498677835, |
| "learning_rate": 4.923512578260049e-05, |
| "loss": 0.5271, |
| "num_tokens": 362502188.0, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6480186480186481, |
| "grad_norm": 0.487074695118609, |
| "learning_rate": 4.922613166322071e-05, |
| "loss": 0.524, |
| "num_tokens": 363812908.0, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6503496503496503, |
| "grad_norm": 0.4550175361090717, |
| "learning_rate": 4.9217085896947636e-05, |
| "loss": 0.5314, |
| "num_tokens": 365123628.0, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.6526806526806527, |
| "grad_norm": 0.47974397984295003, |
| "learning_rate": 4.920798850528468e-05, |
| "loss": 0.5467, |
| "num_tokens": 366434348.0, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.655011655011655, |
| "grad_norm": 0.48000475609552506, |
| "learning_rate": 4.919883950985796e-05, |
| "loss": 0.5284, |
| "num_tokens": 367745068.0, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6573426573426573, |
| "grad_norm": 0.6241647775105474, |
| "learning_rate": 4.918963893241628e-05, |
| "loss": 0.5464, |
| "num_tokens": 369055788.0, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6596736596736597, |
| "grad_norm": 0.48926526915624463, |
| "learning_rate": 4.918038679483105e-05, |
| "loss": 0.5331, |
| "num_tokens": 370366508.0, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.662004662004662, |
| "grad_norm": 0.4456502453067459, |
| "learning_rate": 4.917108311909624e-05, |
| "loss": 0.5525, |
| "num_tokens": 371677228.0, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6643356643356644, |
| "grad_norm": 0.4346779313930937, |
| "learning_rate": 4.916172792732838e-05, |
| "loss": 0.5191, |
| "num_tokens": 372987948.0, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.5028199232030454, |
| "learning_rate": 4.91523212417664e-05, |
| "loss": 0.5355, |
| "num_tokens": 374298668.0, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.668997668997669, |
| "grad_norm": 0.43127888280556725, |
| "learning_rate": 4.914286308477168e-05, |
| "loss": 0.5402, |
| "num_tokens": 375597975.0, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.6713286713286714, |
| "grad_norm": 0.42701713218021103, |
| "learning_rate": 4.913335347882795e-05, |
| "loss": 0.5436, |
| "num_tokens": 376908695.0, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6736596736596736, |
| "grad_norm": 0.45471075660929255, |
| "learning_rate": 4.912379244654125e-05, |
| "loss": 0.5496, |
| "num_tokens": 378219415.0, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.675990675990676, |
| "grad_norm": 0.5036127947007575, |
| "learning_rate": 4.911418001063985e-05, |
| "loss": 0.5457, |
| "num_tokens": 379526026.0, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6783216783216783, |
| "grad_norm": 0.6069326047714376, |
| "learning_rate": 4.910451619397421e-05, |
| "loss": 0.5532, |
| "num_tokens": 380835617.0, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.6806526806526807, |
| "grad_norm": 0.42021839267741845, |
| "learning_rate": 4.9094801019516987e-05, |
| "loss": 0.5302, |
| "num_tokens": 382146337.0, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.682983682983683, |
| "grad_norm": 0.42181592137530005, |
| "learning_rate": 4.908503451036285e-05, |
| "loss": 0.5395, |
| "num_tokens": 383457057.0, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.6853146853146853, |
| "grad_norm": 0.4819433448161508, |
| "learning_rate": 4.9075216689728545e-05, |
| "loss": 0.5232, |
| "num_tokens": 384767777.0, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6876456876456877, |
| "grad_norm": 0.4360769334365848, |
| "learning_rate": 4.9065347580952795e-05, |
| "loss": 0.5419, |
| "num_tokens": 386078497.0, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.6899766899766899, |
| "grad_norm": 0.46623263345856697, |
| "learning_rate": 4.9055427207496216e-05, |
| "loss": 0.5327, |
| "num_tokens": 387384301.0, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 0.49755651823842795, |
| "learning_rate": 4.9045455592941325e-05, |
| "loss": 0.5313, |
| "num_tokens": 388695021.0, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.6946386946386947, |
| "grad_norm": 0.47799577567341445, |
| "learning_rate": 4.903543276099241e-05, |
| "loss": 0.5191, |
| "num_tokens": 390005741.0, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.696969696969697, |
| "grad_norm": 0.43014258037668707, |
| "learning_rate": 4.902535873547555e-05, |
| "loss": 0.5279, |
| "num_tokens": 391300707.0, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.6993006993006993, |
| "grad_norm": 0.43677160663867953, |
| "learning_rate": 4.901523354033849e-05, |
| "loss": 0.5239, |
| "num_tokens": 392611427.0, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7016317016317016, |
| "grad_norm": 0.4314496324860873, |
| "learning_rate": 4.9005057199650624e-05, |
| "loss": 0.5507, |
| "num_tokens": 393915317.0, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.703962703962704, |
| "grad_norm": 0.5211544326381605, |
| "learning_rate": 4.8994829737602945e-05, |
| "loss": 0.5327, |
| "num_tokens": 395213883.0, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.7062937062937062, |
| "grad_norm": 0.4976803079849033, |
| "learning_rate": 4.8984551178507936e-05, |
| "loss": 0.5281, |
| "num_tokens": 396524603.0, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.7086247086247086, |
| "grad_norm": 0.48985310144346317, |
| "learning_rate": 4.897422154679959e-05, |
| "loss": 0.5285, |
| "num_tokens": 397835323.0, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.710955710955711, |
| "grad_norm": 0.5059108187782707, |
| "learning_rate": 4.896384086703327e-05, |
| "loss": 0.5221, |
| "num_tokens": 399146043.0, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.7132867132867133, |
| "grad_norm": 0.4641511467601387, |
| "learning_rate": 4.8953409163885706e-05, |
| "loss": 0.5263, |
| "num_tokens": 400443842.0, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.7156177156177156, |
| "grad_norm": 0.46382018249195767, |
| "learning_rate": 4.894292646215492e-05, |
| "loss": 0.5295, |
| "num_tokens": 401754562.0, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.717948717948718, |
| "grad_norm": 0.46293693643832434, |
| "learning_rate": 4.8932392786760174e-05, |
| "loss": 0.5311, |
| "num_tokens": 403065282.0, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7202797202797203, |
| "grad_norm": 0.4416679999163852, |
| "learning_rate": 4.8921808162741875e-05, |
| "loss": 0.5316, |
| "num_tokens": 404376002.0, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.7226107226107226, |
| "grad_norm": 0.5431939837625993, |
| "learning_rate": 4.891117261526159e-05, |
| "loss": 0.5232, |
| "num_tokens": 405686722.0, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.7249417249417249, |
| "grad_norm": 0.41054749209427666, |
| "learning_rate": 4.890048616960189e-05, |
| "loss": 0.5272, |
| "num_tokens": 406997442.0, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.4379625629492077, |
| "learning_rate": 4.888974885116637e-05, |
| "loss": 0.5359, |
| "num_tokens": 408298322.0, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7296037296037297, |
| "grad_norm": 0.48689004241017486, |
| "learning_rate": 4.887896068547957e-05, |
| "loss": 0.5469, |
| "num_tokens": 409609042.0, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.7319347319347319, |
| "grad_norm": 0.38565120780565504, |
| "learning_rate": 4.886812169818686e-05, |
| "loss": 0.5409, |
| "num_tokens": 410917246.0, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7342657342657343, |
| "grad_norm": 0.42695613923544207, |
| "learning_rate": 4.8857231915054465e-05, |
| "loss": 0.5445, |
| "num_tokens": 412227966.0, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7365967365967366, |
| "grad_norm": 0.381808559474973, |
| "learning_rate": 4.884629136196934e-05, |
| "loss": 0.5207, |
| "num_tokens": 413538686.0, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7389277389277389, |
| "grad_norm": 0.44656354419841626, |
| "learning_rate": 4.8835300064939126e-05, |
| "loss": 0.5172, |
| "num_tokens": 414849406.0, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.7412587412587412, |
| "grad_norm": 0.4096295626157741, |
| "learning_rate": 4.88242580500921e-05, |
| "loss": 0.5255, |
| "num_tokens": 416160126.0, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7435897435897436, |
| "grad_norm": 0.4147213001212058, |
| "learning_rate": 4.8813165343677106e-05, |
| "loss": 0.5426, |
| "num_tokens": 417470846.0, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.745920745920746, |
| "grad_norm": 0.5042968381378073, |
| "learning_rate": 4.8802021972063496e-05, |
| "loss": 0.5351, |
| "num_tokens": 418781566.0, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7482517482517482, |
| "grad_norm": 0.4572941443823439, |
| "learning_rate": 4.879082796174104e-05, |
| "loss": 0.5267, |
| "num_tokens": 420090396.0, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.7505827505827506, |
| "grad_norm": 0.5253757414894156, |
| "learning_rate": 4.87795833393199e-05, |
| "loss": 0.5256, |
| "num_tokens": 421401116.0, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.752913752913753, |
| "grad_norm": 0.4314784400622872, |
| "learning_rate": 4.876828813153055e-05, |
| "loss": 0.52, |
| "num_tokens": 422711836.0, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.7552447552447552, |
| "grad_norm": 0.43464618943674793, |
| "learning_rate": 4.875694236522372e-05, |
| "loss": 0.5157, |
| "num_tokens": 424003342.0, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.43677420380768783, |
| "learning_rate": 4.8745546067370326e-05, |
| "loss": 0.5305, |
| "num_tokens": 425314062.0, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7599067599067599, |
| "grad_norm": 0.4958407404835606, |
| "learning_rate": 4.873409926506139e-05, |
| "loss": 0.5362, |
| "num_tokens": 426624782.0, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7622377622377622, |
| "grad_norm": 0.5242256465945466, |
| "learning_rate": 4.8722601985508024e-05, |
| "loss": 0.5369, |
| "num_tokens": 427935502.0, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7645687645687645, |
| "grad_norm": 0.5675275645432154, |
| "learning_rate": 4.871105425604129e-05, |
| "loss": 0.5422, |
| "num_tokens": 429246222.0, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7668997668997669, |
| "grad_norm": 0.5159660910808933, |
| "learning_rate": 4.869945610411222e-05, |
| "loss": 0.5379, |
| "num_tokens": 430556942.0, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.45981018020284387, |
| "learning_rate": 4.8687807557291684e-05, |
| "loss": 0.5233, |
| "num_tokens": 431867662.0, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7715617715617715, |
| "grad_norm": 0.5178606410785168, |
| "learning_rate": 4.867610864327035e-05, |
| "loss": 0.517, |
| "num_tokens": 433176429.0, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.7738927738927739, |
| "grad_norm": 0.4153211549717526, |
| "learning_rate": 4.866435938985864e-05, |
| "loss": 0.521, |
| "num_tokens": 434474764.0, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7762237762237763, |
| "grad_norm": 0.43474480248852926, |
| "learning_rate": 4.8652559824986614e-05, |
| "loss": 0.5149, |
| "num_tokens": 435785484.0, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.7785547785547785, |
| "grad_norm": 0.40937995874484956, |
| "learning_rate": 4.8640709976703955e-05, |
| "loss": 0.5255, |
| "num_tokens": 437096204.0, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7808857808857809, |
| "grad_norm": 0.4465967325967671, |
| "learning_rate": 4.862880987317987e-05, |
| "loss": 0.5322, |
| "num_tokens": 438406924.0, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.7832167832167832, |
| "grad_norm": 0.49435400982888844, |
| "learning_rate": 4.8616859542703015e-05, |
| "loss": 0.5139, |
| "num_tokens": 439717644.0, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7855477855477856, |
| "grad_norm": 0.45230753156297615, |
| "learning_rate": 4.860485901368146e-05, |
| "loss": 0.5204, |
| "num_tokens": 441012029.0, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.7878787878787878, |
| "grad_norm": 0.46302825612412485, |
| "learning_rate": 4.859280831464262e-05, |
| "loss": 0.5307, |
| "num_tokens": 442322749.0, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7902097902097902, |
| "grad_norm": 0.4352069645722489, |
| "learning_rate": 4.858070747423315e-05, |
| "loss": 0.5293, |
| "num_tokens": 443633469.0, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.7925407925407926, |
| "grad_norm": 0.44776105300683866, |
| "learning_rate": 4.856855652121889e-05, |
| "loss": 0.5376, |
| "num_tokens": 444944189.0, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7948717948717948, |
| "grad_norm": 0.5047765930651309, |
| "learning_rate": 4.855635548448485e-05, |
| "loss": 0.5266, |
| "num_tokens": 446254909.0, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.7972027972027972, |
| "grad_norm": 0.5902940318374286, |
| "learning_rate": 4.8544104393035064e-05, |
| "loss": 0.5548, |
| "num_tokens": 447549445.0, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.7995337995337995, |
| "grad_norm": 0.47765982040228566, |
| "learning_rate": 4.8531803275992564e-05, |
| "loss": 0.5234, |
| "num_tokens": 448860165.0, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.8018648018648019, |
| "grad_norm": 0.4683409566871783, |
| "learning_rate": 4.85194521625993e-05, |
| "loss": 0.5303, |
| "num_tokens": 450170885.0, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.8041958041958042, |
| "grad_norm": 0.3875193157237528, |
| "learning_rate": 4.850705108221607e-05, |
| "loss": 0.5243, |
| "num_tokens": 451481605.0, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.8065268065268065, |
| "grad_norm": 0.4090714505669396, |
| "learning_rate": 4.849460006432246e-05, |
| "loss": 0.5368, |
| "num_tokens": 452792325.0, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.8088578088578089, |
| "grad_norm": 0.41342814545426965, |
| "learning_rate": 4.848209913851676e-05, |
| "loss": 0.5367, |
| "num_tokens": 454103045.0, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.8111888111888111, |
| "grad_norm": 0.43002723270421467, |
| "learning_rate": 4.8469548334515895e-05, |
| "loss": 0.5128, |
| "num_tokens": 455413765.0, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.8135198135198135, |
| "grad_norm": 0.44536700540815066, |
| "learning_rate": 4.845694768215538e-05, |
| "loss": 0.5225, |
| "num_tokens": 456724485.0, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.8158508158508159, |
| "grad_norm": 0.41417616843988325, |
| "learning_rate": 4.844429721138921e-05, |
| "loss": 0.5179, |
| "num_tokens": 458035205.0, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.4335240853933784, |
| "learning_rate": 4.843159695228981e-05, |
| "loss": 0.5338, |
| "num_tokens": 459345925.0, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.8205128205128205, |
| "grad_norm": 0.4804531520417236, |
| "learning_rate": 4.841884693504796e-05, |
| "loss": 0.5301, |
| "num_tokens": 460656645.0, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.8228438228438228, |
| "grad_norm": 0.426207694644574, |
| "learning_rate": 4.8406047189972745e-05, |
| "loss": 0.512, |
| "num_tokens": 461967365.0, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.8251748251748252, |
| "grad_norm": 0.5170006552608741, |
| "learning_rate": 4.839319774749142e-05, |
| "loss": 0.5439, |
| "num_tokens": 463278085.0, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.8275058275058275, |
| "grad_norm": 0.41296058440902, |
| "learning_rate": 4.8380298638149414e-05, |
| "loss": 0.529, |
| "num_tokens": 464588805.0, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8298368298368298, |
| "grad_norm": 0.4432327702578238, |
| "learning_rate": 4.8367349892610205e-05, |
| "loss": 0.5141, |
| "num_tokens": 465899525.0, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8321678321678322, |
| "grad_norm": 0.4186473579130964, |
| "learning_rate": 4.8354351541655295e-05, |
| "loss": 0.5056, |
| "num_tokens": 467210245.0, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8344988344988346, |
| "grad_norm": 0.4085487401716384, |
| "learning_rate": 4.834130361618407e-05, |
| "loss": 0.5201, |
| "num_tokens": 468520965.0, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8368298368298368, |
| "grad_norm": 0.41696496844476233, |
| "learning_rate": 4.832820614721377e-05, |
| "loss": 0.5182, |
| "num_tokens": 469831685.0, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8391608391608392, |
| "grad_norm": 0.49106045483752714, |
| "learning_rate": 4.8315059165879424e-05, |
| "loss": 0.5053, |
| "num_tokens": 471142405.0, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8414918414918415, |
| "grad_norm": 0.48437176577007535, |
| "learning_rate": 4.830186270343375e-05, |
| "loss": 0.5168, |
| "num_tokens": 472453125.0, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.8438228438228438, |
| "grad_norm": 0.40932065591417777, |
| "learning_rate": 4.828861679124711e-05, |
| "loss": 0.5381, |
| "num_tokens": 473763845.0, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 0.5194673863667205, |
| "learning_rate": 4.827532146080738e-05, |
| "loss": 0.5299, |
| "num_tokens": 475074565.0, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.8484848484848485, |
| "grad_norm": 0.41189276085541665, |
| "learning_rate": 4.826197674371995e-05, |
| "loss": 0.5107, |
| "num_tokens": 476385285.0, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8508158508158508, |
| "grad_norm": 0.41512648167613064, |
| "learning_rate": 4.8248582671707585e-05, |
| "loss": 0.5182, |
| "num_tokens": 477684249.0, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.8531468531468531, |
| "grad_norm": 0.39429940494764987, |
| "learning_rate": 4.8235139276610395e-05, |
| "loss": 0.527, |
| "num_tokens": 478994969.0, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8554778554778555, |
| "grad_norm": 0.4269707121617, |
| "learning_rate": 4.8221646590385723e-05, |
| "loss": 0.5202, |
| "num_tokens": 480305689.0, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.8578088578088578, |
| "grad_norm": 0.4747259759226198, |
| "learning_rate": 4.8208104645108086e-05, |
| "loss": 0.5163, |
| "num_tokens": 481616409.0, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8601398601398601, |
| "grad_norm": 0.4214636443982996, |
| "learning_rate": 4.819451347296912e-05, |
| "loss": 0.5202, |
| "num_tokens": 482927129.0, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8624708624708625, |
| "grad_norm": 0.4097076386562582, |
| "learning_rate": 4.818087310627746e-05, |
| "loss": 0.5198, |
| "num_tokens": 484237849.0, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8648018648018648, |
| "grad_norm": 0.4415899349026412, |
| "learning_rate": 4.816718357745869e-05, |
| "loss": 0.5116, |
| "num_tokens": 485544074.0, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8671328671328671, |
| "grad_norm": 0.49520322041534987, |
| "learning_rate": 4.815344491905527e-05, |
| "loss": 0.5268, |
| "num_tokens": 486854794.0, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8694638694638694, |
| "grad_norm": 0.4013730407728088, |
| "learning_rate": 4.813965716372644e-05, |
| "loss": 0.5357, |
| "num_tokens": 488165514.0, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.8717948717948718, |
| "grad_norm": 0.3971657654693432, |
| "learning_rate": 4.812582034424815e-05, |
| "loss": 0.5036, |
| "num_tokens": 489476234.0, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8741258741258742, |
| "grad_norm": 0.4546086981413901, |
| "learning_rate": 4.811193449351301e-05, |
| "loss": 0.5185, |
| "num_tokens": 490786954.0, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.8764568764568764, |
| "grad_norm": 0.4514734870231076, |
| "learning_rate": 4.809799964453014e-05, |
| "loss": 0.5285, |
| "num_tokens": 492097674.0, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8787878787878788, |
| "grad_norm": 0.486652363082405, |
| "learning_rate": 4.808401583042517e-05, |
| "loss": 0.5214, |
| "num_tokens": 493408394.0, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.8811188811188811, |
| "grad_norm": 0.38657514612617594, |
| "learning_rate": 4.806998308444014e-05, |
| "loss": 0.5285, |
| "num_tokens": 494719114.0, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8834498834498834, |
| "grad_norm": 0.43457520597462723, |
| "learning_rate": 4.805590143993337e-05, |
| "loss": 0.5283, |
| "num_tokens": 496018186.0, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.8857808857808858, |
| "grad_norm": 0.45512971801887536, |
| "learning_rate": 4.804177093037947e-05, |
| "loss": 0.5167, |
| "num_tokens": 497312162.0, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8881118881118881, |
| "grad_norm": 0.5864459417857114, |
| "learning_rate": 4.802759158936914e-05, |
| "loss": 0.507, |
| "num_tokens": 498622882.0, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.8904428904428905, |
| "grad_norm": 0.4119135091650743, |
| "learning_rate": 4.801336345060925e-05, |
| "loss": 0.5075, |
| "num_tokens": 499933602.0, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8927738927738927, |
| "grad_norm": 0.5607982382740162, |
| "learning_rate": 4.79990865479226e-05, |
| "loss": 0.5317, |
| "num_tokens": 501244322.0, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.8951048951048951, |
| "grad_norm": 0.4424330844089448, |
| "learning_rate": 4.7984760915247945e-05, |
| "loss": 0.5024, |
| "num_tokens": 502555042.0, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 0.4047970520184837, |
| "learning_rate": 4.7970386586639867e-05, |
| "loss": 0.4966, |
| "num_tokens": 503865762.0, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.8997668997668997, |
| "grad_norm": 0.44771407799642704, |
| "learning_rate": 4.795596359626871e-05, |
| "loss": 0.5236, |
| "num_tokens": 505176482.0, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.9020979020979021, |
| "grad_norm": 0.5048948938413714, |
| "learning_rate": 4.794149197842051e-05, |
| "loss": 0.5179, |
| "num_tokens": 506479186.0, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.9044289044289044, |
| "grad_norm": 0.45102633005957127, |
| "learning_rate": 4.792697176749686e-05, |
| "loss": 0.5329, |
| "num_tokens": 507789906.0, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.9067599067599068, |
| "grad_norm": 0.47896436686520744, |
| "learning_rate": 4.791240299801492e-05, |
| "loss": 0.5144, |
| "num_tokens": 509100626.0, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.44024138392686507, |
| "learning_rate": 4.7897785704607244e-05, |
| "loss": 0.5319, |
| "num_tokens": 510411346.0, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.9114219114219114, |
| "grad_norm": 0.46492535601527907, |
| "learning_rate": 4.7883119922021744e-05, |
| "loss": 0.5005, |
| "num_tokens": 511720801.0, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.9137529137529138, |
| "grad_norm": 0.47305357877273235, |
| "learning_rate": 4.7868405685121614e-05, |
| "loss": 0.5058, |
| "num_tokens": 513031521.0, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.916083916083916, |
| "grad_norm": 0.5349540175928681, |
| "learning_rate": 4.7853643028885216e-05, |
| "loss": 0.5259, |
| "num_tokens": 514342241.0, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.9184149184149184, |
| "grad_norm": 0.4744283556978438, |
| "learning_rate": 4.783883198840601e-05, |
| "loss": 0.5247, |
| "num_tokens": 515652961.0, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.9207459207459208, |
| "grad_norm": 0.4233785441775157, |
| "learning_rate": 4.78239725988925e-05, |
| "loss": 0.5229, |
| "num_tokens": 516963681.0, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.48524455188818355, |
| "learning_rate": 4.78090648956681e-05, |
| "loss": 0.5176, |
| "num_tokens": 518274401.0, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9254079254079254, |
| "grad_norm": 0.4955971872107986, |
| "learning_rate": 4.779410891417107e-05, |
| "loss": 0.517, |
| "num_tokens": 519585121.0, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.9277389277389277, |
| "grad_norm": 0.45326272641602827, |
| "learning_rate": 4.777910468995447e-05, |
| "loss": 0.525, |
| "num_tokens": 520895841.0, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.9300699300699301, |
| "grad_norm": 0.5154625413913212, |
| "learning_rate": 4.7764052258686e-05, |
| "loss": 0.5155, |
| "num_tokens": 522206561.0, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.9324009324009324, |
| "grad_norm": 0.5173600774002117, |
| "learning_rate": 4.774895165614799e-05, |
| "loss": 0.5368, |
| "num_tokens": 523517281.0, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9347319347319347, |
| "grad_norm": 0.4394182635279032, |
| "learning_rate": 4.773380291823726e-05, |
| "loss": 0.5112, |
| "num_tokens": 524828001.0, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.9370629370629371, |
| "grad_norm": 0.46211117414103975, |
| "learning_rate": 4.7718606080965064e-05, |
| "loss": 0.5176, |
| "num_tokens": 526138721.0, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9393939393939394, |
| "grad_norm": 0.44369729323375096, |
| "learning_rate": 4.770336118045701e-05, |
| "loss": 0.5202, |
| "num_tokens": 527449441.0, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.9417249417249417, |
| "grad_norm": 0.46786194576051876, |
| "learning_rate": 4.768806825295292e-05, |
| "loss": 0.5435, |
| "num_tokens": 528760161.0, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9440559440559441, |
| "grad_norm": 0.45716691252844754, |
| "learning_rate": 4.7672727334806844e-05, |
| "loss": 0.5217, |
| "num_tokens": 530065986.0, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.9463869463869464, |
| "grad_norm": 0.4716782323029019, |
| "learning_rate": 4.765733846248685e-05, |
| "loss": 0.5093, |
| "num_tokens": 531376706.0, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9487179487179487, |
| "grad_norm": 0.4809813652755708, |
| "learning_rate": 4.764190167257508e-05, |
| "loss": 0.5222, |
| "num_tokens": 532687426.0, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.951048951048951, |
| "grad_norm": 0.42126121038875564, |
| "learning_rate": 4.7626417001767495e-05, |
| "loss": 0.5105, |
| "num_tokens": 533998146.0, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9533799533799534, |
| "grad_norm": 0.41522513599681043, |
| "learning_rate": 4.7610884486873947e-05, |
| "loss": 0.5056, |
| "num_tokens": 535308866.0, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.9557109557109557, |
| "grad_norm": 0.4622274108637101, |
| "learning_rate": 4.759530416481798e-05, |
| "loss": 0.5275, |
| "num_tokens": 536619586.0, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.958041958041958, |
| "grad_norm": 0.411035345439206, |
| "learning_rate": 4.757967607263681e-05, |
| "loss": 0.5172, |
| "num_tokens": 537916961.0, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.9603729603729604, |
| "grad_norm": 0.41117700376194166, |
| "learning_rate": 4.756400024748121e-05, |
| "loss": 0.5129, |
| "num_tokens": 539227681.0, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9627039627039627, |
| "grad_norm": 0.4009672829244564, |
| "learning_rate": 4.75482767266154e-05, |
| "loss": 0.5233, |
| "num_tokens": 540538401.0, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.965034965034965, |
| "grad_norm": 0.38122118693492923, |
| "learning_rate": 4.7532505547417e-05, |
| "loss": 0.5142, |
| "num_tokens": 541849121.0, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9673659673659674, |
| "grad_norm": 0.45914187153924546, |
| "learning_rate": 4.7516686747376926e-05, |
| "loss": 0.5085, |
| "num_tokens": 543159841.0, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.9696969696969697, |
| "grad_norm": 0.43269563864820926, |
| "learning_rate": 4.7500820364099287e-05, |
| "loss": 0.5108, |
| "num_tokens": 544470561.0, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.972027972027972, |
| "grad_norm": 0.5180648923614879, |
| "learning_rate": 4.74849064353013e-05, |
| "loss": 0.5101, |
| "num_tokens": 545781281.0, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.9743589743589743, |
| "grad_norm": 0.4167054274754852, |
| "learning_rate": 4.746894499881322e-05, |
| "loss": 0.5058, |
| "num_tokens": 547092001.0, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9766899766899767, |
| "grad_norm": 0.4186229461075277, |
| "learning_rate": 4.745293609257822e-05, |
| "loss": 0.5063, |
| "num_tokens": 548394350.0, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.9790209790209791, |
| "grad_norm": 0.45822668262911204, |
| "learning_rate": 4.7436879754652345e-05, |
| "loss": 0.5252, |
| "num_tokens": 549691697.0, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9813519813519813, |
| "grad_norm": 0.3764583775129133, |
| "learning_rate": 4.742077602320437e-05, |
| "loss": 0.5007, |
| "num_tokens": 551002417.0, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.9836829836829837, |
| "grad_norm": 0.39341524513084175, |
| "learning_rate": 4.7404624936515746e-05, |
| "loss": 0.5171, |
| "num_tokens": 552313137.0, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.986013986013986, |
| "grad_norm": 0.4199783806277457, |
| "learning_rate": 4.738842653298048e-05, |
| "loss": 0.5069, |
| "num_tokens": 553623857.0, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.9883449883449883, |
| "grad_norm": 0.4951470880188904, |
| "learning_rate": 4.737218085110506e-05, |
| "loss": 0.5139, |
| "num_tokens": 554934577.0, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.9906759906759907, |
| "grad_norm": 0.4660659693883058, |
| "learning_rate": 4.73558879295084e-05, |
| "loss": 0.5158, |
| "num_tokens": 556245297.0, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.993006993006993, |
| "grad_norm": 0.4274088119401731, |
| "learning_rate": 4.733954780692165e-05, |
| "loss": 0.5086, |
| "num_tokens": 557556017.0, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.9953379953379954, |
| "grad_norm": 0.3888459894818999, |
| "learning_rate": 4.732316052218822e-05, |
| "loss": 0.5214, |
| "num_tokens": 558866737.0, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.9976689976689976, |
| "grad_norm": 0.46590499158532944, |
| "learning_rate": 4.730672611426361e-05, |
| "loss": 0.4982, |
| "num_tokens": 560177457.0, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.40795631542247435, |
| "learning_rate": 4.729024462221533e-05, |
| "loss": 0.5045, |
| "num_tokens": 561488177.0, |
| "step": 2145 |
| }, |
| { |
| "epoch": 1.0023310023310024, |
| "grad_norm": 0.4709448881516708, |
| "learning_rate": 4.727371608522284e-05, |
| "loss": 0.4741, |
| "num_tokens": 562798897.0, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.0046620046620047, |
| "grad_norm": 0.39954760886818896, |
| "learning_rate": 4.725714054257742e-05, |
| "loss": 0.4879, |
| "num_tokens": 564109617.0, |
| "step": 2155 |
| }, |
| { |
| "epoch": 1.006993006993007, |
| "grad_norm": 0.48984368311093457, |
| "learning_rate": 4.724051803368209e-05, |
| "loss": 0.4857, |
| "num_tokens": 565420337.0, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.0093240093240092, |
| "grad_norm": 0.46609742273249777, |
| "learning_rate": 4.7223848598051514e-05, |
| "loss": 0.4796, |
| "num_tokens": 566731057.0, |
| "step": 2165 |
| }, |
| { |
| "epoch": 1.0116550116550116, |
| "grad_norm": 0.36935474096318466, |
| "learning_rate": 4.720713227531193e-05, |
| "loss": 0.4696, |
| "num_tokens": 568041777.0, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.013986013986014, |
| "grad_norm": 0.3580596575113959, |
| "learning_rate": 4.719036910520102e-05, |
| "loss": 0.4624, |
| "num_tokens": 569352497.0, |
| "step": 2175 |
| }, |
| { |
| "epoch": 1.0163170163170163, |
| "grad_norm": 0.39328253702466387, |
| "learning_rate": 4.717355912756783e-05, |
| "loss": 0.4874, |
| "num_tokens": 570663217.0, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.0186480186480187, |
| "grad_norm": 0.43854519615893484, |
| "learning_rate": 4.715670238237267e-05, |
| "loss": 0.4921, |
| "num_tokens": 571946596.0, |
| "step": 2185 |
| }, |
| { |
| "epoch": 1.020979020979021, |
| "grad_norm": 0.4320886782336019, |
| "learning_rate": 4.713979890968704e-05, |
| "loss": 0.4726, |
| "num_tokens": 573254295.0, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.0233100233100234, |
| "grad_norm": 0.417052323523323, |
| "learning_rate": 4.712284874969351e-05, |
| "loss": 0.4761, |
| "num_tokens": 574555927.0, |
| "step": 2195 |
| }, |
| { |
| "epoch": 1.0256410256410255, |
| "grad_norm": 0.39493641603163543, |
| "learning_rate": 4.710585194268564e-05, |
| "loss": 0.4708, |
| "num_tokens": 575866647.0, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.027972027972028, |
| "grad_norm": 0.43644055780187835, |
| "learning_rate": 4.708880852906786e-05, |
| "loss": 0.4811, |
| "num_tokens": 577177367.0, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.0303030303030303, |
| "grad_norm": 0.44168949675712726, |
| "learning_rate": 4.707171854935542e-05, |
| "loss": 0.487, |
| "num_tokens": 578488087.0, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.0326340326340326, |
| "grad_norm": 0.4723407127116691, |
| "learning_rate": 4.705458204417426e-05, |
| "loss": 0.4752, |
| "num_tokens": 579798807.0, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.034965034965035, |
| "grad_norm": 0.5001904118319063, |
| "learning_rate": 4.703739905426089e-05, |
| "loss": 0.4641, |
| "num_tokens": 581109527.0, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.0372960372960374, |
| "grad_norm": 0.41134533047549643, |
| "learning_rate": 4.7020169620462363e-05, |
| "loss": 0.4888, |
| "num_tokens": 582420247.0, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.0396270396270397, |
| "grad_norm": 0.4299285336564839, |
| "learning_rate": 4.7002893783736104e-05, |
| "loss": 0.4663, |
| "num_tokens": 583730967.0, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.0419580419580419, |
| "grad_norm": 0.39597875355379963, |
| "learning_rate": 4.6985571585149876e-05, |
| "loss": 0.4913, |
| "num_tokens": 585031618.0, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.0442890442890442, |
| "grad_norm": 0.5200351424675955, |
| "learning_rate": 4.696820306588162e-05, |
| "loss": 0.4696, |
| "num_tokens": 586342338.0, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0466200466200466, |
| "grad_norm": 0.45179809404395227, |
| "learning_rate": 4.6950788267219425e-05, |
| "loss": 0.479, |
| "num_tokens": 587653058.0, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.048951048951049, |
| "grad_norm": 0.8707060578758659, |
| "learning_rate": 4.6933327230561366e-05, |
| "loss": 0.4666, |
| "num_tokens": 588963778.0, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.0512820512820513, |
| "grad_norm": 0.4615963497950469, |
| "learning_rate": 4.691581999741544e-05, |
| "loss": 0.477, |
| "num_tokens": 590274498.0, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.0536130536130537, |
| "grad_norm": 0.44042568334028054, |
| "learning_rate": 4.689826660939947e-05, |
| "loss": 0.4835, |
| "num_tokens": 591579372.0, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.055944055944056, |
| "grad_norm": 0.4171686514533797, |
| "learning_rate": 4.6880667108241e-05, |
| "loss": 0.4755, |
| "num_tokens": 592882201.0, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.0582750582750582, |
| "grad_norm": 0.3747338047053368, |
| "learning_rate": 4.686302153577717e-05, |
| "loss": 0.4797, |
| "num_tokens": 594192921.0, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.0606060606060606, |
| "grad_norm": 0.45499821388190786, |
| "learning_rate": 4.6845329933954685e-05, |
| "loss": 0.4933, |
| "num_tokens": 595488293.0, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.062937062937063, |
| "grad_norm": 0.4457910657080489, |
| "learning_rate": 4.682759234482961e-05, |
| "loss": 0.4812, |
| "num_tokens": 596799013.0, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.0652680652680653, |
| "grad_norm": 0.48446830788970874, |
| "learning_rate": 4.680980881056736e-05, |
| "loss": 0.4807, |
| "num_tokens": 598097884.0, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.0675990675990676, |
| "grad_norm": 0.39552772687416615, |
| "learning_rate": 4.6791979373442594e-05, |
| "loss": 0.4788, |
| "num_tokens": 599408604.0, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.06993006993007, |
| "grad_norm": 0.4456624889263455, |
| "learning_rate": 4.6774104075839055e-05, |
| "loss": 0.4652, |
| "num_tokens": 600719324.0, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.0722610722610724, |
| "grad_norm": 0.4907163642978358, |
| "learning_rate": 4.6756182960249514e-05, |
| "loss": 0.4881, |
| "num_tokens": 602030044.0, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0745920745920745, |
| "grad_norm": 0.4552923449742935, |
| "learning_rate": 4.6738216069275656e-05, |
| "loss": 0.4767, |
| "num_tokens": 603340764.0, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.0769230769230769, |
| "grad_norm": 0.42327123031082714, |
| "learning_rate": 4.6720203445628006e-05, |
| "loss": 0.4698, |
| "num_tokens": 604651484.0, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.0792540792540792, |
| "grad_norm": 0.42295921506423456, |
| "learning_rate": 4.6702145132125774e-05, |
| "loss": 0.4814, |
| "num_tokens": 605950791.0, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.0815850815850816, |
| "grad_norm": 0.4362001736140205, |
| "learning_rate": 4.668404117169679e-05, |
| "loss": 0.4859, |
| "num_tokens": 607261511.0, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.083916083916084, |
| "grad_norm": 0.404472977066644, |
| "learning_rate": 4.6665891607377415e-05, |
| "loss": 0.4841, |
| "num_tokens": 608572231.0, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.0862470862470863, |
| "grad_norm": 0.4154110483833513, |
| "learning_rate": 4.664769648231239e-05, |
| "loss": 0.4737, |
| "num_tokens": 609882951.0, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.0885780885780885, |
| "grad_norm": 0.4046042178116509, |
| "learning_rate": 4.662945583975478e-05, |
| "loss": 0.4874, |
| "num_tokens": 611193671.0, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 0.6093115629231195, |
| "learning_rate": 4.6611169723065854e-05, |
| "loss": 0.4522, |
| "num_tokens": 612504391.0, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.0932400932400932, |
| "grad_norm": 0.5011935199868325, |
| "learning_rate": 4.659283817571496e-05, |
| "loss": 0.4816, |
| "num_tokens": 613815111.0, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.0955710955710956, |
| "grad_norm": 0.4169395778521336, |
| "learning_rate": 4.657446124127948e-05, |
| "loss": 0.4807, |
| "num_tokens": 615125831.0, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.097902097902098, |
| "grad_norm": 0.41261114151322104, |
| "learning_rate": 4.655603896344465e-05, |
| "loss": 0.4881, |
| "num_tokens": 616436551.0, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.1002331002331003, |
| "grad_norm": 0.4380543995664826, |
| "learning_rate": 4.653757138600352e-05, |
| "loss": 0.4654, |
| "num_tokens": 617747271.0, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.1025641025641026, |
| "grad_norm": 0.4436608684626528, |
| "learning_rate": 4.651905855285682e-05, |
| "loss": 0.4568, |
| "num_tokens": 619057991.0, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.104895104895105, |
| "grad_norm": 0.41612907159233714, |
| "learning_rate": 4.650050050801285e-05, |
| "loss": 0.479, |
| "num_tokens": 620368711.0, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.1072261072261071, |
| "grad_norm": 0.4396241255197849, |
| "learning_rate": 4.64818972955874e-05, |
| "loss": 0.4708, |
| "num_tokens": 621679431.0, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.1095571095571095, |
| "grad_norm": 0.4231567577088472, |
| "learning_rate": 4.646324895980363e-05, |
| "loss": 0.476, |
| "num_tokens": 622990151.0, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.1118881118881119, |
| "grad_norm": 0.37510083417666035, |
| "learning_rate": 4.6444555544991965e-05, |
| "loss": 0.4699, |
| "num_tokens": 624300871.0, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.1142191142191142, |
| "grad_norm": 0.41354134914061674, |
| "learning_rate": 4.642581709558998e-05, |
| "loss": 0.4922, |
| "num_tokens": 625611591.0, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.1165501165501166, |
| "grad_norm": 0.4506054239813869, |
| "learning_rate": 4.640703365614233e-05, |
| "loss": 0.4777, |
| "num_tokens": 626922311.0, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.118881118881119, |
| "grad_norm": 0.49541627026643736, |
| "learning_rate": 4.6388205271300585e-05, |
| "loss": 0.4784, |
| "num_tokens": 628220877.0, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.121212121212121, |
| "grad_norm": 0.4457913773922344, |
| "learning_rate": 4.636933198582319e-05, |
| "loss": 0.4847, |
| "num_tokens": 629531597.0, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.1235431235431235, |
| "grad_norm": 0.4403563514498699, |
| "learning_rate": 4.63504138445753e-05, |
| "loss": 0.4913, |
| "num_tokens": 630842317.0, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.1258741258741258, |
| "grad_norm": 0.5039870137615442, |
| "learning_rate": 4.6331450892528725e-05, |
| "loss": 0.4767, |
| "num_tokens": 632153037.0, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.1282051282051282, |
| "grad_norm": 0.43430596728961307, |
| "learning_rate": 4.631244317476179e-05, |
| "loss": 0.4818, |
| "num_tokens": 633463757.0, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.1305361305361306, |
| "grad_norm": 0.4943774670951527, |
| "learning_rate": 4.6293390736459226e-05, |
| "loss": 0.4692, |
| "num_tokens": 634774477.0, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.132867132867133, |
| "grad_norm": 0.45784553779974424, |
| "learning_rate": 4.627429362291208e-05, |
| "loss": 0.4787, |
| "num_tokens": 636085197.0, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.1351981351981353, |
| "grad_norm": 0.47029067075996683, |
| "learning_rate": 4.62551518795176e-05, |
| "loss": 0.4722, |
| "num_tokens": 637395917.0, |
| "step": 2435 |
| }, |
| { |
| "epoch": 1.1375291375291376, |
| "grad_norm": 0.444979370543174, |
| "learning_rate": 4.623596555177913e-05, |
| "loss": 0.4613, |
| "num_tokens": 638706637.0, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.1398601398601398, |
| "grad_norm": 0.3767342879009302, |
| "learning_rate": 4.621673468530599e-05, |
| "loss": 0.4723, |
| "num_tokens": 640017357.0, |
| "step": 2445 |
| }, |
| { |
| "epoch": 1.1421911421911422, |
| "grad_norm": 0.4436410093654824, |
| "learning_rate": 4.6197459325813406e-05, |
| "loss": 0.4814, |
| "num_tokens": 641328077.0, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.1445221445221445, |
| "grad_norm": 0.45322324569299677, |
| "learning_rate": 4.617813951912231e-05, |
| "loss": 0.4648, |
| "num_tokens": 642637532.0, |
| "step": 2455 |
| }, |
| { |
| "epoch": 1.1468531468531469, |
| "grad_norm": 0.3889321859528221, |
| "learning_rate": 4.6158775311159357e-05, |
| "loss": 0.4776, |
| "num_tokens": 643948252.0, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.1491841491841492, |
| "grad_norm": 0.38162917158886384, |
| "learning_rate": 4.613936674795672e-05, |
| "loss": 0.4886, |
| "num_tokens": 645258972.0, |
| "step": 2465 |
| }, |
| { |
| "epoch": 1.1515151515151516, |
| "grad_norm": 0.4103235852443348, |
| "learning_rate": 4.611991387565202e-05, |
| "loss": 0.4854, |
| "num_tokens": 646569692.0, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.1538461538461537, |
| "grad_norm": 0.4463003624553352, |
| "learning_rate": 4.6100416740488204e-05, |
| "loss": 0.4682, |
| "num_tokens": 647864905.0, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.156177156177156, |
| "grad_norm": 1.1574168466987094, |
| "learning_rate": 4.608087538881344e-05, |
| "loss": 0.4912, |
| "num_tokens": 649175625.0, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.1585081585081585, |
| "grad_norm": 0.3575350970980845, |
| "learning_rate": 4.606128986708101e-05, |
| "loss": 0.4725, |
| "num_tokens": 650486345.0, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.1608391608391608, |
| "grad_norm": 0.4698297516737484, |
| "learning_rate": 4.604166022184921e-05, |
| "loss": 0.4818, |
| "num_tokens": 651772267.0, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.1631701631701632, |
| "grad_norm": 0.4518636492405959, |
| "learning_rate": 4.602198649978119e-05, |
| "loss": 0.4823, |
| "num_tokens": 653082987.0, |
| "step": 2495 |
| }, |
| { |
| "epoch": 1.1655011655011656, |
| "grad_norm": 0.4108073804989677, |
| "learning_rate": 4.600226874764491e-05, |
| "loss": 0.4718, |
| "num_tokens": 654390572.0, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.167832167832168, |
| "grad_norm": 0.42730405936868865, |
| "learning_rate": 4.598250701231299e-05, |
| "loss": 0.4621, |
| "num_tokens": 655701292.0, |
| "step": 2505 |
| }, |
| { |
| "epoch": 1.1701631701631703, |
| "grad_norm": 0.4294330624184933, |
| "learning_rate": 4.596270134076259e-05, |
| "loss": 0.4773, |
| "num_tokens": 657012012.0, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.1724941724941724, |
| "grad_norm": 0.4229790807053798, |
| "learning_rate": 4.594285178007534e-05, |
| "loss": 0.4889, |
| "num_tokens": 658322732.0, |
| "step": 2515 |
| }, |
| { |
| "epoch": 1.1748251748251748, |
| "grad_norm": 0.3704279315350088, |
| "learning_rate": 4.592295837743719e-05, |
| "loss": 0.4645, |
| "num_tokens": 659633452.0, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.1771561771561772, |
| "grad_norm": 0.4283412404900056, |
| "learning_rate": 4.590302118013829e-05, |
| "loss": 0.4722, |
| "num_tokens": 660944172.0, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.1794871794871795, |
| "grad_norm": 0.44943350837379137, |
| "learning_rate": 4.588304023557293e-05, |
| "loss": 0.5052, |
| "num_tokens": 662254892.0, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.1818181818181819, |
| "grad_norm": 0.3908539181988753, |
| "learning_rate": 4.586301559123939e-05, |
| "loss": 0.4688, |
| "num_tokens": 663565612.0, |
| "step": 2535 |
| }, |
| { |
| "epoch": 1.1841491841491842, |
| "grad_norm": 0.4160330010793307, |
| "learning_rate": 4.5842947294739815e-05, |
| "loss": 0.4725, |
| "num_tokens": 664876332.0, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.1864801864801864, |
| "grad_norm": 0.4360101739142026, |
| "learning_rate": 4.582283539378012e-05, |
| "loss": 0.4849, |
| "num_tokens": 666187052.0, |
| "step": 2545 |
| }, |
| { |
| "epoch": 1.1888111888111887, |
| "grad_norm": 0.4342183592761623, |
| "learning_rate": 4.580267993616991e-05, |
| "loss": 0.4825, |
| "num_tokens": 667497772.0, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.191142191142191, |
| "grad_norm": 0.39093089607584136, |
| "learning_rate": 4.578248096982227e-05, |
| "loss": 0.4577, |
| "num_tokens": 668808492.0, |
| "step": 2555 |
| }, |
| { |
| "epoch": 1.1934731934731935, |
| "grad_norm": 0.4265274502701168, |
| "learning_rate": 4.576223854275378e-05, |
| "loss": 0.4695, |
| "num_tokens": 670119212.0, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.1958041958041958, |
| "grad_norm": 0.5106783765028553, |
| "learning_rate": 4.574195270308428e-05, |
| "loss": 0.4596, |
| "num_tokens": 671429932.0, |
| "step": 2565 |
| }, |
| { |
| "epoch": 1.1981351981351982, |
| "grad_norm": 0.45465970639582615, |
| "learning_rate": 4.572162349903685e-05, |
| "loss": 0.4808, |
| "num_tokens": 672740652.0, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.2004662004662006, |
| "grad_norm": 0.4654417167187371, |
| "learning_rate": 4.570125097893762e-05, |
| "loss": 0.481, |
| "num_tokens": 674051372.0, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.2027972027972027, |
| "grad_norm": 0.41022644781405776, |
| "learning_rate": 4.568083519121572e-05, |
| "loss": 0.4741, |
| "num_tokens": 675362092.0, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.205128205128205, |
| "grad_norm": 0.43369798835654055, |
| "learning_rate": 4.566037618440313e-05, |
| "loss": 0.4842, |
| "num_tokens": 676670859.0, |
| "step": 2585 |
| }, |
| { |
| "epoch": 1.2074592074592074, |
| "grad_norm": 0.5470112563703007, |
| "learning_rate": 4.563987400713454e-05, |
| "loss": 0.4745, |
| "num_tokens": 677981579.0, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.2097902097902098, |
| "grad_norm": 0.4175250823314187, |
| "learning_rate": 4.561932870814729e-05, |
| "loss": 0.4714, |
| "num_tokens": 679282079.0, |
| "step": 2595 |
| }, |
| { |
| "epoch": 1.2121212121212122, |
| "grad_norm": 0.38110478817150606, |
| "learning_rate": 4.5598740336281225e-05, |
| "loss": 0.4675, |
| "num_tokens": 680592799.0, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.2144522144522145, |
| "grad_norm": 0.47620665151012714, |
| "learning_rate": 4.557810894047859e-05, |
| "loss": 0.4964, |
| "num_tokens": 681887165.0, |
| "step": 2605 |
| }, |
| { |
| "epoch": 1.2167832167832167, |
| "grad_norm": 0.4049180388899848, |
| "learning_rate": 4.555743456978388e-05, |
| "loss": 0.4744, |
| "num_tokens": 683197885.0, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.219114219114219, |
| "grad_norm": 0.4199713705201553, |
| "learning_rate": 4.553671727334378e-05, |
| "loss": 0.4786, |
| "num_tokens": 684508605.0, |
| "step": 2615 |
| }, |
| { |
| "epoch": 1.2214452214452214, |
| "grad_norm": 0.39770427056801977, |
| "learning_rate": 4.5515957100407e-05, |
| "loss": 0.4696, |
| "num_tokens": 685819325.0, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.2237762237762237, |
| "grad_norm": 0.4512802814267864, |
| "learning_rate": 4.5495154100324166e-05, |
| "loss": 0.4872, |
| "num_tokens": 687130045.0, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.2261072261072261, |
| "grad_norm": 0.42479431900627285, |
| "learning_rate": 4.547430832254773e-05, |
| "loss": 0.4706, |
| "num_tokens": 688440765.0, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.2284382284382285, |
| "grad_norm": 0.43515400124583126, |
| "learning_rate": 4.545341981663182e-05, |
| "loss": 0.4647, |
| "num_tokens": 689751485.0, |
| "step": 2635 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 0.392935620732648, |
| "learning_rate": 4.543248863223215e-05, |
| "loss": 0.4685, |
| "num_tokens": 691062205.0, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.2331002331002332, |
| "grad_norm": 0.46909934828902206, |
| "learning_rate": 4.541151481910589e-05, |
| "loss": 0.4717, |
| "num_tokens": 692372925.0, |
| "step": 2645 |
| }, |
| { |
| "epoch": 1.2354312354312353, |
| "grad_norm": 0.3944491634545739, |
| "learning_rate": 4.5390498427111525e-05, |
| "loss": 0.4873, |
| "num_tokens": 693683645.0, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.2377622377622377, |
| "grad_norm": 0.3542154968237942, |
| "learning_rate": 4.536943950620877e-05, |
| "loss": 0.4947, |
| "num_tokens": 694994365.0, |
| "step": 2655 |
| }, |
| { |
| "epoch": 1.24009324009324, |
| "grad_norm": 0.43496568575132033, |
| "learning_rate": 4.5348338106458446e-05, |
| "loss": 0.465, |
| "num_tokens": 696305085.0, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.2424242424242424, |
| "grad_norm": 0.38699131626194777, |
| "learning_rate": 4.532719427802234e-05, |
| "loss": 0.4752, |
| "num_tokens": 697607524.0, |
| "step": 2665 |
| }, |
| { |
| "epoch": 1.2447552447552448, |
| "grad_norm": 0.42606969233099046, |
| "learning_rate": 4.5306008071163105e-05, |
| "loss": 0.4965, |
| "num_tokens": 698918244.0, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.2470862470862472, |
| "grad_norm": 0.46040092500555474, |
| "learning_rate": 4.528477953624416e-05, |
| "loss": 0.4861, |
| "num_tokens": 700228964.0, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.2494172494172493, |
| "grad_norm": 0.40529545375633014, |
| "learning_rate": 4.526350872372949e-05, |
| "loss": 0.4576, |
| "num_tokens": 701539684.0, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.2517482517482517, |
| "grad_norm": 0.4181283911639419, |
| "learning_rate": 4.524219568418364e-05, |
| "loss": 0.4711, |
| "num_tokens": 702850404.0, |
| "step": 2685 |
| }, |
| { |
| "epoch": 1.254079254079254, |
| "grad_norm": 0.39553338610588684, |
| "learning_rate": 4.522084046827148e-05, |
| "loss": 0.476, |
| "num_tokens": 704161124.0, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.2564102564102564, |
| "grad_norm": 0.37450359234529307, |
| "learning_rate": 4.51994431267582e-05, |
| "loss": 0.4639, |
| "num_tokens": 705462856.0, |
| "step": 2695 |
| }, |
| { |
| "epoch": 1.2587412587412588, |
| "grad_norm": 0.4094194955768158, |
| "learning_rate": 4.5178003710509087e-05, |
| "loss": 0.4825, |
| "num_tokens": 706773576.0, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2610722610722611, |
| "grad_norm": 0.4885076570757472, |
| "learning_rate": 4.515652227048946e-05, |
| "loss": 0.4737, |
| "num_tokens": 708084296.0, |
| "step": 2705 |
| }, |
| { |
| "epoch": 1.2634032634032635, |
| "grad_norm": 0.49289407887729214, |
| "learning_rate": 4.513499885776453e-05, |
| "loss": 0.4757, |
| "num_tokens": 709395016.0, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.2657342657342658, |
| "grad_norm": 0.3999863566296061, |
| "learning_rate": 4.511343352349931e-05, |
| "loss": 0.4839, |
| "num_tokens": 710690433.0, |
| "step": 2715 |
| }, |
| { |
| "epoch": 1.2680652680652682, |
| "grad_norm": 0.4305940675053385, |
| "learning_rate": 4.5091826318958434e-05, |
| "loss": 0.4744, |
| "num_tokens": 712001153.0, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.2703962703962703, |
| "grad_norm": 0.4514699940855802, |
| "learning_rate": 4.50701772955061e-05, |
| "loss": 0.4656, |
| "num_tokens": 713311873.0, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.2727272727272727, |
| "grad_norm": 0.3809637591571807, |
| "learning_rate": 4.5048486504605874e-05, |
| "loss": 0.4627, |
| "num_tokens": 714622593.0, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.275058275058275, |
| "grad_norm": 0.41819681647184476, |
| "learning_rate": 4.502675399782066e-05, |
| "loss": 0.4746, |
| "num_tokens": 715933313.0, |
| "step": 2735 |
| }, |
| { |
| "epoch": 1.2773892773892774, |
| "grad_norm": 0.4133255247787771, |
| "learning_rate": 4.5004979826812505e-05, |
| "loss": 0.4763, |
| "num_tokens": 717244033.0, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.2797202797202798, |
| "grad_norm": 0.4440690290427124, |
| "learning_rate": 4.498316404334249e-05, |
| "loss": 0.4857, |
| "num_tokens": 718554753.0, |
| "step": 2745 |
| }, |
| { |
| "epoch": 1.282051282051282, |
| "grad_norm": 0.41320454746022983, |
| "learning_rate": 4.4961306699270634e-05, |
| "loss": 0.4812, |
| "num_tokens": 719865473.0, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2843822843822843, |
| "grad_norm": 0.37971285454946885, |
| "learning_rate": 4.4939407846555734e-05, |
| "loss": 0.4592, |
| "num_tokens": 721176193.0, |
| "step": 2755 |
| }, |
| { |
| "epoch": 1.2867132867132867, |
| "grad_norm": 0.38669049099636904, |
| "learning_rate": 4.49174675372553e-05, |
| "loss": 0.4808, |
| "num_tokens": 722486913.0, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.289044289044289, |
| "grad_norm": 0.38833499571078073, |
| "learning_rate": 4.489548582352533e-05, |
| "loss": 0.4648, |
| "num_tokens": 723781879.0, |
| "step": 2765 |
| }, |
| { |
| "epoch": 1.2913752913752914, |
| "grad_norm": 0.37993452738902034, |
| "learning_rate": 4.487346275762031e-05, |
| "loss": 0.468, |
| "num_tokens": 725092599.0, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.2937062937062938, |
| "grad_norm": 0.3673232976728133, |
| "learning_rate": 4.4851398391892976e-05, |
| "loss": 0.4648, |
| "num_tokens": 726403319.0, |
| "step": 2775 |
| }, |
| { |
| "epoch": 1.2960372960372961, |
| "grad_norm": 0.4116221051012755, |
| "learning_rate": 4.482929277879428e-05, |
| "loss": 0.4745, |
| "num_tokens": 727707400.0, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.2983682983682985, |
| "grad_norm": 0.4092445660610788, |
| "learning_rate": 4.4807145970873206e-05, |
| "loss": 0.4822, |
| "num_tokens": 729018120.0, |
| "step": 2785 |
| }, |
| { |
| "epoch": 1.3006993006993006, |
| "grad_norm": 0.4185448307140966, |
| "learning_rate": 4.4784958020776665e-05, |
| "loss": 0.4616, |
| "num_tokens": 730328840.0, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.303030303030303, |
| "grad_norm": 0.38479876763978926, |
| "learning_rate": 4.476272898124938e-05, |
| "loss": 0.4721, |
| "num_tokens": 731639560.0, |
| "step": 2795 |
| }, |
| { |
| "epoch": 1.3053613053613053, |
| "grad_norm": 0.46767133110126385, |
| "learning_rate": 4.474045890513374e-05, |
| "loss": 0.4752, |
| "num_tokens": 732950280.0, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.3076923076923077, |
| "grad_norm": 0.411649743246021, |
| "learning_rate": 4.4718147845369696e-05, |
| "loss": 0.4573, |
| "num_tokens": 734261000.0, |
| "step": 2805 |
| }, |
| { |
| "epoch": 1.31002331002331, |
| "grad_norm": 0.4316901135425129, |
| "learning_rate": 4.469579585499463e-05, |
| "loss": 0.4783, |
| "num_tokens": 735564553.0, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.3123543123543124, |
| "grad_norm": 0.40955334557870626, |
| "learning_rate": 4.467340298714319e-05, |
| "loss": 0.4883, |
| "num_tokens": 736875273.0, |
| "step": 2815 |
| }, |
| { |
| "epoch": 1.3146853146853146, |
| "grad_norm": 0.4080344761506793, |
| "learning_rate": 4.4650969295047236e-05, |
| "loss": 0.4832, |
| "num_tokens": 738185993.0, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.317016317016317, |
| "grad_norm": 0.4946836082141776, |
| "learning_rate": 4.462849483203566e-05, |
| "loss": 0.4761, |
| "num_tokens": 739475770.0, |
| "step": 2825 |
| }, |
| { |
| "epoch": 1.3193473193473193, |
| "grad_norm": 0.4584478564333903, |
| "learning_rate": 4.460597965153426e-05, |
| "loss": 0.4649, |
| "num_tokens": 740786490.0, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.3216783216783217, |
| "grad_norm": 0.41894692846730963, |
| "learning_rate": 4.458342380706566e-05, |
| "loss": 0.4809, |
| "num_tokens": 742097210.0, |
| "step": 2835 |
| }, |
| { |
| "epoch": 1.324009324009324, |
| "grad_norm": 0.4516243516797518, |
| "learning_rate": 4.456082735224911e-05, |
| "loss": 0.4703, |
| "num_tokens": 743407930.0, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.3263403263403264, |
| "grad_norm": 0.4254492278222537, |
| "learning_rate": 4.4538190340800426e-05, |
| "loss": 0.4793, |
| "num_tokens": 744718650.0, |
| "step": 2845 |
| }, |
| { |
| "epoch": 1.3286713286713288, |
| "grad_norm": 0.4137937243385849, |
| "learning_rate": 4.451551282653182e-05, |
| "loss": 0.48, |
| "num_tokens": 746029370.0, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.3310023310023311, |
| "grad_norm": 0.519752518142126, |
| "learning_rate": 4.449279486335179e-05, |
| "loss": 0.4736, |
| "num_tokens": 747340090.0, |
| "step": 2855 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.4353489605024039, |
| "learning_rate": 4.4470036505265e-05, |
| "loss": 0.4744, |
| "num_tokens": 748650810.0, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.3356643356643356, |
| "grad_norm": 0.4419316706152749, |
| "learning_rate": 4.444723780637212e-05, |
| "loss": 0.493, |
| "num_tokens": 749939934.0, |
| "step": 2865 |
| }, |
| { |
| "epoch": 1.337995337995338, |
| "grad_norm": 0.4901281137641553, |
| "learning_rate": 4.442439882086973e-05, |
| "loss": 0.4901, |
| "num_tokens": 751250654.0, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.3403263403263403, |
| "grad_norm": 0.4412679310378607, |
| "learning_rate": 4.440151960305017e-05, |
| "loss": 0.4725, |
| "num_tokens": 752561374.0, |
| "step": 2875 |
| }, |
| { |
| "epoch": 1.3426573426573427, |
| "grad_norm": 0.46469847523541835, |
| "learning_rate": 4.437860020730144e-05, |
| "loss": 0.4784, |
| "num_tokens": 753872094.0, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.3449883449883449, |
| "grad_norm": 0.4631294470988359, |
| "learning_rate": 4.4355640688107024e-05, |
| "loss": 0.4645, |
| "num_tokens": 755182814.0, |
| "step": 2885 |
| }, |
| { |
| "epoch": 1.3473193473193472, |
| "grad_norm": 0.4557090602532957, |
| "learning_rate": 4.43326411000458e-05, |
| "loss": 0.4713, |
| "num_tokens": 756493534.0, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.3496503496503496, |
| "grad_norm": 0.4386481997162464, |
| "learning_rate": 4.4309601497791894e-05, |
| "loss": 0.4733, |
| "num_tokens": 757804254.0, |
| "step": 2895 |
| }, |
| { |
| "epoch": 1.351981351981352, |
| "grad_norm": 0.4269351049606663, |
| "learning_rate": 4.428652193611454e-05, |
| "loss": 0.4692, |
| "num_tokens": 759114974.0, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.3543123543123543, |
| "grad_norm": 2.3425883033287866, |
| "learning_rate": 4.4263402469878015e-05, |
| "loss": 0.4567, |
| "num_tokens": 760425694.0, |
| "step": 2905 |
| }, |
| { |
| "epoch": 1.3566433566433567, |
| "grad_norm": 0.36072419381621385, |
| "learning_rate": 4.424024315404137e-05, |
| "loss": 0.4748, |
| "num_tokens": 761736414.0, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.358974358974359, |
| "grad_norm": 0.4249230241783985, |
| "learning_rate": 4.421704404365847e-05, |
| "loss": 0.4683, |
| "num_tokens": 763047134.0, |
| "step": 2915 |
| }, |
| { |
| "epoch": 1.3613053613053614, |
| "grad_norm": 0.4299202972649719, |
| "learning_rate": 4.4193805193877714e-05, |
| "loss": 0.4663, |
| "num_tokens": 764357854.0, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 0.3879041745135311, |
| "learning_rate": 4.4170526659942015e-05, |
| "loss": 0.4721, |
| "num_tokens": 765660038.0, |
| "step": 2925 |
| }, |
| { |
| "epoch": 1.365967365967366, |
| "grad_norm": 0.5484971150134953, |
| "learning_rate": 4.414720849718859e-05, |
| "loss": 0.4839, |
| "num_tokens": 766970758.0, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.3682983682983683, |
| "grad_norm": 0.40718901418820086, |
| "learning_rate": 4.412385076104889e-05, |
| "loss": 0.4667, |
| "num_tokens": 768281478.0, |
| "step": 2935 |
| }, |
| { |
| "epoch": 1.3706293706293706, |
| "grad_norm": 0.3876539740773812, |
| "learning_rate": 4.410045350704841e-05, |
| "loss": 0.4612, |
| "num_tokens": 769592198.0, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.372960372960373, |
| "grad_norm": 0.41309989970311883, |
| "learning_rate": 4.4077016790806604e-05, |
| "loss": 0.4705, |
| "num_tokens": 770902918.0, |
| "step": 2945 |
| }, |
| { |
| "epoch": 1.3752913752913754, |
| "grad_norm": 0.3744163790044172, |
| "learning_rate": 4.405354066803673e-05, |
| "loss": 0.4707, |
| "num_tokens": 772213638.0, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.3776223776223775, |
| "grad_norm": 0.480030813067916, |
| "learning_rate": 4.403002519454573e-05, |
| "loss": 0.489, |
| "num_tokens": 773524358.0, |
| "step": 2955 |
| }, |
| { |
| "epoch": 1.3799533799533799, |
| "grad_norm": 0.44969634064801967, |
| "learning_rate": 4.400647042623407e-05, |
| "loss": 0.4688, |
| "num_tokens": 774835078.0, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.3822843822843822, |
| "grad_norm": 0.4122214657313857, |
| "learning_rate": 4.398287641909564e-05, |
| "loss": 0.4521, |
| "num_tokens": 776145798.0, |
| "step": 2965 |
| }, |
| { |
| "epoch": 1.3846153846153846, |
| "grad_norm": 0.4180712738656679, |
| "learning_rate": 4.395924322921762e-05, |
| "loss": 0.471, |
| "num_tokens": 777456518.0, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.386946386946387, |
| "grad_norm": 0.49070370600152946, |
| "learning_rate": 4.393557091278031e-05, |
| "loss": 0.4844, |
| "num_tokens": 778766755.0, |
| "step": 2975 |
| }, |
| { |
| "epoch": 1.3892773892773893, |
| "grad_norm": 0.40125497971986246, |
| "learning_rate": 4.391185952605703e-05, |
| "loss": 0.4859, |
| "num_tokens": 780077475.0, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.3916083916083917, |
| "grad_norm": 0.4410310353900502, |
| "learning_rate": 4.3888109125413984e-05, |
| "loss": 0.4713, |
| "num_tokens": 781383628.0, |
| "step": 2985 |
| }, |
| { |
| "epoch": 1.393939393939394, |
| "grad_norm": 0.3981200505167726, |
| "learning_rate": 4.3864319767310116e-05, |
| "loss": 0.474, |
| "num_tokens": 782694348.0, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.3962703962703964, |
| "grad_norm": 0.4882530547316365, |
| "learning_rate": 4.384049150829697e-05, |
| "loss": 0.4907, |
| "num_tokens": 784003029.0, |
| "step": 2995 |
| }, |
| { |
| "epoch": 1.3986013986013985, |
| "grad_norm": 0.4625354000998703, |
| "learning_rate": 4.381662440501857e-05, |
| "loss": 0.4783, |
| "num_tokens": 785313749.0, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.400932400932401, |
| "grad_norm": 0.42320091867347914, |
| "learning_rate": 4.379271851421129e-05, |
| "loss": 0.4745, |
| "num_tokens": 786608916.0, |
| "step": 3005 |
| }, |
| { |
| "epoch": 1.4032634032634033, |
| "grad_norm": 0.34353265181156867, |
| "learning_rate": 4.3768773892703696e-05, |
| "loss": 0.4682, |
| "num_tokens": 787919636.0, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.4055944055944056, |
| "grad_norm": 0.41229838212196757, |
| "learning_rate": 4.374479059741643e-05, |
| "loss": 0.4903, |
| "num_tokens": 789230356.0, |
| "step": 3015 |
| }, |
| { |
| "epoch": 1.407925407925408, |
| "grad_norm": 0.3828081072364993, |
| "learning_rate": 4.372076868536206e-05, |
| "loss": 0.4685, |
| "num_tokens": 790541076.0, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.4102564102564101, |
| "grad_norm": 0.3782799956569614, |
| "learning_rate": 4.369670821364497e-05, |
| "loss": 0.4875, |
| "num_tokens": 791851796.0, |
| "step": 3025 |
| }, |
| { |
| "epoch": 1.4125874125874125, |
| "grad_norm": 0.38271818179702677, |
| "learning_rate": 4.3672609239461185e-05, |
| "loss": 0.472, |
| "num_tokens": 793162516.0, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.4149184149184149, |
| "grad_norm": 0.4432182142698113, |
| "learning_rate": 4.364847182009827e-05, |
| "loss": 0.4536, |
| "num_tokens": 794473236.0, |
| "step": 3035 |
| }, |
| { |
| "epoch": 1.4172494172494172, |
| "grad_norm": 0.4163541269383781, |
| "learning_rate": 4.362429601293519e-05, |
| "loss": 0.4674, |
| "num_tokens": 795783956.0, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.4195804195804196, |
| "grad_norm": 0.3963132427363548, |
| "learning_rate": 4.360008187544213e-05, |
| "loss": 0.4691, |
| "num_tokens": 797094676.0, |
| "step": 3045 |
| }, |
| { |
| "epoch": 1.421911421911422, |
| "grad_norm": 0.376804595278115, |
| "learning_rate": 4.357582946518045e-05, |
| "loss": 0.4638, |
| "num_tokens": 798392983.0, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.4242424242424243, |
| "grad_norm": 0.3809783523823445, |
| "learning_rate": 4.355153883980243e-05, |
| "loss": 0.4779, |
| "num_tokens": 799693790.0, |
| "step": 3055 |
| }, |
| { |
| "epoch": 1.4265734265734267, |
| "grad_norm": 0.3621884672482386, |
| "learning_rate": 4.3527210057051246e-05, |
| "loss": 0.4808, |
| "num_tokens": 801004510.0, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.428904428904429, |
| "grad_norm": 0.4816211455166946, |
| "learning_rate": 4.3502843174760736e-05, |
| "loss": 0.4627, |
| "num_tokens": 802315230.0, |
| "step": 3065 |
| }, |
| { |
| "epoch": 1.4312354312354312, |
| "grad_norm": 0.3686667678553204, |
| "learning_rate": 4.3478438250855344e-05, |
| "loss": 0.4781, |
| "num_tokens": 803625950.0, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.4335664335664335, |
| "grad_norm": 0.39139268593103854, |
| "learning_rate": 4.345399534334993e-05, |
| "loss": 0.4614, |
| "num_tokens": 804936670.0, |
| "step": 3075 |
| }, |
| { |
| "epoch": 1.435897435897436, |
| "grad_norm": 0.3682813822971589, |
| "learning_rate": 4.3429514510349636e-05, |
| "loss": 0.4698, |
| "num_tokens": 806247390.0, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.4382284382284383, |
| "grad_norm": 0.39577744941721116, |
| "learning_rate": 4.340499581004979e-05, |
| "loss": 0.4696, |
| "num_tokens": 807558110.0, |
| "step": 3085 |
| }, |
| { |
| "epoch": 1.4405594405594406, |
| "grad_norm": 0.39221995923827324, |
| "learning_rate": 4.33804393007357e-05, |
| "loss": 0.4575, |
| "num_tokens": 808854280.0, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.4428904428904428, |
| "grad_norm": 0.3469876104551282, |
| "learning_rate": 4.335584504078258e-05, |
| "loss": 0.4663, |
| "num_tokens": 810160671.0, |
| "step": 3095 |
| }, |
| { |
| "epoch": 1.4452214452214451, |
| "grad_norm": 0.38039731278638117, |
| "learning_rate": 4.333121308865539e-05, |
| "loss": 0.4656, |
| "num_tokens": 811471391.0, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.4475524475524475, |
| "grad_norm": 0.3871641009392114, |
| "learning_rate": 4.330654350290866e-05, |
| "loss": 0.4741, |
| "num_tokens": 812782111.0, |
| "step": 3105 |
| }, |
| { |
| "epoch": 1.4498834498834499, |
| "grad_norm": 0.3723812044477465, |
| "learning_rate": 4.328183634218641e-05, |
| "loss": 0.4616, |
| "num_tokens": 814092831.0, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.4522144522144522, |
| "grad_norm": 0.4159411274519776, |
| "learning_rate": 4.325709166522196e-05, |
| "loss": 0.4705, |
| "num_tokens": 815403551.0, |
| "step": 3115 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.3786619070365938, |
| "learning_rate": 4.3232309530837826e-05, |
| "loss": 0.4702, |
| "num_tokens": 816714271.0, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.456876456876457, |
| "grad_norm": 0.3629693685192925, |
| "learning_rate": 4.320748999794558e-05, |
| "loss": 0.4623, |
| "num_tokens": 818024991.0, |
| "step": 3125 |
| }, |
| { |
| "epoch": 1.4592074592074593, |
| "grad_norm": 0.3807642152047702, |
| "learning_rate": 4.3182633125545664e-05, |
| "loss": 0.4826, |
| "num_tokens": 819335711.0, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 0.36345106221694695, |
| "learning_rate": 4.3157738972727316e-05, |
| "loss": 0.4749, |
| "num_tokens": 820646431.0, |
| "step": 3135 |
| }, |
| { |
| "epoch": 1.4638694638694638, |
| "grad_norm": 0.3982169584509918, |
| "learning_rate": 4.3132807598668366e-05, |
| "loss": 0.4592, |
| "num_tokens": 821957151.0, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.4662004662004662, |
| "grad_norm": 0.3578005945222367, |
| "learning_rate": 4.310783906263515e-05, |
| "loss": 0.472, |
| "num_tokens": 823267871.0, |
| "step": 3145 |
| }, |
| { |
| "epoch": 1.4685314685314685, |
| "grad_norm": 0.44584910654927556, |
| "learning_rate": 4.3082833423982346e-05, |
| "loss": 0.4682, |
| "num_tokens": 824556476.0, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.470862470862471, |
| "grad_norm": 0.439322244082669, |
| "learning_rate": 4.3057790742152785e-05, |
| "loss": 0.4572, |
| "num_tokens": 825867196.0, |
| "step": 3155 |
| }, |
| { |
| "epoch": 1.4731934731934733, |
| "grad_norm": 0.3890285611037925, |
| "learning_rate": 4.3032711076677436e-05, |
| "loss": 0.4684, |
| "num_tokens": 827177916.0, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.4755244755244754, |
| "grad_norm": 0.4262221846633365, |
| "learning_rate": 4.3007594487175114e-05, |
| "loss": 0.4748, |
| "num_tokens": 828488636.0, |
| "step": 3165 |
| }, |
| { |
| "epoch": 1.4778554778554778, |
| "grad_norm": 0.3770399690427187, |
| "learning_rate": 4.298244103335244e-05, |
| "loss": 0.4597, |
| "num_tokens": 829799356.0, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.4801864801864801, |
| "grad_norm": 0.3524048177654436, |
| "learning_rate": 4.2957250775003664e-05, |
| "loss": 0.4814, |
| "num_tokens": 831095535.0, |
| "step": 3175 |
| }, |
| { |
| "epoch": 1.4825174825174825, |
| "grad_norm": 0.4087741944757923, |
| "learning_rate": 4.293202377201053e-05, |
| "loss": 0.4892, |
| "num_tokens": 832394592.0, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.4848484848484849, |
| "grad_norm": 0.3875644282355541, |
| "learning_rate": 4.290676008434214e-05, |
| "loss": 0.4817, |
| "num_tokens": 833705312.0, |
| "step": 3185 |
| }, |
| { |
| "epoch": 1.4871794871794872, |
| "grad_norm": 0.41275819372925243, |
| "learning_rate": 4.2881459772054764e-05, |
| "loss": 0.4705, |
| "num_tokens": 835016032.0, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.4895104895104896, |
| "grad_norm": 0.3565273655614255, |
| "learning_rate": 4.2856122895291767e-05, |
| "loss": 0.4539, |
| "num_tokens": 836326752.0, |
| "step": 3195 |
| }, |
| { |
| "epoch": 1.491841491841492, |
| "grad_norm": 0.4013967833750652, |
| "learning_rate": 4.2830749514283444e-05, |
| "loss": 0.471, |
| "num_tokens": 837637472.0, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.494172494172494, |
| "grad_norm": 0.44988006189556307, |
| "learning_rate": 4.280533968934683e-05, |
| "loss": 0.4737, |
| "num_tokens": 838948192.0, |
| "step": 3205 |
| }, |
| { |
| "epoch": 1.4965034965034965, |
| "grad_norm": 0.4128029879161445, |
| "learning_rate": 4.277989348088564e-05, |
| "loss": 0.4618, |
| "num_tokens": 840258912.0, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.4988344988344988, |
| "grad_norm": 0.3860230229659164, |
| "learning_rate": 4.275441094939002e-05, |
| "loss": 0.4772, |
| "num_tokens": 841569632.0, |
| "step": 3215 |
| }, |
| { |
| "epoch": 1.5011655011655012, |
| "grad_norm": 0.38330430061235765, |
| "learning_rate": 4.2728892155436524e-05, |
| "loss": 0.4655, |
| "num_tokens": 842875786.0, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.5034965034965035, |
| "grad_norm": 0.3583158818403601, |
| "learning_rate": 4.270333715968787e-05, |
| "loss": 0.4637, |
| "num_tokens": 844186506.0, |
| "step": 3225 |
| }, |
| { |
| "epoch": 1.5058275058275057, |
| "grad_norm": 0.4292683787844713, |
| "learning_rate": 4.267774602289285e-05, |
| "loss": 0.4513, |
| "num_tokens": 845490179.0, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.508158508158508, |
| "grad_norm": 0.4561169755057234, |
| "learning_rate": 4.265211880588617e-05, |
| "loss": 0.4575, |
| "num_tokens": 846800899.0, |
| "step": 3235 |
| }, |
| { |
| "epoch": 1.5104895104895104, |
| "grad_norm": 0.38223276760013997, |
| "learning_rate": 4.2626455569588274e-05, |
| "loss": 0.4591, |
| "num_tokens": 848111619.0, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.5128205128205128, |
| "grad_norm": 0.3989737822244612, |
| "learning_rate": 4.260075637500528e-05, |
| "loss": 0.4791, |
| "num_tokens": 849422339.0, |
| "step": 3245 |
| }, |
| { |
| "epoch": 1.5151515151515151, |
| "grad_norm": 0.39448654246499554, |
| "learning_rate": 4.257502128322875e-05, |
| "loss": 0.4697, |
| "num_tokens": 850733059.0, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.5174825174825175, |
| "grad_norm": 0.38135346811109977, |
| "learning_rate": 4.25492503554356e-05, |
| "loss": 0.4858, |
| "num_tokens": 852033301.0, |
| "step": 3255 |
| }, |
| { |
| "epoch": 1.5198135198135199, |
| "grad_norm": 0.4063492887591379, |
| "learning_rate": 4.252344365288791e-05, |
| "loss": 0.4558, |
| "num_tokens": 853344021.0, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.5221445221445222, |
| "grad_norm": 0.41694831501332164, |
| "learning_rate": 4.2497601236932836e-05, |
| "loss": 0.4695, |
| "num_tokens": 854654741.0, |
| "step": 3265 |
| }, |
| { |
| "epoch": 1.5244755244755246, |
| "grad_norm": 0.36492369915112965, |
| "learning_rate": 4.2471723169002404e-05, |
| "loss": 0.4656, |
| "num_tokens": 855965461.0, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.526806526806527, |
| "grad_norm": 0.4768438850647179, |
| "learning_rate": 4.244580951061341e-05, |
| "loss": 0.4628, |
| "num_tokens": 857276181.0, |
| "step": 3275 |
| }, |
| { |
| "epoch": 1.529137529137529, |
| "grad_norm": 0.364684932951632, |
| "learning_rate": 4.2419860323367236e-05, |
| "loss": 0.4789, |
| "num_tokens": 858586901.0, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.5314685314685315, |
| "grad_norm": 0.3723413634564425, |
| "learning_rate": 4.239387566894973e-05, |
| "loss": 0.4852, |
| "num_tokens": 859897621.0, |
| "step": 3285 |
| }, |
| { |
| "epoch": 1.5337995337995338, |
| "grad_norm": 0.4225249222913032, |
| "learning_rate": 4.2367855609131074e-05, |
| "loss": 0.479, |
| "num_tokens": 861194968.0, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.5361305361305362, |
| "grad_norm": 0.37715348099690094, |
| "learning_rate": 4.234180020576556e-05, |
| "loss": 0.4849, |
| "num_tokens": 862505688.0, |
| "step": 3295 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.38467493999417174, |
| "learning_rate": 4.231570952079157e-05, |
| "loss": 0.4664, |
| "num_tokens": 863814518.0, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.5407925407925407, |
| "grad_norm": 0.37660805921200846, |
| "learning_rate": 4.22895836162313e-05, |
| "loss": 0.4735, |
| "num_tokens": 865125238.0, |
| "step": 3305 |
| }, |
| { |
| "epoch": 1.543123543123543, |
| "grad_norm": 0.3576120232900732, |
| "learning_rate": 4.226342255419069e-05, |
| "loss": 0.4836, |
| "num_tokens": 866435958.0, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.5454545454545454, |
| "grad_norm": 0.35132235584289356, |
| "learning_rate": 4.2237226396859256e-05, |
| "loss": 0.4482, |
| "num_tokens": 867746678.0, |
| "step": 3315 |
| }, |
| { |
| "epoch": 1.5477855477855478, |
| "grad_norm": 0.44192479046805605, |
| "learning_rate": 4.2210995206509945e-05, |
| "loss": 0.4741, |
| "num_tokens": 869057398.0, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.5501165501165501, |
| "grad_norm": 0.46240430648199005, |
| "learning_rate": 4.218472904549897e-05, |
| "loss": 0.4685, |
| "num_tokens": 870368118.0, |
| "step": 3325 |
| }, |
| { |
| "epoch": 1.5524475524475525, |
| "grad_norm": 0.3991386786525242, |
| "learning_rate": 4.215842797626569e-05, |
| "loss": 0.4821, |
| "num_tokens": 871678838.0, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.5547785547785549, |
| "grad_norm": 0.4310594533956871, |
| "learning_rate": 4.2132092061332444e-05, |
| "loss": 0.4716, |
| "num_tokens": 872989558.0, |
| "step": 3335 |
| }, |
| { |
| "epoch": 1.5571095571095572, |
| "grad_norm": 0.4719206923402879, |
| "learning_rate": 4.21057213633044e-05, |
| "loss": 0.4636, |
| "num_tokens": 874287357.0, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.5594405594405596, |
| "grad_norm": 0.4174800382083453, |
| "learning_rate": 4.207931594486941e-05, |
| "loss": 0.4702, |
| "num_tokens": 875598077.0, |
| "step": 3345 |
| }, |
| { |
| "epoch": 1.5617715617715617, |
| "grad_norm": 0.4353294397787359, |
| "learning_rate": 4.205287586879788e-05, |
| "loss": 0.4731, |
| "num_tokens": 876908797.0, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.564102564102564, |
| "grad_norm": 0.4245486575857189, |
| "learning_rate": 4.202640119794258e-05, |
| "loss": 0.4897, |
| "num_tokens": 878219517.0, |
| "step": 3355 |
| }, |
| { |
| "epoch": 1.5664335664335665, |
| "grad_norm": 0.36829836620338285, |
| "learning_rate": 4.1999891995238525e-05, |
| "loss": 0.4713, |
| "num_tokens": 879530237.0, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.5687645687645686, |
| "grad_norm": 0.4572528057427085, |
| "learning_rate": 4.1973348323702834e-05, |
| "loss": 0.4839, |
| "num_tokens": 880840957.0, |
| "step": 3365 |
| }, |
| { |
| "epoch": 1.571095571095571, |
| "grad_norm": 0.4298611015712589, |
| "learning_rate": 4.1946770246434554e-05, |
| "loss": 0.4712, |
| "num_tokens": 882151677.0, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.5734265734265733, |
| "grad_norm": 0.4045036094575523, |
| "learning_rate": 4.19201578266145e-05, |
| "loss": 0.4762, |
| "num_tokens": 883462397.0, |
| "step": 3375 |
| }, |
| { |
| "epoch": 1.5757575757575757, |
| "grad_norm": 0.3508435597625087, |
| "learning_rate": 4.1893511127505155e-05, |
| "loss": 0.4771, |
| "num_tokens": 884773117.0, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.578088578088578, |
| "grad_norm": 0.34886439925605456, |
| "learning_rate": 4.186683021245048e-05, |
| "loss": 0.4667, |
| "num_tokens": 886083837.0, |
| "step": 3385 |
| }, |
| { |
| "epoch": 1.5804195804195804, |
| "grad_norm": 0.3763065049584587, |
| "learning_rate": 4.1840115144875784e-05, |
| "loss": 0.4802, |
| "num_tokens": 887394557.0, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.5827505827505828, |
| "grad_norm": 0.49328331391815927, |
| "learning_rate": 4.1813365988287536e-05, |
| "loss": 0.4842, |
| "num_tokens": 888691868.0, |
| "step": 3395 |
| }, |
| { |
| "epoch": 1.5850815850815851, |
| "grad_norm": 0.3986181417508742, |
| "learning_rate": 4.178658280627326e-05, |
| "loss": 0.484, |
| "num_tokens": 890002588.0, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.5874125874125875, |
| "grad_norm": 0.49931340147631254, |
| "learning_rate": 4.175976566250136e-05, |
| "loss": 0.484, |
| "num_tokens": 891313308.0, |
| "step": 3405 |
| }, |
| { |
| "epoch": 1.5897435897435899, |
| "grad_norm": 0.45433476497736364, |
| "learning_rate": 4.173291462072098e-05, |
| "loss": 0.4618, |
| "num_tokens": 892617089.0, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.5920745920745922, |
| "grad_norm": 0.384524852468567, |
| "learning_rate": 4.170602974476184e-05, |
| "loss": 0.468, |
| "num_tokens": 893927809.0, |
| "step": 3415 |
| }, |
| { |
| "epoch": 1.5944055944055944, |
| "grad_norm": 0.3807586219393433, |
| "learning_rate": 4.167911109853407e-05, |
| "loss": 0.4771, |
| "num_tokens": 895238529.0, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.5967365967365967, |
| "grad_norm": 0.4447754359804572, |
| "learning_rate": 4.1652158746028116e-05, |
| "loss": 0.4716, |
| "num_tokens": 896541317.0, |
| "step": 3425 |
| }, |
| { |
| "epoch": 1.599067599067599, |
| "grad_norm": 0.42809843952530335, |
| "learning_rate": 4.162517275131454e-05, |
| "loss": 0.4604, |
| "num_tokens": 897852037.0, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.6013986013986012, |
| "grad_norm": 0.38368322059871096, |
| "learning_rate": 4.159815317854384e-05, |
| "loss": 0.4722, |
| "num_tokens": 899157306.0, |
| "step": 3435 |
| }, |
| { |
| "epoch": 1.6037296037296036, |
| "grad_norm": 0.41349052270597325, |
| "learning_rate": 4.157110009194639e-05, |
| "loss": 0.4854, |
| "num_tokens": 900455090.0, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.606060606060606, |
| "grad_norm": 0.37303520873037516, |
| "learning_rate": 4.15440135558322e-05, |
| "loss": 0.4504, |
| "num_tokens": 901765810.0, |
| "step": 3445 |
| }, |
| { |
| "epoch": 1.6083916083916083, |
| "grad_norm": 0.35322625283306586, |
| "learning_rate": 4.151689363459078e-05, |
| "loss": 0.4829, |
| "num_tokens": 903076530.0, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.6107226107226107, |
| "grad_norm": 0.41613261144008123, |
| "learning_rate": 4.1489740392691054e-05, |
| "loss": 0.4642, |
| "num_tokens": 904387250.0, |
| "step": 3455 |
| }, |
| { |
| "epoch": 1.613053613053613, |
| "grad_norm": 0.36788455921816615, |
| "learning_rate": 4.1462553894681115e-05, |
| "loss": 0.4504, |
| "num_tokens": 905697970.0, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 0.3865061275286875, |
| "learning_rate": 4.1435334205188106e-05, |
| "loss": 0.4742, |
| "num_tokens": 907008690.0, |
| "step": 3465 |
| }, |
| { |
| "epoch": 1.6177156177156178, |
| "grad_norm": 0.4445705789673663, |
| "learning_rate": 4.1408081388918114e-05, |
| "loss": 0.4611, |
| "num_tokens": 908319410.0, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.6200466200466201, |
| "grad_norm": 0.3887208640873298, |
| "learning_rate": 4.138079551065593e-05, |
| "loss": 0.4561, |
| "num_tokens": 909630130.0, |
| "step": 3475 |
| }, |
| { |
| "epoch": 1.6223776223776225, |
| "grad_norm": 0.377258046210296, |
| "learning_rate": 4.135347663526496e-05, |
| "loss": 0.4745, |
| "num_tokens": 910940850.0, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.6247086247086249, |
| "grad_norm": 0.3678540515945634, |
| "learning_rate": 4.132612482768704e-05, |
| "loss": 0.4724, |
| "num_tokens": 912251570.0, |
| "step": 3485 |
| }, |
| { |
| "epoch": 1.627039627039627, |
| "grad_norm": 0.36120123975381446, |
| "learning_rate": 4.129874015294234e-05, |
| "loss": 0.4844, |
| "num_tokens": 913562290.0, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.6293706293706294, |
| "grad_norm": 0.409642194208649, |
| "learning_rate": 4.127132267612907e-05, |
| "loss": 0.4665, |
| "num_tokens": 914873010.0, |
| "step": 3495 |
| }, |
| { |
| "epoch": 1.6317016317016317, |
| "grad_norm": 0.34029196923903116, |
| "learning_rate": 4.1243872462423485e-05, |
| "loss": 0.4753, |
| "num_tokens": 916183730.0, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.6340326340326339, |
| "grad_norm": 0.407667262964334, |
| "learning_rate": 4.121638957707965e-05, |
| "loss": 0.4627, |
| "num_tokens": 917494450.0, |
| "step": 3505 |
| }, |
| { |
| "epoch": 1.6363636363636362, |
| "grad_norm": 0.35645199665718186, |
| "learning_rate": 4.118887408542927e-05, |
| "loss": 0.4705, |
| "num_tokens": 918791806.0, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.6386946386946386, |
| "grad_norm": 0.37596379435936367, |
| "learning_rate": 4.11613260528816e-05, |
| "loss": 0.4591, |
| "num_tokens": 920102526.0, |
| "step": 3515 |
| }, |
| { |
| "epoch": 1.641025641025641, |
| "grad_norm": 0.4320558743649901, |
| "learning_rate": 4.1133745544923236e-05, |
| "loss": 0.456, |
| "num_tokens": 921413246.0, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.6433566433566433, |
| "grad_norm": 0.4341288373141138, |
| "learning_rate": 4.1106132627117956e-05, |
| "loss": 0.4748, |
| "num_tokens": 922715595.0, |
| "step": 3525 |
| }, |
| { |
| "epoch": 1.6456876456876457, |
| "grad_norm": 0.41676180005636126, |
| "learning_rate": 4.107848736510659e-05, |
| "loss": 0.4575, |
| "num_tokens": 924026315.0, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.648018648018648, |
| "grad_norm": 0.40802471968176357, |
| "learning_rate": 4.105080982460687e-05, |
| "loss": 0.4628, |
| "num_tokens": 925337035.0, |
| "step": 3535 |
| }, |
| { |
| "epoch": 1.6503496503496504, |
| "grad_norm": 0.4507954091649136, |
| "learning_rate": 4.102310007141324e-05, |
| "loss": 0.4837, |
| "num_tokens": 926631017.0, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.6526806526806528, |
| "grad_norm": 0.4050589740560033, |
| "learning_rate": 4.0995358171396747e-05, |
| "loss": 0.4736, |
| "num_tokens": 927941737.0, |
| "step": 3545 |
| }, |
| { |
| "epoch": 1.6550116550116551, |
| "grad_norm": 0.42380553595370807, |
| "learning_rate": 4.0967584190504825e-05, |
| "loss": 0.4734, |
| "num_tokens": 929252457.0, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.6573426573426573, |
| "grad_norm": 0.3958904686940088, |
| "learning_rate": 4.0939778194761196e-05, |
| "loss": 0.488, |
| "num_tokens": 930563177.0, |
| "step": 3555 |
| }, |
| { |
| "epoch": 1.6596736596736597, |
| "grad_norm": 0.43394413515654184, |
| "learning_rate": 4.091194025026567e-05, |
| "loss": 0.4692, |
| "num_tokens": 931860496.0, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.662004662004662, |
| "grad_norm": 0.4589951627434292, |
| "learning_rate": 4.0884070423194007e-05, |
| "loss": 0.4805, |
| "num_tokens": 933171216.0, |
| "step": 3565 |
| }, |
| { |
| "epoch": 1.6643356643356644, |
| "grad_norm": 0.4150754775824352, |
| "learning_rate": 4.085616877979776e-05, |
| "loss": 0.4628, |
| "num_tokens": 934465598.0, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.40687005175901875, |
| "learning_rate": 4.0828235386404124e-05, |
| "loss": 0.4564, |
| "num_tokens": 935774296.0, |
| "step": 3575 |
| }, |
| { |
| "epoch": 1.6689976689976689, |
| "grad_norm": 0.3856327268150972, |
| "learning_rate": 4.0800270309415756e-05, |
| "loss": 0.4635, |
| "num_tokens": 937085016.0, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.6713286713286712, |
| "grad_norm": 0.43509202476957415, |
| "learning_rate": 4.077227361531063e-05, |
| "loss": 0.4708, |
| "num_tokens": 938395736.0, |
| "step": 3585 |
| }, |
| { |
| "epoch": 1.6736596736596736, |
| "grad_norm": 0.37887891871157525, |
| "learning_rate": 4.07442453706419e-05, |
| "loss": 0.4775, |
| "num_tokens": 939706456.0, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.675990675990676, |
| "grad_norm": 0.35092056020696705, |
| "learning_rate": 4.07161856420377e-05, |
| "loss": 0.4642, |
| "num_tokens": 941017176.0, |
| "step": 3595 |
| }, |
| { |
| "epoch": 1.6783216783216783, |
| "grad_norm": 0.3618238243762927, |
| "learning_rate": 4.068809449620101e-05, |
| "loss": 0.4728, |
| "num_tokens": 942319215.0, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.6806526806526807, |
| "grad_norm": 0.3850438304208137, |
| "learning_rate": 4.065997199990951e-05, |
| "loss": 0.4823, |
| "num_tokens": 943629935.0, |
| "step": 3605 |
| }, |
| { |
| "epoch": 1.682983682983683, |
| "grad_norm": 0.3981622077125061, |
| "learning_rate": 4.063181822001538e-05, |
| "loss": 0.4605, |
| "num_tokens": 944940655.0, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.6853146853146854, |
| "grad_norm": 0.4200393473429356, |
| "learning_rate": 4.060363322344518e-05, |
| "loss": 0.4816, |
| "num_tokens": 946251375.0, |
| "step": 3615 |
| }, |
| { |
| "epoch": 1.6876456876456878, |
| "grad_norm": 0.389339620745983, |
| "learning_rate": 4.05754170771997e-05, |
| "loss": 0.4714, |
| "num_tokens": 947562095.0, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.68997668997669, |
| "grad_norm": 0.4117742280678568, |
| "learning_rate": 4.054716984835372e-05, |
| "loss": 0.4695, |
| "num_tokens": 948872815.0, |
| "step": 3625 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 0.4245786229670795, |
| "learning_rate": 4.051889160405598e-05, |
| "loss": 0.4574, |
| "num_tokens": 950183535.0, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.6946386946386947, |
| "grad_norm": 0.4777093865988957, |
| "learning_rate": 4.0490582411528896e-05, |
| "loss": 0.4572, |
| "num_tokens": 951494255.0, |
| "step": 3635 |
| }, |
| { |
| "epoch": 1.696969696969697, |
| "grad_norm": 0.3924099164372027, |
| "learning_rate": 4.0462242338068476e-05, |
| "loss": 0.4651, |
| "num_tokens": 952804975.0, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.6993006993006992, |
| "grad_norm": 0.38987930954143907, |
| "learning_rate": 4.0433871451044136e-05, |
| "loss": 0.4873, |
| "num_tokens": 954115695.0, |
| "step": 3645 |
| }, |
| { |
| "epoch": 1.7016317016317015, |
| "grad_norm": 0.41461621362174245, |
| "learning_rate": 4.040546981789854e-05, |
| "loss": 0.4748, |
| "num_tokens": 955426415.0, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.7039627039627039, |
| "grad_norm": 0.3425572462390038, |
| "learning_rate": 4.0377037506147436e-05, |
| "loss": 0.4858, |
| "num_tokens": 956737135.0, |
| "step": 3655 |
| }, |
| { |
| "epoch": 1.7062937062937062, |
| "grad_norm": 0.35362235462359226, |
| "learning_rate": 4.0348574583379506e-05, |
| "loss": 0.4515, |
| "num_tokens": 958047855.0, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.7086247086247086, |
| "grad_norm": 0.4269439890212748, |
| "learning_rate": 4.032008111725619e-05, |
| "loss": 0.478, |
| "num_tokens": 959358575.0, |
| "step": 3665 |
| }, |
| { |
| "epoch": 1.710955710955711, |
| "grad_norm": 0.3975233814611868, |
| "learning_rate": 4.029155717551156e-05, |
| "loss": 0.4682, |
| "num_tokens": 960647821.0, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.7132867132867133, |
| "grad_norm": 0.42538111394289835, |
| "learning_rate": 4.026300282595211e-05, |
| "loss": 0.4821, |
| "num_tokens": 961958541.0, |
| "step": 3675 |
| }, |
| { |
| "epoch": 1.7156177156177157, |
| "grad_norm": 0.44368078414892886, |
| "learning_rate": 4.023441813645662e-05, |
| "loss": 0.4629, |
| "num_tokens": 963269261.0, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.717948717948718, |
| "grad_norm": 0.4096743909582884, |
| "learning_rate": 4.0205803174975996e-05, |
| "loss": 0.4678, |
| "num_tokens": 964579981.0, |
| "step": 3685 |
| }, |
| { |
| "epoch": 1.7202797202797204, |
| "grad_norm": 0.36149619103114544, |
| "learning_rate": 4.0177158009533136e-05, |
| "loss": 0.4661, |
| "num_tokens": 965885806.0, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.7226107226107226, |
| "grad_norm": 0.3883498094552971, |
| "learning_rate": 4.014848270822268e-05, |
| "loss": 0.4679, |
| "num_tokens": 967191092.0, |
| "step": 3695 |
| }, |
| { |
| "epoch": 1.724941724941725, |
| "grad_norm": 0.3874435629285386, |
| "learning_rate": 4.011977733921096e-05, |
| "loss": 0.4613, |
| "num_tokens": 968501812.0, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.7272727272727273, |
| "grad_norm": 0.40622823181165385, |
| "learning_rate": 4.009104197073575e-05, |
| "loss": 0.4813, |
| "num_tokens": 969796954.0, |
| "step": 3705 |
| }, |
| { |
| "epoch": 1.7296037296037297, |
| "grad_norm": 0.3576403196641578, |
| "learning_rate": 4.0062276671106154e-05, |
| "loss": 0.456, |
| "num_tokens": 971107674.0, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.7319347319347318, |
| "grad_norm": 0.42428688878098647, |
| "learning_rate": 4.0033481508702425e-05, |
| "loss": 0.4771, |
| "num_tokens": 972418394.0, |
| "step": 3715 |
| }, |
| { |
| "epoch": 1.7342657342657342, |
| "grad_norm": 0.398776005644926, |
| "learning_rate": 4.00046565519758e-05, |
| "loss": 0.4826, |
| "num_tokens": 973729114.0, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.7365967365967365, |
| "grad_norm": 0.3803669387697352, |
| "learning_rate": 3.997580186944835e-05, |
| "loss": 0.4817, |
| "num_tokens": 975039834.0, |
| "step": 3725 |
| }, |
| { |
| "epoch": 1.7389277389277389, |
| "grad_norm": 0.3668247169705026, |
| "learning_rate": 3.994691752971282e-05, |
| "loss": 0.4671, |
| "num_tokens": 976350554.0, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.7412587412587412, |
| "grad_norm": 0.3618934392527535, |
| "learning_rate": 3.991800360143241e-05, |
| "loss": 0.475, |
| "num_tokens": 977661274.0, |
| "step": 3735 |
| }, |
| { |
| "epoch": 1.7435897435897436, |
| "grad_norm": 0.3616956268703821, |
| "learning_rate": 3.988906015334073e-05, |
| "loss": 0.4595, |
| "num_tokens": 978971994.0, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.745920745920746, |
| "grad_norm": 0.3930597565056608, |
| "learning_rate": 3.986008725424148e-05, |
| "loss": 0.465, |
| "num_tokens": 980282714.0, |
| "step": 3745 |
| }, |
| { |
| "epoch": 1.7482517482517483, |
| "grad_norm": 0.39536789600867117, |
| "learning_rate": 3.983108497300844e-05, |
| "loss": 0.4701, |
| "num_tokens": 981585502.0, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.7505827505827507, |
| "grad_norm": 0.4099848875497283, |
| "learning_rate": 3.9802053378585205e-05, |
| "loss": 0.4751, |
| "num_tokens": 982896222.0, |
| "step": 3755 |
| }, |
| { |
| "epoch": 1.752913752913753, |
| "grad_norm": 0.3722702285436598, |
| "learning_rate": 3.977299253998504e-05, |
| "loss": 0.4738, |
| "num_tokens": 984206942.0, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.7552447552447552, |
| "grad_norm": 0.3970652311409931, |
| "learning_rate": 3.974390252629078e-05, |
| "loss": 0.4671, |
| "num_tokens": 985517662.0, |
| "step": 3765 |
| }, |
| { |
| "epoch": 1.7575757575757576, |
| "grad_norm": 0.38418584202250494, |
| "learning_rate": 3.971478340665455e-05, |
| "loss": 0.478, |
| "num_tokens": 986828382.0, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.75990675990676, |
| "grad_norm": 0.4897203405840543, |
| "learning_rate": 3.968563525029771e-05, |
| "loss": 0.4758, |
| "num_tokens": 988139102.0, |
| "step": 3775 |
| }, |
| { |
| "epoch": 1.762237762237762, |
| "grad_norm": 0.4708593559155846, |
| "learning_rate": 3.965645812651063e-05, |
| "loss": 0.4872, |
| "num_tokens": 989444809.0, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.7645687645687644, |
| "grad_norm": 0.4215417171128016, |
| "learning_rate": 3.9627252104652535e-05, |
| "loss": 0.4591, |
| "num_tokens": 990753790.0, |
| "step": 3785 |
| }, |
| { |
| "epoch": 1.7668997668997668, |
| "grad_norm": 0.40405280271269434, |
| "learning_rate": 3.959801725415136e-05, |
| "loss": 0.4648, |
| "num_tokens": 992054396.0, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 0.41393808447009156, |
| "learning_rate": 3.9568753644503566e-05, |
| "loss": 0.4587, |
| "num_tokens": 993352731.0, |
| "step": 3795 |
| }, |
| { |
| "epoch": 1.7715617715617715, |
| "grad_norm": 0.37732142695063003, |
| "learning_rate": 3.9539461345273956e-05, |
| "loss": 0.4737, |
| "num_tokens": 994656385.0, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.7738927738927739, |
| "grad_norm": 0.528280250373434, |
| "learning_rate": 3.951014042609559e-05, |
| "loss": 0.4702, |
| "num_tokens": 995967105.0, |
| "step": 3805 |
| }, |
| { |
| "epoch": 1.7762237762237763, |
| "grad_norm": 0.4213342625415021, |
| "learning_rate": 3.9480790956669486e-05, |
| "loss": 0.4791, |
| "num_tokens": 997267208.0, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.7785547785547786, |
| "grad_norm": 0.3698430883873798, |
| "learning_rate": 3.9451413006764604e-05, |
| "loss": 0.4653, |
| "num_tokens": 998577928.0, |
| "step": 3815 |
| }, |
| { |
| "epoch": 1.780885780885781, |
| "grad_norm": 0.414231394973144, |
| "learning_rate": 3.942200664621756e-05, |
| "loss": 0.4687, |
| "num_tokens": 999888162.0, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.7832167832167833, |
| "grad_norm": 0.4350040599172444, |
| "learning_rate": 3.939257194493253e-05, |
| "loss": 0.4513, |
| "num_tokens": 1001186063.0, |
| "step": 3825 |
| }, |
| { |
| "epoch": 1.7855477855477857, |
| "grad_norm": 0.4152803596705445, |
| "learning_rate": 3.936310897288104e-05, |
| "loss": 0.4562, |
| "num_tokens": 1002496783.0, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.7878787878787878, |
| "grad_norm": 0.41881419362638367, |
| "learning_rate": 3.933361780010185e-05, |
| "loss": 0.4646, |
| "num_tokens": 1003807503.0, |
| "step": 3835 |
| }, |
| { |
| "epoch": 1.7902097902097902, |
| "grad_norm": 0.38522642875173096, |
| "learning_rate": 3.930409849670073e-05, |
| "loss": 0.4596, |
| "num_tokens": 1005118223.0, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.7925407925407926, |
| "grad_norm": 0.34873890955647613, |
| "learning_rate": 3.927455113285035e-05, |
| "loss": 0.4559, |
| "num_tokens": 1006428943.0, |
| "step": 3845 |
| }, |
| { |
| "epoch": 1.7948717948717947, |
| "grad_norm": 0.39305628827987876, |
| "learning_rate": 3.924497577879005e-05, |
| "loss": 0.4647, |
| "num_tokens": 1007739663.0, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.797202797202797, |
| "grad_norm": 0.3854549232040684, |
| "learning_rate": 3.9215372504825735e-05, |
| "loss": 0.4737, |
| "num_tokens": 1009050383.0, |
| "step": 3855 |
| }, |
| { |
| "epoch": 1.7995337995337994, |
| "grad_norm": 0.40079016381501426, |
| "learning_rate": 3.9185741381329664e-05, |
| "loss": 0.4792, |
| "num_tokens": 1010361103.0, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.8018648018648018, |
| "grad_norm": 0.40266606077066175, |
| "learning_rate": 3.915608247874032e-05, |
| "loss": 0.487, |
| "num_tokens": 1011671823.0, |
| "step": 3865 |
| }, |
| { |
| "epoch": 1.8041958041958042, |
| "grad_norm": 0.3510993917665707, |
| "learning_rate": 3.912639586756221e-05, |
| "loss": 0.4514, |
| "num_tokens": 1012982543.0, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.8065268065268065, |
| "grad_norm": 0.3907134061685135, |
| "learning_rate": 3.9096681618365686e-05, |
| "loss": 0.447, |
| "num_tokens": 1014293263.0, |
| "step": 3875 |
| }, |
| { |
| "epoch": 1.808857808857809, |
| "grad_norm": 0.3708262682688497, |
| "learning_rate": 3.9066939801786836e-05, |
| "loss": 0.4765, |
| "num_tokens": 1015592603.0, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.8111888111888113, |
| "grad_norm": 0.44553625905509964, |
| "learning_rate": 3.903717048852728e-05, |
| "loss": 0.4709, |
| "num_tokens": 1016903323.0, |
| "step": 3885 |
| }, |
| { |
| "epoch": 1.8135198135198136, |
| "grad_norm": 0.5102907463594831, |
| "learning_rate": 3.900737374935396e-05, |
| "loss": 0.477, |
| "num_tokens": 1018214043.0, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.815850815850816, |
| "grad_norm": 0.4103843094724411, |
| "learning_rate": 3.897754965509908e-05, |
| "loss": 0.4557, |
| "num_tokens": 1019524763.0, |
| "step": 3895 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.3973998481652665, |
| "learning_rate": 3.8947698276659806e-05, |
| "loss": 0.4606, |
| "num_tokens": 1020835483.0, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.8205128205128205, |
| "grad_norm": 0.4071222548172448, |
| "learning_rate": 3.8917819684998215e-05, |
| "loss": 0.4734, |
| "num_tokens": 1022146203.0, |
| "step": 3905 |
| }, |
| { |
| "epoch": 1.8228438228438228, |
| "grad_norm": 0.38953013965021704, |
| "learning_rate": 3.888791395114103e-05, |
| "loss": 0.4481, |
| "num_tokens": 1023456923.0, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.8251748251748252, |
| "grad_norm": 0.34776150147238477, |
| "learning_rate": 3.885798114617954e-05, |
| "loss": 0.4653, |
| "num_tokens": 1024767643.0, |
| "step": 3915 |
| }, |
| { |
| "epoch": 1.8275058275058274, |
| "grad_norm": 0.43412541332007054, |
| "learning_rate": 3.8828021341269363e-05, |
| "loss": 0.4696, |
| "num_tokens": 1026078363.0, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.8298368298368297, |
| "grad_norm": 0.39408936405816397, |
| "learning_rate": 3.879803460763029e-05, |
| "loss": 0.471, |
| "num_tokens": 1027389083.0, |
| "step": 3925 |
| }, |
| { |
| "epoch": 1.832167832167832, |
| "grad_norm": 0.36162777389200856, |
| "learning_rate": 3.876802101654614e-05, |
| "loss": 0.4669, |
| "num_tokens": 1028699803.0, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.8344988344988344, |
| "grad_norm": 0.4060158965672199, |
| "learning_rate": 3.87379806393646e-05, |
| "loss": 0.4601, |
| "num_tokens": 1030010523.0, |
| "step": 3935 |
| }, |
| { |
| "epoch": 1.8368298368298368, |
| "grad_norm": 0.40896752020498217, |
| "learning_rate": 3.870791354749698e-05, |
| "loss": 0.4688, |
| "num_tokens": 1031319258.0, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.8391608391608392, |
| "grad_norm": 0.44359916481371037, |
| "learning_rate": 3.867781981241814e-05, |
| "loss": 0.4889, |
| "num_tokens": 1032629978.0, |
| "step": 3945 |
| }, |
| { |
| "epoch": 1.8414918414918415, |
| "grad_norm": 0.40506992750006354, |
| "learning_rate": 3.8647699505666265e-05, |
| "loss": 0.4477, |
| "num_tokens": 1033940698.0, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.843822843822844, |
| "grad_norm": 0.39121466623448464, |
| "learning_rate": 3.861755269884269e-05, |
| "loss": 0.462, |
| "num_tokens": 1035251418.0, |
| "step": 3955 |
| }, |
| { |
| "epoch": 1.8461538461538463, |
| "grad_norm": 0.3762364215121074, |
| "learning_rate": 3.8587379463611766e-05, |
| "loss": 0.4718, |
| "num_tokens": 1036562138.0, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.8484848484848486, |
| "grad_norm": 0.38276793618104127, |
| "learning_rate": 3.855717987170065e-05, |
| "loss": 0.4694, |
| "num_tokens": 1037868363.0, |
| "step": 3965 |
| }, |
| { |
| "epoch": 1.8508158508158508, |
| "grad_norm": 0.3993712479498845, |
| "learning_rate": 3.852695399489917e-05, |
| "loss": 0.4632, |
| "num_tokens": 1039179083.0, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.8531468531468531, |
| "grad_norm": 0.3653526687250586, |
| "learning_rate": 3.849670190505963e-05, |
| "loss": 0.458, |
| "num_tokens": 1040489803.0, |
| "step": 3975 |
| }, |
| { |
| "epoch": 1.8554778554778555, |
| "grad_norm": 0.35478532717038763, |
| "learning_rate": 3.846642367409663e-05, |
| "loss": 0.4773, |
| "num_tokens": 1041789042.0, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.8578088578088578, |
| "grad_norm": 0.4152595626409347, |
| "learning_rate": 3.843611937398695e-05, |
| "loss": 0.4734, |
| "num_tokens": 1043099762.0, |
| "step": 3985 |
| }, |
| { |
| "epoch": 1.86013986013986, |
| "grad_norm": 0.3697811329438798, |
| "learning_rate": 3.840578907676933e-05, |
| "loss": 0.4603, |
| "num_tokens": 1044410482.0, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.8624708624708624, |
| "grad_norm": 0.3673405078658897, |
| "learning_rate": 3.8375432854544265e-05, |
| "loss": 0.468, |
| "num_tokens": 1045721202.0, |
| "step": 3995 |
| }, |
| { |
| "epoch": 1.8648018648018647, |
| "grad_norm": 0.3642674759468013, |
| "learning_rate": 3.834505077947395e-05, |
| "loss": 0.4679, |
| "num_tokens": 1047031922.0, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.867132867132867, |
| "grad_norm": 0.39831494968982045, |
| "learning_rate": 3.831464292378199e-05, |
| "loss": 0.4603, |
| "num_tokens": 1048342642.0, |
| "step": 4005 |
| }, |
| { |
| "epoch": 1.8694638694638694, |
| "grad_norm": 0.47030492710960053, |
| "learning_rate": 3.828420935975328e-05, |
| "loss": 0.4718, |
| "num_tokens": 1049647085.0, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.8717948717948718, |
| "grad_norm": 0.34963450633338594, |
| "learning_rate": 3.825375015973383e-05, |
| "loss": 0.4582, |
| "num_tokens": 1050957805.0, |
| "step": 4015 |
| }, |
| { |
| "epoch": 1.8741258741258742, |
| "grad_norm": 0.38080688538607055, |
| "learning_rate": 3.822326539613061e-05, |
| "loss": 0.4686, |
| "num_tokens": 1052268525.0, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.8764568764568765, |
| "grad_norm": 0.3878968790543752, |
| "learning_rate": 3.819275514141134e-05, |
| "loss": 0.4718, |
| "num_tokens": 1053579245.0, |
| "step": 4025 |
| }, |
| { |
| "epoch": 1.878787878787879, |
| "grad_norm": 0.4444788783917722, |
| "learning_rate": 3.816221946810434e-05, |
| "loss": 0.449, |
| "num_tokens": 1054889965.0, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.8811188811188813, |
| "grad_norm": 0.3855436235980361, |
| "learning_rate": 3.813165844879835e-05, |
| "loss": 0.4663, |
| "num_tokens": 1056200685.0, |
| "step": 4035 |
| }, |
| { |
| "epoch": 1.8834498834498834, |
| "grad_norm": 0.41749033912855776, |
| "learning_rate": 3.8101072156142376e-05, |
| "loss": 0.4721, |
| "num_tokens": 1057507296.0, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.8857808857808858, |
| "grad_norm": 0.3325680793207379, |
| "learning_rate": 3.8070460662845495e-05, |
| "loss": 0.4685, |
| "num_tokens": 1058818016.0, |
| "step": 4045 |
| }, |
| { |
| "epoch": 1.8881118881118881, |
| "grad_norm": 0.36642497795228024, |
| "learning_rate": 3.80398240416767e-05, |
| "loss": 0.4513, |
| "num_tokens": 1060123820.0, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.8904428904428905, |
| "grad_norm": 0.3301418844159666, |
| "learning_rate": 3.800916236546468e-05, |
| "loss": 0.4734, |
| "num_tokens": 1061434540.0, |
| "step": 4055 |
| }, |
| { |
| "epoch": 1.8927738927738926, |
| "grad_norm": 0.34768445537128323, |
| "learning_rate": 3.797847570709775e-05, |
| "loss": 0.4723, |
| "num_tokens": 1062745260.0, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.895104895104895, |
| "grad_norm": 0.347789179412769, |
| "learning_rate": 3.794776413952354e-05, |
| "loss": 0.4626, |
| "num_tokens": 1064055980.0, |
| "step": 4065 |
| }, |
| { |
| "epoch": 1.8974358974358974, |
| "grad_norm": 0.3462234651976999, |
| "learning_rate": 3.7917027735748956e-05, |
| "loss": 0.4607, |
| "num_tokens": 1065366509.0, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.8997668997668997, |
| "grad_norm": 0.38708966741877066, |
| "learning_rate": 3.788626656883991e-05, |
| "loss": 0.4826, |
| "num_tokens": 1066677229.0, |
| "step": 4075 |
| }, |
| { |
| "epoch": 1.902097902097902, |
| "grad_norm": 0.38785225263682604, |
| "learning_rate": 3.785548071192117e-05, |
| "loss": 0.4663, |
| "num_tokens": 1067987949.0, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.9044289044289044, |
| "grad_norm": 0.43056969345933427, |
| "learning_rate": 3.782467023817623e-05, |
| "loss": 0.4647, |
| "num_tokens": 1069298669.0, |
| "step": 4085 |
| }, |
| { |
| "epoch": 1.9067599067599068, |
| "grad_norm": 0.3412833063088448, |
| "learning_rate": 3.7793835220847076e-05, |
| "loss": 0.4678, |
| "num_tokens": 1070609389.0, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.9090909090909092, |
| "grad_norm": 0.4184934857430221, |
| "learning_rate": 3.776297573323406e-05, |
| "loss": 0.474, |
| "num_tokens": 1071920109.0, |
| "step": 4095 |
| }, |
| { |
| "epoch": 1.9114219114219115, |
| "grad_norm": 0.44954773016733546, |
| "learning_rate": 3.7732091848695686e-05, |
| "loss": 0.4647, |
| "num_tokens": 1073230829.0, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.913752913752914, |
| "grad_norm": 0.4771060848896429, |
| "learning_rate": 3.770118364064846e-05, |
| "loss": 0.4743, |
| "num_tokens": 1074541549.0, |
| "step": 4105 |
| }, |
| { |
| "epoch": 1.916083916083916, |
| "grad_norm": 0.3945529571970546, |
| "learning_rate": 3.767025118256672e-05, |
| "loss": 0.4691, |
| "num_tokens": 1075852269.0, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.9184149184149184, |
| "grad_norm": 0.40375250793365847, |
| "learning_rate": 3.7639294547982416e-05, |
| "loss": 0.4699, |
| "num_tokens": 1077160473.0, |
| "step": 4115 |
| }, |
| { |
| "epoch": 1.9207459207459208, |
| "grad_norm": 0.3682666412768804, |
| "learning_rate": 3.760831381048503e-05, |
| "loss": 0.4396, |
| "num_tokens": 1078471193.0, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 0.40270380854681137, |
| "learning_rate": 3.757730904372127e-05, |
| "loss": 0.4655, |
| "num_tokens": 1079781913.0, |
| "step": 4125 |
| }, |
| { |
| "epoch": 1.9254079254079253, |
| "grad_norm": 0.4091540171139127, |
| "learning_rate": 3.754628032139502e-05, |
| "loss": 0.4676, |
| "num_tokens": 1081092633.0, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.9277389277389276, |
| "grad_norm": 0.37723565465509623, |
| "learning_rate": 3.75152277172671e-05, |
| "loss": 0.458, |
| "num_tokens": 1082403353.0, |
| "step": 4135 |
| }, |
| { |
| "epoch": 1.93006993006993, |
| "grad_norm": 0.35463116725754923, |
| "learning_rate": 3.7484151305155066e-05, |
| "loss": 0.4601, |
| "num_tokens": 1083697889.0, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.9324009324009324, |
| "grad_norm": 0.4440845165505597, |
| "learning_rate": 3.7453051158933124e-05, |
| "loss": 0.4635, |
| "num_tokens": 1084995264.0, |
| "step": 4145 |
| }, |
| { |
| "epoch": 1.9347319347319347, |
| "grad_norm": 0.3844563537675778, |
| "learning_rate": 3.742192735253186e-05, |
| "loss": 0.4486, |
| "num_tokens": 1086305984.0, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.937062937062937, |
| "grad_norm": 0.3653521354637281, |
| "learning_rate": 3.739077995993811e-05, |
| "loss": 0.4609, |
| "num_tokens": 1087616704.0, |
| "step": 4155 |
| }, |
| { |
| "epoch": 1.9393939393939394, |
| "grad_norm": 0.3338487174568724, |
| "learning_rate": 3.735960905519482e-05, |
| "loss": 0.4475, |
| "num_tokens": 1088926295.0, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.9417249417249418, |
| "grad_norm": 0.3624856781426835, |
| "learning_rate": 3.732841471240076e-05, |
| "loss": 0.4515, |
| "num_tokens": 1090237015.0, |
| "step": 4165 |
| }, |
| { |
| "epoch": 1.9440559440559442, |
| "grad_norm": 0.34674172363160854, |
| "learning_rate": 3.729719700571046e-05, |
| "loss": 0.4581, |
| "num_tokens": 1091547735.0, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.9463869463869465, |
| "grad_norm": 0.3634515054857927, |
| "learning_rate": 3.726595600933398e-05, |
| "loss": 0.4614, |
| "num_tokens": 1092858455.0, |
| "step": 4175 |
| }, |
| { |
| "epoch": 1.9487179487179487, |
| "grad_norm": 0.38834463875662345, |
| "learning_rate": 3.7234691797536746e-05, |
| "loss": 0.4655, |
| "num_tokens": 1094169175.0, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.951048951048951, |
| "grad_norm": 0.4040443266819196, |
| "learning_rate": 3.720340444463939e-05, |
| "loss": 0.4603, |
| "num_tokens": 1095479895.0, |
| "step": 4185 |
| }, |
| { |
| "epoch": 1.9533799533799534, |
| "grad_norm": 0.37150785338102815, |
| "learning_rate": 3.7172094025017504e-05, |
| "loss": 0.4644, |
| "num_tokens": 1096790615.0, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.9557109557109555, |
| "grad_norm": 0.38391418767198954, |
| "learning_rate": 3.714076061310157e-05, |
| "loss": 0.47, |
| "num_tokens": 1098101335.0, |
| "step": 4195 |
| }, |
| { |
| "epoch": 1.958041958041958, |
| "grad_norm": 0.342628715510528, |
| "learning_rate": 3.710940428337668e-05, |
| "loss": 0.4598, |
| "num_tokens": 1099412055.0, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.9603729603729603, |
| "grad_norm": 0.35615594682543533, |
| "learning_rate": 3.7078025110382455e-05, |
| "loss": 0.453, |
| "num_tokens": 1100722775.0, |
| "step": 4205 |
| }, |
| { |
| "epoch": 1.9627039627039626, |
| "grad_norm": 0.34603620652492556, |
| "learning_rate": 3.704662316871276e-05, |
| "loss": 0.4821, |
| "num_tokens": 1102033495.0, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.965034965034965, |
| "grad_norm": 0.3544349334013842, |
| "learning_rate": 3.7015198533015633e-05, |
| "loss": 0.4739, |
| "num_tokens": 1103344215.0, |
| "step": 4215 |
| }, |
| { |
| "epoch": 1.9673659673659674, |
| "grad_norm": 0.3717399478589991, |
| "learning_rate": 3.6983751277993045e-05, |
| "loss": 0.4683, |
| "num_tokens": 1104654935.0, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.9696969696969697, |
| "grad_norm": 0.3807350804066158, |
| "learning_rate": 3.6952281478400715e-05, |
| "loss": 0.4721, |
| "num_tokens": 1105965655.0, |
| "step": 4225 |
| }, |
| { |
| "epoch": 1.972027972027972, |
| "grad_norm": 0.36855589171035036, |
| "learning_rate": 3.692078920904799e-05, |
| "loss": 0.4701, |
| "num_tokens": 1107267769.0, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.9743589743589745, |
| "grad_norm": 0.31575827164633735, |
| "learning_rate": 3.688927454479763e-05, |
| "loss": 0.4512, |
| "num_tokens": 1108578489.0, |
| "step": 4235 |
| }, |
| { |
| "epoch": 1.9766899766899768, |
| "grad_norm": 0.4085486617161448, |
| "learning_rate": 3.6857737560565584e-05, |
| "loss": 0.4624, |
| "num_tokens": 1109889209.0, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.9790209790209792, |
| "grad_norm": 0.33727800612662295, |
| "learning_rate": 3.682617833132092e-05, |
| "loss": 0.4427, |
| "num_tokens": 1111190573.0, |
| "step": 4245 |
| }, |
| { |
| "epoch": 1.9813519813519813, |
| "grad_norm": 0.4098762469941274, |
| "learning_rate": 3.679459693208555e-05, |
| "loss": 0.4656, |
| "num_tokens": 1112501293.0, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.9836829836829837, |
| "grad_norm": 0.3985077104938252, |
| "learning_rate": 3.6762993437934094e-05, |
| "loss": 0.4585, |
| "num_tokens": 1113812013.0, |
| "step": 4255 |
| }, |
| { |
| "epoch": 1.986013986013986, |
| "grad_norm": 0.37492705859719844, |
| "learning_rate": 3.673136792399371e-05, |
| "loss": 0.4589, |
| "num_tokens": 1115112034.0, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.9883449883449882, |
| "grad_norm": 0.4337730376679739, |
| "learning_rate": 3.6699720465443885e-05, |
| "loss": 0.471, |
| "num_tokens": 1116422754.0, |
| "step": 4265 |
| }, |
| { |
| "epoch": 1.9906759906759905, |
| "grad_norm": 0.36853486357793097, |
| "learning_rate": 3.6668051137516275e-05, |
| "loss": 0.4793, |
| "num_tokens": 1117733474.0, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.993006993006993, |
| "grad_norm": 0.4408364026741728, |
| "learning_rate": 3.663636001549452e-05, |
| "loss": 0.4637, |
| "num_tokens": 1119044194.0, |
| "step": 4275 |
| }, |
| { |
| "epoch": 1.9953379953379953, |
| "grad_norm": 0.4221479195643412, |
| "learning_rate": 3.660464717471408e-05, |
| "loss": 0.4608, |
| "num_tokens": 1120354914.0, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.9976689976689976, |
| "grad_norm": 0.37516116056322824, |
| "learning_rate": 3.6572912690562045e-05, |
| "loss": 0.4605, |
| "num_tokens": 1121665634.0, |
| "step": 4285 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.4487985158857049, |
| "learning_rate": 3.654115663847694e-05, |
| "loss": 0.4591, |
| "num_tokens": 1122976354.0, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.0023310023310024, |
| "grad_norm": 0.41022871529290744, |
| "learning_rate": 3.650937909394857e-05, |
| "loss": 0.4071, |
| "num_tokens": 1124287074.0, |
| "step": 4295 |
| }, |
| { |
| "epoch": 2.0046620046620047, |
| "grad_norm": 0.38634036595793075, |
| "learning_rate": 3.6477580132517833e-05, |
| "loss": 0.4082, |
| "num_tokens": 1125594773.0, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.006993006993007, |
| "grad_norm": 0.3650522398368059, |
| "learning_rate": 3.644575982977655e-05, |
| "loss": 0.4186, |
| "num_tokens": 1126905493.0, |
| "step": 4305 |
| }, |
| { |
| "epoch": 2.0093240093240095, |
| "grad_norm": 0.3719826700428957, |
| "learning_rate": 3.641391826136724e-05, |
| "loss": 0.4182, |
| "num_tokens": 1128216213.0, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.011655011655012, |
| "grad_norm": 0.36321886526329594, |
| "learning_rate": 3.6382055502983e-05, |
| "loss": 0.416, |
| "num_tokens": 1129526933.0, |
| "step": 4315 |
| }, |
| { |
| "epoch": 2.013986013986014, |
| "grad_norm": 0.38670247193489393, |
| "learning_rate": 3.63501716303673e-05, |
| "loss": 0.4081, |
| "num_tokens": 1130837653.0, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.016317016317016, |
| "grad_norm": 0.4267019144244097, |
| "learning_rate": 3.631826671931379e-05, |
| "loss": 0.4238, |
| "num_tokens": 1132148373.0, |
| "step": 4325 |
| }, |
| { |
| "epoch": 2.0186480186480185, |
| "grad_norm": 0.3862354868842951, |
| "learning_rate": 3.628634084566615e-05, |
| "loss": 0.4009, |
| "num_tokens": 1133459093.0, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.020979020979021, |
| "grad_norm": 0.3553448746301145, |
| "learning_rate": 3.625439408531787e-05, |
| "loss": 0.4141, |
| "num_tokens": 1134757659.0, |
| "step": 4335 |
| }, |
| { |
| "epoch": 2.023310023310023, |
| "grad_norm": 0.3988598508713757, |
| "learning_rate": 3.62224265142121e-05, |
| "loss": 0.4054, |
| "num_tokens": 1136052195.0, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.0256410256410255, |
| "grad_norm": 0.4052184398184166, |
| "learning_rate": 3.6190438208341484e-05, |
| "loss": 0.4113, |
| "num_tokens": 1137362915.0, |
| "step": 4345 |
| }, |
| { |
| "epoch": 2.027972027972028, |
| "grad_norm": 0.3848516871568244, |
| "learning_rate": 3.615842924374791e-05, |
| "loss": 0.4153, |
| "num_tokens": 1138673635.0, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.0303030303030303, |
| "grad_norm": 0.36445962395799664, |
| "learning_rate": 3.6126399696522413e-05, |
| "loss": 0.4067, |
| "num_tokens": 1139984355.0, |
| "step": 4355 |
| }, |
| { |
| "epoch": 2.0326340326340326, |
| "grad_norm": 0.4070066068730233, |
| "learning_rate": 3.609434964280495e-05, |
| "loss": 0.4114, |
| "num_tokens": 1141285456.0, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.034965034965035, |
| "grad_norm": 0.3864432857902367, |
| "learning_rate": 3.6062279158784205e-05, |
| "loss": 0.4047, |
| "num_tokens": 1142596176.0, |
| "step": 4365 |
| }, |
| { |
| "epoch": 2.0372960372960374, |
| "grad_norm": 0.3673852417907459, |
| "learning_rate": 3.603018832069744e-05, |
| "loss": 0.4178, |
| "num_tokens": 1143906896.0, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.0396270396270397, |
| "grad_norm": 0.37584930043751846, |
| "learning_rate": 3.599807720483034e-05, |
| "loss": 0.418, |
| "num_tokens": 1145217616.0, |
| "step": 4375 |
| }, |
| { |
| "epoch": 2.041958041958042, |
| "grad_norm": 0.36602579882002695, |
| "learning_rate": 3.5965945887516715e-05, |
| "loss": 0.4056, |
| "num_tokens": 1146528336.0, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.0442890442890445, |
| "grad_norm": 0.351202961985245, |
| "learning_rate": 3.593379444513848e-05, |
| "loss": 0.3902, |
| "num_tokens": 1147839056.0, |
| "step": 4385 |
| }, |
| { |
| "epoch": 2.046620046620047, |
| "grad_norm": 0.4105988426092805, |
| "learning_rate": 3.590162295412533e-05, |
| "loss": 0.3981, |
| "num_tokens": 1149142866.0, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.0489510489510487, |
| "grad_norm": 0.38250301137351694, |
| "learning_rate": 3.586943149095464e-05, |
| "loss": 0.4103, |
| "num_tokens": 1150453586.0, |
| "step": 4395 |
| }, |
| { |
| "epoch": 2.051282051282051, |
| "grad_norm": 0.3386603986144158, |
| "learning_rate": 3.5837220132151286e-05, |
| "loss": 0.4069, |
| "num_tokens": 1151764306.0, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.0536130536130535, |
| "grad_norm": 0.35936045689135193, |
| "learning_rate": 3.58049889542874e-05, |
| "loss": 0.4141, |
| "num_tokens": 1153075026.0, |
| "step": 4405 |
| }, |
| { |
| "epoch": 2.055944055944056, |
| "grad_norm": 0.35837033557087505, |
| "learning_rate": 3.577273803398225e-05, |
| "loss": 0.4302, |
| "num_tokens": 1154385746.0, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.058275058275058, |
| "grad_norm": 0.35713780316116617, |
| "learning_rate": 3.574046744790203e-05, |
| "loss": 0.4052, |
| "num_tokens": 1155696466.0, |
| "step": 4415 |
| }, |
| { |
| "epoch": 2.0606060606060606, |
| "grad_norm": 0.3834311499947858, |
| "learning_rate": 3.570817727275968e-05, |
| "loss": 0.4107, |
| "num_tokens": 1156996487.0, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.062937062937063, |
| "grad_norm": 0.37490633384840205, |
| "learning_rate": 3.567586758531471e-05, |
| "loss": 0.4154, |
| "num_tokens": 1158307207.0, |
| "step": 4425 |
| }, |
| { |
| "epoch": 2.0652680652680653, |
| "grad_norm": 0.3741096498424809, |
| "learning_rate": 3.5643538462373035e-05, |
| "loss": 0.403, |
| "num_tokens": 1159617927.0, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.0675990675990676, |
| "grad_norm": 0.37754550383970564, |
| "learning_rate": 3.561118998078673e-05, |
| "loss": 0.4057, |
| "num_tokens": 1160928647.0, |
| "step": 4435 |
| }, |
| { |
| "epoch": 2.06993006993007, |
| "grad_norm": 0.4251054782461779, |
| "learning_rate": 3.55788222174539e-05, |
| "loss": 0.4188, |
| "num_tokens": 1162239367.0, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.0722610722610724, |
| "grad_norm": 0.3266838827050116, |
| "learning_rate": 3.5546435249318535e-05, |
| "loss": 0.4088, |
| "num_tokens": 1163550087.0, |
| "step": 4445 |
| }, |
| { |
| "epoch": 2.0745920745920747, |
| "grad_norm": 0.37379003655649706, |
| "learning_rate": 3.551402915337021e-05, |
| "loss": 0.4075, |
| "num_tokens": 1164860807.0, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.076923076923077, |
| "grad_norm": 0.3572437557048024, |
| "learning_rate": 3.5481604006644e-05, |
| "loss": 0.4179, |
| "num_tokens": 1166171527.0, |
| "step": 4455 |
| }, |
| { |
| "epoch": 2.0792540792540795, |
| "grad_norm": 0.3859455958746963, |
| "learning_rate": 3.544915988622028e-05, |
| "loss": 0.4237, |
| "num_tokens": 1167482247.0, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.0815850815850814, |
| "grad_norm": 0.4603570748371342, |
| "learning_rate": 3.5416696869224504e-05, |
| "loss": 0.4286, |
| "num_tokens": 1168792967.0, |
| "step": 4465 |
| }, |
| { |
| "epoch": 2.0839160839160837, |
| "grad_norm": 0.41262993381270163, |
| "learning_rate": 3.538421503282707e-05, |
| "loss": 0.4136, |
| "num_tokens": 1170103687.0, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.086247086247086, |
| "grad_norm": 0.37590755119976543, |
| "learning_rate": 3.5351714454243096e-05, |
| "loss": 0.4251, |
| "num_tokens": 1171414407.0, |
| "step": 4475 |
| }, |
| { |
| "epoch": 2.0885780885780885, |
| "grad_norm": 0.3465536685672719, |
| "learning_rate": 3.531919521073225e-05, |
| "loss": 0.4157, |
| "num_tokens": 1172725127.0, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.090909090909091, |
| "grad_norm": 0.42464850126292525, |
| "learning_rate": 3.5286657379598586e-05, |
| "loss": 0.405, |
| "num_tokens": 1174035847.0, |
| "step": 4485 |
| }, |
| { |
| "epoch": 2.093240093240093, |
| "grad_norm": 0.3901689607224437, |
| "learning_rate": 3.5254101038190345e-05, |
| "loss": 0.4168, |
| "num_tokens": 1175346567.0, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.0955710955710956, |
| "grad_norm": 0.4360019158309819, |
| "learning_rate": 3.522152626389975e-05, |
| "loss": 0.4151, |
| "num_tokens": 1176657287.0, |
| "step": 4495 |
| }, |
| { |
| "epoch": 2.097902097902098, |
| "grad_norm": 0.3502067766511093, |
| "learning_rate": 3.5188933134162865e-05, |
| "loss": 0.4224, |
| "num_tokens": 1177968007.0, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.1002331002331003, |
| "grad_norm": 0.5275300365734853, |
| "learning_rate": 3.515632172645937e-05, |
| "loss": 0.4159, |
| "num_tokens": 1179278727.0, |
| "step": 4505 |
| }, |
| { |
| "epoch": 2.1025641025641026, |
| "grad_norm": 0.3789515598432233, |
| "learning_rate": 3.51236921183124e-05, |
| "loss": 0.4191, |
| "num_tokens": 1180589447.0, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.104895104895105, |
| "grad_norm": 0.365833048618729, |
| "learning_rate": 3.509104438728837e-05, |
| "loss": 0.4059, |
| "num_tokens": 1181900167.0, |
| "step": 4515 |
| }, |
| { |
| "epoch": 2.1072261072261074, |
| "grad_norm": 0.35537158917962436, |
| "learning_rate": 3.505837861099676e-05, |
| "loss": 0.4234, |
| "num_tokens": 1183210887.0, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.1095571095571097, |
| "grad_norm": 0.35631156133374037, |
| "learning_rate": 3.5025694867089945e-05, |
| "loss": 0.4111, |
| "num_tokens": 1184521607.0, |
| "step": 4525 |
| }, |
| { |
| "epoch": 2.111888111888112, |
| "grad_norm": 0.38809219895458275, |
| "learning_rate": 3.499299323326302e-05, |
| "loss": 0.421, |
| "num_tokens": 1185832327.0, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.114219114219114, |
| "grad_norm": 0.3497921359981421, |
| "learning_rate": 3.496027378725361e-05, |
| "loss": 0.407, |
| "num_tokens": 1187143047.0, |
| "step": 4535 |
| }, |
| { |
| "epoch": 2.1165501165501164, |
| "grad_norm": 0.40913669577154826, |
| "learning_rate": 3.492753660684167e-05, |
| "loss": 0.4033, |
| "num_tokens": 1188453767.0, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.1188811188811187, |
| "grad_norm": 0.39030245051407625, |
| "learning_rate": 3.489478176984934e-05, |
| "loss": 0.4217, |
| "num_tokens": 1189756471.0, |
| "step": 4545 |
| }, |
| { |
| "epoch": 2.121212121212121, |
| "grad_norm": 0.3443883610438411, |
| "learning_rate": 3.48620093541407e-05, |
| "loss": 0.4232, |
| "num_tokens": 1191067191.0, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.1235431235431235, |
| "grad_norm": 0.3841739344611504, |
| "learning_rate": 3.482921943762163e-05, |
| "loss": 0.4141, |
| "num_tokens": 1192377911.0, |
| "step": 4555 |
| }, |
| { |
| "epoch": 2.125874125874126, |
| "grad_norm": 0.3383108963469932, |
| "learning_rate": 3.479641209823964e-05, |
| "loss": 0.4092, |
| "num_tokens": 1193688631.0, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.128205128205128, |
| "grad_norm": 0.3760327145277456, |
| "learning_rate": 3.47635874139836e-05, |
| "loss": 0.4087, |
| "num_tokens": 1194999351.0, |
| "step": 4565 |
| }, |
| { |
| "epoch": 2.1305361305361306, |
| "grad_norm": 0.34503872631449223, |
| "learning_rate": 3.473074546288366e-05, |
| "loss": 0.4048, |
| "num_tokens": 1196310071.0, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.132867132867133, |
| "grad_norm": 0.3580599391213381, |
| "learning_rate": 3.4697886323010994e-05, |
| "loss": 0.4152, |
| "num_tokens": 1197608942.0, |
| "step": 4575 |
| }, |
| { |
| "epoch": 2.1351981351981353, |
| "grad_norm": 0.435723923799417, |
| "learning_rate": 3.466501007247764e-05, |
| "loss": 0.4286, |
| "num_tokens": 1198919662.0, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.1375291375291376, |
| "grad_norm": 0.40488968120953833, |
| "learning_rate": 3.4632116789436334e-05, |
| "loss": 0.4118, |
| "num_tokens": 1200230382.0, |
| "step": 4585 |
| }, |
| { |
| "epoch": 2.13986013986014, |
| "grad_norm": 0.36702045490517987, |
| "learning_rate": 3.459920655208027e-05, |
| "loss": 0.4118, |
| "num_tokens": 1201528689.0, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.1421911421911424, |
| "grad_norm": 0.36787552508826277, |
| "learning_rate": 3.456627943864295e-05, |
| "loss": 0.4184, |
| "num_tokens": 1202839409.0, |
| "step": 4595 |
| }, |
| { |
| "epoch": 2.1445221445221447, |
| "grad_norm": 0.3756630907032684, |
| "learning_rate": 3.453333552739801e-05, |
| "loss": 0.4053, |
| "num_tokens": 1204150129.0, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.1468531468531467, |
| "grad_norm": 0.3579470200059966, |
| "learning_rate": 3.4500374896658996e-05, |
| "loss": 0.4147, |
| "num_tokens": 1205460849.0, |
| "step": 4605 |
| }, |
| { |
| "epoch": 2.149184149184149, |
| "grad_norm": 0.3946329332145125, |
| "learning_rate": 3.446739762477922e-05, |
| "loss": 0.4207, |
| "num_tokens": 1206761500.0, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.1515151515151514, |
| "grad_norm": 0.3580747430804025, |
| "learning_rate": 3.4434403790151546e-05, |
| "loss": 0.3979, |
| "num_tokens": 1208065281.0, |
| "step": 4615 |
| }, |
| { |
| "epoch": 2.1538461538461537, |
| "grad_norm": 0.36202526630950443, |
| "learning_rate": 3.44013934712082e-05, |
| "loss": 0.4197, |
| "num_tokens": 1209376001.0, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.156177156177156, |
| "grad_norm": 0.3794229385793346, |
| "learning_rate": 3.4368366746420613e-05, |
| "loss": 0.4259, |
| "num_tokens": 1210686721.0, |
| "step": 4625 |
| }, |
| { |
| "epoch": 2.1585081585081585, |
| "grad_norm": 0.3356824344579626, |
| "learning_rate": 3.4335323694299205e-05, |
| "loss": 0.4168, |
| "num_tokens": 1211997441.0, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.160839160839161, |
| "grad_norm": 0.3749451829370277, |
| "learning_rate": 3.43022643933932e-05, |
| "loss": 0.4106, |
| "num_tokens": 1213308161.0, |
| "step": 4635 |
| }, |
| { |
| "epoch": 2.163170163170163, |
| "grad_norm": 0.4016684163697115, |
| "learning_rate": 3.426918892229046e-05, |
| "loss": 0.4098, |
| "num_tokens": 1214618881.0, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.1655011655011656, |
| "grad_norm": 0.4000860482831418, |
| "learning_rate": 3.423609735961729e-05, |
| "loss": 0.41, |
| "num_tokens": 1215929601.0, |
| "step": 4645 |
| }, |
| { |
| "epoch": 2.167832167832168, |
| "grad_norm": 0.4074946003687253, |
| "learning_rate": 3.420298978403824e-05, |
| "loss": 0.418, |
| "num_tokens": 1217240321.0, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.1701631701631703, |
| "grad_norm": 0.3696291402654651, |
| "learning_rate": 3.4169866274255926e-05, |
| "loss": 0.4149, |
| "num_tokens": 1218544211.0, |
| "step": 4655 |
| }, |
| { |
| "epoch": 2.1724941724941726, |
| "grad_norm": 0.38844912853019314, |
| "learning_rate": 3.413672690901084e-05, |
| "loss": 0.4059, |
| "num_tokens": 1219854931.0, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.174825174825175, |
| "grad_norm": 0.33716786832291057, |
| "learning_rate": 3.410357176708118e-05, |
| "loss": 0.4033, |
| "num_tokens": 1221165651.0, |
| "step": 4665 |
| }, |
| { |
| "epoch": 2.177156177156177, |
| "grad_norm": 0.36180594189314, |
| "learning_rate": 3.4070400927282616e-05, |
| "loss": 0.4134, |
| "num_tokens": 1222476371.0, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.1794871794871793, |
| "grad_norm": 0.34683197522568154, |
| "learning_rate": 3.403721446846818e-05, |
| "loss": 0.3892, |
| "num_tokens": 1223784575.0, |
| "step": 4675 |
| }, |
| { |
| "epoch": 2.1818181818181817, |
| "grad_norm": 0.3776362556179136, |
| "learning_rate": 3.400401246952798e-05, |
| "loss": 0.4259, |
| "num_tokens": 1225095104.0, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.184149184149184, |
| "grad_norm": 0.3518038107992216, |
| "learning_rate": 3.397079500938913e-05, |
| "loss": 0.4227, |
| "num_tokens": 1226405824.0, |
| "step": 4685 |
| }, |
| { |
| "epoch": 2.1864801864801864, |
| "grad_norm": 0.38008130799789136, |
| "learning_rate": 3.3937562167015444e-05, |
| "loss": 0.4192, |
| "num_tokens": 1227716544.0, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.1888111888111887, |
| "grad_norm": 0.410030351985677, |
| "learning_rate": 3.3904314021407306e-05, |
| "loss": 0.4187, |
| "num_tokens": 1229027264.0, |
| "step": 4695 |
| }, |
| { |
| "epoch": 2.191142191142191, |
| "grad_norm": 0.44730022868266656, |
| "learning_rate": 3.3871050651601526e-05, |
| "loss": 0.4035, |
| "num_tokens": 1230337984.0, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.1934731934731935, |
| "grad_norm": 0.4230202641947911, |
| "learning_rate": 3.383777213667104e-05, |
| "loss": 0.4354, |
| "num_tokens": 1231648704.0, |
| "step": 4705 |
| }, |
| { |
| "epoch": 2.195804195804196, |
| "grad_norm": 0.39299528264780836, |
| "learning_rate": 3.3804478555724836e-05, |
| "loss": 0.4189, |
| "num_tokens": 1232959424.0, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.198135198135198, |
| "grad_norm": 0.33696525877227246, |
| "learning_rate": 3.3771169987907694e-05, |
| "loss": 0.3992, |
| "num_tokens": 1234270144.0, |
| "step": 4715 |
| }, |
| { |
| "epoch": 2.2004662004662006, |
| "grad_norm": 0.3608578810758537, |
| "learning_rate": 3.373784651240003e-05, |
| "loss": 0.4138, |
| "num_tokens": 1235580864.0, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.202797202797203, |
| "grad_norm": 0.36875097820212976, |
| "learning_rate": 3.370450820841769e-05, |
| "loss": 0.4168, |
| "num_tokens": 1236891584.0, |
| "step": 4725 |
| }, |
| { |
| "epoch": 2.2051282051282053, |
| "grad_norm": 0.3857628385194161, |
| "learning_rate": 3.3671155155211775e-05, |
| "loss": 0.4126, |
| "num_tokens": 1238197409.0, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.2074592074592077, |
| "grad_norm": 0.36269590587156925, |
| "learning_rate": 3.363778743206844e-05, |
| "loss": 0.4124, |
| "num_tokens": 1239508129.0, |
| "step": 4735 |
| }, |
| { |
| "epoch": 2.20979020979021, |
| "grad_norm": 0.37684424909135084, |
| "learning_rate": 3.360440511830873e-05, |
| "loss": 0.4051, |
| "num_tokens": 1240818849.0, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.212121212121212, |
| "grad_norm": 0.33976488415925243, |
| "learning_rate": 3.3571008293288366e-05, |
| "loss": 0.4058, |
| "num_tokens": 1242129569.0, |
| "step": 4745 |
| }, |
| { |
| "epoch": 2.2144522144522143, |
| "grad_norm": 0.3390834642562686, |
| "learning_rate": 3.3537597036397555e-05, |
| "loss": 0.3954, |
| "num_tokens": 1243440289.0, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.2167832167832167, |
| "grad_norm": 0.3625798993977378, |
| "learning_rate": 3.35041714270608e-05, |
| "loss": 0.415, |
| "num_tokens": 1244751009.0, |
| "step": 4755 |
| }, |
| { |
| "epoch": 2.219114219114219, |
| "grad_norm": 0.3637371812561485, |
| "learning_rate": 3.3470731544736784e-05, |
| "loss": 0.4099, |
| "num_tokens": 1246061729.0, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.2214452214452214, |
| "grad_norm": 0.3469567003110868, |
| "learning_rate": 3.3437277468918046e-05, |
| "loss": 0.4205, |
| "num_tokens": 1247372449.0, |
| "step": 4765 |
| }, |
| { |
| "epoch": 2.2237762237762237, |
| "grad_norm": 0.3626483429391148, |
| "learning_rate": 3.3403809279130904e-05, |
| "loss": 0.4348, |
| "num_tokens": 1248679073.0, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.226107226107226, |
| "grad_norm": 0.34400225732618517, |
| "learning_rate": 3.337032705493522e-05, |
| "loss": 0.4088, |
| "num_tokens": 1249989793.0, |
| "step": 4775 |
| }, |
| { |
| "epoch": 2.2284382284382285, |
| "grad_norm": 0.42160458003014656, |
| "learning_rate": 3.333683087592421e-05, |
| "loss": 0.4182, |
| "num_tokens": 1251300513.0, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.230769230769231, |
| "grad_norm": 0.3628491977962132, |
| "learning_rate": 3.3303320821724285e-05, |
| "loss": 0.4263, |
| "num_tokens": 1252597120.0, |
| "step": 4785 |
| }, |
| { |
| "epoch": 2.233100233100233, |
| "grad_norm": 0.3646447806760171, |
| "learning_rate": 3.326979697199482e-05, |
| "loss": 0.4206, |
| "num_tokens": 1253907840.0, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.2354312354312356, |
| "grad_norm": 0.42668719182222326, |
| "learning_rate": 3.323625940642797e-05, |
| "loss": 0.4124, |
| "num_tokens": 1255218560.0, |
| "step": 4795 |
| }, |
| { |
| "epoch": 2.237762237762238, |
| "grad_norm": 0.38155232884871143, |
| "learning_rate": 3.320270820474856e-05, |
| "loss": 0.4019, |
| "num_tokens": 1256529280.0, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.2400932400932403, |
| "grad_norm": 0.3680246772827745, |
| "learning_rate": 3.316914344671374e-05, |
| "loss": 0.424, |
| "num_tokens": 1257840000.0, |
| "step": 4805 |
| }, |
| { |
| "epoch": 2.242424242424242, |
| "grad_norm": 0.3538991106150229, |
| "learning_rate": 3.313556521211296e-05, |
| "loss": 0.4171, |
| "num_tokens": 1259150720.0, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.2447552447552446, |
| "grad_norm": 0.3799814749588959, |
| "learning_rate": 3.310197358076767e-05, |
| "loss": 0.4089, |
| "num_tokens": 1260448949.0, |
| "step": 4815 |
| }, |
| { |
| "epoch": 2.247086247086247, |
| "grad_norm": 0.3534336331691378, |
| "learning_rate": 3.3068368632531166e-05, |
| "loss": 0.419, |
| "num_tokens": 1261759669.0, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.2494172494172493, |
| "grad_norm": 0.3948143146379037, |
| "learning_rate": 3.303475044728842e-05, |
| "loss": 0.4341, |
| "num_tokens": 1263070389.0, |
| "step": 4825 |
| }, |
| { |
| "epoch": 2.2517482517482517, |
| "grad_norm": 0.38620152575690947, |
| "learning_rate": 3.3001119104955856e-05, |
| "loss": 0.3993, |
| "num_tokens": 1264381109.0, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.254079254079254, |
| "grad_norm": 0.409856401580112, |
| "learning_rate": 3.296747468548117e-05, |
| "loss": 0.4284, |
| "num_tokens": 1265691829.0, |
| "step": 4835 |
| }, |
| { |
| "epoch": 2.2564102564102564, |
| "grad_norm": 0.3894321494198609, |
| "learning_rate": 3.2933817268843175e-05, |
| "loss": 0.4044, |
| "num_tokens": 1267002549.0, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.2587412587412588, |
| "grad_norm": 0.44554017981068295, |
| "learning_rate": 3.2900146935051535e-05, |
| "loss": 0.4046, |
| "num_tokens": 1268313269.0, |
| "step": 4845 |
| }, |
| { |
| "epoch": 2.261072261072261, |
| "grad_norm": 0.38537941934187847, |
| "learning_rate": 3.2866463764146647e-05, |
| "loss": 0.4088, |
| "num_tokens": 1269611053.0, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.2634032634032635, |
| "grad_norm": 0.34106108845050315, |
| "learning_rate": 3.2832767836199435e-05, |
| "loss": 0.4066, |
| "num_tokens": 1270921773.0, |
| "step": 4855 |
| }, |
| { |
| "epoch": 2.265734265734266, |
| "grad_norm": 0.37822769502708925, |
| "learning_rate": 3.279905923131112e-05, |
| "loss": 0.4352, |
| "num_tokens": 1272232493.0, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.268065268065268, |
| "grad_norm": 0.3717980750928343, |
| "learning_rate": 3.276533802961308e-05, |
| "loss": 0.4149, |
| "num_tokens": 1273533373.0, |
| "step": 4865 |
| }, |
| { |
| "epoch": 2.2703962703962706, |
| "grad_norm": 0.40049719958281904, |
| "learning_rate": 3.273160431126664e-05, |
| "loss": 0.4149, |
| "num_tokens": 1274825773.0, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 0.3492455161378825, |
| "learning_rate": 3.269785815646286e-05, |
| "loss": 0.4078, |
| "num_tokens": 1276121190.0, |
| "step": 4875 |
| }, |
| { |
| "epoch": 2.2750582750582753, |
| "grad_norm": 0.3434108328323875, |
| "learning_rate": 3.266409964542236e-05, |
| "loss": 0.4315, |
| "num_tokens": 1277431910.0, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.277389277389277, |
| "grad_norm": 0.34434993144818066, |
| "learning_rate": 3.263032885839517e-05, |
| "loss": 0.3986, |
| "num_tokens": 1278729221.0, |
| "step": 4885 |
| }, |
| { |
| "epoch": 2.2797202797202796, |
| "grad_norm": 0.32968474076252635, |
| "learning_rate": 3.2596545875660474e-05, |
| "loss": 0.4029, |
| "num_tokens": 1280039941.0, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.282051282051282, |
| "grad_norm": 0.3533987796359302, |
| "learning_rate": 3.256275077752644e-05, |
| "loss": 0.4132, |
| "num_tokens": 1281350661.0, |
| "step": 4895 |
| }, |
| { |
| "epoch": 2.2843822843822843, |
| "grad_norm": 0.36035857510185826, |
| "learning_rate": 3.2528943644330066e-05, |
| "loss": 0.4062, |
| "num_tokens": 1282661381.0, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.2867132867132867, |
| "grad_norm": 0.4102624567196318, |
| "learning_rate": 3.2495124556436935e-05, |
| "loss": 0.405, |
| "num_tokens": 1283972101.0, |
| "step": 4905 |
| }, |
| { |
| "epoch": 2.289044289044289, |
| "grad_norm": 0.32975812878362454, |
| "learning_rate": 3.246129359424105e-05, |
| "loss": 0.4183, |
| "num_tokens": 1285282821.0, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.2913752913752914, |
| "grad_norm": 0.3671230776462342, |
| "learning_rate": 3.2427450838164665e-05, |
| "loss": 0.4202, |
| "num_tokens": 1286593541.0, |
| "step": 4915 |
| }, |
| { |
| "epoch": 2.2937062937062938, |
| "grad_norm": 0.35487029443793194, |
| "learning_rate": 3.239359636865803e-05, |
| "loss": 0.4135, |
| "num_tokens": 1287904261.0, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.296037296037296, |
| "grad_norm": 0.35127658402893597, |
| "learning_rate": 3.235973026619928e-05, |
| "loss": 0.4119, |
| "num_tokens": 1289214981.0, |
| "step": 4925 |
| }, |
| { |
| "epoch": 2.2983682983682985, |
| "grad_norm": 0.34383367438253826, |
| "learning_rate": 3.2325852611294175e-05, |
| "loss": 0.4191, |
| "num_tokens": 1290517020.0, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.300699300699301, |
| "grad_norm": 0.3781283212650206, |
| "learning_rate": 3.229196348447595e-05, |
| "loss": 0.4133, |
| "num_tokens": 1291814376.0, |
| "step": 4935 |
| }, |
| { |
| "epoch": 2.303030303030303, |
| "grad_norm": 0.3797225808633411, |
| "learning_rate": 3.225806296630512e-05, |
| "loss": 0.4314, |
| "num_tokens": 1293125096.0, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.3053613053613056, |
| "grad_norm": 0.3820018986940426, |
| "learning_rate": 3.2224151137369244e-05, |
| "loss": 0.4089, |
| "num_tokens": 1294422895.0, |
| "step": 4945 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 0.3535499522376053, |
| "learning_rate": 3.219022807828282e-05, |
| "loss": 0.4105, |
| "num_tokens": 1295733615.0, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.31002331002331, |
| "grad_norm": 0.35691493546859565, |
| "learning_rate": 3.215629386968701e-05, |
| "loss": 0.4103, |
| "num_tokens": 1297044335.0, |
| "step": 4955 |
| }, |
| { |
| "epoch": 2.312354312354312, |
| "grad_norm": 0.3387110035916893, |
| "learning_rate": 3.212234859224946e-05, |
| "loss": 0.4284, |
| "num_tokens": 1298355055.0, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.3146853146853146, |
| "grad_norm": 0.3710615014269036, |
| "learning_rate": 3.208839232666419e-05, |
| "loss": 0.4101, |
| "num_tokens": 1299665775.0, |
| "step": 4965 |
| }, |
| { |
| "epoch": 2.317016317016317, |
| "grad_norm": 0.3373474545094395, |
| "learning_rate": 3.205442515365128e-05, |
| "loss": 0.4088, |
| "num_tokens": 1300976495.0, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.3193473193473193, |
| "grad_norm": 0.34943245534248407, |
| "learning_rate": 3.202044715395677e-05, |
| "loss": 0.4291, |
| "num_tokens": 1302287215.0, |
| "step": 4975 |
| }, |
| { |
| "epoch": 2.3216783216783217, |
| "grad_norm": 0.3617392320216389, |
| "learning_rate": 3.198645840835243e-05, |
| "loss": 0.4289, |
| "num_tokens": 1303590183.0, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.324009324009324, |
| "grad_norm": 0.37806058308011203, |
| "learning_rate": 3.195245899763559e-05, |
| "loss": 0.4104, |
| "num_tokens": 1304900903.0, |
| "step": 4985 |
| }, |
| { |
| "epoch": 2.3263403263403264, |
| "grad_norm": 0.3317302716295439, |
| "learning_rate": 3.1918449002628895e-05, |
| "loss": 0.4084, |
| "num_tokens": 1306211623.0, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.3286713286713288, |
| "grad_norm": 0.3435386897987483, |
| "learning_rate": 3.1884428504180186e-05, |
| "loss": 0.4135, |
| "num_tokens": 1307522343.0, |
| "step": 4995 |
| }, |
| { |
| "epoch": 2.331002331002331, |
| "grad_norm": 0.360164719000458, |
| "learning_rate": 3.185039758316226e-05, |
| "loss": 0.4115, |
| "num_tokens": 1308833063.0, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.3945377389793074, |
| "learning_rate": 3.1816356320472695e-05, |
| "loss": 0.4188, |
| "num_tokens": 1310143783.0, |
| "step": 5005 |
| }, |
| { |
| "epoch": 2.335664335664336, |
| "grad_norm": 0.4051939320818589, |
| "learning_rate": 3.178230479703364e-05, |
| "loss": 0.4203, |
| "num_tokens": 1311454503.0, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.3379953379953378, |
| "grad_norm": 0.35708148981072574, |
| "learning_rate": 3.174824309379166e-05, |
| "loss": 0.418, |
| "num_tokens": 1312765223.0, |
| "step": 5015 |
| }, |
| { |
| "epoch": 2.3403263403263406, |
| "grad_norm": 0.33742717843001446, |
| "learning_rate": 3.1714171291717486e-05, |
| "loss": 0.4084, |
| "num_tokens": 1314069304.0, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.3426573426573425, |
| "grad_norm": 0.3589509813194004, |
| "learning_rate": 3.168008947180588e-05, |
| "loss": 0.4045, |
| "num_tokens": 1315380024.0, |
| "step": 5025 |
| }, |
| { |
| "epoch": 2.344988344988345, |
| "grad_norm": 0.3508912054860665, |
| "learning_rate": 3.1645997715075426e-05, |
| "loss": 0.4033, |
| "num_tokens": 1316690744.0, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.347319347319347, |
| "grad_norm": 0.3295215331291118, |
| "learning_rate": 3.161189610256829e-05, |
| "loss": 0.4066, |
| "num_tokens": 1318001464.0, |
| "step": 5035 |
| }, |
| { |
| "epoch": 2.3496503496503496, |
| "grad_norm": 0.35230973624032774, |
| "learning_rate": 3.157778471535011e-05, |
| "loss": 0.417, |
| "num_tokens": 1319312184.0, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.351981351981352, |
| "grad_norm": 0.3884539975737978, |
| "learning_rate": 3.154366363450974e-05, |
| "loss": 0.4236, |
| "num_tokens": 1320618337.0, |
| "step": 5045 |
| }, |
| { |
| "epoch": 2.3543123543123543, |
| "grad_norm": 0.3698867601064244, |
| "learning_rate": 3.150953294115907e-05, |
| "loss": 0.4054, |
| "num_tokens": 1321929057.0, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.3566433566433567, |
| "grad_norm": 0.3652929150276016, |
| "learning_rate": 3.147539271643287e-05, |
| "loss": 0.4267, |
| "num_tokens": 1323239777.0, |
| "step": 5055 |
| }, |
| { |
| "epoch": 2.358974358974359, |
| "grad_norm": 0.35528129394096414, |
| "learning_rate": 3.1441243041488525e-05, |
| "loss": 0.4336, |
| "num_tokens": 1324550497.0, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.3613053613053614, |
| "grad_norm": 0.37876386202795675, |
| "learning_rate": 3.140708399750594e-05, |
| "loss": 0.425, |
| "num_tokens": 1325861217.0, |
| "step": 5065 |
| }, |
| { |
| "epoch": 2.3636363636363638, |
| "grad_norm": 0.35526472594618097, |
| "learning_rate": 3.1372915665687225e-05, |
| "loss": 0.4073, |
| "num_tokens": 1327171937.0, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.365967365967366, |
| "grad_norm": 0.3550032233081871, |
| "learning_rate": 3.133873812725662e-05, |
| "loss": 0.4078, |
| "num_tokens": 1328482657.0, |
| "step": 5075 |
| }, |
| { |
| "epoch": 2.3682983682983685, |
| "grad_norm": 0.3431998998307288, |
| "learning_rate": 3.130455146346024e-05, |
| "loss": 0.4105, |
| "num_tokens": 1329791355.0, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.370629370629371, |
| "grad_norm": 0.3608126350221179, |
| "learning_rate": 3.1270355755565886e-05, |
| "loss": 0.4262, |
| "num_tokens": 1331102075.0, |
| "step": 5085 |
| }, |
| { |
| "epoch": 2.3729603729603728, |
| "grad_norm": 0.3494733230503953, |
| "learning_rate": 3.123615108486286e-05, |
| "loss": 0.4238, |
| "num_tokens": 1332404189.0, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.375291375291375, |
| "grad_norm": 0.338730898909754, |
| "learning_rate": 3.120193753266175e-05, |
| "loss": 0.4191, |
| "num_tokens": 1333714909.0, |
| "step": 5095 |
| }, |
| { |
| "epoch": 2.3776223776223775, |
| "grad_norm": 0.402224903788031, |
| "learning_rate": 3.116771518029431e-05, |
| "loss": 0.4161, |
| "num_tokens": 1335025629.0, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.37995337995338, |
| "grad_norm": 0.36595610812881385, |
| "learning_rate": 3.113348410911316e-05, |
| "loss": 0.4081, |
| "num_tokens": 1336336349.0, |
| "step": 5105 |
| }, |
| { |
| "epoch": 2.382284382284382, |
| "grad_norm": 0.3880930752201236, |
| "learning_rate": 3.109924440049166e-05, |
| "loss": 0.4176, |
| "num_tokens": 1337640003.0, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.3846153846153846, |
| "grad_norm": 0.35480427992895003, |
| "learning_rate": 3.1064996135823736e-05, |
| "loss": 0.4143, |
| "num_tokens": 1338950723.0, |
| "step": 5115 |
| }, |
| { |
| "epoch": 2.386946386946387, |
| "grad_norm": 0.3439245249078291, |
| "learning_rate": 3.10307393965236e-05, |
| "loss": 0.4128, |
| "num_tokens": 1340261443.0, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.3892773892773893, |
| "grad_norm": 0.38182867678172666, |
| "learning_rate": 3.0996474264025654e-05, |
| "loss": 0.4112, |
| "num_tokens": 1341564973.0, |
| "step": 5125 |
| }, |
| { |
| "epoch": 2.3916083916083917, |
| "grad_norm": 0.3598389668576128, |
| "learning_rate": 3.096220081978423e-05, |
| "loss": 0.4156, |
| "num_tokens": 1342875693.0, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.393939393939394, |
| "grad_norm": 0.3860442296593736, |
| "learning_rate": 3.092791914527341e-05, |
| "loss": 0.4091, |
| "num_tokens": 1344186413.0, |
| "step": 5135 |
| }, |
| { |
| "epoch": 2.3962703962703964, |
| "grad_norm": 0.36500293825420654, |
| "learning_rate": 3.0893629321986874e-05, |
| "loss": 0.4121, |
| "num_tokens": 1345497133.0, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.3986013986013988, |
| "grad_norm": 0.3776001399782403, |
| "learning_rate": 3.085933143143765e-05, |
| "loss": 0.4306, |
| "num_tokens": 1346794525.0, |
| "step": 5145 |
| }, |
| { |
| "epoch": 2.400932400932401, |
| "grad_norm": 0.3760233460610802, |
| "learning_rate": 3.082502555515793e-05, |
| "loss": 0.4302, |
| "num_tokens": 1348105245.0, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.403263403263403, |
| "grad_norm": 0.37109726618618827, |
| "learning_rate": 3.079071177469892e-05, |
| "loss": 0.4021, |
| "num_tokens": 1349414836.0, |
| "step": 5155 |
| }, |
| { |
| "epoch": 2.4055944055944054, |
| "grad_norm": 0.3562308097398838, |
| "learning_rate": 3.07563901716306e-05, |
| "loss": 0.4198, |
| "num_tokens": 1350725556.0, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.4079254079254078, |
| "grad_norm": 0.37126907481875954, |
| "learning_rate": 3.072206082754154e-05, |
| "loss": 0.427, |
| "num_tokens": 1352022873.0, |
| "step": 5165 |
| }, |
| { |
| "epoch": 2.41025641025641, |
| "grad_norm": 0.3766487217257149, |
| "learning_rate": 3.068772382403873e-05, |
| "loss": 0.4231, |
| "num_tokens": 1353333593.0, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.4125874125874125, |
| "grad_norm": 0.36252558911615673, |
| "learning_rate": 3.065337924274735e-05, |
| "loss": 0.4113, |
| "num_tokens": 1354644313.0, |
| "step": 5175 |
| }, |
| { |
| "epoch": 2.414918414918415, |
| "grad_norm": 0.36974990717403, |
| "learning_rate": 3.06190271653106e-05, |
| "loss": 0.4289, |
| "num_tokens": 1355955033.0, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.417249417249417, |
| "grad_norm": 0.3383571793373514, |
| "learning_rate": 3.058466767338951e-05, |
| "loss": 0.4214, |
| "num_tokens": 1357265753.0, |
| "step": 5185 |
| }, |
| { |
| "epoch": 2.4195804195804196, |
| "grad_norm": 0.38036895795490944, |
| "learning_rate": 3.0550300848662704e-05, |
| "loss": 0.4207, |
| "num_tokens": 1358576473.0, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.421911421911422, |
| "grad_norm": 0.3505203016572691, |
| "learning_rate": 3.051592677282628e-05, |
| "loss": 0.4165, |
| "num_tokens": 1359887193.0, |
| "step": 5195 |
| }, |
| { |
| "epoch": 2.4242424242424243, |
| "grad_norm": 0.3640785191337429, |
| "learning_rate": 3.0481545527593546e-05, |
| "loss": 0.4272, |
| "num_tokens": 1361197913.0, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.4265734265734267, |
| "grad_norm": 0.35398548228202553, |
| "learning_rate": 3.0447157194694864e-05, |
| "loss": 0.4182, |
| "num_tokens": 1362508633.0, |
| "step": 5205 |
| }, |
| { |
| "epoch": 2.428904428904429, |
| "grad_norm": 0.36683505543529804, |
| "learning_rate": 3.041276185587743e-05, |
| "loss": 0.421, |
| "num_tokens": 1363819353.0, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.4312354312354314, |
| "grad_norm": 0.37273261132442753, |
| "learning_rate": 3.0378359592905097e-05, |
| "loss": 0.4004, |
| "num_tokens": 1365130073.0, |
| "step": 5215 |
| }, |
| { |
| "epoch": 2.4335664335664333, |
| "grad_norm": 0.3711753185510304, |
| "learning_rate": 3.0343950487558208e-05, |
| "loss": 0.4157, |
| "num_tokens": 1366440793.0, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.435897435897436, |
| "grad_norm": 0.3886512119876518, |
| "learning_rate": 3.030953462163334e-05, |
| "loss": 0.4203, |
| "num_tokens": 1367751513.0, |
| "step": 5225 |
| }, |
| { |
| "epoch": 2.438228438228438, |
| "grad_norm": 0.35270305999051793, |
| "learning_rate": 3.0275112076943145e-05, |
| "loss": 0.4039, |
| "num_tokens": 1369051755.0, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.4405594405594404, |
| "grad_norm": 0.3912345566826907, |
| "learning_rate": 3.0240682935316156e-05, |
| "loss": 0.4152, |
| "num_tokens": 1370362475.0, |
| "step": 5235 |
| }, |
| { |
| "epoch": 2.4428904428904428, |
| "grad_norm": 0.39812504543925764, |
| "learning_rate": 3.0206247278596594e-05, |
| "loss": 0.4252, |
| "num_tokens": 1371667349.0, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.445221445221445, |
| "grad_norm": 0.36495714706101673, |
| "learning_rate": 3.0171805188644163e-05, |
| "loss": 0.4262, |
| "num_tokens": 1372978069.0, |
| "step": 5245 |
| }, |
| { |
| "epoch": 2.4475524475524475, |
| "grad_norm": 0.32681284740431915, |
| "learning_rate": 3.013735674733385e-05, |
| "loss": 0.4091, |
| "num_tokens": 1374288789.0, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.44988344988345, |
| "grad_norm": 0.41435527791431637, |
| "learning_rate": 3.0102902036555765e-05, |
| "loss": 0.4153, |
| "num_tokens": 1375599509.0, |
| "step": 5255 |
| }, |
| { |
| "epoch": 2.4522144522144522, |
| "grad_norm": 0.347283922287271, |
| "learning_rate": 3.0068441138214886e-05, |
| "loss": 0.4092, |
| "num_tokens": 1376910229.0, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.4545454545454546, |
| "grad_norm": 0.38829537811571885, |
| "learning_rate": 3.0033974134230937e-05, |
| "loss": 0.4177, |
| "num_tokens": 1378220949.0, |
| "step": 5265 |
| }, |
| { |
| "epoch": 2.456876456876457, |
| "grad_norm": 0.3275166232456935, |
| "learning_rate": 2.9999501106538126e-05, |
| "loss": 0.4082, |
| "num_tokens": 1379531669.0, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.4592074592074593, |
| "grad_norm": 0.42514354355776585, |
| "learning_rate": 2.9965022137084997e-05, |
| "loss": 0.4056, |
| "num_tokens": 1380837473.0, |
| "step": 5275 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 0.37968747313444934, |
| "learning_rate": 2.993053730783422e-05, |
| "loss": 0.3987, |
| "num_tokens": 1382148193.0, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.463869463869464, |
| "grad_norm": 0.3560839943508509, |
| "learning_rate": 2.9896046700762398e-05, |
| "loss": 0.4136, |
| "num_tokens": 1383458913.0, |
| "step": 5285 |
| }, |
| { |
| "epoch": 2.4662004662004664, |
| "grad_norm": 0.36938187951856855, |
| "learning_rate": 2.9861550397859838e-05, |
| "loss": 0.4154, |
| "num_tokens": 1384769633.0, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.4685314685314683, |
| "grad_norm": 0.40308698065693266, |
| "learning_rate": 2.982704848113043e-05, |
| "loss": 0.425, |
| "num_tokens": 1386065812.0, |
| "step": 5295 |
| }, |
| { |
| "epoch": 2.4708624708624707, |
| "grad_norm": 0.3293971532786334, |
| "learning_rate": 2.9792541032591387e-05, |
| "loss": 0.4114, |
| "num_tokens": 1387376532.0, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.473193473193473, |
| "grad_norm": 0.3273718604110626, |
| "learning_rate": 2.975802813427307e-05, |
| "loss": 0.3997, |
| "num_tokens": 1388687252.0, |
| "step": 5305 |
| }, |
| { |
| "epoch": 2.4755244755244754, |
| "grad_norm": 0.35222934802235306, |
| "learning_rate": 2.9723509868218792e-05, |
| "loss": 0.4143, |
| "num_tokens": 1389997972.0, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.4778554778554778, |
| "grad_norm": 0.3606682109903173, |
| "learning_rate": 2.9688986316484636e-05, |
| "loss": 0.4158, |
| "num_tokens": 1391308692.0, |
| "step": 5315 |
| }, |
| { |
| "epoch": 2.48018648018648, |
| "grad_norm": 0.3851368043166362, |
| "learning_rate": 2.9654457561139254e-05, |
| "loss": 0.4204, |
| "num_tokens": 1392619412.0, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.4825174825174825, |
| "grad_norm": 0.35215808790406317, |
| "learning_rate": 2.961992368426366e-05, |
| "loss": 0.4112, |
| "num_tokens": 1393930132.0, |
| "step": 5325 |
| }, |
| { |
| "epoch": 2.484848484848485, |
| "grad_norm": 0.33407840882303375, |
| "learning_rate": 2.958538476795104e-05, |
| "loss": 0.4065, |
| "num_tokens": 1395232316.0, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.4871794871794872, |
| "grad_norm": 0.365448648296931, |
| "learning_rate": 2.9550840894306565e-05, |
| "loss": 0.4144, |
| "num_tokens": 1396543036.0, |
| "step": 5335 |
| }, |
| { |
| "epoch": 2.4895104895104896, |
| "grad_norm": 0.3516522718004562, |
| "learning_rate": 2.9516292145447187e-05, |
| "loss": 0.4036, |
| "num_tokens": 1397853756.0, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.491841491841492, |
| "grad_norm": 0.3964385897616128, |
| "learning_rate": 2.9481738603501464e-05, |
| "loss": 0.4145, |
| "num_tokens": 1399164476.0, |
| "step": 5345 |
| }, |
| { |
| "epoch": 2.4941724941724943, |
| "grad_norm": 0.3330290617721678, |
| "learning_rate": 2.9447180350609305e-05, |
| "loss": 0.4126, |
| "num_tokens": 1400475196.0, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.4965034965034967, |
| "grad_norm": 0.3620399941365234, |
| "learning_rate": 2.941261746892187e-05, |
| "loss": 0.4198, |
| "num_tokens": 1401785916.0, |
| "step": 5355 |
| }, |
| { |
| "epoch": 2.4988344988344986, |
| "grad_norm": 0.4031249657041131, |
| "learning_rate": 2.937805004060129e-05, |
| "loss": 0.3909, |
| "num_tokens": 1403096636.0, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.5011655011655014, |
| "grad_norm": 0.35297787174955664, |
| "learning_rate": 2.9343478147820515e-05, |
| "loss": 0.4161, |
| "num_tokens": 1404398368.0, |
| "step": 5365 |
| }, |
| { |
| "epoch": 2.5034965034965033, |
| "grad_norm": 0.3580720337699733, |
| "learning_rate": 2.9308901872763107e-05, |
| "loss": 0.4119, |
| "num_tokens": 1405709088.0, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.5058275058275057, |
| "grad_norm": 0.33634655420177195, |
| "learning_rate": 2.927432129762303e-05, |
| "loss": 0.4072, |
| "num_tokens": 1407019808.0, |
| "step": 5375 |
| }, |
| { |
| "epoch": 2.508158508158508, |
| "grad_norm": 0.4092817541410367, |
| "learning_rate": 2.923973650460451e-05, |
| "loss": 0.4249, |
| "num_tokens": 1408330528.0, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.5104895104895104, |
| "grad_norm": 0.386629174107386, |
| "learning_rate": 2.9205147575921748e-05, |
| "loss": 0.4001, |
| "num_tokens": 1409641248.0, |
| "step": 5385 |
| }, |
| { |
| "epoch": 2.5128205128205128, |
| "grad_norm": 0.38645028662583764, |
| "learning_rate": 2.917055459379881e-05, |
| "loss": 0.4201, |
| "num_tokens": 1410951968.0, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.515151515151515, |
| "grad_norm": 0.3870366504220812, |
| "learning_rate": 2.9135957640469407e-05, |
| "loss": 0.4013, |
| "num_tokens": 1412262688.0, |
| "step": 5395 |
| }, |
| { |
| "epoch": 2.5174825174825175, |
| "grad_norm": 0.3590149507926292, |
| "learning_rate": 2.9101356798176648e-05, |
| "loss": 0.4281, |
| "num_tokens": 1413573408.0, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.51981351981352, |
| "grad_norm": 0.3914639948997253, |
| "learning_rate": 2.9066752149172927e-05, |
| "loss": 0.4105, |
| "num_tokens": 1414884128.0, |
| "step": 5405 |
| }, |
| { |
| "epoch": 2.5221445221445222, |
| "grad_norm": 0.3459336521291884, |
| "learning_rate": 2.903214377571967e-05, |
| "loss": 0.4018, |
| "num_tokens": 1416194848.0, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.5244755244755246, |
| "grad_norm": 0.351270481877377, |
| "learning_rate": 2.8997531760087143e-05, |
| "loss": 0.4256, |
| "num_tokens": 1417505568.0, |
| "step": 5415 |
| }, |
| { |
| "epoch": 2.526806526806527, |
| "grad_norm": 0.3494010823054712, |
| "learning_rate": 2.896291618455431e-05, |
| "loss": 0.412, |
| "num_tokens": 1418816288.0, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.529137529137529, |
| "grad_norm": 0.3663146601610986, |
| "learning_rate": 2.8928297131408557e-05, |
| "loss": 0.4104, |
| "num_tokens": 1420127008.0, |
| "step": 5425 |
| }, |
| { |
| "epoch": 2.5314685314685317, |
| "grad_norm": 0.39872390845448125, |
| "learning_rate": 2.889367468294556e-05, |
| "loss": 0.4068, |
| "num_tokens": 1421437728.0, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.5337995337995336, |
| "grad_norm": 0.37140846588281806, |
| "learning_rate": 2.885904892146905e-05, |
| "loss": 0.4189, |
| "num_tokens": 1422748448.0, |
| "step": 5435 |
| }, |
| { |
| "epoch": 2.5361305361305364, |
| "grad_norm": 0.3657604671694247, |
| "learning_rate": 2.8824419929290665e-05, |
| "loss": 0.4191, |
| "num_tokens": 1424059168.0, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.5384615384615383, |
| "grad_norm": 0.3568520716840305, |
| "learning_rate": 2.878978778872968e-05, |
| "loss": 0.4201, |
| "num_tokens": 1425369888.0, |
| "step": 5445 |
| }, |
| { |
| "epoch": 2.5407925407925407, |
| "grad_norm": 0.39448531507467444, |
| "learning_rate": 2.8755152582112877e-05, |
| "loss": 0.4161, |
| "num_tokens": 1426680608.0, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.543123543123543, |
| "grad_norm": 0.34606657031969595, |
| "learning_rate": 2.8720514391774333e-05, |
| "loss": 0.4134, |
| "num_tokens": 1427991328.0, |
| "step": 5455 |
| }, |
| { |
| "epoch": 2.5454545454545454, |
| "grad_norm": 0.4053792494656858, |
| "learning_rate": 2.8685873300055206e-05, |
| "loss": 0.402, |
| "num_tokens": 1429302048.0, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.5477855477855478, |
| "grad_norm": 0.3798273899991343, |
| "learning_rate": 2.8651229389303556e-05, |
| "loss": 0.4133, |
| "num_tokens": 1430612768.0, |
| "step": 5465 |
| }, |
| { |
| "epoch": 2.55011655011655, |
| "grad_norm": 0.35682635054873424, |
| "learning_rate": 2.8616582741874143e-05, |
| "loss": 0.4117, |
| "num_tokens": 1431923488.0, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.5524475524475525, |
| "grad_norm": 0.3645622618000647, |
| "learning_rate": 2.8581933440128228e-05, |
| "loss": 0.4239, |
| "num_tokens": 1433234208.0, |
| "step": 5475 |
| }, |
| { |
| "epoch": 2.554778554778555, |
| "grad_norm": 0.3703457691159881, |
| "learning_rate": 2.8547281566433393e-05, |
| "loss": 0.4128, |
| "num_tokens": 1434539040.0, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.5571095571095572, |
| "grad_norm": 0.41270632575142685, |
| "learning_rate": 2.851262720316332e-05, |
| "loss": 0.4095, |
| "num_tokens": 1435849760.0, |
| "step": 5485 |
| }, |
| { |
| "epoch": 2.5594405594405596, |
| "grad_norm": 0.33721292116678364, |
| "learning_rate": 2.8477970432697625e-05, |
| "loss": 0.3976, |
| "num_tokens": 1437160480.0, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.561771561771562, |
| "grad_norm": 0.3503161770381493, |
| "learning_rate": 2.8443311337421642e-05, |
| "loss": 0.4228, |
| "num_tokens": 1438471200.0, |
| "step": 5495 |
| }, |
| { |
| "epoch": 2.564102564102564, |
| "grad_norm": 0.39093092792942047, |
| "learning_rate": 2.840864999972621e-05, |
| "loss": 0.4102, |
| "num_tokens": 1439768547.0, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.5664335664335667, |
| "grad_norm": 0.3855562639577423, |
| "learning_rate": 2.8373986502007522e-05, |
| "loss": 0.3962, |
| "num_tokens": 1441079267.0, |
| "step": 5505 |
| }, |
| { |
| "epoch": 2.5687645687645686, |
| "grad_norm": 0.39374706330512194, |
| "learning_rate": 2.833932092666692e-05, |
| "loss": 0.4168, |
| "num_tokens": 1442389987.0, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.571095571095571, |
| "grad_norm": 0.35307123842027394, |
| "learning_rate": 2.830465335611064e-05, |
| "loss": 0.4109, |
| "num_tokens": 1443700707.0, |
| "step": 5515 |
| }, |
| { |
| "epoch": 2.5734265734265733, |
| "grad_norm": 0.3531292824198428, |
| "learning_rate": 2.826998387274969e-05, |
| "loss": 0.401, |
| "num_tokens": 1445011427.0, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.5757575757575757, |
| "grad_norm": 0.35841271371937383, |
| "learning_rate": 2.8235312558999634e-05, |
| "loss": 0.3987, |
| "num_tokens": 1446322147.0, |
| "step": 5525 |
| }, |
| { |
| "epoch": 2.578088578088578, |
| "grad_norm": 0.36420778646479857, |
| "learning_rate": 2.820063949728035e-05, |
| "loss": 0.4004, |
| "num_tokens": 1447632867.0, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.5804195804195804, |
| "grad_norm": 0.34605617089524987, |
| "learning_rate": 2.8165964770015923e-05, |
| "loss": 0.4046, |
| "num_tokens": 1448943587.0, |
| "step": 5535 |
| }, |
| { |
| "epoch": 2.582750582750583, |
| "grad_norm": 0.3638468608455636, |
| "learning_rate": 2.8131288459634358e-05, |
| "loss": 0.4183, |
| "num_tokens": 1450254307.0, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.585081585081585, |
| "grad_norm": 0.33673390656126306, |
| "learning_rate": 2.8096610648567428e-05, |
| "loss": 0.4052, |
| "num_tokens": 1451549273.0, |
| "step": 5545 |
| }, |
| { |
| "epoch": 2.5874125874125875, |
| "grad_norm": 0.3450093834999504, |
| "learning_rate": 2.806193141925048e-05, |
| "loss": 0.4092, |
| "num_tokens": 1452852946.0, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.58974358974359, |
| "grad_norm": 0.33375024409509796, |
| "learning_rate": 2.8027250854122245e-05, |
| "loss": 0.4071, |
| "num_tokens": 1454163666.0, |
| "step": 5555 |
| }, |
| { |
| "epoch": 2.5920745920745922, |
| "grad_norm": 0.32434266094123126, |
| "learning_rate": 2.7992569035624612e-05, |
| "loss": 0.4088, |
| "num_tokens": 1455464696.0, |
| "step": 5560 |
| }, |
| { |
| "epoch": 2.594405594405594, |
| "grad_norm": 0.3625165606072977, |
| "learning_rate": 2.795788604620246e-05, |
| "loss": 0.4027, |
| "num_tokens": 1456775416.0, |
| "step": 5565 |
| }, |
| { |
| "epoch": 2.596736596736597, |
| "grad_norm": 0.34850576006189254, |
| "learning_rate": 2.7923201968303427e-05, |
| "loss": 0.4225, |
| "num_tokens": 1458086136.0, |
| "step": 5570 |
| }, |
| { |
| "epoch": 2.599067599067599, |
| "grad_norm": 0.31593994005170517, |
| "learning_rate": 2.788851688437777e-05, |
| "loss": 0.4014, |
| "num_tokens": 1459396856.0, |
| "step": 5575 |
| }, |
| { |
| "epoch": 2.6013986013986012, |
| "grad_norm": 0.34688524294847034, |
| "learning_rate": 2.785383087687813e-05, |
| "loss": 0.4172, |
| "num_tokens": 1460707576.0, |
| "step": 5580 |
| }, |
| { |
| "epoch": 2.6037296037296036, |
| "grad_norm": 0.3557511247705549, |
| "learning_rate": 2.781914402825933e-05, |
| "loss": 0.4143, |
| "num_tokens": 1462018296.0, |
| "step": 5585 |
| }, |
| { |
| "epoch": 2.606060606060606, |
| "grad_norm": 0.3387419936179967, |
| "learning_rate": 2.77844564209782e-05, |
| "loss": 0.4029, |
| "num_tokens": 1463329016.0, |
| "step": 5590 |
| }, |
| { |
| "epoch": 2.6083916083916083, |
| "grad_norm": 0.343630335231053, |
| "learning_rate": 2.77497681374934e-05, |
| "loss": 0.404, |
| "num_tokens": 1464639736.0, |
| "step": 5595 |
| }, |
| { |
| "epoch": 2.6107226107226107, |
| "grad_norm": 0.3332182207020839, |
| "learning_rate": 2.7715079260265124e-05, |
| "loss": 0.4006, |
| "num_tokens": 1465950456.0, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.613053613053613, |
| "grad_norm": 0.3643825631091346, |
| "learning_rate": 2.7680389871755064e-05, |
| "loss": 0.4097, |
| "num_tokens": 1467261176.0, |
| "step": 5605 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 0.3364764273544578, |
| "learning_rate": 2.7645700054426087e-05, |
| "loss": 0.4033, |
| "num_tokens": 1468571896.0, |
| "step": 5610 |
| }, |
| { |
| "epoch": 2.617715617715618, |
| "grad_norm": 0.34072785097660746, |
| "learning_rate": 2.7611009890742058e-05, |
| "loss": 0.4212, |
| "num_tokens": 1469882616.0, |
| "step": 5615 |
| }, |
| { |
| "epoch": 2.62004662004662, |
| "grad_norm": 0.3574149052487222, |
| "learning_rate": 2.757631946316771e-05, |
| "loss": 0.4154, |
| "num_tokens": 1471193336.0, |
| "step": 5620 |
| }, |
| { |
| "epoch": 2.6223776223776225, |
| "grad_norm": 0.32899936425520643, |
| "learning_rate": 2.754162885416837e-05, |
| "loss": 0.4135, |
| "num_tokens": 1472504056.0, |
| "step": 5625 |
| }, |
| { |
| "epoch": 2.624708624708625, |
| "grad_norm": 0.3263664126860434, |
| "learning_rate": 2.7506938146209816e-05, |
| "loss": 0.4048, |
| "num_tokens": 1473814776.0, |
| "step": 5630 |
| }, |
| { |
| "epoch": 2.6270396270396272, |
| "grad_norm": 0.35274896684534274, |
| "learning_rate": 2.7472247421758046e-05, |
| "loss": 0.3946, |
| "num_tokens": 1475125496.0, |
| "step": 5635 |
| }, |
| { |
| "epoch": 2.629370629370629, |
| "grad_norm": 0.3706959177174659, |
| "learning_rate": 2.743755676327911e-05, |
| "loss": 0.4116, |
| "num_tokens": 1476436216.0, |
| "step": 5640 |
| }, |
| { |
| "epoch": 2.631701631701632, |
| "grad_norm": 0.3391136391587131, |
| "learning_rate": 2.7402866253238896e-05, |
| "loss": 0.4028, |
| "num_tokens": 1477746936.0, |
| "step": 5645 |
| }, |
| { |
| "epoch": 2.634032634032634, |
| "grad_norm": 0.3880994944779498, |
| "learning_rate": 2.7368175974102938e-05, |
| "loss": 0.3995, |
| "num_tokens": 1479057656.0, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.6363636363636362, |
| "grad_norm": 0.3565909411799108, |
| "learning_rate": 2.7333486008336217e-05, |
| "loss": 0.4089, |
| "num_tokens": 1480368376.0, |
| "step": 5655 |
| }, |
| { |
| "epoch": 2.6386946386946386, |
| "grad_norm": 0.3229561588592343, |
| "learning_rate": 2.7298796438402986e-05, |
| "loss": 0.4108, |
| "num_tokens": 1481679096.0, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.641025641025641, |
| "grad_norm": 0.3393467174551697, |
| "learning_rate": 2.726410734676653e-05, |
| "loss": 0.4153, |
| "num_tokens": 1482989816.0, |
| "step": 5665 |
| }, |
| { |
| "epoch": 2.6433566433566433, |
| "grad_norm": 0.40533026810505063, |
| "learning_rate": 2.7229418815889023e-05, |
| "loss": 0.427, |
| "num_tokens": 1484300536.0, |
| "step": 5670 |
| }, |
| { |
| "epoch": 2.6456876456876457, |
| "grad_norm": 0.3493290537833516, |
| "learning_rate": 2.7194730928231292e-05, |
| "loss": 0.4233, |
| "num_tokens": 1485603324.0, |
| "step": 5675 |
| }, |
| { |
| "epoch": 2.648018648018648, |
| "grad_norm": 0.3293219859362695, |
| "learning_rate": 2.716004376625264e-05, |
| "loss": 0.4137, |
| "num_tokens": 1486914044.0, |
| "step": 5680 |
| }, |
| { |
| "epoch": 2.6503496503496504, |
| "grad_norm": 0.32917350717509924, |
| "learning_rate": 2.7125357412410634e-05, |
| "loss": 0.4112, |
| "num_tokens": 1488224764.0, |
| "step": 5685 |
| }, |
| { |
| "epoch": 2.652680652680653, |
| "grad_norm": 0.36778219939737017, |
| "learning_rate": 2.7090671949160945e-05, |
| "loss": 0.4151, |
| "num_tokens": 1489530334.0, |
| "step": 5690 |
| }, |
| { |
| "epoch": 2.655011655011655, |
| "grad_norm": 0.3520157165475074, |
| "learning_rate": 2.70559874589571e-05, |
| "loss": 0.4252, |
| "num_tokens": 1490841054.0, |
| "step": 5695 |
| }, |
| { |
| "epoch": 2.6573426573426575, |
| "grad_norm": 0.364402639873204, |
| "learning_rate": 2.7021304024250315e-05, |
| "loss": 0.415, |
| "num_tokens": 1492151774.0, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.6596736596736594, |
| "grad_norm": 0.3802755602559821, |
| "learning_rate": 2.698662172748933e-05, |
| "loss": 0.4084, |
| "num_tokens": 1493457481.0, |
| "step": 5705 |
| }, |
| { |
| "epoch": 2.6620046620046622, |
| "grad_norm": 0.3296000790473323, |
| "learning_rate": 2.695194065112014e-05, |
| "loss": 0.41, |
| "num_tokens": 1494759113.0, |
| "step": 5710 |
| }, |
| { |
| "epoch": 2.664335664335664, |
| "grad_norm": 0.3427344361950261, |
| "learning_rate": 2.6917260877585854e-05, |
| "loss": 0.4155, |
| "num_tokens": 1496069833.0, |
| "step": 5715 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.33635269462764017, |
| "learning_rate": 2.6882582489326485e-05, |
| "loss": 0.4175, |
| "num_tokens": 1497380553.0, |
| "step": 5720 |
| }, |
| { |
| "epoch": 2.668997668997669, |
| "grad_norm": 0.3931298913652689, |
| "learning_rate": 2.6847905568778753e-05, |
| "loss": 0.421, |
| "num_tokens": 1498670997.0, |
| "step": 5725 |
| }, |
| { |
| "epoch": 2.6713286713286712, |
| "grad_norm": 0.35218576187719325, |
| "learning_rate": 2.6813230198375887e-05, |
| "loss": 0.4072, |
| "num_tokens": 1499981717.0, |
| "step": 5730 |
| }, |
| { |
| "epoch": 2.6736596736596736, |
| "grad_norm": 0.35875764676123595, |
| "learning_rate": 2.6778556460547437e-05, |
| "loss": 0.4185, |
| "num_tokens": 1501292437.0, |
| "step": 5735 |
| }, |
| { |
| "epoch": 2.675990675990676, |
| "grad_norm": 0.36986425256659194, |
| "learning_rate": 2.6743884437719064e-05, |
| "loss": 0.4052, |
| "num_tokens": 1502603157.0, |
| "step": 5740 |
| }, |
| { |
| "epoch": 2.6783216783216783, |
| "grad_norm": 0.32481671854646, |
| "learning_rate": 2.6709214212312362e-05, |
| "loss": 0.4175, |
| "num_tokens": 1503913877.0, |
| "step": 5745 |
| }, |
| { |
| "epoch": 2.6806526806526807, |
| "grad_norm": 0.33341909508902867, |
| "learning_rate": 2.6674545866744627e-05, |
| "loss": 0.4095, |
| "num_tokens": 1505212841.0, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.682983682983683, |
| "grad_norm": 0.3247256800202665, |
| "learning_rate": 2.663987948342873e-05, |
| "loss": 0.3978, |
| "num_tokens": 1506523561.0, |
| "step": 5755 |
| }, |
| { |
| "epoch": 2.6853146853146854, |
| "grad_norm": 0.3693723379792467, |
| "learning_rate": 2.6605215144772844e-05, |
| "loss": 0.4031, |
| "num_tokens": 1507834281.0, |
| "step": 5760 |
| }, |
| { |
| "epoch": 2.687645687645688, |
| "grad_norm": 0.3363247073452088, |
| "learning_rate": 2.6570552933180275e-05, |
| "loss": 0.4096, |
| "num_tokens": 1509145001.0, |
| "step": 5765 |
| }, |
| { |
| "epoch": 2.6899766899766897, |
| "grad_norm": 0.3686548048692657, |
| "learning_rate": 2.6535892931049304e-05, |
| "loss": 0.3989, |
| "num_tokens": 1510455721.0, |
| "step": 5770 |
| }, |
| { |
| "epoch": 2.6923076923076925, |
| "grad_norm": 0.3590538341136808, |
| "learning_rate": 2.650123522077294e-05, |
| "loss": 0.4126, |
| "num_tokens": 1511756221.0, |
| "step": 5775 |
| }, |
| { |
| "epoch": 2.6946386946386944, |
| "grad_norm": 0.3939246015397987, |
| "learning_rate": 2.6466579884738745e-05, |
| "loss": 0.4127, |
| "num_tokens": 1513066941.0, |
| "step": 5780 |
| }, |
| { |
| "epoch": 2.6969696969696972, |
| "grad_norm": 0.36656521638788236, |
| "learning_rate": 2.6431927005328634e-05, |
| "loss": 0.4118, |
| "num_tokens": 1514377661.0, |
| "step": 5785 |
| }, |
| { |
| "epoch": 2.699300699300699, |
| "grad_norm": 0.3786768997732052, |
| "learning_rate": 2.6397276664918695e-05, |
| "loss": 0.4056, |
| "num_tokens": 1515688381.0, |
| "step": 5790 |
| }, |
| { |
| "epoch": 2.7016317016317015, |
| "grad_norm": 0.34250789196379766, |
| "learning_rate": 2.6362628945878982e-05, |
| "loss": 0.4237, |
| "num_tokens": 1516983122.0, |
| "step": 5795 |
| }, |
| { |
| "epoch": 2.703962703962704, |
| "grad_norm": 0.3879698601925601, |
| "learning_rate": 2.6327983930573275e-05, |
| "loss": 0.4194, |
| "num_tokens": 1518293842.0, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.7062937062937062, |
| "grad_norm": 0.34322275404532027, |
| "learning_rate": 2.629334170135899e-05, |
| "loss": 0.421, |
| "num_tokens": 1519604562.0, |
| "step": 5805 |
| }, |
| { |
| "epoch": 2.7086247086247086, |
| "grad_norm": 0.3274763994589238, |
| "learning_rate": 2.6258702340586888e-05, |
| "loss": 0.3991, |
| "num_tokens": 1520915282.0, |
| "step": 5810 |
| }, |
| { |
| "epoch": 2.710955710955711, |
| "grad_norm": 0.3313466614418822, |
| "learning_rate": 2.6224065930600895e-05, |
| "loss": 0.4114, |
| "num_tokens": 1522226002.0, |
| "step": 5815 |
| }, |
| { |
| "epoch": 2.7132867132867133, |
| "grad_norm": 0.3577956678121881, |
| "learning_rate": 2.6189432553737965e-05, |
| "loss": 0.4313, |
| "num_tokens": 1523536722.0, |
| "step": 5820 |
| }, |
| { |
| "epoch": 2.7156177156177157, |
| "grad_norm": 0.34556719467314156, |
| "learning_rate": 2.6154802292327795e-05, |
| "loss": 0.4179, |
| "num_tokens": 1524847442.0, |
| "step": 5825 |
| }, |
| { |
| "epoch": 2.717948717948718, |
| "grad_norm": 0.3422225730154786, |
| "learning_rate": 2.6120175228692705e-05, |
| "loss": 0.4224, |
| "num_tokens": 1526145343.0, |
| "step": 5830 |
| }, |
| { |
| "epoch": 2.7202797202797204, |
| "grad_norm": 0.3310291884231872, |
| "learning_rate": 2.608555144514741e-05, |
| "loss": 0.4104, |
| "num_tokens": 1527456063.0, |
| "step": 5835 |
| }, |
| { |
| "epoch": 2.722610722610723, |
| "grad_norm": 0.35527150456435, |
| "learning_rate": 2.6050931023998825e-05, |
| "loss": 0.4265, |
| "num_tokens": 1528762674.0, |
| "step": 5840 |
| }, |
| { |
| "epoch": 2.7249417249417247, |
| "grad_norm": 0.3810983867864767, |
| "learning_rate": 2.601631404754587e-05, |
| "loss": 0.4156, |
| "num_tokens": 1530058844.0, |
| "step": 5845 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.3920073125528213, |
| "learning_rate": 2.5981700598079267e-05, |
| "loss": 0.4202, |
| "num_tokens": 1531358947.0, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.7296037296037294, |
| "grad_norm": 0.3259366792942078, |
| "learning_rate": 2.594709075788138e-05, |
| "loss": 0.414, |
| "num_tokens": 1532660121.0, |
| "step": 5855 |
| }, |
| { |
| "epoch": 2.731934731934732, |
| "grad_norm": 0.37607877851481675, |
| "learning_rate": 2.5912484609225973e-05, |
| "loss": 0.4125, |
| "num_tokens": 1533961019.0, |
| "step": 5860 |
| }, |
| { |
| "epoch": 2.734265734265734, |
| "grad_norm": 0.361401132584504, |
| "learning_rate": 2.5877882234378027e-05, |
| "loss": 0.4149, |
| "num_tokens": 1535271739.0, |
| "step": 5865 |
| }, |
| { |
| "epoch": 2.7365967365967365, |
| "grad_norm": 0.3814825573434443, |
| "learning_rate": 2.584328371559358e-05, |
| "loss": 0.4171, |
| "num_tokens": 1536582459.0, |
| "step": 5870 |
| }, |
| { |
| "epoch": 2.738927738927739, |
| "grad_norm": 0.33196554110199666, |
| "learning_rate": 2.5808689135119484e-05, |
| "loss": 0.4198, |
| "num_tokens": 1537893179.0, |
| "step": 5875 |
| }, |
| { |
| "epoch": 2.7412587412587412, |
| "grad_norm": 0.3514505419081785, |
| "learning_rate": 2.577409857519323e-05, |
| "loss": 0.4116, |
| "num_tokens": 1539203899.0, |
| "step": 5880 |
| }, |
| { |
| "epoch": 2.7435897435897436, |
| "grad_norm": 0.36051816664386027, |
| "learning_rate": 2.573951211804274e-05, |
| "loss": 0.3955, |
| "num_tokens": 1540501469.0, |
| "step": 5885 |
| }, |
| { |
| "epoch": 2.745920745920746, |
| "grad_norm": 0.34431578542179486, |
| "learning_rate": 2.570492984588622e-05, |
| "loss": 0.4048, |
| "num_tokens": 1541807849.0, |
| "step": 5890 |
| }, |
| { |
| "epoch": 2.7482517482517483, |
| "grad_norm": 0.3482952718749119, |
| "learning_rate": 2.56703518409319e-05, |
| "loss": 0.421, |
| "num_tokens": 1543118569.0, |
| "step": 5895 |
| }, |
| { |
| "epoch": 2.7505827505827507, |
| "grad_norm": 0.3435903033996109, |
| "learning_rate": 2.5635778185377846e-05, |
| "loss": 0.4105, |
| "num_tokens": 1544429289.0, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.752913752913753, |
| "grad_norm": 0.3649183613089683, |
| "learning_rate": 2.5601208961411838e-05, |
| "loss": 0.4363, |
| "num_tokens": 1545740009.0, |
| "step": 5905 |
| }, |
| { |
| "epoch": 2.755244755244755, |
| "grad_norm": 0.33542477596319575, |
| "learning_rate": 2.556664425121108e-05, |
| "loss": 0.417, |
| "num_tokens": 1547050729.0, |
| "step": 5910 |
| }, |
| { |
| "epoch": 2.757575757575758, |
| "grad_norm": 0.3612431221112293, |
| "learning_rate": 2.5532084136942048e-05, |
| "loss": 0.4106, |
| "num_tokens": 1548361449.0, |
| "step": 5915 |
| }, |
| { |
| "epoch": 2.7599067599067597, |
| "grad_norm": 0.34457273412289285, |
| "learning_rate": 2.5497528700760333e-05, |
| "loss": 0.4111, |
| "num_tokens": 1549672169.0, |
| "step": 5920 |
| }, |
| { |
| "epoch": 2.762237762237762, |
| "grad_norm": 0.3643149061193825, |
| "learning_rate": 2.5462978024810347e-05, |
| "loss": 0.4007, |
| "num_tokens": 1550982889.0, |
| "step": 5925 |
| }, |
| { |
| "epoch": 2.7645687645687644, |
| "grad_norm": 0.33113278908341853, |
| "learning_rate": 2.5428432191225226e-05, |
| "loss": 0.4115, |
| "num_tokens": 1552293609.0, |
| "step": 5930 |
| }, |
| { |
| "epoch": 2.766899766899767, |
| "grad_norm": 0.324549859393571, |
| "learning_rate": 2.5393891282126576e-05, |
| "loss": 0.4147, |
| "num_tokens": 1553603846.0, |
| "step": 5935 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 0.34583557264006637, |
| "learning_rate": 2.5359355379624317e-05, |
| "loss": 0.4159, |
| "num_tokens": 1554898212.0, |
| "step": 5940 |
| }, |
| { |
| "epoch": 2.7715617715617715, |
| "grad_norm": 0.3704574170132862, |
| "learning_rate": 2.532482456581644e-05, |
| "loss": 0.4187, |
| "num_tokens": 1556204437.0, |
| "step": 5945 |
| }, |
| { |
| "epoch": 2.773892773892774, |
| "grad_norm": 0.3427794333227205, |
| "learning_rate": 2.529029892278886e-05, |
| "loss": 0.4052, |
| "num_tokens": 1557515157.0, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.7762237762237763, |
| "grad_norm": 0.3533422903029431, |
| "learning_rate": 2.5255778532615194e-05, |
| "loss": 0.4092, |
| "num_tokens": 1558825877.0, |
| "step": 5955 |
| }, |
| { |
| "epoch": 2.7785547785547786, |
| "grad_norm": 0.3549142246233924, |
| "learning_rate": 2.5221263477356572e-05, |
| "loss": 0.4081, |
| "num_tokens": 1560136597.0, |
| "step": 5960 |
| }, |
| { |
| "epoch": 2.780885780885781, |
| "grad_norm": 0.38274212191913465, |
| "learning_rate": 2.5186753839061438e-05, |
| "loss": 0.4038, |
| "num_tokens": 1561447317.0, |
| "step": 5965 |
| }, |
| { |
| "epoch": 2.7832167832167833, |
| "grad_norm": 0.33786865946250155, |
| "learning_rate": 2.5152249699765367e-05, |
| "loss": 0.4018, |
| "num_tokens": 1562758037.0, |
| "step": 5970 |
| }, |
| { |
| "epoch": 2.7855477855477857, |
| "grad_norm": 0.37594129759116907, |
| "learning_rate": 2.5117751141490858e-05, |
| "loss": 0.4275, |
| "num_tokens": 1564068757.0, |
| "step": 5975 |
| }, |
| { |
| "epoch": 2.787878787878788, |
| "grad_norm": 0.3547780778843013, |
| "learning_rate": 2.5083258246247144e-05, |
| "loss": 0.4107, |
| "num_tokens": 1565366132.0, |
| "step": 5980 |
| }, |
| { |
| "epoch": 2.79020979020979, |
| "grad_norm": 0.336676677250395, |
| "learning_rate": 2.5048771096029976e-05, |
| "loss": 0.4228, |
| "num_tokens": 1566676852.0, |
| "step": 5985 |
| }, |
| { |
| "epoch": 2.792540792540793, |
| "grad_norm": 0.37105836604705056, |
| "learning_rate": 2.5014289772821486e-05, |
| "loss": 0.4141, |
| "num_tokens": 1567987572.0, |
| "step": 5990 |
| }, |
| { |
| "epoch": 2.7948717948717947, |
| "grad_norm": 0.3641465643333037, |
| "learning_rate": 2.4979814358589944e-05, |
| "loss": 0.4268, |
| "num_tokens": 1569298292.0, |
| "step": 5995 |
| }, |
| { |
| "epoch": 2.797202797202797, |
| "grad_norm": 0.39906084842919354, |
| "learning_rate": 2.494534493528952e-05, |
| "loss": 0.4249, |
| "num_tokens": 1570609012.0, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.7995337995337994, |
| "grad_norm": 0.33358892565041576, |
| "learning_rate": 2.491088158486024e-05, |
| "loss": 0.3972, |
| "num_tokens": 1571908251.0, |
| "step": 6005 |
| }, |
| { |
| "epoch": 2.801864801864802, |
| "grad_norm": 0.3597994521954223, |
| "learning_rate": 2.487642438922761e-05, |
| "loss": 0.3987, |
| "num_tokens": 1573218971.0, |
| "step": 6010 |
| }, |
| { |
| "epoch": 2.804195804195804, |
| "grad_norm": 0.32444840115208157, |
| "learning_rate": 2.484197343030253e-05, |
| "loss": 0.4103, |
| "num_tokens": 1574529691.0, |
| "step": 6015 |
| }, |
| { |
| "epoch": 2.8065268065268065, |
| "grad_norm": 0.3369493662408065, |
| "learning_rate": 2.48075287899811e-05, |
| "loss": 0.4077, |
| "num_tokens": 1575834151.0, |
| "step": 6020 |
| }, |
| { |
| "epoch": 2.808857808857809, |
| "grad_norm": 0.35772585365072584, |
| "learning_rate": 2.4773090550144366e-05, |
| "loss": 0.4176, |
| "num_tokens": 1577144871.0, |
| "step": 6025 |
| }, |
| { |
| "epoch": 2.8111888111888113, |
| "grad_norm": 0.36110473631062134, |
| "learning_rate": 2.473865879265817e-05, |
| "loss": 0.4253, |
| "num_tokens": 1578449314.0, |
| "step": 6030 |
| }, |
| { |
| "epoch": 2.8135198135198136, |
| "grad_norm": 0.33350627178900166, |
| "learning_rate": 2.470423359937295e-05, |
| "loss": 0.4142, |
| "num_tokens": 1579760034.0, |
| "step": 6035 |
| }, |
| { |
| "epoch": 2.815850815850816, |
| "grad_norm": 0.3597334739451086, |
| "learning_rate": 2.4669815052123534e-05, |
| "loss": 0.4125, |
| "num_tokens": 1581060841.0, |
| "step": 6040 |
| }, |
| { |
| "epoch": 2.8181818181818183, |
| "grad_norm": 0.39863743568305726, |
| "learning_rate": 2.463540323272896e-05, |
| "loss": 0.4161, |
| "num_tokens": 1582371561.0, |
| "step": 6045 |
| }, |
| { |
| "epoch": 2.8205128205128203, |
| "grad_norm": 0.3596208010960054, |
| "learning_rate": 2.4600998222992257e-05, |
| "loss": 0.4126, |
| "num_tokens": 1583682281.0, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.822843822843823, |
| "grad_norm": 0.3542098381141003, |
| "learning_rate": 2.456660010470028e-05, |
| "loss": 0.4164, |
| "num_tokens": 1584993001.0, |
| "step": 6055 |
| }, |
| { |
| "epoch": 2.825174825174825, |
| "grad_norm": 0.33689485990156454, |
| "learning_rate": 2.4532208959623488e-05, |
| "loss": 0.3965, |
| "num_tokens": 1586303721.0, |
| "step": 6060 |
| }, |
| { |
| "epoch": 2.8275058275058274, |
| "grad_norm": 0.36681615470748274, |
| "learning_rate": 2.4497824869515773e-05, |
| "loss": 0.4268, |
| "num_tokens": 1587614441.0, |
| "step": 6065 |
| }, |
| { |
| "epoch": 2.8298368298368297, |
| "grad_norm": 0.33853074905875574, |
| "learning_rate": 2.4463447916114273e-05, |
| "loss": 0.4105, |
| "num_tokens": 1588909813.0, |
| "step": 6070 |
| }, |
| { |
| "epoch": 2.832167832167832, |
| "grad_norm": 0.32550466329208894, |
| "learning_rate": 2.4429078181139127e-05, |
| "loss": 0.4083, |
| "num_tokens": 1590220533.0, |
| "step": 6075 |
| }, |
| { |
| "epoch": 2.8344988344988344, |
| "grad_norm": 0.3234109896716426, |
| "learning_rate": 2.439471574629333e-05, |
| "loss": 0.4162, |
| "num_tokens": 1591531253.0, |
| "step": 6080 |
| }, |
| { |
| "epoch": 2.836829836829837, |
| "grad_norm": 0.32857277817721126, |
| "learning_rate": 2.4360360693262524e-05, |
| "loss": 0.4077, |
| "num_tokens": 1592841973.0, |
| "step": 6085 |
| }, |
| { |
| "epoch": 2.839160839160839, |
| "grad_norm": 0.37570041632784523, |
| "learning_rate": 2.4326013103714813e-05, |
| "loss": 0.4081, |
| "num_tokens": 1594152693.0, |
| "step": 6090 |
| }, |
| { |
| "epoch": 2.8414918414918415, |
| "grad_norm": 0.36498566644592234, |
| "learning_rate": 2.4291673059300546e-05, |
| "loss": 0.4101, |
| "num_tokens": 1595463413.0, |
| "step": 6095 |
| }, |
| { |
| "epoch": 2.843822843822844, |
| "grad_norm": 0.33322954386185466, |
| "learning_rate": 2.4257340641652115e-05, |
| "loss": 0.4203, |
| "num_tokens": 1596774133.0, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.8461538461538463, |
| "grad_norm": 0.32749325978795546, |
| "learning_rate": 2.4223015932383842e-05, |
| "loss": 0.41, |
| "num_tokens": 1598084853.0, |
| "step": 6105 |
| }, |
| { |
| "epoch": 2.8484848484848486, |
| "grad_norm": 0.37491209566738504, |
| "learning_rate": 2.4188699013091665e-05, |
| "loss": 0.4288, |
| "num_tokens": 1599395573.0, |
| "step": 6110 |
| }, |
| { |
| "epoch": 2.8508158508158505, |
| "grad_norm": 0.34071317068951223, |
| "learning_rate": 2.4154389965353025e-05, |
| "loss": 0.4114, |
| "num_tokens": 1600702382.0, |
| "step": 6115 |
| }, |
| { |
| "epoch": 2.8531468531468533, |
| "grad_norm": 0.3564070027232447, |
| "learning_rate": 2.4120088870726675e-05, |
| "loss": 0.4269, |
| "num_tokens": 1602013102.0, |
| "step": 6120 |
| }, |
| { |
| "epoch": 2.8554778554778553, |
| "grad_norm": 0.37329758310001454, |
| "learning_rate": 2.408579581075242e-05, |
| "loss": 0.4311, |
| "num_tokens": 1603321783.0, |
| "step": 6125 |
| }, |
| { |
| "epoch": 2.857808857808858, |
| "grad_norm": 0.3300871433307017, |
| "learning_rate": 2.4051510866950987e-05, |
| "loss": 0.431, |
| "num_tokens": 1604632503.0, |
| "step": 6130 |
| }, |
| { |
| "epoch": 2.86013986013986, |
| "grad_norm": 0.33854849401601644, |
| "learning_rate": 2.4017234120823816e-05, |
| "loss": 0.4085, |
| "num_tokens": 1605943223.0, |
| "step": 6135 |
| }, |
| { |
| "epoch": 2.8624708624708624, |
| "grad_norm": 0.3397431033223637, |
| "learning_rate": 2.3982965653852845e-05, |
| "loss": 0.427, |
| "num_tokens": 1607238436.0, |
| "step": 6140 |
| }, |
| { |
| "epoch": 2.8648018648018647, |
| "grad_norm": 0.31163913161758244, |
| "learning_rate": 2.3948705547500346e-05, |
| "loss": 0.396, |
| "num_tokens": 1608549156.0, |
| "step": 6145 |
| }, |
| { |
| "epoch": 2.867132867132867, |
| "grad_norm": 0.36119797833011485, |
| "learning_rate": 2.391445388320869e-05, |
| "loss": 0.4219, |
| "num_tokens": 1609859876.0, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.8694638694638694, |
| "grad_norm": 0.35324575640930755, |
| "learning_rate": 2.388021074240021e-05, |
| "loss": 0.4045, |
| "num_tokens": 1611170596.0, |
| "step": 6155 |
| }, |
| { |
| "epoch": 2.871794871794872, |
| "grad_norm": 0.34168122235530196, |
| "learning_rate": 2.3845976206476962e-05, |
| "loss": 0.4119, |
| "num_tokens": 1612481316.0, |
| "step": 6160 |
| }, |
| { |
| "epoch": 2.874125874125874, |
| "grad_norm": 0.3267756311441551, |
| "learning_rate": 2.381175035682055e-05, |
| "loss": 0.4068, |
| "num_tokens": 1613780656.0, |
| "step": 6165 |
| }, |
| { |
| "epoch": 2.8764568764568765, |
| "grad_norm": 0.33042098367758244, |
| "learning_rate": 2.377753327479193e-05, |
| "loss": 0.4063, |
| "num_tokens": 1615091376.0, |
| "step": 6170 |
| }, |
| { |
| "epoch": 2.878787878787879, |
| "grad_norm": 0.3484728853535675, |
| "learning_rate": 2.374332504173121e-05, |
| "loss": 0.4062, |
| "num_tokens": 1616402096.0, |
| "step": 6175 |
| }, |
| { |
| "epoch": 2.8811188811188813, |
| "grad_norm": 0.3454455029684927, |
| "learning_rate": 2.3709125738957467e-05, |
| "loss": 0.4047, |
| "num_tokens": 1617712816.0, |
| "step": 6180 |
| }, |
| { |
| "epoch": 2.8834498834498836, |
| "grad_norm": 0.35256247777872834, |
| "learning_rate": 2.3674935447768547e-05, |
| "loss": 0.4092, |
| "num_tokens": 1619023536.0, |
| "step": 6185 |
| }, |
| { |
| "epoch": 2.8857808857808855, |
| "grad_norm": 0.34447264412116185, |
| "learning_rate": 2.3640754249440893e-05, |
| "loss": 0.4171, |
| "num_tokens": 1620320855.0, |
| "step": 6190 |
| }, |
| { |
| "epoch": 2.8881118881118883, |
| "grad_norm": 0.3300552790749895, |
| "learning_rate": 2.360658222522929e-05, |
| "loss": 0.404, |
| "num_tokens": 1621631575.0, |
| "step": 6195 |
| }, |
| { |
| "epoch": 2.8904428904428903, |
| "grad_norm": 0.3301478105077118, |
| "learning_rate": 2.357241945636674e-05, |
| "loss": 0.4041, |
| "num_tokens": 1622942295.0, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.8927738927738926, |
| "grad_norm": 0.3241286505711926, |
| "learning_rate": 2.3538266024064272e-05, |
| "loss": 0.4088, |
| "num_tokens": 1624236677.0, |
| "step": 6205 |
| }, |
| { |
| "epoch": 2.895104895104895, |
| "grad_norm": 0.3393889730654891, |
| "learning_rate": 2.350412200951066e-05, |
| "loss": 0.4045, |
| "num_tokens": 1625547397.0, |
| "step": 6210 |
| }, |
| { |
| "epoch": 2.8974358974358974, |
| "grad_norm": 0.3722971080352807, |
| "learning_rate": 2.346998749387233e-05, |
| "loss": 0.3972, |
| "num_tokens": 1626858117.0, |
| "step": 6215 |
| }, |
| { |
| "epoch": 2.8997668997668997, |
| "grad_norm": 0.3481952783229655, |
| "learning_rate": 2.3435862558293137e-05, |
| "loss": 0.4185, |
| "num_tokens": 1628168837.0, |
| "step": 6220 |
| }, |
| { |
| "epoch": 2.902097902097902, |
| "grad_norm": 0.3575311095008992, |
| "learning_rate": 2.3401747283894122e-05, |
| "loss": 0.4089, |
| "num_tokens": 1629479557.0, |
| "step": 6225 |
| }, |
| { |
| "epoch": 2.9044289044289044, |
| "grad_norm": 0.3213929542319523, |
| "learning_rate": 2.3367641751773388e-05, |
| "loss": 0.4044, |
| "num_tokens": 1630783572.0, |
| "step": 6230 |
| }, |
| { |
| "epoch": 2.906759906759907, |
| "grad_norm": 0.33775314492186553, |
| "learning_rate": 2.3333546043005877e-05, |
| "loss": 0.4117, |
| "num_tokens": 1632094292.0, |
| "step": 6235 |
| }, |
| { |
| "epoch": 2.909090909090909, |
| "grad_norm": 0.35583582490206767, |
| "learning_rate": 2.3299460238643178e-05, |
| "loss": 0.4191, |
| "num_tokens": 1633405012.0, |
| "step": 6240 |
| }, |
| { |
| "epoch": 2.9114219114219115, |
| "grad_norm": 0.3403055493539477, |
| "learning_rate": 2.3265384419713325e-05, |
| "loss": 0.4074, |
| "num_tokens": 1634715732.0, |
| "step": 6245 |
| }, |
| { |
| "epoch": 2.913752913752914, |
| "grad_norm": 0.32361564388685027, |
| "learning_rate": 2.3231318667220624e-05, |
| "loss": 0.4047, |
| "num_tokens": 1636021018.0, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.916083916083916, |
| "grad_norm": 0.36911990299906383, |
| "learning_rate": 2.3197263062145457e-05, |
| "loss": 0.3952, |
| "num_tokens": 1637331738.0, |
| "step": 6255 |
| }, |
| { |
| "epoch": 2.9184149184149186, |
| "grad_norm": 0.3529888818933417, |
| "learning_rate": 2.3163217685444067e-05, |
| "loss": 0.4037, |
| "num_tokens": 1638640505.0, |
| "step": 6260 |
| }, |
| { |
| "epoch": 2.9207459207459205, |
| "grad_norm": 0.3228114643758619, |
| "learning_rate": 2.312918261804839e-05, |
| "loss": 0.4039, |
| "num_tokens": 1639951225.0, |
| "step": 6265 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 0.37676066453773155, |
| "learning_rate": 2.3095157940865876e-05, |
| "loss": 0.408, |
| "num_tokens": 1641261945.0, |
| "step": 6270 |
| }, |
| { |
| "epoch": 2.9254079254079253, |
| "grad_norm": 0.3498311618012844, |
| "learning_rate": 2.3061143734779235e-05, |
| "loss": 0.4052, |
| "num_tokens": 1642572665.0, |
| "step": 6275 |
| }, |
| { |
| "epoch": 2.9277389277389276, |
| "grad_norm": 0.3793591470148143, |
| "learning_rate": 2.3027140080646313e-05, |
| "loss": 0.4059, |
| "num_tokens": 1643883385.0, |
| "step": 6280 |
| }, |
| { |
| "epoch": 2.93006993006993, |
| "grad_norm": 0.30604459853495847, |
| "learning_rate": 2.299314705929987e-05, |
| "loss": 0.4008, |
| "num_tokens": 1645194105.0, |
| "step": 6285 |
| }, |
| { |
| "epoch": 2.9324009324009324, |
| "grad_norm": 0.3471040950462663, |
| "learning_rate": 2.295916475154739e-05, |
| "loss": 0.4152, |
| "num_tokens": 1646501490.0, |
| "step": 6290 |
| }, |
| { |
| "epoch": 2.9347319347319347, |
| "grad_norm": 0.3497693139520713, |
| "learning_rate": 2.292519323817087e-05, |
| "loss": 0.4247, |
| "num_tokens": 1647803839.0, |
| "step": 6295 |
| }, |
| { |
| "epoch": 2.937062937062937, |
| "grad_norm": 0.37949081912769744, |
| "learning_rate": 2.2891232599926666e-05, |
| "loss": 0.4094, |
| "num_tokens": 1649114559.0, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.9393939393939394, |
| "grad_norm": 0.3885455375827252, |
| "learning_rate": 2.2857282917545285e-05, |
| "loss": 0.4188, |
| "num_tokens": 1650425279.0, |
| "step": 6305 |
| }, |
| { |
| "epoch": 2.941724941724942, |
| "grad_norm": 0.3477949975861951, |
| "learning_rate": 2.2823344271731184e-05, |
| "loss": 0.4176, |
| "num_tokens": 1651731670.0, |
| "step": 6310 |
| }, |
| { |
| "epoch": 2.944055944055944, |
| "grad_norm": 0.4148827072433097, |
| "learning_rate": 2.2789416743162567e-05, |
| "loss": 0.4097, |
| "num_tokens": 1653042390.0, |
| "step": 6315 |
| }, |
| { |
| "epoch": 2.9463869463869465, |
| "grad_norm": 0.34732540637824916, |
| "learning_rate": 2.275550041249124e-05, |
| "loss": 0.4112, |
| "num_tokens": 1654353110.0, |
| "step": 6320 |
| }, |
| { |
| "epoch": 2.948717948717949, |
| "grad_norm": 0.32571968477635, |
| "learning_rate": 2.272159536034238e-05, |
| "loss": 0.4127, |
| "num_tokens": 1655663830.0, |
| "step": 6325 |
| }, |
| { |
| "epoch": 2.951048951048951, |
| "grad_norm": 0.34646771087112505, |
| "learning_rate": 2.2687701667314327e-05, |
| "loss": 0.4042, |
| "num_tokens": 1656974550.0, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.9533799533799536, |
| "grad_norm": 0.3649182836596108, |
| "learning_rate": 2.2653819413978454e-05, |
| "loss": 0.3955, |
| "num_tokens": 1658285270.0, |
| "step": 6335 |
| }, |
| { |
| "epoch": 2.9557109557109555, |
| "grad_norm": 0.36915878506354805, |
| "learning_rate": 2.261994868087893e-05, |
| "loss": 0.4217, |
| "num_tokens": 1659595990.0, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.958041958041958, |
| "grad_norm": 0.34333413093504295, |
| "learning_rate": 2.258608954853252e-05, |
| "loss": 0.4019, |
| "num_tokens": 1660899543.0, |
| "step": 6345 |
| }, |
| { |
| "epoch": 2.9603729603729603, |
| "grad_norm": 0.3548268496609685, |
| "learning_rate": 2.2552242097428432e-05, |
| "loss": 0.4111, |
| "num_tokens": 1662198615.0, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.9627039627039626, |
| "grad_norm": 0.33885473039293307, |
| "learning_rate": 2.2518406408028108e-05, |
| "loss": 0.4136, |
| "num_tokens": 1663509335.0, |
| "step": 6355 |
| }, |
| { |
| "epoch": 2.965034965034965, |
| "grad_norm": 0.31938033848179553, |
| "learning_rate": 2.2484582560765012e-05, |
| "loss": 0.4059, |
| "num_tokens": 1664820055.0, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.9673659673659674, |
| "grad_norm": 0.33392158191585564, |
| "learning_rate": 2.245077063604446e-05, |
| "loss": 0.4011, |
| "num_tokens": 1666130775.0, |
| "step": 6365 |
| }, |
| { |
| "epoch": 2.9696969696969697, |
| "grad_norm": 0.31402160061675133, |
| "learning_rate": 2.241697071424345e-05, |
| "loss": 0.4065, |
| "num_tokens": 1667441495.0, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.972027972027972, |
| "grad_norm": 0.3762934050333541, |
| "learning_rate": 2.2383182875710424e-05, |
| "loss": 0.4137, |
| "num_tokens": 1668752215.0, |
| "step": 6375 |
| }, |
| { |
| "epoch": 2.9743589743589745, |
| "grad_norm": 0.34874468121941143, |
| "learning_rate": 2.23494072007651e-05, |
| "loss": 0.4089, |
| "num_tokens": 1670062935.0, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.976689976689977, |
| "grad_norm": 0.31091397228227735, |
| "learning_rate": 2.231564376969829e-05, |
| "loss": 0.4169, |
| "num_tokens": 1671372884.0, |
| "step": 6385 |
| }, |
| { |
| "epoch": 2.979020979020979, |
| "grad_norm": 0.34721881259745, |
| "learning_rate": 2.2281892662771703e-05, |
| "loss": 0.4073, |
| "num_tokens": 1672668051.0, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.981351981351981, |
| "grad_norm": 0.32893411375726883, |
| "learning_rate": 2.224815396021772e-05, |
| "loss": 0.4115, |
| "num_tokens": 1673978771.0, |
| "step": 6395 |
| }, |
| { |
| "epoch": 2.983682983682984, |
| "grad_norm": 0.3148206325892906, |
| "learning_rate": 2.221442774223929e-05, |
| "loss": 0.3946, |
| "num_tokens": 1675289491.0, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.986013986013986, |
| "grad_norm": 0.33342285443614306, |
| "learning_rate": 2.2180714089009652e-05, |
| "loss": 0.3874, |
| "num_tokens": 1676600211.0, |
| "step": 6405 |
| }, |
| { |
| "epoch": 2.988344988344988, |
| "grad_norm": 0.3144256371115021, |
| "learning_rate": 2.214701308067216e-05, |
| "loss": 0.4006, |
| "num_tokens": 1677910931.0, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.9906759906759905, |
| "grad_norm": 0.33559767396859935, |
| "learning_rate": 2.211332479734013e-05, |
| "loss": 0.4079, |
| "num_tokens": 1679221651.0, |
| "step": 6415 |
| }, |
| { |
| "epoch": 2.993006993006993, |
| "grad_norm": 0.3406414559616791, |
| "learning_rate": 2.207964931909663e-05, |
| "loss": 0.3905, |
| "num_tokens": 1680532371.0, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.9953379953379953, |
| "grad_norm": 0.34257530422603977, |
| "learning_rate": 2.2045986725994287e-05, |
| "loss": 0.4173, |
| "num_tokens": 1681843091.0, |
| "step": 6425 |
| }, |
| { |
| "epoch": 2.9976689976689976, |
| "grad_norm": 0.36116021112956004, |
| "learning_rate": 2.2012337098055086e-05, |
| "loss": 0.4182, |
| "num_tokens": 1683153811.0, |
| "step": 6430 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.33421231681330704, |
| "learning_rate": 2.19787005152702e-05, |
| "loss": 0.3941, |
| "num_tokens": 1684464531.0, |
| "step": 6435 |
| }, |
| { |
| "epoch": 3.0023310023310024, |
| "grad_norm": 0.3393950285548961, |
| "learning_rate": 2.1945077057599804e-05, |
| "loss": 0.3565, |
| "num_tokens": 1685775251.0, |
| "step": 6440 |
| }, |
| { |
| "epoch": 3.0046620046620047, |
| "grad_norm": 0.3640517357882801, |
| "learning_rate": 2.191146680497284e-05, |
| "loss": 0.3643, |
| "num_tokens": 1687076615.0, |
| "step": 6445 |
| }, |
| { |
| "epoch": 3.006993006993007, |
| "grad_norm": 0.3644950262571567, |
| "learning_rate": 2.1877869837286896e-05, |
| "loss": 0.3655, |
| "num_tokens": 1688387335.0, |
| "step": 6450 |
| }, |
| { |
| "epoch": 3.0093240093240095, |
| "grad_norm": 0.36701056648807145, |
| "learning_rate": 2.1844286234407947e-05, |
| "loss": 0.3499, |
| "num_tokens": 1689698055.0, |
| "step": 6455 |
| }, |
| { |
| "epoch": 3.011655011655012, |
| "grad_norm": 0.3865682129879495, |
| "learning_rate": 2.181071607617022e-05, |
| "loss": 0.3712, |
| "num_tokens": 1691008775.0, |
| "step": 6460 |
| }, |
| { |
| "epoch": 3.013986013986014, |
| "grad_norm": 0.33558912130902724, |
| "learning_rate": 2.1777159442375967e-05, |
| "loss": 0.362, |
| "num_tokens": 1692319495.0, |
| "step": 6465 |
| }, |
| { |
| "epoch": 3.016317016317016, |
| "grad_norm": 0.3541099475736384, |
| "learning_rate": 2.1743616412795303e-05, |
| "loss": 0.3473, |
| "num_tokens": 1693630024.0, |
| "step": 6470 |
| }, |
| { |
| "epoch": 3.0186480186480185, |
| "grad_norm": 0.3374133971809518, |
| "learning_rate": 2.1710087067165998e-05, |
| "loss": 0.3659, |
| "num_tokens": 1694940744.0, |
| "step": 6475 |
| }, |
| { |
| "epoch": 3.020979020979021, |
| "grad_norm": 0.31181861303337255, |
| "learning_rate": 2.1676571485193282e-05, |
| "loss": 0.367, |
| "num_tokens": 1696244679.0, |
| "step": 6480 |
| }, |
| { |
| "epoch": 3.023310023310023, |
| "grad_norm": 0.34475890728017977, |
| "learning_rate": 2.1643069746549694e-05, |
| "loss": 0.3575, |
| "num_tokens": 1697555399.0, |
| "step": 6485 |
| }, |
| { |
| "epoch": 3.0256410256410255, |
| "grad_norm": 0.3467950380584418, |
| "learning_rate": 2.1609581930874835e-05, |
| "loss": 0.3531, |
| "num_tokens": 1698866119.0, |
| "step": 6490 |
| }, |
| { |
| "epoch": 3.027972027972028, |
| "grad_norm": 0.3620598508450458, |
| "learning_rate": 2.1576108117775205e-05, |
| "loss": 0.3685, |
| "num_tokens": 1700166619.0, |
| "step": 6495 |
| }, |
| { |
| "epoch": 3.0303030303030303, |
| "grad_norm": 0.32443886801232974, |
| "learning_rate": 2.154264838682407e-05, |
| "loss": 0.3438, |
| "num_tokens": 1701477339.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.0326340326340326, |
| "grad_norm": 0.3529808773793806, |
| "learning_rate": 2.1509202817561164e-05, |
| "loss": 0.3613, |
| "num_tokens": 1702788059.0, |
| "step": 6505 |
| }, |
| { |
| "epoch": 3.034965034965035, |
| "grad_norm": 0.34696686691787276, |
| "learning_rate": 2.1475771489492567e-05, |
| "loss": 0.3548, |
| "num_tokens": 1704098779.0, |
| "step": 6510 |
| }, |
| { |
| "epoch": 3.0372960372960374, |
| "grad_norm": 0.35892737626187393, |
| "learning_rate": 2.144235448209052e-05, |
| "loss": 0.3546, |
| "num_tokens": 1705409499.0, |
| "step": 6515 |
| }, |
| { |
| "epoch": 3.0396270396270397, |
| "grad_norm": 0.3442123945875795, |
| "learning_rate": 2.140895187479322e-05, |
| "loss": 0.3461, |
| "num_tokens": 1706707069.0, |
| "step": 6520 |
| }, |
| { |
| "epoch": 3.041958041958042, |
| "grad_norm": 0.3173856196639699, |
| "learning_rate": 2.137556374700463e-05, |
| "loss": 0.3513, |
| "num_tokens": 1708017789.0, |
| "step": 6525 |
| }, |
| { |
| "epoch": 3.0442890442890445, |
| "grad_norm": 0.34717882632048175, |
| "learning_rate": 2.1342190178094267e-05, |
| "loss": 0.3616, |
| "num_tokens": 1709324169.0, |
| "step": 6530 |
| }, |
| { |
| "epoch": 3.046620046620047, |
| "grad_norm": 0.36986993805338514, |
| "learning_rate": 2.1308831247397094e-05, |
| "loss": 0.3543, |
| "num_tokens": 1710634889.0, |
| "step": 6535 |
| }, |
| { |
| "epoch": 3.0489510489510487, |
| "grad_norm": 0.3537014861009382, |
| "learning_rate": 2.1275487034213227e-05, |
| "loss": 0.3434, |
| "num_tokens": 1711945609.0, |
| "step": 6540 |
| }, |
| { |
| "epoch": 3.051282051282051, |
| "grad_norm": 0.3753264100076296, |
| "learning_rate": 2.1242157617807807e-05, |
| "loss": 0.3509, |
| "num_tokens": 1713256329.0, |
| "step": 6545 |
| }, |
| { |
| "epoch": 3.0536130536130535, |
| "grad_norm": 0.3526114805968593, |
| "learning_rate": 2.1208843077410816e-05, |
| "loss": 0.3542, |
| "num_tokens": 1714567049.0, |
| "step": 6550 |
| }, |
| { |
| "epoch": 3.055944055944056, |
| "grad_norm": 0.32727730145454015, |
| "learning_rate": 2.117554349221687e-05, |
| "loss": 0.3536, |
| "num_tokens": 1715877769.0, |
| "step": 6555 |
| }, |
| { |
| "epoch": 3.058275058275058, |
| "grad_norm": 0.3437242965711344, |
| "learning_rate": 2.1142258941385012e-05, |
| "loss": 0.3525, |
| "num_tokens": 1717182601.0, |
| "step": 6560 |
| }, |
| { |
| "epoch": 3.0606060606060606, |
| "grad_norm": 0.33805241257757357, |
| "learning_rate": 2.1108989504038567e-05, |
| "loss": 0.3603, |
| "num_tokens": 1718493321.0, |
| "step": 6565 |
| }, |
| { |
| "epoch": 3.062937062937063, |
| "grad_norm": 0.3645037205678424, |
| "learning_rate": 2.1075735259264935e-05, |
| "loss": 0.3576, |
| "num_tokens": 1719804041.0, |
| "step": 6570 |
| }, |
| { |
| "epoch": 3.0652680652680653, |
| "grad_norm": 0.33019158555715583, |
| "learning_rate": 2.1042496286115383e-05, |
| "loss": 0.3455, |
| "num_tokens": 1721114761.0, |
| "step": 6575 |
| }, |
| { |
| "epoch": 3.0675990675990676, |
| "grad_norm": 0.35114211762624814, |
| "learning_rate": 2.100927266360487e-05, |
| "loss": 0.3624, |
| "num_tokens": 1722425481.0, |
| "step": 6580 |
| }, |
| { |
| "epoch": 3.06993006993007, |
| "grad_norm": 0.341780681859243, |
| "learning_rate": 2.0976064470711908e-05, |
| "loss": 0.3487, |
| "num_tokens": 1723733066.0, |
| "step": 6585 |
| }, |
| { |
| "epoch": 3.0722610722610724, |
| "grad_norm": 0.31101561100825387, |
| "learning_rate": 2.0942871786378283e-05, |
| "loss": 0.3424, |
| "num_tokens": 1725043786.0, |
| "step": 6590 |
| }, |
| { |
| "epoch": 3.0745920745920747, |
| "grad_norm": 0.3413708055501447, |
| "learning_rate": 2.090969468950892e-05, |
| "loss": 0.3449, |
| "num_tokens": 1726347459.0, |
| "step": 6595 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "grad_norm": 0.3631157917623202, |
| "learning_rate": 2.087653325897172e-05, |
| "loss": 0.3533, |
| "num_tokens": 1727658179.0, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.0792540792540795, |
| "grad_norm": 0.35244419005544364, |
| "learning_rate": 2.0843387573597324e-05, |
| "loss": 0.3594, |
| "num_tokens": 1728968899.0, |
| "step": 6605 |
| }, |
| { |
| "epoch": 3.0815850815850814, |
| "grad_norm": 0.3705036974511524, |
| "learning_rate": 2.0810257712178914e-05, |
| "loss": 0.3519, |
| "num_tokens": 1730278354.0, |
| "step": 6610 |
| }, |
| { |
| "epoch": 3.0839160839160837, |
| "grad_norm": 0.37121857812051107, |
| "learning_rate": 2.077714375347213e-05, |
| "loss": 0.344, |
| "num_tokens": 1731589074.0, |
| "step": 6615 |
| }, |
| { |
| "epoch": 3.086247086247086, |
| "grad_norm": 0.5821507533226011, |
| "learning_rate": 2.074404577619472e-05, |
| "loss": 0.3588, |
| "num_tokens": 1732899794.0, |
| "step": 6620 |
| }, |
| { |
| "epoch": 3.0885780885780885, |
| "grad_norm": 0.34294237084456936, |
| "learning_rate": 2.071096385902651e-05, |
| "loss": 0.3466, |
| "num_tokens": 1734195166.0, |
| "step": 6625 |
| }, |
| { |
| "epoch": 3.090909090909091, |
| "grad_norm": 0.3654999256904382, |
| "learning_rate": 2.067789808060911e-05, |
| "loss": 0.361, |
| "num_tokens": 1735505886.0, |
| "step": 6630 |
| }, |
| { |
| "epoch": 3.093240093240093, |
| "grad_norm": 0.33471915902491023, |
| "learning_rate": 2.064484851954579e-05, |
| "loss": 0.3542, |
| "num_tokens": 1736816606.0, |
| "step": 6635 |
| }, |
| { |
| "epoch": 3.0955710955710956, |
| "grad_norm": 0.3378622718593044, |
| "learning_rate": 2.061181525440124e-05, |
| "loss": 0.3542, |
| "num_tokens": 1738127326.0, |
| "step": 6640 |
| }, |
| { |
| "epoch": 3.097902097902098, |
| "grad_norm": 0.3179265953787293, |
| "learning_rate": 2.057879836370144e-05, |
| "loss": 0.3444, |
| "num_tokens": 1739438046.0, |
| "step": 6645 |
| }, |
| { |
| "epoch": 3.1002331002331003, |
| "grad_norm": 0.3616349714374049, |
| "learning_rate": 2.0545797925933437e-05, |
| "loss": 0.3502, |
| "num_tokens": 1740748766.0, |
| "step": 6650 |
| }, |
| { |
| "epoch": 3.1025641025641026, |
| "grad_norm": 0.3304854559937638, |
| "learning_rate": 2.0512814019545153e-05, |
| "loss": 0.3549, |
| "num_tokens": 1742059486.0, |
| "step": 6655 |
| }, |
| { |
| "epoch": 3.104895104895105, |
| "grad_norm": 0.3524011066513125, |
| "learning_rate": 2.047984672294521e-05, |
| "loss": 0.3465, |
| "num_tokens": 1743360293.0, |
| "step": 6660 |
| }, |
| { |
| "epoch": 3.1072261072261074, |
| "grad_norm": 0.35458352089191, |
| "learning_rate": 2.044689611450279e-05, |
| "loss": 0.3549, |
| "num_tokens": 1744671013.0, |
| "step": 6665 |
| }, |
| { |
| "epoch": 3.1095571095571097, |
| "grad_norm": 0.3269683114844527, |
| "learning_rate": 2.0413962272547343e-05, |
| "loss": 0.3686, |
| "num_tokens": 1745981733.0, |
| "step": 6670 |
| }, |
| { |
| "epoch": 3.111888111888112, |
| "grad_norm": 0.33666988209851806, |
| "learning_rate": 2.0381045275368504e-05, |
| "loss": 0.3569, |
| "num_tokens": 1747292453.0, |
| "step": 6675 |
| }, |
| { |
| "epoch": 3.114219114219114, |
| "grad_norm": 0.36146342082712163, |
| "learning_rate": 2.034814520121584e-05, |
| "loss": 0.3628, |
| "num_tokens": 1748603173.0, |
| "step": 6680 |
| }, |
| { |
| "epoch": 3.1165501165501164, |
| "grad_norm": 0.3358286209657946, |
| "learning_rate": 2.0315262128298713e-05, |
| "loss": 0.3503, |
| "num_tokens": 1749913893.0, |
| "step": 6685 |
| }, |
| { |
| "epoch": 3.1188811188811187, |
| "grad_norm": 0.335084467637873, |
| "learning_rate": 2.0282396134786052e-05, |
| "loss": 0.3654, |
| "num_tokens": 1751212459.0, |
| "step": 6690 |
| }, |
| { |
| "epoch": 3.121212121212121, |
| "grad_norm": 0.3438646934966656, |
| "learning_rate": 2.024954729880618e-05, |
| "loss": 0.3603, |
| "num_tokens": 1752511336.0, |
| "step": 6695 |
| }, |
| { |
| "epoch": 3.1235431235431235, |
| "grad_norm": 0.3361069117028752, |
| "learning_rate": 2.0216715698446665e-05, |
| "loss": 0.359, |
| "num_tokens": 1753807515.0, |
| "step": 6700 |
| }, |
| { |
| "epoch": 3.125874125874126, |
| "grad_norm": 0.34476799845177647, |
| "learning_rate": 2.0183901411754074e-05, |
| "loss": 0.3559, |
| "num_tokens": 1755118235.0, |
| "step": 6705 |
| }, |
| { |
| "epoch": 3.128205128205128, |
| "grad_norm": 0.3307136432579508, |
| "learning_rate": 2.01511045167338e-05, |
| "loss": 0.3595, |
| "num_tokens": 1756428955.0, |
| "step": 6710 |
| }, |
| { |
| "epoch": 3.1305361305361306, |
| "grad_norm": 0.38351409943354237, |
| "learning_rate": 2.011832509134996e-05, |
| "loss": 0.3515, |
| "num_tokens": 1757739675.0, |
| "step": 6715 |
| }, |
| { |
| "epoch": 3.132867132867133, |
| "grad_norm": 0.3085970573121658, |
| "learning_rate": 2.0085563213525065e-05, |
| "loss": 0.3622, |
| "num_tokens": 1759050395.0, |
| "step": 6720 |
| }, |
| { |
| "epoch": 3.1351981351981353, |
| "grad_norm": 0.33740198770023655, |
| "learning_rate": 2.005281896113997e-05, |
| "loss": 0.3564, |
| "num_tokens": 1760361115.0, |
| "step": 6725 |
| }, |
| { |
| "epoch": 3.1375291375291376, |
| "grad_norm": 0.3727057443139077, |
| "learning_rate": 2.0020092412033587e-05, |
| "loss": 0.3651, |
| "num_tokens": 1761671835.0, |
| "step": 6730 |
| }, |
| { |
| "epoch": 3.13986013986014, |
| "grad_norm": 0.3237016735245595, |
| "learning_rate": 1.9987383644002776e-05, |
| "loss": 0.355, |
| "num_tokens": 1762982555.0, |
| "step": 6735 |
| }, |
| { |
| "epoch": 3.1421911421911424, |
| "grad_norm": 0.3357114616276399, |
| "learning_rate": 1.995469273480212e-05, |
| "loss": 0.3566, |
| "num_tokens": 1764293275.0, |
| "step": 6740 |
| }, |
| { |
| "epoch": 3.1445221445221447, |
| "grad_norm": 0.3438598090126279, |
| "learning_rate": 1.9922019762143744e-05, |
| "loss": 0.3583, |
| "num_tokens": 1765603995.0, |
| "step": 6745 |
| }, |
| { |
| "epoch": 3.1468531468531467, |
| "grad_norm": 0.3456073924056793, |
| "learning_rate": 1.9889364803697137e-05, |
| "loss": 0.3781, |
| "num_tokens": 1766914715.0, |
| "step": 6750 |
| }, |
| { |
| "epoch": 3.149184149184149, |
| "grad_norm": 0.33427973566883806, |
| "learning_rate": 1.9856727937088955e-05, |
| "loss": 0.3451, |
| "num_tokens": 1768213787.0, |
| "step": 6755 |
| }, |
| { |
| "epoch": 3.1515151515151514, |
| "grad_norm": 0.3420541495199111, |
| "learning_rate": 1.9824109239902865e-05, |
| "loss": 0.3705, |
| "num_tokens": 1769519941.0, |
| "step": 6760 |
| }, |
| { |
| "epoch": 3.1538461538461537, |
| "grad_norm": 0.3396999634927412, |
| "learning_rate": 1.9791508789679337e-05, |
| "loss": 0.3563, |
| "num_tokens": 1770830661.0, |
| "step": 6765 |
| }, |
| { |
| "epoch": 3.156177156177156, |
| "grad_norm": 0.3580402097640208, |
| "learning_rate": 1.9758926663915455e-05, |
| "loss": 0.3635, |
| "num_tokens": 1772141381.0, |
| "step": 6770 |
| }, |
| { |
| "epoch": 3.1585081585081585, |
| "grad_norm": 0.3349041898550379, |
| "learning_rate": 1.9726362940064752e-05, |
| "loss": 0.3514, |
| "num_tokens": 1773452101.0, |
| "step": 6775 |
| }, |
| { |
| "epoch": 3.160839160839161, |
| "grad_norm": 0.33160721758218376, |
| "learning_rate": 1.9693817695537e-05, |
| "loss": 0.3556, |
| "num_tokens": 1774762821.0, |
| "step": 6780 |
| }, |
| { |
| "epoch": 3.163170163170163, |
| "grad_norm": 0.34077147929405477, |
| "learning_rate": 1.9661291007698062e-05, |
| "loss": 0.3549, |
| "num_tokens": 1776073541.0, |
| "step": 6785 |
| }, |
| { |
| "epoch": 3.1655011655011656, |
| "grad_norm": 0.350707492092592, |
| "learning_rate": 1.9628782953869696e-05, |
| "loss": 0.3575, |
| "num_tokens": 1777384261.0, |
| "step": 6790 |
| }, |
| { |
| "epoch": 3.167832167832168, |
| "grad_norm": 0.32637137207920947, |
| "learning_rate": 1.959629361132932e-05, |
| "loss": 0.3487, |
| "num_tokens": 1778681608.0, |
| "step": 6795 |
| }, |
| { |
| "epoch": 3.1701631701631703, |
| "grad_norm": 0.34823978144338197, |
| "learning_rate": 1.956382305730993e-05, |
| "loss": 0.3579, |
| "num_tokens": 1779992328.0, |
| "step": 6800 |
| }, |
| { |
| "epoch": 3.1724941724941726, |
| "grad_norm": 0.3231155957000068, |
| "learning_rate": 1.953137136899982e-05, |
| "loss": 0.3485, |
| "num_tokens": 1781303048.0, |
| "step": 6805 |
| }, |
| { |
| "epoch": 3.174825174825175, |
| "grad_norm": 0.32946568056880193, |
| "learning_rate": 1.9498938623542418e-05, |
| "loss": 0.3536, |
| "num_tokens": 1782611783.0, |
| "step": 6810 |
| }, |
| { |
| "epoch": 3.177156177156177, |
| "grad_norm": 0.33586077368245165, |
| "learning_rate": 1.94665248980362e-05, |
| "loss": 0.3476, |
| "num_tokens": 1783922503.0, |
| "step": 6815 |
| }, |
| { |
| "epoch": 3.1794871794871793, |
| "grad_norm": 0.345953994294241, |
| "learning_rate": 1.943413026953434e-05, |
| "loss": 0.3567, |
| "num_tokens": 1785233223.0, |
| "step": 6820 |
| }, |
| { |
| "epoch": 3.1818181818181817, |
| "grad_norm": 0.3350392695147026, |
| "learning_rate": 1.9401754815044665e-05, |
| "loss": 0.368, |
| "num_tokens": 1786536776.0, |
| "step": 6825 |
| }, |
| { |
| "epoch": 3.184149184149184, |
| "grad_norm": 0.34554062524489243, |
| "learning_rate": 1.9369398611529405e-05, |
| "loss": 0.3589, |
| "num_tokens": 1787847496.0, |
| "step": 6830 |
| }, |
| { |
| "epoch": 3.1864801864801864, |
| "grad_norm": 0.3554652710783725, |
| "learning_rate": 1.9337061735905038e-05, |
| "loss": 0.3516, |
| "num_tokens": 1789158216.0, |
| "step": 6835 |
| }, |
| { |
| "epoch": 3.1888111888111887, |
| "grad_norm": 0.3893789449502026, |
| "learning_rate": 1.930474426504209e-05, |
| "loss": 0.3631, |
| "num_tokens": 1790468936.0, |
| "step": 6840 |
| }, |
| { |
| "epoch": 3.191142191142191, |
| "grad_norm": 0.3556543352743324, |
| "learning_rate": 1.9272446275764954e-05, |
| "loss": 0.3733, |
| "num_tokens": 1791779656.0, |
| "step": 6845 |
| }, |
| { |
| "epoch": 3.1934731934731935, |
| "grad_norm": 0.35483552905034643, |
| "learning_rate": 1.924016784485172e-05, |
| "loss": 0.3609, |
| "num_tokens": 1793090376.0, |
| "step": 6850 |
| }, |
| { |
| "epoch": 3.195804195804196, |
| "grad_norm": 0.33949854975550836, |
| "learning_rate": 1.9207909049033972e-05, |
| "loss": 0.3584, |
| "num_tokens": 1794401096.0, |
| "step": 6855 |
| }, |
| { |
| "epoch": 3.198135198135198, |
| "grad_norm": 0.33640749306479956, |
| "learning_rate": 1.9175669964996636e-05, |
| "loss": 0.3633, |
| "num_tokens": 1795711816.0, |
| "step": 6860 |
| }, |
| { |
| "epoch": 3.2004662004662006, |
| "grad_norm": 0.3409998399187101, |
| "learning_rate": 1.9143450669377762e-05, |
| "loss": 0.3634, |
| "num_tokens": 1797022536.0, |
| "step": 6865 |
| }, |
| { |
| "epoch": 3.202797202797203, |
| "grad_norm": 0.3275911678319333, |
| "learning_rate": 1.9111251238768373e-05, |
| "loss": 0.3487, |
| "num_tokens": 1798317072.0, |
| "step": 6870 |
| }, |
| { |
| "epoch": 3.2051282051282053, |
| "grad_norm": 0.3348585258935862, |
| "learning_rate": 1.9079071749712262e-05, |
| "loss": 0.354, |
| "num_tokens": 1799619860.0, |
| "step": 6875 |
| }, |
| { |
| "epoch": 3.2074592074592077, |
| "grad_norm": 0.34340870474918594, |
| "learning_rate": 1.9046912278705815e-05, |
| "loss": 0.363, |
| "num_tokens": 1800930580.0, |
| "step": 6880 |
| }, |
| { |
| "epoch": 3.20979020979021, |
| "grad_norm": 0.3407672323919426, |
| "learning_rate": 1.901477290219784e-05, |
| "loss": 0.3573, |
| "num_tokens": 1802235866.0, |
| "step": 6885 |
| }, |
| { |
| "epoch": 3.212121212121212, |
| "grad_norm": 0.3586302213819541, |
| "learning_rate": 1.898265369658938e-05, |
| "loss": 0.3595, |
| "num_tokens": 1803546586.0, |
| "step": 6890 |
| }, |
| { |
| "epoch": 3.2144522144522143, |
| "grad_norm": 0.3352168444831923, |
| "learning_rate": 1.8950554738233495e-05, |
| "loss": 0.3547, |
| "num_tokens": 1804840952.0, |
| "step": 6895 |
| }, |
| { |
| "epoch": 3.2167832167832167, |
| "grad_norm": 0.33771864707994703, |
| "learning_rate": 1.8918476103435174e-05, |
| "loss": 0.3581, |
| "num_tokens": 1806151672.0, |
| "step": 6900 |
| }, |
| { |
| "epoch": 3.219114219114219, |
| "grad_norm": 0.32791228261887584, |
| "learning_rate": 1.888641786845102e-05, |
| "loss": 0.3475, |
| "num_tokens": 1807462392.0, |
| "step": 6905 |
| }, |
| { |
| "epoch": 3.2214452214452214, |
| "grad_norm": 0.3475449829583515, |
| "learning_rate": 1.8854380109489206e-05, |
| "loss": 0.3597, |
| "num_tokens": 1808759709.0, |
| "step": 6910 |
| }, |
| { |
| "epoch": 3.2237762237762237, |
| "grad_norm": 0.349286003745334, |
| "learning_rate": 1.88223629027092e-05, |
| "loss": 0.3702, |
| "num_tokens": 1810057084.0, |
| "step": 6915 |
| }, |
| { |
| "epoch": 3.226107226107226, |
| "grad_norm": 0.34279824899570205, |
| "learning_rate": 1.8790366324221616e-05, |
| "loss": 0.3572, |
| "num_tokens": 1811367804.0, |
| "step": 6920 |
| }, |
| { |
| "epoch": 3.2284382284382285, |
| "grad_norm": 0.3270543901207085, |
| "learning_rate": 1.8758390450088025e-05, |
| "loss": 0.3581, |
| "num_tokens": 1812678524.0, |
| "step": 6925 |
| }, |
| { |
| "epoch": 3.230769230769231, |
| "grad_norm": 0.3719710378237588, |
| "learning_rate": 1.8726435356320804e-05, |
| "loss": 0.3503, |
| "num_tokens": 1813989244.0, |
| "step": 6930 |
| }, |
| { |
| "epoch": 3.233100233100233, |
| "grad_norm": 0.34389192152825504, |
| "learning_rate": 1.8694501118882902e-05, |
| "loss": 0.3677, |
| "num_tokens": 1815299964.0, |
| "step": 6935 |
| }, |
| { |
| "epoch": 3.2354312354312356, |
| "grad_norm": 0.3206403097527311, |
| "learning_rate": 1.8662587813687704e-05, |
| "loss": 0.3698, |
| "num_tokens": 1816610684.0, |
| "step": 6940 |
| }, |
| { |
| "epoch": 3.237762237762238, |
| "grad_norm": 0.3333938729526916, |
| "learning_rate": 1.8630695516598832e-05, |
| "loss": 0.3517, |
| "num_tokens": 1817921404.0, |
| "step": 6945 |
| }, |
| { |
| "epoch": 3.2400932400932403, |
| "grad_norm": 0.3453920815805985, |
| "learning_rate": 1.8598824303429985e-05, |
| "loss": 0.3608, |
| "num_tokens": 1819232124.0, |
| "step": 6950 |
| }, |
| { |
| "epoch": 3.242424242424242, |
| "grad_norm": 0.34058628970443383, |
| "learning_rate": 1.8566974249944707e-05, |
| "loss": 0.356, |
| "num_tokens": 1820534563.0, |
| "step": 6955 |
| }, |
| { |
| "epoch": 3.2447552447552446, |
| "grad_norm": 0.3288432699435565, |
| "learning_rate": 1.8535145431856266e-05, |
| "loss": 0.3554, |
| "num_tokens": 1821845283.0, |
| "step": 6960 |
| }, |
| { |
| "epoch": 3.247086247086247, |
| "grad_norm": 0.35168833123437004, |
| "learning_rate": 1.8503337924827446e-05, |
| "loss": 0.3537, |
| "num_tokens": 1823156003.0, |
| "step": 6965 |
| }, |
| { |
| "epoch": 3.2494172494172493, |
| "grad_norm": 0.3392709841202291, |
| "learning_rate": 1.8471551804470372e-05, |
| "loss": 0.3557, |
| "num_tokens": 1824466723.0, |
| "step": 6970 |
| }, |
| { |
| "epoch": 3.2517482517482517, |
| "grad_norm": 0.3426042646396951, |
| "learning_rate": 1.8439787146346314e-05, |
| "loss": 0.3532, |
| "num_tokens": 1825777443.0, |
| "step": 6975 |
| }, |
| { |
| "epoch": 3.254079254079254, |
| "grad_norm": 0.3454602775337345, |
| "learning_rate": 1.8408044025965555e-05, |
| "loss": 0.3484, |
| "num_tokens": 1827081333.0, |
| "step": 6980 |
| }, |
| { |
| "epoch": 3.2564102564102564, |
| "grad_norm": 0.36506433832795027, |
| "learning_rate": 1.8376322518787144e-05, |
| "loss": 0.3621, |
| "num_tokens": 1828392053.0, |
| "step": 6985 |
| }, |
| { |
| "epoch": 3.2587412587412588, |
| "grad_norm": 0.36524454212892954, |
| "learning_rate": 1.8344622700218774e-05, |
| "loss": 0.3632, |
| "num_tokens": 1829702773.0, |
| "step": 6990 |
| }, |
| { |
| "epoch": 3.261072261072261, |
| "grad_norm": 0.3635612315820556, |
| "learning_rate": 1.831294464561655e-05, |
| "loss": 0.3577, |
| "num_tokens": 1831013493.0, |
| "step": 6995 |
| }, |
| { |
| "epoch": 3.2634032634032635, |
| "grad_norm": 0.3439969893709455, |
| "learning_rate": 1.8281288430284898e-05, |
| "loss": 0.3587, |
| "num_tokens": 1832324213.0, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.265734265734266, |
| "grad_norm": 0.3664905114985399, |
| "learning_rate": 1.8249654129476267e-05, |
| "loss": 0.3643, |
| "num_tokens": 1833634933.0, |
| "step": 7005 |
| }, |
| { |
| "epoch": 3.268065268065268, |
| "grad_norm": 0.3415502009175583, |
| "learning_rate": 1.8218041818391046e-05, |
| "loss": 0.3627, |
| "num_tokens": 1834945653.0, |
| "step": 7010 |
| }, |
| { |
| "epoch": 3.2703962703962706, |
| "grad_norm": 0.3416327517246984, |
| "learning_rate": 1.8186451572177348e-05, |
| "loss": 0.3581, |
| "num_tokens": 1836243452.0, |
| "step": 7015 |
| }, |
| { |
| "epoch": 3.2727272727272725, |
| "grad_norm": 0.34814377337071994, |
| "learning_rate": 1.8154883465930816e-05, |
| "loss": 0.3629, |
| "num_tokens": 1837547262.0, |
| "step": 7020 |
| }, |
| { |
| "epoch": 3.2750582750582753, |
| "grad_norm": 0.34272113854001507, |
| "learning_rate": 1.812333757469447e-05, |
| "loss": 0.3489, |
| "num_tokens": 1838857982.0, |
| "step": 7025 |
| }, |
| { |
| "epoch": 3.277389277389277, |
| "grad_norm": 0.35857308092533485, |
| "learning_rate": 1.8091813973458538e-05, |
| "loss": 0.3756, |
| "num_tokens": 1840156853.0, |
| "step": 7030 |
| }, |
| { |
| "epoch": 3.2797202797202796, |
| "grad_norm": 0.32955350777081055, |
| "learning_rate": 1.806031273716025e-05, |
| "loss": 0.3706, |
| "num_tokens": 1841467573.0, |
| "step": 7035 |
| }, |
| { |
| "epoch": 3.282051282051282, |
| "grad_norm": 0.3456971274249648, |
| "learning_rate": 1.802883394068366e-05, |
| "loss": 0.3567, |
| "num_tokens": 1842778293.0, |
| "step": 7040 |
| }, |
| { |
| "epoch": 3.2843822843822843, |
| "grad_norm": 0.3288793199678882, |
| "learning_rate": 1.7997377658859464e-05, |
| "loss": 0.3604, |
| "num_tokens": 1844089013.0, |
| "step": 7045 |
| }, |
| { |
| "epoch": 3.2867132867132867, |
| "grad_norm": 0.3251910644151693, |
| "learning_rate": 1.796594396646491e-05, |
| "loss": 0.3573, |
| "num_tokens": 1845399733.0, |
| "step": 7050 |
| }, |
| { |
| "epoch": 3.289044289044289, |
| "grad_norm": 0.32298843530806964, |
| "learning_rate": 1.7934532938223457e-05, |
| "loss": 0.368, |
| "num_tokens": 1846710453.0, |
| "step": 7055 |
| }, |
| { |
| "epoch": 3.2913752913752914, |
| "grad_norm": 0.3390547972284236, |
| "learning_rate": 1.7903144648804725e-05, |
| "loss": 0.3488, |
| "num_tokens": 1848021173.0, |
| "step": 7060 |
| }, |
| { |
| "epoch": 3.2937062937062938, |
| "grad_norm": 0.31322899510921975, |
| "learning_rate": 1.7871779172824316e-05, |
| "loss": 0.3567, |
| "num_tokens": 1849331893.0, |
| "step": 7065 |
| }, |
| { |
| "epoch": 3.296037296037296, |
| "grad_norm": 0.3248889681783109, |
| "learning_rate": 1.7840436584843536e-05, |
| "loss": 0.3569, |
| "num_tokens": 1850642613.0, |
| "step": 7070 |
| }, |
| { |
| "epoch": 3.2983682983682985, |
| "grad_norm": 0.3205616691475068, |
| "learning_rate": 1.780911695936931e-05, |
| "loss": 0.3479, |
| "num_tokens": 1851953333.0, |
| "step": 7075 |
| }, |
| { |
| "epoch": 3.300699300699301, |
| "grad_norm": 0.3382664447246441, |
| "learning_rate": 1.7777820370853988e-05, |
| "loss": 0.3602, |
| "num_tokens": 1853264053.0, |
| "step": 7080 |
| }, |
| { |
| "epoch": 3.303030303030303, |
| "grad_norm": 0.3535879556837902, |
| "learning_rate": 1.7746546893695148e-05, |
| "loss": 0.354, |
| "num_tokens": 1854574773.0, |
| "step": 7085 |
| }, |
| { |
| "epoch": 3.3053613053613056, |
| "grad_norm": 0.3688428341220924, |
| "learning_rate": 1.7715296602235427e-05, |
| "loss": 0.3568, |
| "num_tokens": 1855885493.0, |
| "step": 7090 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 0.34122421753248505, |
| "learning_rate": 1.768406957076234e-05, |
| "loss": 0.3659, |
| "num_tokens": 1857196213.0, |
| "step": 7095 |
| }, |
| { |
| "epoch": 3.31002331002331, |
| "grad_norm": 0.3474475995666852, |
| "learning_rate": 1.7652865873508134e-05, |
| "loss": 0.3658, |
| "num_tokens": 1858506933.0, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.312354312354312, |
| "grad_norm": 0.3120215313160503, |
| "learning_rate": 1.7621685584649543e-05, |
| "loss": 0.3636, |
| "num_tokens": 1859817653.0, |
| "step": 7105 |
| }, |
| { |
| "epoch": 3.3146853146853146, |
| "grad_norm": 0.34100285389129825, |
| "learning_rate": 1.7590528778307693e-05, |
| "loss": 0.3575, |
| "num_tokens": 1861128373.0, |
| "step": 7110 |
| }, |
| { |
| "epoch": 3.317016317016317, |
| "grad_norm": 0.3401139043980884, |
| "learning_rate": 1.7559395528547874e-05, |
| "loss": 0.3716, |
| "num_tokens": 1862423586.0, |
| "step": 7115 |
| }, |
| { |
| "epoch": 3.3193473193473193, |
| "grad_norm": 0.35077446828202064, |
| "learning_rate": 1.752828590937938e-05, |
| "loss": 0.3715, |
| "num_tokens": 1863734306.0, |
| "step": 7120 |
| }, |
| { |
| "epoch": 3.3216783216783217, |
| "grad_norm": 0.3890151970682224, |
| "learning_rate": 1.7497199994755313e-05, |
| "loss": 0.3625, |
| "num_tokens": 1865045026.0, |
| "step": 7125 |
| }, |
| { |
| "epoch": 3.324009324009324, |
| "grad_norm": 0.3357664208904343, |
| "learning_rate": 1.7466137858572467e-05, |
| "loss": 0.3565, |
| "num_tokens": 1866349041.0, |
| "step": 7130 |
| }, |
| { |
| "epoch": 3.3263403263403264, |
| "grad_norm": 0.3489477894369833, |
| "learning_rate": 1.743509957467107e-05, |
| "loss": 0.3615, |
| "num_tokens": 1867659761.0, |
| "step": 7135 |
| }, |
| { |
| "epoch": 3.3286713286713288, |
| "grad_norm": 0.32828301621058453, |
| "learning_rate": 1.740408521683465e-05, |
| "loss": 0.3456, |
| "num_tokens": 1868970481.0, |
| "step": 7140 |
| }, |
| { |
| "epoch": 3.331002331002331, |
| "grad_norm": 0.3585651282189203, |
| "learning_rate": 1.7373094858789905e-05, |
| "loss": 0.366, |
| "num_tokens": 1870281201.0, |
| "step": 7145 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.3665074740674538, |
| "learning_rate": 1.7342128574206428e-05, |
| "loss": 0.3575, |
| "num_tokens": 1871582933.0, |
| "step": 7150 |
| }, |
| { |
| "epoch": 3.335664335664336, |
| "grad_norm": 0.3250266204452436, |
| "learning_rate": 1.7311186436696597e-05, |
| "loss": 0.3478, |
| "num_tokens": 1872886587.0, |
| "step": 7155 |
| }, |
| { |
| "epoch": 3.3379953379953378, |
| "grad_norm": 0.3224181885863933, |
| "learning_rate": 1.7280268519815413e-05, |
| "loss": 0.349, |
| "num_tokens": 1874197307.0, |
| "step": 7160 |
| }, |
| { |
| "epoch": 3.3403263403263406, |
| "grad_norm": 0.3388758481335769, |
| "learning_rate": 1.7249374897060282e-05, |
| "loss": 0.3583, |
| "num_tokens": 1875508027.0, |
| "step": 7165 |
| }, |
| { |
| "epoch": 3.3426573426573425, |
| "grad_norm": 0.35014080427325917, |
| "learning_rate": 1.7218505641870846e-05, |
| "loss": 0.3644, |
| "num_tokens": 1876818747.0, |
| "step": 7170 |
| }, |
| { |
| "epoch": 3.344988344988345, |
| "grad_norm": 0.34625439089102006, |
| "learning_rate": 1.7187660827628844e-05, |
| "loss": 0.3544, |
| "num_tokens": 1878129467.0, |
| "step": 7175 |
| }, |
| { |
| "epoch": 3.347319347319347, |
| "grad_norm": 0.3322054073312721, |
| "learning_rate": 1.7156840527657915e-05, |
| "loss": 0.3507, |
| "num_tokens": 1879421830.0, |
| "step": 7180 |
| }, |
| { |
| "epoch": 3.3496503496503496, |
| "grad_norm": 0.31612604477115036, |
| "learning_rate": 1.712604481522339e-05, |
| "loss": 0.3527, |
| "num_tokens": 1880732550.0, |
| "step": 7185 |
| }, |
| { |
| "epoch": 3.351981351981352, |
| "grad_norm": 0.32854072716891647, |
| "learning_rate": 1.70952737635322e-05, |
| "loss": 0.3654, |
| "num_tokens": 1882043270.0, |
| "step": 7190 |
| }, |
| { |
| "epoch": 3.3543123543123543, |
| "grad_norm": 0.3377153380244728, |
| "learning_rate": 1.706452744573262e-05, |
| "loss": 0.3639, |
| "num_tokens": 1883353990.0, |
| "step": 7195 |
| }, |
| { |
| "epoch": 3.3566433566433567, |
| "grad_norm": 0.3368928740025494, |
| "learning_rate": 1.7033805934914126e-05, |
| "loss": 0.3615, |
| "num_tokens": 1884664710.0, |
| "step": 7200 |
| }, |
| { |
| "epoch": 3.358974358974359, |
| "grad_norm": 0.3423816860220159, |
| "learning_rate": 1.7003109304107245e-05, |
| "loss": 0.3521, |
| "num_tokens": 1885975430.0, |
| "step": 7205 |
| }, |
| { |
| "epoch": 3.3613053613053614, |
| "grad_norm": 0.31997739521276675, |
| "learning_rate": 1.697243762628334e-05, |
| "loss": 0.3576, |
| "num_tokens": 1887286150.0, |
| "step": 7210 |
| }, |
| { |
| "epoch": 3.3636363636363638, |
| "grad_norm": 0.31310634183618563, |
| "learning_rate": 1.6941790974354464e-05, |
| "loss": 0.3578, |
| "num_tokens": 1888596870.0, |
| "step": 7215 |
| }, |
| { |
| "epoch": 3.365967365967366, |
| "grad_norm": 0.32276140427338623, |
| "learning_rate": 1.6911169421173194e-05, |
| "loss": 0.3628, |
| "num_tokens": 1889907590.0, |
| "step": 7220 |
| }, |
| { |
| "epoch": 3.3682983682983685, |
| "grad_norm": 0.32607047003096234, |
| "learning_rate": 1.688057303953241e-05, |
| "loss": 0.3642, |
| "num_tokens": 1891218310.0, |
| "step": 7225 |
| }, |
| { |
| "epoch": 3.370629370629371, |
| "grad_norm": 0.34484157121118036, |
| "learning_rate": 1.6850001902165176e-05, |
| "loss": 0.3467, |
| "num_tokens": 1892529030.0, |
| "step": 7230 |
| }, |
| { |
| "epoch": 3.3729603729603728, |
| "grad_norm": 0.3407500682459095, |
| "learning_rate": 1.6819456081744558e-05, |
| "loss": 0.355, |
| "num_tokens": 1893829272.0, |
| "step": 7235 |
| }, |
| { |
| "epoch": 3.375291375291375, |
| "grad_norm": 0.34265345627298444, |
| "learning_rate": 1.6788935650883407e-05, |
| "loss": 0.3559, |
| "num_tokens": 1895139992.0, |
| "step": 7240 |
| }, |
| { |
| "epoch": 3.3776223776223775, |
| "grad_norm": 0.3256176164919399, |
| "learning_rate": 1.6758440682134235e-05, |
| "loss": 0.3537, |
| "num_tokens": 1896450712.0, |
| "step": 7245 |
| }, |
| { |
| "epoch": 3.37995337995338, |
| "grad_norm": 0.34858433343769213, |
| "learning_rate": 1.6727971247989045e-05, |
| "loss": 0.3573, |
| "num_tokens": 1897749047.0, |
| "step": 7250 |
| }, |
| { |
| "epoch": 3.382284382284382, |
| "grad_norm": 0.35644873419658263, |
| "learning_rate": 1.669752742087911e-05, |
| "loss": 0.356, |
| "num_tokens": 1899059767.0, |
| "step": 7255 |
| }, |
| { |
| "epoch": 3.3846153846153846, |
| "grad_norm": 0.3339145275176127, |
| "learning_rate": 1.6667109273174823e-05, |
| "loss": 0.3562, |
| "num_tokens": 1900370487.0, |
| "step": 7260 |
| }, |
| { |
| "epoch": 3.386946386946387, |
| "grad_norm": 0.3479027207281923, |
| "learning_rate": 1.6636716877185575e-05, |
| "loss": 0.3515, |
| "num_tokens": 1901681207.0, |
| "step": 7265 |
| }, |
| { |
| "epoch": 3.3892773892773893, |
| "grad_norm": 0.3373951962701607, |
| "learning_rate": 1.660635030515952e-05, |
| "loss": 0.3524, |
| "num_tokens": 1902991927.0, |
| "step": 7270 |
| }, |
| { |
| "epoch": 3.3916083916083917, |
| "grad_norm": 0.3252740139876212, |
| "learning_rate": 1.6576009629283402e-05, |
| "loss": 0.3585, |
| "num_tokens": 1904302647.0, |
| "step": 7275 |
| }, |
| { |
| "epoch": 3.393939393939394, |
| "grad_norm": 0.32879290977391473, |
| "learning_rate": 1.654569492168243e-05, |
| "loss": 0.3588, |
| "num_tokens": 1905613367.0, |
| "step": 7280 |
| }, |
| { |
| "epoch": 3.3962703962703964, |
| "grad_norm": 0.34714056963970763, |
| "learning_rate": 1.6515406254420085e-05, |
| "loss": 0.3614, |
| "num_tokens": 1906924087.0, |
| "step": 7285 |
| }, |
| { |
| "epoch": 3.3986013986013988, |
| "grad_norm": 0.32182206400603064, |
| "learning_rate": 1.6485143699497917e-05, |
| "loss": 0.3732, |
| "num_tokens": 1908234807.0, |
| "step": 7290 |
| }, |
| { |
| "epoch": 3.400932400932401, |
| "grad_norm": 0.3521858097420374, |
| "learning_rate": 1.6454907328855436e-05, |
| "loss": 0.3601, |
| "num_tokens": 1909539681.0, |
| "step": 7295 |
| }, |
| { |
| "epoch": 3.403263403263403, |
| "grad_norm": 0.3222892499746984, |
| "learning_rate": 1.6424697214369894e-05, |
| "loss": 0.3548, |
| "num_tokens": 1910850401.0, |
| "step": 7300 |
| }, |
| { |
| "epoch": 3.4055944055944054, |
| "grad_norm": 0.3361283433082892, |
| "learning_rate": 1.6394513427856117e-05, |
| "loss": 0.3627, |
| "num_tokens": 1912161121.0, |
| "step": 7305 |
| }, |
| { |
| "epoch": 3.4079254079254078, |
| "grad_norm": 0.34204868238681374, |
| "learning_rate": 1.6364356041066355e-05, |
| "loss": 0.3577, |
| "num_tokens": 1913471841.0, |
| "step": 7310 |
| }, |
| { |
| "epoch": 3.41025641025641, |
| "grad_norm": 0.3302716919114987, |
| "learning_rate": 1.633422512569011e-05, |
| "loss": 0.367, |
| "num_tokens": 1914782561.0, |
| "step": 7315 |
| }, |
| { |
| "epoch": 3.4125874125874125, |
| "grad_norm": 0.3333885988826235, |
| "learning_rate": 1.630412075335393e-05, |
| "loss": 0.3626, |
| "num_tokens": 1916093281.0, |
| "step": 7320 |
| }, |
| { |
| "epoch": 3.414918414918415, |
| "grad_norm": 0.34793721986248155, |
| "learning_rate": 1.627404299562129e-05, |
| "loss": 0.3613, |
| "num_tokens": 1917404001.0, |
| "step": 7325 |
| }, |
| { |
| "epoch": 3.417249417249417, |
| "grad_norm": 0.3338489940605626, |
| "learning_rate": 1.6243991923992404e-05, |
| "loss": 0.3577, |
| "num_tokens": 1918698383.0, |
| "step": 7330 |
| }, |
| { |
| "epoch": 3.4195804195804196, |
| "grad_norm": 0.33919384025686283, |
| "learning_rate": 1.6213967609904014e-05, |
| "loss": 0.3688, |
| "num_tokens": 1920009103.0, |
| "step": 7335 |
| }, |
| { |
| "epoch": 3.421911421911422, |
| "grad_norm": 0.32694232562377856, |
| "learning_rate": 1.6183970124729268e-05, |
| "loss": 0.3559, |
| "num_tokens": 1921319823.0, |
| "step": 7340 |
| }, |
| { |
| "epoch": 3.4242424242424243, |
| "grad_norm": 0.33584818314646175, |
| "learning_rate": 1.615399953977757e-05, |
| "loss": 0.3589, |
| "num_tokens": 1922630543.0, |
| "step": 7345 |
| }, |
| { |
| "epoch": 3.4265734265734267, |
| "grad_norm": 0.31756474094191617, |
| "learning_rate": 1.612405592629433e-05, |
| "loss": 0.3509, |
| "num_tokens": 1923941263.0, |
| "step": 7350 |
| }, |
| { |
| "epoch": 3.428904428904429, |
| "grad_norm": 0.3256749853351091, |
| "learning_rate": 1.6094139355460855e-05, |
| "loss": 0.3589, |
| "num_tokens": 1925251983.0, |
| "step": 7355 |
| }, |
| { |
| "epoch": 3.4312354312354314, |
| "grad_norm": 0.3440498427386265, |
| "learning_rate": 1.6064249898394205e-05, |
| "loss": 0.366, |
| "num_tokens": 1926562703.0, |
| "step": 7360 |
| }, |
| { |
| "epoch": 3.4335664335664333, |
| "grad_norm": 0.31653310321422085, |
| "learning_rate": 1.6034387626146936e-05, |
| "loss": 0.3644, |
| "num_tokens": 1927873423.0, |
| "step": 7365 |
| }, |
| { |
| "epoch": 3.435897435897436, |
| "grad_norm": 0.316076483824156, |
| "learning_rate": 1.6004552609706992e-05, |
| "loss": 0.3512, |
| "num_tokens": 1929184143.0, |
| "step": 7370 |
| }, |
| { |
| "epoch": 3.438228438228438, |
| "grad_norm": 0.32507348992734375, |
| "learning_rate": 1.5974744919997543e-05, |
| "loss": 0.3498, |
| "num_tokens": 1930494863.0, |
| "step": 7375 |
| }, |
| { |
| "epoch": 3.4405594405594404, |
| "grad_norm": 0.3302545729067321, |
| "learning_rate": 1.5944964627876795e-05, |
| "loss": 0.3665, |
| "num_tokens": 1931792174.0, |
| "step": 7380 |
| }, |
| { |
| "epoch": 3.4428904428904428, |
| "grad_norm": 0.32458383971347465, |
| "learning_rate": 1.5915211804137803e-05, |
| "loss": 0.3633, |
| "num_tokens": 1933102894.0, |
| "step": 7385 |
| }, |
| { |
| "epoch": 3.445221445221445, |
| "grad_norm": 0.34226951871442984, |
| "learning_rate": 1.5885486519508347e-05, |
| "loss": 0.3595, |
| "num_tokens": 1934413614.0, |
| "step": 7390 |
| }, |
| { |
| "epoch": 3.4475524475524475, |
| "grad_norm": 0.34897664385155186, |
| "learning_rate": 1.5855788844650744e-05, |
| "loss": 0.3594, |
| "num_tokens": 1935724334.0, |
| "step": 7395 |
| }, |
| { |
| "epoch": 3.44988344988345, |
| "grad_norm": 0.3366078374615375, |
| "learning_rate": 1.5826118850161653e-05, |
| "loss": 0.3551, |
| "num_tokens": 1937035054.0, |
| "step": 7400 |
| }, |
| { |
| "epoch": 3.4522144522144522, |
| "grad_norm": 0.36277795416572367, |
| "learning_rate": 1.5796476606571957e-05, |
| "loss": 0.3704, |
| "num_tokens": 1938345774.0, |
| "step": 7405 |
| }, |
| { |
| "epoch": 3.4545454545454546, |
| "grad_norm": 0.34336029415281094, |
| "learning_rate": 1.576686218434656e-05, |
| "loss": 0.3537, |
| "num_tokens": 1939646875.0, |
| "step": 7410 |
| }, |
| { |
| "epoch": 3.456876456876457, |
| "grad_norm": 0.3393838080049611, |
| "learning_rate": 1.5737275653884225e-05, |
| "loss": 0.3724, |
| "num_tokens": 1940957595.0, |
| "step": 7415 |
| }, |
| { |
| "epoch": 3.4592074592074593, |
| "grad_norm": 0.34552607264965096, |
| "learning_rate": 1.5707717085517427e-05, |
| "loss": 0.3535, |
| "num_tokens": 1942268315.0, |
| "step": 7420 |
| }, |
| { |
| "epoch": 3.4615384615384617, |
| "grad_norm": 0.3161448905098042, |
| "learning_rate": 1.567818654951214e-05, |
| "loss": 0.3551, |
| "num_tokens": 1943579035.0, |
| "step": 7425 |
| }, |
| { |
| "epoch": 3.463869463869464, |
| "grad_norm": 0.33578110841595465, |
| "learning_rate": 1.5648684116067737e-05, |
| "loss": 0.3737, |
| "num_tokens": 1944889755.0, |
| "step": 7430 |
| }, |
| { |
| "epoch": 3.4662004662004664, |
| "grad_norm": 0.3547622429809544, |
| "learning_rate": 1.5619209855316766e-05, |
| "loss": 0.3628, |
| "num_tokens": 1946184922.0, |
| "step": 7435 |
| }, |
| { |
| "epoch": 3.4685314685314683, |
| "grad_norm": 0.3257055842704097, |
| "learning_rate": 1.5589763837324794e-05, |
| "loss": 0.3557, |
| "num_tokens": 1947495642.0, |
| "step": 7440 |
| }, |
| { |
| "epoch": 3.4708624708624707, |
| "grad_norm": 0.3259822417139684, |
| "learning_rate": 1.5560346132090275e-05, |
| "loss": 0.3544, |
| "num_tokens": 1948806362.0, |
| "step": 7445 |
| }, |
| { |
| "epoch": 3.473193473193473, |
| "grad_norm": 0.34231123217132675, |
| "learning_rate": 1.5530956809544354e-05, |
| "loss": 0.3609, |
| "num_tokens": 1950117082.0, |
| "step": 7450 |
| }, |
| { |
| "epoch": 3.4755244755244754, |
| "grad_norm": 0.32649483544948477, |
| "learning_rate": 1.5501595939550674e-05, |
| "loss": 0.352, |
| "num_tokens": 1951427802.0, |
| "step": 7455 |
| }, |
| { |
| "epoch": 3.4778554778554778, |
| "grad_norm": 0.3538064723620207, |
| "learning_rate": 1.547226359190528e-05, |
| "loss": 0.3601, |
| "num_tokens": 1952738522.0, |
| "step": 7460 |
| }, |
| { |
| "epoch": 3.48018648018648, |
| "grad_norm": 0.34134701799353756, |
| "learning_rate": 1.544295983633639e-05, |
| "loss": 0.3543, |
| "num_tokens": 1954049242.0, |
| "step": 7465 |
| }, |
| { |
| "epoch": 3.4825174825174825, |
| "grad_norm": 0.3050547799836289, |
| "learning_rate": 1.5413684742504275e-05, |
| "loss": 0.3426, |
| "num_tokens": 1955359962.0, |
| "step": 7470 |
| }, |
| { |
| "epoch": 3.484848484848485, |
| "grad_norm": 0.33289101107690966, |
| "learning_rate": 1.538443838000104e-05, |
| "loss": 0.3555, |
| "num_tokens": 1956670682.0, |
| "step": 7475 |
| }, |
| { |
| "epoch": 3.4871794871794872, |
| "grad_norm": 0.3248102594895154, |
| "learning_rate": 1.5355220818350517e-05, |
| "loss": 0.3664, |
| "num_tokens": 1957981402.0, |
| "step": 7480 |
| }, |
| { |
| "epoch": 3.4895104895104896, |
| "grad_norm": 0.32624207166938474, |
| "learning_rate": 1.5326032127008077e-05, |
| "loss": 0.3627, |
| "num_tokens": 1959292122.0, |
| "step": 7485 |
| }, |
| { |
| "epoch": 3.491841491841492, |
| "grad_norm": 0.32138847441298957, |
| "learning_rate": 1.5296872375360434e-05, |
| "loss": 0.3596, |
| "num_tokens": 1960602842.0, |
| "step": 7490 |
| }, |
| { |
| "epoch": 3.4941724941724943, |
| "grad_norm": 0.3329259719385868, |
| "learning_rate": 1.526774163272553e-05, |
| "loss": 0.3713, |
| "num_tokens": 1961913562.0, |
| "step": 7495 |
| }, |
| { |
| "epoch": 3.4965034965034967, |
| "grad_norm": 0.3270763048933, |
| "learning_rate": 1.5238639968352346e-05, |
| "loss": 0.3605, |
| "num_tokens": 1963224282.0, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.4988344988344986, |
| "grad_norm": 0.33222559214722885, |
| "learning_rate": 1.520956745142072e-05, |
| "loss": 0.3557, |
| "num_tokens": 1964535002.0, |
| "step": 7505 |
| }, |
| { |
| "epoch": 3.5011655011655014, |
| "grad_norm": 0.31280758663110264, |
| "learning_rate": 1.518052415104122e-05, |
| "loss": 0.3525, |
| "num_tokens": 1965845722.0, |
| "step": 7510 |
| }, |
| { |
| "epoch": 3.5034965034965033, |
| "grad_norm": 0.3474298912782969, |
| "learning_rate": 1.5151510136254971e-05, |
| "loss": 0.3762, |
| "num_tokens": 1967156442.0, |
| "step": 7515 |
| }, |
| { |
| "epoch": 3.5058275058275057, |
| "grad_norm": 0.35950009162167107, |
| "learning_rate": 1.5122525476033448e-05, |
| "loss": 0.3629, |
| "num_tokens": 1968467162.0, |
| "step": 7520 |
| }, |
| { |
| "epoch": 3.508158508158508, |
| "grad_norm": 0.32565859243171547, |
| "learning_rate": 1.5093570239278348e-05, |
| "loss": 0.3567, |
| "num_tokens": 1969777882.0, |
| "step": 7525 |
| }, |
| { |
| "epoch": 3.5104895104895104, |
| "grad_norm": 0.34433837280720203, |
| "learning_rate": 1.5064644494821472e-05, |
| "loss": 0.3578, |
| "num_tokens": 1971088602.0, |
| "step": 7530 |
| }, |
| { |
| "epoch": 3.5128205128205128, |
| "grad_norm": 0.32951210301147243, |
| "learning_rate": 1.503574831142446e-05, |
| "loss": 0.3564, |
| "num_tokens": 1972384019.0, |
| "step": 7535 |
| }, |
| { |
| "epoch": 3.515151515151515, |
| "grad_norm": 0.3256591839058464, |
| "learning_rate": 1.5006881757778687e-05, |
| "loss": 0.3592, |
| "num_tokens": 1973681803.0, |
| "step": 7540 |
| }, |
| { |
| "epoch": 3.5174825174825175, |
| "grad_norm": 0.3271419894397293, |
| "learning_rate": 1.4978044902505133e-05, |
| "loss": 0.3569, |
| "num_tokens": 1974992523.0, |
| "step": 7545 |
| }, |
| { |
| "epoch": 3.51981351981352, |
| "grad_norm": 0.34506187438893227, |
| "learning_rate": 1.4949237814154132e-05, |
| "loss": 0.3484, |
| "num_tokens": 1976296304.0, |
| "step": 7550 |
| }, |
| { |
| "epoch": 3.5221445221445222, |
| "grad_norm": 0.34392115950662083, |
| "learning_rate": 1.4920460561205263e-05, |
| "loss": 0.3605, |
| "num_tokens": 1977607024.0, |
| "step": 7555 |
| }, |
| { |
| "epoch": 3.5244755244755246, |
| "grad_norm": 0.31870592682573046, |
| "learning_rate": 1.4891713212067223e-05, |
| "loss": 0.3539, |
| "num_tokens": 1978917744.0, |
| "step": 7560 |
| }, |
| { |
| "epoch": 3.526806526806527, |
| "grad_norm": 0.3369778884605504, |
| "learning_rate": 1.4862995835077582e-05, |
| "loss": 0.3616, |
| "num_tokens": 1980228464.0, |
| "step": 7565 |
| }, |
| { |
| "epoch": 3.529137529137529, |
| "grad_norm": 0.3181421663920499, |
| "learning_rate": 1.4834308498502652e-05, |
| "loss": 0.3586, |
| "num_tokens": 1981539184.0, |
| "step": 7570 |
| }, |
| { |
| "epoch": 3.5314685314685317, |
| "grad_norm": 0.3574276899464395, |
| "learning_rate": 1.480565127053737e-05, |
| "loss": 0.3432, |
| "num_tokens": 1982849904.0, |
| "step": 7575 |
| }, |
| { |
| "epoch": 3.5337995337995336, |
| "grad_norm": 0.35575215961743184, |
| "learning_rate": 1.4777024219305092e-05, |
| "loss": 0.3638, |
| "num_tokens": 1984160624.0, |
| "step": 7580 |
| }, |
| { |
| "epoch": 3.5361305361305364, |
| "grad_norm": 0.3463286942931235, |
| "learning_rate": 1.4748427412857407e-05, |
| "loss": 0.3687, |
| "num_tokens": 1985471344.0, |
| "step": 7585 |
| }, |
| { |
| "epoch": 3.5384615384615383, |
| "grad_norm": 0.3249769467333938, |
| "learning_rate": 1.4719860919174039e-05, |
| "loss": 0.3618, |
| "num_tokens": 1986782064.0, |
| "step": 7590 |
| }, |
| { |
| "epoch": 3.5407925407925407, |
| "grad_norm": 0.3489092698701824, |
| "learning_rate": 1.469132480616265e-05, |
| "loss": 0.3592, |
| "num_tokens": 1988092784.0, |
| "step": 7595 |
| }, |
| { |
| "epoch": 3.543123543123543, |
| "grad_norm": 0.33206107999422213, |
| "learning_rate": 1.4662819141658662e-05, |
| "loss": 0.3435, |
| "num_tokens": 1989403504.0, |
| "step": 7600 |
| }, |
| { |
| "epoch": 3.5454545454545454, |
| "grad_norm": 0.32926989102798976, |
| "learning_rate": 1.4634343993425132e-05, |
| "loss": 0.3598, |
| "num_tokens": 1990714224.0, |
| "step": 7605 |
| }, |
| { |
| "epoch": 3.5477855477855478, |
| "grad_norm": 0.3312078503555784, |
| "learning_rate": 1.4605899429152581e-05, |
| "loss": 0.366, |
| "num_tokens": 1992024944.0, |
| "step": 7610 |
| }, |
| { |
| "epoch": 3.55011655011655, |
| "grad_norm": 0.32101930069810486, |
| "learning_rate": 1.45774855164588e-05, |
| "loss": 0.3786, |
| "num_tokens": 1993335664.0, |
| "step": 7615 |
| }, |
| { |
| "epoch": 3.5524475524475525, |
| "grad_norm": 0.3270442973643097, |
| "learning_rate": 1.4549102322888739e-05, |
| "loss": 0.3522, |
| "num_tokens": 1994646384.0, |
| "step": 7620 |
| }, |
| { |
| "epoch": 3.554778554778555, |
| "grad_norm": 0.3410847452328196, |
| "learning_rate": 1.452074991591432e-05, |
| "loss": 0.3661, |
| "num_tokens": 1995957104.0, |
| "step": 7625 |
| }, |
| { |
| "epoch": 3.5571095571095572, |
| "grad_norm": 0.3311841562429209, |
| "learning_rate": 1.4492428362934269e-05, |
| "loss": 0.3644, |
| "num_tokens": 1997267824.0, |
| "step": 7630 |
| }, |
| { |
| "epoch": 3.5594405594405596, |
| "grad_norm": 0.34891575680597303, |
| "learning_rate": 1.4464137731273974e-05, |
| "loss": 0.3659, |
| "num_tokens": 1998564615.0, |
| "step": 7635 |
| }, |
| { |
| "epoch": 3.561771561771562, |
| "grad_norm": 0.3270565111830101, |
| "learning_rate": 1.4435878088185317e-05, |
| "loss": 0.3588, |
| "num_tokens": 1999861934.0, |
| "step": 7640 |
| }, |
| { |
| "epoch": 3.564102564102564, |
| "grad_norm": 0.3566229262833042, |
| "learning_rate": 1.440764950084652e-05, |
| "loss": 0.3651, |
| "num_tokens": 2001172654.0, |
| "step": 7645 |
| }, |
| { |
| "epoch": 3.5664335664335667, |
| "grad_norm": 0.34737324488773064, |
| "learning_rate": 1.4379452036361963e-05, |
| "loss": 0.3685, |
| "num_tokens": 2002483374.0, |
| "step": 7650 |
| }, |
| { |
| "epoch": 3.5687645687645686, |
| "grad_norm": 0.3479603277552613, |
| "learning_rate": 1.4351285761762057e-05, |
| "loss": 0.3603, |
| "num_tokens": 2003794094.0, |
| "step": 7655 |
| }, |
| { |
| "epoch": 3.571095571095571, |
| "grad_norm": 0.320114547771287, |
| "learning_rate": 1.4323150744003075e-05, |
| "loss": 0.3594, |
| "num_tokens": 2005104814.0, |
| "step": 7660 |
| }, |
| { |
| "epoch": 3.5734265734265733, |
| "grad_norm": 0.32862256230307296, |
| "learning_rate": 1.4295047049966958e-05, |
| "loss": 0.3605, |
| "num_tokens": 2006415534.0, |
| "step": 7665 |
| }, |
| { |
| "epoch": 3.5757575757575757, |
| "grad_norm": 0.35139510455853196, |
| "learning_rate": 1.4266974746461217e-05, |
| "loss": 0.3586, |
| "num_tokens": 2007726254.0, |
| "step": 7670 |
| }, |
| { |
| "epoch": 3.578088578088578, |
| "grad_norm": 0.33176820642996774, |
| "learning_rate": 1.4238933900218731e-05, |
| "loss": 0.3515, |
| "num_tokens": 2009033953.0, |
| "step": 7675 |
| }, |
| { |
| "epoch": 3.5804195804195804, |
| "grad_norm": 0.36171642550182254, |
| "learning_rate": 1.4210924577897583e-05, |
| "loss": 0.3604, |
| "num_tokens": 2010344673.0, |
| "step": 7680 |
| }, |
| { |
| "epoch": 3.582750582750583, |
| "grad_norm": 0.3374934342841691, |
| "learning_rate": 1.4182946846080952e-05, |
| "loss": 0.3545, |
| "num_tokens": 2011655393.0, |
| "step": 7685 |
| }, |
| { |
| "epoch": 3.585081585081585, |
| "grad_norm": 0.32415522876729075, |
| "learning_rate": 1.4155000771276878e-05, |
| "loss": 0.3434, |
| "num_tokens": 2012966113.0, |
| "step": 7690 |
| }, |
| { |
| "epoch": 3.5874125874125875, |
| "grad_norm": 0.3552935558976595, |
| "learning_rate": 1.4127086419918178e-05, |
| "loss": 0.366, |
| "num_tokens": 2014276833.0, |
| "step": 7695 |
| }, |
| { |
| "epoch": 3.58974358974359, |
| "grad_norm": 0.3306470072028632, |
| "learning_rate": 1.4099203858362262e-05, |
| "loss": 0.3608, |
| "num_tokens": 2015587553.0, |
| "step": 7700 |
| }, |
| { |
| "epoch": 3.5920745920745922, |
| "grad_norm": 0.32899882379271284, |
| "learning_rate": 1.4071353152890936e-05, |
| "loss": 0.3564, |
| "num_tokens": 2016898273.0, |
| "step": 7705 |
| }, |
| { |
| "epoch": 3.594405594405594, |
| "grad_norm": 0.32893163008661985, |
| "learning_rate": 1.4043534369710307e-05, |
| "loss": 0.3618, |
| "num_tokens": 2018208993.0, |
| "step": 7710 |
| }, |
| { |
| "epoch": 3.596736596736597, |
| "grad_norm": 0.33890514726280374, |
| "learning_rate": 1.4015747574950597e-05, |
| "loss": 0.3585, |
| "num_tokens": 2019519713.0, |
| "step": 7715 |
| }, |
| { |
| "epoch": 3.599067599067599, |
| "grad_norm": 0.35025561594471827, |
| "learning_rate": 1.3987992834665963e-05, |
| "loss": 0.3764, |
| "num_tokens": 2020817105.0, |
| "step": 7720 |
| }, |
| { |
| "epoch": 3.6013986013986012, |
| "grad_norm": 0.322499498679104, |
| "learning_rate": 1.3960270214834381e-05, |
| "loss": 0.3557, |
| "num_tokens": 2022127825.0, |
| "step": 7725 |
| }, |
| { |
| "epoch": 3.6037296037296036, |
| "grad_norm": 0.32465177989473437, |
| "learning_rate": 1.3932579781357477e-05, |
| "loss": 0.3528, |
| "num_tokens": 2023438545.0, |
| "step": 7730 |
| }, |
| { |
| "epoch": 3.606060606060606, |
| "grad_norm": 0.32534204421549856, |
| "learning_rate": 1.390492160006035e-05, |
| "loss": 0.3514, |
| "num_tokens": 2024749265.0, |
| "step": 7735 |
| }, |
| { |
| "epoch": 3.6083916083916083, |
| "grad_norm": 0.3348791455491572, |
| "learning_rate": 1.3877295736691408e-05, |
| "loss": 0.3548, |
| "num_tokens": 2026059985.0, |
| "step": 7740 |
| }, |
| { |
| "epoch": 3.6107226107226107, |
| "grad_norm": 0.30376460026082297, |
| "learning_rate": 1.3849702256922309e-05, |
| "loss": 0.3517, |
| "num_tokens": 2027370705.0, |
| "step": 7745 |
| }, |
| { |
| "epoch": 3.613053613053613, |
| "grad_norm": 0.3202796706000662, |
| "learning_rate": 1.3822141226347646e-05, |
| "loss": 0.3661, |
| "num_tokens": 2028678090.0, |
| "step": 7750 |
| }, |
| { |
| "epoch": 3.6153846153846154, |
| "grad_norm": 0.32249662232530607, |
| "learning_rate": 1.3794612710484905e-05, |
| "loss": 0.351, |
| "num_tokens": 2029988810.0, |
| "step": 7755 |
| }, |
| { |
| "epoch": 3.617715617715618, |
| "grad_norm": 0.3475421569483326, |
| "learning_rate": 1.3767116774774307e-05, |
| "loss": 0.3744, |
| "num_tokens": 2031288913.0, |
| "step": 7760 |
| }, |
| { |
| "epoch": 3.62004662004662, |
| "grad_norm": 0.33679545809282563, |
| "learning_rate": 1.3739653484578586e-05, |
| "loss": 0.3555, |
| "num_tokens": 2032589943.0, |
| "step": 7765 |
| }, |
| { |
| "epoch": 3.6223776223776225, |
| "grad_norm": 0.340971223695831, |
| "learning_rate": 1.3712222905182881e-05, |
| "loss": 0.3499, |
| "num_tokens": 2033900663.0, |
| "step": 7770 |
| }, |
| { |
| "epoch": 3.624708624708625, |
| "grad_norm": 0.3039493470993974, |
| "learning_rate": 1.3684825101794575e-05, |
| "loss": 0.3514, |
| "num_tokens": 2035211383.0, |
| "step": 7775 |
| }, |
| { |
| "epoch": 3.6270396270396272, |
| "grad_norm": 0.3352791563799734, |
| "learning_rate": 1.3657460139543155e-05, |
| "loss": 0.3626, |
| "num_tokens": 2036522103.0, |
| "step": 7780 |
| }, |
| { |
| "epoch": 3.629370629370629, |
| "grad_norm": 0.34252274886593165, |
| "learning_rate": 1.3630128083479998e-05, |
| "loss": 0.3504, |
| "num_tokens": 2037832823.0, |
| "step": 7785 |
| }, |
| { |
| "epoch": 3.631701631701632, |
| "grad_norm": 0.3513374308995934, |
| "learning_rate": 1.3602828998578293e-05, |
| "loss": 0.3684, |
| "num_tokens": 2039143543.0, |
| "step": 7790 |
| }, |
| { |
| "epoch": 3.634032634032634, |
| "grad_norm": 0.33422112695710476, |
| "learning_rate": 1.3575562949732845e-05, |
| "loss": 0.3584, |
| "num_tokens": 2040454263.0, |
| "step": 7795 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 0.35082577383897384, |
| "learning_rate": 1.3548330001759898e-05, |
| "loss": 0.3797, |
| "num_tokens": 2041764983.0, |
| "step": 7800 |
| }, |
| { |
| "epoch": 3.6386946386946386, |
| "grad_norm": 0.3188497601819274, |
| "learning_rate": 1.352113021939705e-05, |
| "loss": 0.3511, |
| "num_tokens": 2043075703.0, |
| "step": 7805 |
| }, |
| { |
| "epoch": 3.641025641025641, |
| "grad_norm": 0.34579309685383136, |
| "learning_rate": 1.3493963667303036e-05, |
| "loss": 0.3563, |
| "num_tokens": 2044386423.0, |
| "step": 7810 |
| }, |
| { |
| "epoch": 3.6433566433566433, |
| "grad_norm": 0.3507371852839906, |
| "learning_rate": 1.3466830410057588e-05, |
| "loss": 0.3416, |
| "num_tokens": 2045697143.0, |
| "step": 7815 |
| }, |
| { |
| "epoch": 3.6456876456876457, |
| "grad_norm": 0.3437622175939715, |
| "learning_rate": 1.343973051216131e-05, |
| "loss": 0.3652, |
| "num_tokens": 2047007863.0, |
| "step": 7820 |
| }, |
| { |
| "epoch": 3.648018648018648, |
| "grad_norm": 0.31415387046199733, |
| "learning_rate": 1.3412664038035507e-05, |
| "loss": 0.3619, |
| "num_tokens": 2048318583.0, |
| "step": 7825 |
| }, |
| { |
| "epoch": 3.6503496503496504, |
| "grad_norm": 0.3398023578909356, |
| "learning_rate": 1.338563105202201e-05, |
| "loss": 0.35, |
| "num_tokens": 2049629303.0, |
| "step": 7830 |
| }, |
| { |
| "epoch": 3.652680652680653, |
| "grad_norm": 0.3850082776958453, |
| "learning_rate": 1.3358631618383041e-05, |
| "loss": 0.3495, |
| "num_tokens": 2050940023.0, |
| "step": 7835 |
| }, |
| { |
| "epoch": 3.655011655011655, |
| "grad_norm": 0.3506882837431775, |
| "learning_rate": 1.3331665801301085e-05, |
| "loss": 0.3587, |
| "num_tokens": 2052245292.0, |
| "step": 7840 |
| }, |
| { |
| "epoch": 3.6573426573426575, |
| "grad_norm": 0.33043151182996516, |
| "learning_rate": 1.3304733664878714e-05, |
| "loss": 0.3757, |
| "num_tokens": 2053556012.0, |
| "step": 7845 |
| }, |
| { |
| "epoch": 3.6596736596736594, |
| "grad_norm": 0.32244585116058333, |
| "learning_rate": 1.32778352731384e-05, |
| "loss": 0.3674, |
| "num_tokens": 2054864779.0, |
| "step": 7850 |
| }, |
| { |
| "epoch": 3.6620046620046622, |
| "grad_norm": 0.34649671127745907, |
| "learning_rate": 1.3250970690022435e-05, |
| "loss": 0.3607, |
| "num_tokens": 2056173760.0, |
| "step": 7855 |
| }, |
| { |
| "epoch": 3.664335664335664, |
| "grad_norm": 0.3592288768686839, |
| "learning_rate": 1.3224139979392739e-05, |
| "loss": 0.3483, |
| "num_tokens": 2057484480.0, |
| "step": 7860 |
| }, |
| { |
| "epoch": 3.6666666666666665, |
| "grad_norm": 0.33267649430105906, |
| "learning_rate": 1.3197343205030677e-05, |
| "loss": 0.3601, |
| "num_tokens": 2058795200.0, |
| "step": 7865 |
| }, |
| { |
| "epoch": 3.668997668997669, |
| "grad_norm": 0.3155221547689689, |
| "learning_rate": 1.317058043063698e-05, |
| "loss": 0.3592, |
| "num_tokens": 2060105920.0, |
| "step": 7870 |
| }, |
| { |
| "epoch": 3.6713286713286712, |
| "grad_norm": 0.32504676536520444, |
| "learning_rate": 1.3143851719831545e-05, |
| "loss": 0.347, |
| "num_tokens": 2061416640.0, |
| "step": 7875 |
| }, |
| { |
| "epoch": 3.6736596736596736, |
| "grad_norm": 0.3390312427438953, |
| "learning_rate": 1.3117157136153275e-05, |
| "loss": 0.3598, |
| "num_tokens": 2062727360.0, |
| "step": 7880 |
| }, |
| { |
| "epoch": 3.675990675990676, |
| "grad_norm": 0.3272385721818141, |
| "learning_rate": 1.3090496743059963e-05, |
| "loss": 0.3457, |
| "num_tokens": 2064038080.0, |
| "step": 7885 |
| }, |
| { |
| "epoch": 3.6783216783216783, |
| "grad_norm": 0.3406171598088719, |
| "learning_rate": 1.3063870603928135e-05, |
| "loss": 0.3619, |
| "num_tokens": 2065348800.0, |
| "step": 7890 |
| }, |
| { |
| "epoch": 3.6806526806526807, |
| "grad_norm": 0.3199792712358228, |
| "learning_rate": 1.3037278782052863e-05, |
| "loss": 0.3676, |
| "num_tokens": 2066659520.0, |
| "step": 7895 |
| }, |
| { |
| "epoch": 3.682983682983683, |
| "grad_norm": 0.32349347623327407, |
| "learning_rate": 1.3010721340647672e-05, |
| "loss": 0.351, |
| "num_tokens": 2067970240.0, |
| "step": 7900 |
| }, |
| { |
| "epoch": 3.6853146853146854, |
| "grad_norm": 0.3197976897891316, |
| "learning_rate": 1.2984198342844317e-05, |
| "loss": 0.3507, |
| "num_tokens": 2069280960.0, |
| "step": 7905 |
| }, |
| { |
| "epoch": 3.687645687645688, |
| "grad_norm": 0.31153094945488263, |
| "learning_rate": 1.2957709851692709e-05, |
| "loss": 0.3531, |
| "num_tokens": 2070591680.0, |
| "step": 7910 |
| }, |
| { |
| "epoch": 3.6899766899766897, |
| "grad_norm": 0.34104591153105757, |
| "learning_rate": 1.293125593016073e-05, |
| "loss": 0.352, |
| "num_tokens": 2071902400.0, |
| "step": 7915 |
| }, |
| { |
| "epoch": 3.6923076923076925, |
| "grad_norm": 0.32756629305528456, |
| "learning_rate": 1.2904836641134058e-05, |
| "loss": 0.3609, |
| "num_tokens": 2073193557.0, |
| "step": 7920 |
| }, |
| { |
| "epoch": 3.6946386946386944, |
| "grad_norm": 0.32387204375058537, |
| "learning_rate": 1.2878452047416065e-05, |
| "loss": 0.3558, |
| "num_tokens": 2074504277.0, |
| "step": 7925 |
| }, |
| { |
| "epoch": 3.6969696969696972, |
| "grad_norm": 0.34675201672498257, |
| "learning_rate": 1.2852102211727648e-05, |
| "loss": 0.3616, |
| "num_tokens": 2075814997.0, |
| "step": 7930 |
| }, |
| { |
| "epoch": 3.699300699300699, |
| "grad_norm": 0.3247586444847732, |
| "learning_rate": 1.2825787196707059e-05, |
| "loss": 0.349, |
| "num_tokens": 2077125717.0, |
| "step": 7935 |
| }, |
| { |
| "epoch": 3.7016317016317015, |
| "grad_norm": 0.3156890734127022, |
| "learning_rate": 1.2799507064909787e-05, |
| "loss": 0.3533, |
| "num_tokens": 2078436437.0, |
| "step": 7940 |
| }, |
| { |
| "epoch": 3.703962703962704, |
| "grad_norm": 0.3227247560032221, |
| "learning_rate": 1.2773261878808413e-05, |
| "loss": 0.3466, |
| "num_tokens": 2079747157.0, |
| "step": 7945 |
| }, |
| { |
| "epoch": 3.7062937062937062, |
| "grad_norm": 0.3216324993277899, |
| "learning_rate": 1.2747051700792412e-05, |
| "loss": 0.3554, |
| "num_tokens": 2081057877.0, |
| "step": 7950 |
| }, |
| { |
| "epoch": 3.7086247086247086, |
| "grad_norm": 0.31249996903728416, |
| "learning_rate": 1.2720876593168052e-05, |
| "loss": 0.3492, |
| "num_tokens": 2082368597.0, |
| "step": 7955 |
| }, |
| { |
| "epoch": 3.710955710955711, |
| "grad_norm": 0.3188316644091349, |
| "learning_rate": 1.2694736618158249e-05, |
| "loss": 0.3458, |
| "num_tokens": 2083679317.0, |
| "step": 7960 |
| }, |
| { |
| "epoch": 3.7132867132867133, |
| "grad_norm": 0.3335771049219983, |
| "learning_rate": 1.2668631837902389e-05, |
| "loss": 0.3424, |
| "num_tokens": 2084990037.0, |
| "step": 7965 |
| }, |
| { |
| "epoch": 3.7156177156177157, |
| "grad_norm": 0.3499985089110688, |
| "learning_rate": 1.2642562314456185e-05, |
| "loss": 0.3534, |
| "num_tokens": 2086300757.0, |
| "step": 7970 |
| }, |
| { |
| "epoch": 3.717948717948718, |
| "grad_norm": 0.3410878602651583, |
| "learning_rate": 1.2616528109791554e-05, |
| "loss": 0.3659, |
| "num_tokens": 2087611477.0, |
| "step": 7975 |
| }, |
| { |
| "epoch": 3.7202797202797204, |
| "grad_norm": 0.3527728670467037, |
| "learning_rate": 1.259052928579646e-05, |
| "loss": 0.3591, |
| "num_tokens": 2088908084.0, |
| "step": 7980 |
| }, |
| { |
| "epoch": 3.722610722610723, |
| "grad_norm": 0.3510762979938676, |
| "learning_rate": 1.2564565904274722e-05, |
| "loss": 0.368, |
| "num_tokens": 2090210123.0, |
| "step": 7985 |
| }, |
| { |
| "epoch": 3.7249417249417247, |
| "grad_norm": 0.37404478878722747, |
| "learning_rate": 1.2538638026945954e-05, |
| "loss": 0.3647, |
| "num_tokens": 2091520843.0, |
| "step": 7990 |
| }, |
| { |
| "epoch": 3.7272727272727275, |
| "grad_norm": 0.3253947492356622, |
| "learning_rate": 1.2512745715445345e-05, |
| "loss": 0.3691, |
| "num_tokens": 2092831563.0, |
| "step": 7995 |
| }, |
| { |
| "epoch": 3.7296037296037294, |
| "grad_norm": 0.3730204049764418, |
| "learning_rate": 1.2486889031323528e-05, |
| "loss": 0.3568, |
| "num_tokens": 2094140244.0, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.731934731934732, |
| "grad_norm": 0.322985878009181, |
| "learning_rate": 1.2461068036046474e-05, |
| "loss": 0.3558, |
| "num_tokens": 2095450964.0, |
| "step": 8005 |
| }, |
| { |
| "epoch": 3.734265734265734, |
| "grad_norm": 0.3451139274796168, |
| "learning_rate": 1.2435282790995294e-05, |
| "loss": 0.3568, |
| "num_tokens": 2096761684.0, |
| "step": 8010 |
| }, |
| { |
| "epoch": 3.7365967365967365, |
| "grad_norm": 0.32232149066408405, |
| "learning_rate": 1.240953335746611e-05, |
| "loss": 0.3592, |
| "num_tokens": 2098072404.0, |
| "step": 8015 |
| }, |
| { |
| "epoch": 3.738927738927739, |
| "grad_norm": 0.33356015443779835, |
| "learning_rate": 1.2383819796669929e-05, |
| "loss": 0.3485, |
| "num_tokens": 2099383124.0, |
| "step": 8020 |
| }, |
| { |
| "epoch": 3.7412587412587412, |
| "grad_norm": 0.31783927007138674, |
| "learning_rate": 1.235814216973248e-05, |
| "loss": 0.362, |
| "num_tokens": 2100693844.0, |
| "step": 8025 |
| }, |
| { |
| "epoch": 3.7435897435897436, |
| "grad_norm": 0.3258650151987321, |
| "learning_rate": 1.2332500537694061e-05, |
| "loss": 0.3643, |
| "num_tokens": 2102004564.0, |
| "step": 8030 |
| }, |
| { |
| "epoch": 3.745920745920746, |
| "grad_norm": 0.31862835430157704, |
| "learning_rate": 1.2306894961509392e-05, |
| "loss": 0.3559, |
| "num_tokens": 2103315284.0, |
| "step": 8035 |
| }, |
| { |
| "epoch": 3.7482517482517483, |
| "grad_norm": 0.31403800945327537, |
| "learning_rate": 1.2281325502047526e-05, |
| "loss": 0.352, |
| "num_tokens": 2104610250.0, |
| "step": 8040 |
| }, |
| { |
| "epoch": 3.7505827505827507, |
| "grad_norm": 0.3364153402051559, |
| "learning_rate": 1.2255792220091623e-05, |
| "loss": 0.3605, |
| "num_tokens": 2105920970.0, |
| "step": 8045 |
| }, |
| { |
| "epoch": 3.752913752913753, |
| "grad_norm": 0.33818645363457694, |
| "learning_rate": 1.2230295176338843e-05, |
| "loss": 0.3528, |
| "num_tokens": 2107231690.0, |
| "step": 8050 |
| }, |
| { |
| "epoch": 3.755244755244755, |
| "grad_norm": 0.33218579315347985, |
| "learning_rate": 1.2204834431400218e-05, |
| "loss": 0.3646, |
| "num_tokens": 2108542410.0, |
| "step": 8055 |
| }, |
| { |
| "epoch": 3.757575757575758, |
| "grad_norm": 0.3150157899865806, |
| "learning_rate": 1.2179410045800486e-05, |
| "loss": 0.3678, |
| "num_tokens": 2109845114.0, |
| "step": 8060 |
| }, |
| { |
| "epoch": 3.7599067599067597, |
| "grad_norm": 0.3239476998553056, |
| "learning_rate": 1.2154022079977941e-05, |
| "loss": 0.364, |
| "num_tokens": 2111155834.0, |
| "step": 8065 |
| }, |
| { |
| "epoch": 3.762237762237762, |
| "grad_norm": 0.30771382308356826, |
| "learning_rate": 1.2128670594284317e-05, |
| "loss": 0.3656, |
| "num_tokens": 2112466554.0, |
| "step": 8070 |
| }, |
| { |
| "epoch": 3.7645687645687644, |
| "grad_norm": 0.36628760033281316, |
| "learning_rate": 1.2103355648984627e-05, |
| "loss": 0.3539, |
| "num_tokens": 2113777274.0, |
| "step": 8075 |
| }, |
| { |
| "epoch": 3.766899766899767, |
| "grad_norm": 0.3435307692569418, |
| "learning_rate": 1.2078077304256999e-05, |
| "loss": 0.3698, |
| "num_tokens": 2115087994.0, |
| "step": 8080 |
| }, |
| { |
| "epoch": 3.769230769230769, |
| "grad_norm": 0.31904278075717907, |
| "learning_rate": 1.2052835620192577e-05, |
| "loss": 0.3539, |
| "num_tokens": 2116398714.0, |
| "step": 8085 |
| }, |
| { |
| "epoch": 3.7715617715617715, |
| "grad_norm": 0.33066965750120453, |
| "learning_rate": 1.2027630656795365e-05, |
| "loss": 0.3608, |
| "num_tokens": 2117709434.0, |
| "step": 8090 |
| }, |
| { |
| "epoch": 3.773892773892774, |
| "grad_norm": 0.3254496733507665, |
| "learning_rate": 1.2002462473982034e-05, |
| "loss": 0.3646, |
| "num_tokens": 2119020154.0, |
| "step": 8095 |
| }, |
| { |
| "epoch": 3.7762237762237763, |
| "grad_norm": 0.318029815318303, |
| "learning_rate": 1.1977331131581872e-05, |
| "loss": 0.3643, |
| "num_tokens": 2120330874.0, |
| "step": 8100 |
| }, |
| { |
| "epoch": 3.7785547785547786, |
| "grad_norm": 0.33657956842752124, |
| "learning_rate": 1.1952236689336547e-05, |
| "loss": 0.3483, |
| "num_tokens": 2121641594.0, |
| "step": 8105 |
| }, |
| { |
| "epoch": 3.780885780885781, |
| "grad_norm": 0.339494433625539, |
| "learning_rate": 1.1927179206900036e-05, |
| "loss": 0.3624, |
| "num_tokens": 2122952314.0, |
| "step": 8110 |
| }, |
| { |
| "epoch": 3.7832167832167833, |
| "grad_norm": 0.32937653713889037, |
| "learning_rate": 1.1902158743838455e-05, |
| "loss": 0.3578, |
| "num_tokens": 2124263034.0, |
| "step": 8115 |
| }, |
| { |
| "epoch": 3.7855477855477857, |
| "grad_norm": 0.31184931579510083, |
| "learning_rate": 1.1877175359629895e-05, |
| "loss": 0.3515, |
| "num_tokens": 2125564666.0, |
| "step": 8120 |
| }, |
| { |
| "epoch": 3.787878787878788, |
| "grad_norm": 0.32358413426775373, |
| "learning_rate": 1.185222911366433e-05, |
| "loss": 0.3634, |
| "num_tokens": 2126875386.0, |
| "step": 8125 |
| }, |
| { |
| "epoch": 3.79020979020979, |
| "grad_norm": 0.34491635141056515, |
| "learning_rate": 1.1827320065243442e-05, |
| "loss": 0.3663, |
| "num_tokens": 2128186106.0, |
| "step": 8130 |
| }, |
| { |
| "epoch": 3.792540792540793, |
| "grad_norm": 0.3263014090650137, |
| "learning_rate": 1.1802448273580482e-05, |
| "loss": 0.3531, |
| "num_tokens": 2129496826.0, |
| "step": 8135 |
| }, |
| { |
| "epoch": 3.7948717948717947, |
| "grad_norm": 0.34055454076158553, |
| "learning_rate": 1.1777613797800132e-05, |
| "loss": 0.3526, |
| "num_tokens": 2130807546.0, |
| "step": 8140 |
| }, |
| { |
| "epoch": 3.797202797202797, |
| "grad_norm": 0.35098519401821454, |
| "learning_rate": 1.175281669693839e-05, |
| "loss": 0.3567, |
| "num_tokens": 2132118266.0, |
| "step": 8145 |
| }, |
| { |
| "epoch": 3.7995337995337994, |
| "grad_norm": 0.3266773596447305, |
| "learning_rate": 1.1728057029942377e-05, |
| "loss": 0.3531, |
| "num_tokens": 2133428986.0, |
| "step": 8150 |
| }, |
| { |
| "epoch": 3.801864801864802, |
| "grad_norm": 0.31360535728361383, |
| "learning_rate": 1.170333485567025e-05, |
| "loss": 0.3674, |
| "num_tokens": 2134739706.0, |
| "step": 8155 |
| }, |
| { |
| "epoch": 3.804195804195804, |
| "grad_norm": 0.31624940012549485, |
| "learning_rate": 1.1678650232891021e-05, |
| "loss": 0.3518, |
| "num_tokens": 2136050426.0, |
| "step": 8160 |
| }, |
| { |
| "epoch": 3.8065268065268065, |
| "grad_norm": 0.3141407909472399, |
| "learning_rate": 1.1654003220284459e-05, |
| "loss": 0.3619, |
| "num_tokens": 2137356817.0, |
| "step": 8165 |
| }, |
| { |
| "epoch": 3.808857808857809, |
| "grad_norm": 0.34368525998084476, |
| "learning_rate": 1.1629393876440894e-05, |
| "loss": 0.3526, |
| "num_tokens": 2138667537.0, |
| "step": 8170 |
| }, |
| { |
| "epoch": 3.8111888111888113, |
| "grad_norm": 0.33230532572557525, |
| "learning_rate": 1.1604822259861143e-05, |
| "loss": 0.3554, |
| "num_tokens": 2139978257.0, |
| "step": 8175 |
| }, |
| { |
| "epoch": 3.8135198135198136, |
| "grad_norm": 0.3285215453270888, |
| "learning_rate": 1.1580288428956326e-05, |
| "loss": 0.3545, |
| "num_tokens": 2141288977.0, |
| "step": 8180 |
| }, |
| { |
| "epoch": 3.815850815850816, |
| "grad_norm": 0.32253369632017526, |
| "learning_rate": 1.1555792442047727e-05, |
| "loss": 0.3545, |
| "num_tokens": 2142599697.0, |
| "step": 8185 |
| }, |
| { |
| "epoch": 3.8181818181818183, |
| "grad_norm": 0.31275107239667255, |
| "learning_rate": 1.1531334357366687e-05, |
| "loss": 0.3648, |
| "num_tokens": 2143910417.0, |
| "step": 8190 |
| }, |
| { |
| "epoch": 3.8205128205128203, |
| "grad_norm": 0.3371090455672606, |
| "learning_rate": 1.1506914233054449e-05, |
| "loss": 0.3548, |
| "num_tokens": 2145215363.0, |
| "step": 8195 |
| }, |
| { |
| "epoch": 3.822843822843823, |
| "grad_norm": 0.3330220005685937, |
| "learning_rate": 1.1482532127161987e-05, |
| "loss": 0.3682, |
| "num_tokens": 2146526083.0, |
| "step": 8200 |
| }, |
| { |
| "epoch": 3.825174825174825, |
| "grad_norm": 0.3163036289984921, |
| "learning_rate": 1.1458188097649931e-05, |
| "loss": 0.3652, |
| "num_tokens": 2147828267.0, |
| "step": 8205 |
| }, |
| { |
| "epoch": 3.8275058275058274, |
| "grad_norm": 0.31880150929387496, |
| "learning_rate": 1.143388220238839e-05, |
| "loss": 0.3612, |
| "num_tokens": 2149138987.0, |
| "step": 8210 |
| }, |
| { |
| "epoch": 3.8298368298368297, |
| "grad_norm": 0.319810402665015, |
| "learning_rate": 1.1409614499156807e-05, |
| "loss": 0.355, |
| "num_tokens": 2150449707.0, |
| "step": 8215 |
| }, |
| { |
| "epoch": 3.832167832167832, |
| "grad_norm": 0.3394499363463578, |
| "learning_rate": 1.138538504564384e-05, |
| "loss": 0.3543, |
| "num_tokens": 2151760427.0, |
| "step": 8220 |
| }, |
| { |
| "epoch": 3.8344988344988344, |
| "grad_norm": 0.3339998319196104, |
| "learning_rate": 1.1361193899447239e-05, |
| "loss": 0.3643, |
| "num_tokens": 2153071147.0, |
| "step": 8225 |
| }, |
| { |
| "epoch": 3.836829836829837, |
| "grad_norm": 0.3283698373871858, |
| "learning_rate": 1.1337041118073673e-05, |
| "loss": 0.365, |
| "num_tokens": 2154381867.0, |
| "step": 8230 |
| }, |
| { |
| "epoch": 3.839160839160839, |
| "grad_norm": 0.31803261230391683, |
| "learning_rate": 1.1312926758938598e-05, |
| "loss": 0.3542, |
| "num_tokens": 2155692587.0, |
| "step": 8235 |
| }, |
| { |
| "epoch": 3.8414918414918415, |
| "grad_norm": 0.32415297440890517, |
| "learning_rate": 1.1288850879366178e-05, |
| "loss": 0.3476, |
| "num_tokens": 2157003307.0, |
| "step": 8240 |
| }, |
| { |
| "epoch": 3.843822843822844, |
| "grad_norm": 0.32171582377815044, |
| "learning_rate": 1.1264813536589063e-05, |
| "loss": 0.3505, |
| "num_tokens": 2158302271.0, |
| "step": 8245 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 0.32594823868385486, |
| "learning_rate": 1.1240814787748294e-05, |
| "loss": 0.3542, |
| "num_tokens": 2159612991.0, |
| "step": 8250 |
| }, |
| { |
| "epoch": 3.8484848484848486, |
| "grad_norm": 0.3130567477371064, |
| "learning_rate": 1.1216854689893208e-05, |
| "loss": 0.3474, |
| "num_tokens": 2160923711.0, |
| "step": 8255 |
| }, |
| { |
| "epoch": 3.8508158508158505, |
| "grad_norm": 0.32884981317619555, |
| "learning_rate": 1.119293329998122e-05, |
| "loss": 0.3613, |
| "num_tokens": 2162234431.0, |
| "step": 8260 |
| }, |
| { |
| "epoch": 3.8531468531468533, |
| "grad_norm": 0.3182477239378319, |
| "learning_rate": 1.116905067487774e-05, |
| "loss": 0.3496, |
| "num_tokens": 2163545151.0, |
| "step": 8265 |
| }, |
| { |
| "epoch": 3.8554778554778553, |
| "grad_norm": 0.3238906779321128, |
| "learning_rate": 1.1145206871356035e-05, |
| "loss": 0.3755, |
| "num_tokens": 2164845172.0, |
| "step": 8270 |
| }, |
| { |
| "epoch": 3.857808857808858, |
| "grad_norm": 0.3230035270398852, |
| "learning_rate": 1.1121401946097089e-05, |
| "loss": 0.3579, |
| "num_tokens": 2166155892.0, |
| "step": 8275 |
| }, |
| { |
| "epoch": 3.86013986013986, |
| "grad_norm": 0.34202250350581626, |
| "learning_rate": 1.1097635955689447e-05, |
| "loss": 0.367, |
| "num_tokens": 2167466612.0, |
| "step": 8280 |
| }, |
| { |
| "epoch": 3.8624708624708624, |
| "grad_norm": 0.33282529022268353, |
| "learning_rate": 1.107390895662912e-05, |
| "loss": 0.3634, |
| "num_tokens": 2168777332.0, |
| "step": 8285 |
| }, |
| { |
| "epoch": 3.8648018648018647, |
| "grad_norm": 0.34181595087542227, |
| "learning_rate": 1.1050221005319422e-05, |
| "loss": 0.3674, |
| "num_tokens": 2170088052.0, |
| "step": 8290 |
| }, |
| { |
| "epoch": 3.867132867132867, |
| "grad_norm": 0.3036597141524631, |
| "learning_rate": 1.1026572158070831e-05, |
| "loss": 0.3555, |
| "num_tokens": 2171398772.0, |
| "step": 8295 |
| }, |
| { |
| "epoch": 3.8694638694638694, |
| "grad_norm": 0.32232320158504335, |
| "learning_rate": 1.1002962471100883e-05, |
| "loss": 0.3548, |
| "num_tokens": 2172709492.0, |
| "step": 8300 |
| }, |
| { |
| "epoch": 3.871794871794872, |
| "grad_norm": 0.3210640108188077, |
| "learning_rate": 1.0979392000534027e-05, |
| "loss": 0.3585, |
| "num_tokens": 2174020212.0, |
| "step": 8305 |
| }, |
| { |
| "epoch": 3.874125874125874, |
| "grad_norm": 0.3273031634760707, |
| "learning_rate": 1.0955860802401465e-05, |
| "loss": 0.3868, |
| "num_tokens": 2175330932.0, |
| "step": 8310 |
| }, |
| { |
| "epoch": 3.8764568764568765, |
| "grad_norm": 0.31087673967738866, |
| "learning_rate": 1.0932368932641074e-05, |
| "loss": 0.3553, |
| "num_tokens": 2176633046.0, |
| "step": 8315 |
| }, |
| { |
| "epoch": 3.878787878787879, |
| "grad_norm": 0.3289983288827823, |
| "learning_rate": 1.0908916447097199e-05, |
| "loss": 0.3661, |
| "num_tokens": 2177943766.0, |
| "step": 8320 |
| }, |
| { |
| "epoch": 3.8811188811188813, |
| "grad_norm": 0.35024193147589183, |
| "learning_rate": 1.0885503401520598e-05, |
| "loss": 0.3489, |
| "num_tokens": 2179254486.0, |
| "step": 8325 |
| }, |
| { |
| "epoch": 3.8834498834498836, |
| "grad_norm": 0.3317648553400327, |
| "learning_rate": 1.0862129851568261e-05, |
| "loss": 0.3525, |
| "num_tokens": 2180565206.0, |
| "step": 8330 |
| }, |
| { |
| "epoch": 3.8857808857808855, |
| "grad_norm": 0.34059702892010785, |
| "learning_rate": 1.0838795852803285e-05, |
| "loss": 0.3658, |
| "num_tokens": 2181875926.0, |
| "step": 8335 |
| }, |
| { |
| "epoch": 3.8881118881118883, |
| "grad_norm": 0.33839087803574835, |
| "learning_rate": 1.0815501460694752e-05, |
| "loss": 0.357, |
| "num_tokens": 2183186646.0, |
| "step": 8340 |
| }, |
| { |
| "epoch": 3.8904428904428903, |
| "grad_norm": 0.33882338228224024, |
| "learning_rate": 1.0792246730617587e-05, |
| "loss": 0.352, |
| "num_tokens": 2184497366.0, |
| "step": 8345 |
| }, |
| { |
| "epoch": 3.8927738927738926, |
| "grad_norm": 0.33693236579652286, |
| "learning_rate": 1.0769031717852435e-05, |
| "loss": 0.3518, |
| "num_tokens": 2185808086.0, |
| "step": 8350 |
| }, |
| { |
| "epoch": 3.895104895104895, |
| "grad_norm": 0.330046544608827, |
| "learning_rate": 1.0745856477585534e-05, |
| "loss": 0.3645, |
| "num_tokens": 2187118806.0, |
| "step": 8355 |
| }, |
| { |
| "epoch": 3.8974358974358974, |
| "grad_norm": 0.3214183099709155, |
| "learning_rate": 1.0722721064908554e-05, |
| "loss": 0.3602, |
| "num_tokens": 2188419457.0, |
| "step": 8360 |
| }, |
| { |
| "epoch": 3.8997668997668997, |
| "grad_norm": 0.3166655849628983, |
| "learning_rate": 1.0699625534818512e-05, |
| "loss": 0.3618, |
| "num_tokens": 2189730177.0, |
| "step": 8365 |
| }, |
| { |
| "epoch": 3.902097902097902, |
| "grad_norm": 0.31754371228858536, |
| "learning_rate": 1.0676569942217596e-05, |
| "loss": 0.3628, |
| "num_tokens": 2191040897.0, |
| "step": 8370 |
| }, |
| { |
| "epoch": 3.9044289044289044, |
| "grad_norm": 0.3060117009044129, |
| "learning_rate": 1.0653554341913072e-05, |
| "loss": 0.3535, |
| "num_tokens": 2192351617.0, |
| "step": 8375 |
| }, |
| { |
| "epoch": 3.906759906759907, |
| "grad_norm": 0.3435379540747769, |
| "learning_rate": 1.0630578788617131e-05, |
| "loss": 0.3642, |
| "num_tokens": 2193648973.0, |
| "step": 8380 |
| }, |
| { |
| "epoch": 3.909090909090909, |
| "grad_norm": 0.3389237512793272, |
| "learning_rate": 1.060764333694676e-05, |
| "loss": 0.3509, |
| "num_tokens": 2194959693.0, |
| "step": 8385 |
| }, |
| { |
| "epoch": 3.9114219114219115, |
| "grad_norm": 0.32842554162135046, |
| "learning_rate": 1.0584748041423623e-05, |
| "loss": 0.3556, |
| "num_tokens": 2196265846.0, |
| "step": 8390 |
| }, |
| { |
| "epoch": 3.913752913752914, |
| "grad_norm": 0.33837517172721177, |
| "learning_rate": 1.0561892956473932e-05, |
| "loss": 0.3573, |
| "num_tokens": 2197568195.0, |
| "step": 8395 |
| }, |
| { |
| "epoch": 3.916083916083916, |
| "grad_norm": 0.3218786208698996, |
| "learning_rate": 1.0539078136428294e-05, |
| "loss": 0.3634, |
| "num_tokens": 2198878915.0, |
| "step": 8400 |
| }, |
| { |
| "epoch": 3.9184149184149186, |
| "grad_norm": 0.351508799242857, |
| "learning_rate": 1.0516303635521606e-05, |
| "loss": 0.3753, |
| "num_tokens": 2200185526.0, |
| "step": 8405 |
| }, |
| { |
| "epoch": 3.9207459207459205, |
| "grad_norm": 0.34131167249228345, |
| "learning_rate": 1.0493569507892938e-05, |
| "loss": 0.3613, |
| "num_tokens": 2201496246.0, |
| "step": 8410 |
| }, |
| { |
| "epoch": 3.9230769230769234, |
| "grad_norm": 0.3318435319138198, |
| "learning_rate": 1.0470875807585354e-05, |
| "loss": 0.3572, |
| "num_tokens": 2202799214.0, |
| "step": 8415 |
| }, |
| { |
| "epoch": 3.9254079254079253, |
| "grad_norm": 0.33378030300757455, |
| "learning_rate": 1.0448222588545837e-05, |
| "loss": 0.3565, |
| "num_tokens": 2204109934.0, |
| "step": 8420 |
| }, |
| { |
| "epoch": 3.9277389277389276, |
| "grad_norm": 0.33527036359922735, |
| "learning_rate": 1.0425609904625137e-05, |
| "loss": 0.3599, |
| "num_tokens": 2205420654.0, |
| "step": 8425 |
| }, |
| { |
| "epoch": 3.93006993006993, |
| "grad_norm": 0.31167055961688644, |
| "learning_rate": 1.0403037809577636e-05, |
| "loss": 0.3581, |
| "num_tokens": 2206731374.0, |
| "step": 8430 |
| }, |
| { |
| "epoch": 3.9324009324009324, |
| "grad_norm": 0.3311994709997141, |
| "learning_rate": 1.0380506357061221e-05, |
| "loss": 0.3695, |
| "num_tokens": 2208042094.0, |
| "step": 8435 |
| }, |
| { |
| "epoch": 3.9347319347319347, |
| "grad_norm": 0.30544971351661804, |
| "learning_rate": 1.03580156006372e-05, |
| "loss": 0.3575, |
| "num_tokens": 2209352814.0, |
| "step": 8440 |
| }, |
| { |
| "epoch": 3.937062937062937, |
| "grad_norm": 0.3163147870974832, |
| "learning_rate": 1.0335565593770102e-05, |
| "loss": 0.3519, |
| "num_tokens": 2210663534.0, |
| "step": 8445 |
| }, |
| { |
| "epoch": 3.9393939393939394, |
| "grad_norm": 0.31310671731601936, |
| "learning_rate": 1.0313156389827596e-05, |
| "loss": 0.3589, |
| "num_tokens": 2211974254.0, |
| "step": 8450 |
| }, |
| { |
| "epoch": 3.941724941724942, |
| "grad_norm": 0.3359729029408067, |
| "learning_rate": 1.0290788042080375e-05, |
| "loss": 0.3617, |
| "num_tokens": 2213279287.0, |
| "step": 8455 |
| }, |
| { |
| "epoch": 3.944055944055944, |
| "grad_norm": 0.33715090925620084, |
| "learning_rate": 1.026846060370199e-05, |
| "loss": 0.3555, |
| "num_tokens": 2214584857.0, |
| "step": 8460 |
| }, |
| { |
| "epoch": 3.9463869463869465, |
| "grad_norm": 0.30902647410730066, |
| "learning_rate": 1.0246174127768738e-05, |
| "loss": 0.3595, |
| "num_tokens": 2215888814.0, |
| "step": 8465 |
| }, |
| { |
| "epoch": 3.948717948717949, |
| "grad_norm": 0.3291193720775341, |
| "learning_rate": 1.0223928667259556e-05, |
| "loss": 0.3673, |
| "num_tokens": 2217199534.0, |
| "step": 8470 |
| }, |
| { |
| "epoch": 3.951048951048951, |
| "grad_norm": 0.34291852278637736, |
| "learning_rate": 1.020172427505588e-05, |
| "loss": 0.3525, |
| "num_tokens": 2218509771.0, |
| "step": 8475 |
| }, |
| { |
| "epoch": 3.9533799533799536, |
| "grad_norm": 0.3561232943784015, |
| "learning_rate": 1.0179561003941507e-05, |
| "loss": 0.3538, |
| "num_tokens": 2219820491.0, |
| "step": 8480 |
| }, |
| { |
| "epoch": 3.9557109557109555, |
| "grad_norm": 0.3157272502198812, |
| "learning_rate": 1.0157438906602487e-05, |
| "loss": 0.3524, |
| "num_tokens": 2221130082.0, |
| "step": 8485 |
| }, |
| { |
| "epoch": 3.958041958041958, |
| "grad_norm": 0.33080441973925323, |
| "learning_rate": 1.0135358035627007e-05, |
| "loss": 0.3614, |
| "num_tokens": 2222424293.0, |
| "step": 8490 |
| }, |
| { |
| "epoch": 3.9603729603729603, |
| "grad_norm": 0.3289328935479798, |
| "learning_rate": 1.0113318443505226e-05, |
| "loss": 0.3659, |
| "num_tokens": 2223735013.0, |
| "step": 8495 |
| }, |
| { |
| "epoch": 3.9627039627039626, |
| "grad_norm": 0.31711304131356893, |
| "learning_rate": 1.0091320182629193e-05, |
| "loss": 0.3653, |
| "num_tokens": 2225045733.0, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.965034965034965, |
| "grad_norm": 0.323224468581729, |
| "learning_rate": 1.0069363305292708e-05, |
| "loss": 0.3628, |
| "num_tokens": 2226356453.0, |
| "step": 8505 |
| }, |
| { |
| "epoch": 3.9673659673659674, |
| "grad_norm": 0.3399766154632268, |
| "learning_rate": 1.0047447863691175e-05, |
| "loss": 0.3523, |
| "num_tokens": 2227667173.0, |
| "step": 8510 |
| }, |
| { |
| "epoch": 3.9696969696969697, |
| "grad_norm": 0.31305493281081237, |
| "learning_rate": 1.0025573909921515e-05, |
| "loss": 0.3553, |
| "num_tokens": 2228973398.0, |
| "step": 8515 |
| }, |
| { |
| "epoch": 3.972027972027972, |
| "grad_norm": 0.3312572614176095, |
| "learning_rate": 1.0003741495982034e-05, |
| "loss": 0.3563, |
| "num_tokens": 2230272637.0, |
| "step": 8520 |
| }, |
| { |
| "epoch": 3.9743589743589745, |
| "grad_norm": 0.3358237225226056, |
| "learning_rate": 9.981950673772256e-06, |
| "loss": 0.3611, |
| "num_tokens": 2231583357.0, |
| "step": 8525 |
| }, |
| { |
| "epoch": 3.976689976689977, |
| "grad_norm": 0.31567748638452275, |
| "learning_rate": 9.960201495092871e-06, |
| "loss": 0.37, |
| "num_tokens": 2232894077.0, |
| "step": 8530 |
| }, |
| { |
| "epoch": 3.979020979020979, |
| "grad_norm": 0.33864552842513596, |
| "learning_rate": 9.938494011645553e-06, |
| "loss": 0.3614, |
| "num_tokens": 2234204797.0, |
| "step": 8535 |
| }, |
| { |
| "epoch": 3.981351981351981, |
| "grad_norm": 0.3294973703926195, |
| "learning_rate": 9.916828275032868e-06, |
| "loss": 0.3585, |
| "num_tokens": 2235502698.0, |
| "step": 8540 |
| }, |
| { |
| "epoch": 3.983682983682984, |
| "grad_norm": 0.324896447348713, |
| "learning_rate": 9.895204336758132e-06, |
| "loss": 0.3539, |
| "num_tokens": 2236813418.0, |
| "step": 8545 |
| }, |
| { |
| "epoch": 3.986013986013986, |
| "grad_norm": 0.31758055886736325, |
| "learning_rate": 9.87362224822531e-06, |
| "loss": 0.3543, |
| "num_tokens": 2238114298.0, |
| "step": 8550 |
| }, |
| { |
| "epoch": 3.988344988344988, |
| "grad_norm": 0.34580160569991886, |
| "learning_rate": 9.85208206073889e-06, |
| "loss": 0.3552, |
| "num_tokens": 2239417127.0, |
| "step": 8555 |
| }, |
| { |
| "epoch": 3.9906759906759905, |
| "grad_norm": 0.3386420228574512, |
| "learning_rate": 9.830583825503725e-06, |
| "loss": 0.3521, |
| "num_tokens": 2240727847.0, |
| "step": 8560 |
| }, |
| { |
| "epoch": 3.993006993006993, |
| "grad_norm": 0.32993309995325326, |
| "learning_rate": 9.80912759362497e-06, |
| "loss": 0.3504, |
| "num_tokens": 2242038567.0, |
| "step": 8565 |
| }, |
| { |
| "epoch": 3.9953379953379953, |
| "grad_norm": 0.34047566030766635, |
| "learning_rate": 9.787713416107919e-06, |
| "loss": 0.3535, |
| "num_tokens": 2243349287.0, |
| "step": 8570 |
| }, |
| { |
| "epoch": 3.9976689976689976, |
| "grad_norm": 0.32231730842208467, |
| "learning_rate": 9.76634134385788e-06, |
| "loss": 0.3576, |
| "num_tokens": 2244653368.0, |
| "step": 8575 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.3196697658342358, |
| "learning_rate": 9.745011427680106e-06, |
| "loss": 0.3417, |
| "num_tokens": 2245952708.0, |
| "step": 8580 |
| }, |
| { |
| "epoch": 4.002331002331002, |
| "grad_norm": 0.3154639847560265, |
| "learning_rate": 9.723723718279595e-06, |
| "loss": 0.3027, |
| "num_tokens": 2247263428.0, |
| "step": 8585 |
| }, |
| { |
| "epoch": 4.004662004662005, |
| "grad_norm": 0.3627171601549924, |
| "learning_rate": 9.702478266261042e-06, |
| "loss": 0.3105, |
| "num_tokens": 2248564079.0, |
| "step": 8590 |
| }, |
| { |
| "epoch": 4.006993006993007, |
| "grad_norm": 0.36469519862133226, |
| "learning_rate": 9.68127512212868e-06, |
| "loss": 0.3218, |
| "num_tokens": 2249874799.0, |
| "step": 8595 |
| }, |
| { |
| "epoch": 4.0093240093240095, |
| "grad_norm": 0.3315289199193423, |
| "learning_rate": 9.660114336286164e-06, |
| "loss": 0.3212, |
| "num_tokens": 2251185519.0, |
| "step": 8600 |
| }, |
| { |
| "epoch": 4.011655011655011, |
| "grad_norm": 0.35875417884768623, |
| "learning_rate": 9.638995959036456e-06, |
| "loss": 0.3109, |
| "num_tokens": 2252491344.0, |
| "step": 8605 |
| }, |
| { |
| "epoch": 4.013986013986014, |
| "grad_norm": 0.32867156865417013, |
| "learning_rate": 9.617920040581724e-06, |
| "loss": 0.303, |
| "num_tokens": 2253802064.0, |
| "step": 8610 |
| }, |
| { |
| "epoch": 4.016317016317016, |
| "grad_norm": 0.310349653790998, |
| "learning_rate": 9.596886631023169e-06, |
| "loss": 0.3094, |
| "num_tokens": 2255108675.0, |
| "step": 8615 |
| }, |
| { |
| "epoch": 4.018648018648019, |
| "grad_norm": 0.32806845035826643, |
| "learning_rate": 9.575895780360969e-06, |
| "loss": 0.3207, |
| "num_tokens": 2256419395.0, |
| "step": 8620 |
| }, |
| { |
| "epoch": 4.020979020979021, |
| "grad_norm": 0.3217206000624844, |
| "learning_rate": 9.55494753849413e-06, |
| "loss": 0.3088, |
| "num_tokens": 2257730115.0, |
| "step": 8625 |
| }, |
| { |
| "epoch": 4.023310023310024, |
| "grad_norm": 0.33329725907377766, |
| "learning_rate": 9.534041955220353e-06, |
| "loss": 0.309, |
| "num_tokens": 2259040835.0, |
| "step": 8630 |
| }, |
| { |
| "epoch": 4.0256410256410255, |
| "grad_norm": 0.3237761512977047, |
| "learning_rate": 9.513179080235933e-06, |
| "loss": 0.3108, |
| "num_tokens": 2260351555.0, |
| "step": 8635 |
| }, |
| { |
| "epoch": 4.027972027972028, |
| "grad_norm": 0.3301327078317335, |
| "learning_rate": 9.492358963135671e-06, |
| "loss": 0.3075, |
| "num_tokens": 2261662275.0, |
| "step": 8640 |
| }, |
| { |
| "epoch": 4.03030303030303, |
| "grad_norm": 0.3257522642348306, |
| "learning_rate": 9.47158165341269e-06, |
| "loss": 0.3167, |
| "num_tokens": 2262972995.0, |
| "step": 8645 |
| }, |
| { |
| "epoch": 4.032634032634032, |
| "grad_norm": 0.3400399129587691, |
| "learning_rate": 9.450847200458351e-06, |
| "loss": 0.3144, |
| "num_tokens": 2264283715.0, |
| "step": 8650 |
| }, |
| { |
| "epoch": 4.034965034965035, |
| "grad_norm": 0.3279323276345651, |
| "learning_rate": 9.430155653562176e-06, |
| "loss": 0.3138, |
| "num_tokens": 2265594435.0, |
| "step": 8655 |
| }, |
| { |
| "epoch": 4.037296037296037, |
| "grad_norm": 0.34371766089078787, |
| "learning_rate": 9.409507061911648e-06, |
| "loss": 0.3153, |
| "num_tokens": 2266897223.0, |
| "step": 8660 |
| }, |
| { |
| "epoch": 4.03962703962704, |
| "grad_norm": 0.3237833855664639, |
| "learning_rate": 9.38890147459216e-06, |
| "loss": 0.3141, |
| "num_tokens": 2268207943.0, |
| "step": 8665 |
| }, |
| { |
| "epoch": 4.041958041958042, |
| "grad_norm": 0.33288798285203314, |
| "learning_rate": 9.368338940586866e-06, |
| "loss": 0.3144, |
| "num_tokens": 2269518663.0, |
| "step": 8670 |
| }, |
| { |
| "epoch": 4.0442890442890445, |
| "grad_norm": 0.34404924053052394, |
| "learning_rate": 9.347819508776593e-06, |
| "loss": 0.3142, |
| "num_tokens": 2270829383.0, |
| "step": 8675 |
| }, |
| { |
| "epoch": 4.046620046620046, |
| "grad_norm": 0.3366069132240311, |
| "learning_rate": 9.327343227939677e-06, |
| "loss": 0.3118, |
| "num_tokens": 2272140103.0, |
| "step": 8680 |
| }, |
| { |
| "epoch": 4.048951048951049, |
| "grad_norm": 0.33022829494586375, |
| "learning_rate": 9.306910146751903e-06, |
| "loss": 0.3025, |
| "num_tokens": 2273448784.0, |
| "step": 8685 |
| }, |
| { |
| "epoch": 4.051282051282051, |
| "grad_norm": 0.3362236980144924, |
| "learning_rate": 9.286520313786359e-06, |
| "loss": 0.3062, |
| "num_tokens": 2274759504.0, |
| "step": 8690 |
| }, |
| { |
| "epoch": 4.053613053613054, |
| "grad_norm": 0.3375407896276986, |
| "learning_rate": 9.2661737775133e-06, |
| "loss": 0.3115, |
| "num_tokens": 2276070224.0, |
| "step": 8695 |
| }, |
| { |
| "epoch": 4.055944055944056, |
| "grad_norm": 0.3387055817635362, |
| "learning_rate": 9.245870586300086e-06, |
| "loss": 0.3076, |
| "num_tokens": 2277380944.0, |
| "step": 8700 |
| }, |
| { |
| "epoch": 4.058275058275059, |
| "grad_norm": 0.33597806666914465, |
| "learning_rate": 9.225610788411028e-06, |
| "loss": 0.3124, |
| "num_tokens": 2278683128.0, |
| "step": 8705 |
| }, |
| { |
| "epoch": 4.0606060606060606, |
| "grad_norm": 0.33166354222510536, |
| "learning_rate": 9.205394432007274e-06, |
| "loss": 0.3195, |
| "num_tokens": 2279993848.0, |
| "step": 8710 |
| }, |
| { |
| "epoch": 4.062937062937063, |
| "grad_norm": 0.340915400738789, |
| "learning_rate": 9.185221565146719e-06, |
| "loss": 0.3129, |
| "num_tokens": 2281304568.0, |
| "step": 8715 |
| }, |
| { |
| "epoch": 4.065268065268065, |
| "grad_norm": 0.324138237680919, |
| "learning_rate": 9.165092235783872e-06, |
| "loss": 0.3026, |
| "num_tokens": 2282615288.0, |
| "step": 8720 |
| }, |
| { |
| "epoch": 4.067599067599067, |
| "grad_norm": 0.32248302399372863, |
| "learning_rate": 9.145006491769734e-06, |
| "loss": 0.3131, |
| "num_tokens": 2283926008.0, |
| "step": 8725 |
| }, |
| { |
| "epoch": 4.06993006993007, |
| "grad_norm": 0.3300129820806774, |
| "learning_rate": 9.124964380851697e-06, |
| "loss": 0.3147, |
| "num_tokens": 2285236728.0, |
| "step": 8730 |
| }, |
| { |
| "epoch": 4.072261072261072, |
| "grad_norm": 0.3281535752862011, |
| "learning_rate": 9.104965950673457e-06, |
| "loss": 0.317, |
| "num_tokens": 2286547448.0, |
| "step": 8735 |
| }, |
| { |
| "epoch": 4.074592074592075, |
| "grad_norm": 0.33825429279702496, |
| "learning_rate": 9.085011248774844e-06, |
| "loss": 0.3056, |
| "num_tokens": 2287858168.0, |
| "step": 8740 |
| }, |
| { |
| "epoch": 4.076923076923077, |
| "grad_norm": 0.33028034320434174, |
| "learning_rate": 9.065100322591735e-06, |
| "loss": 0.3084, |
| "num_tokens": 2289164548.0, |
| "step": 8745 |
| }, |
| { |
| "epoch": 4.0792540792540795, |
| "grad_norm": 0.3353664679588956, |
| "learning_rate": 9.045233219455967e-06, |
| "loss": 0.3257, |
| "num_tokens": 2290475268.0, |
| "step": 8750 |
| }, |
| { |
| "epoch": 4.081585081585081, |
| "grad_norm": 0.3315814088066854, |
| "learning_rate": 9.025409986595191e-06, |
| "loss": 0.3131, |
| "num_tokens": 2291785988.0, |
| "step": 8755 |
| }, |
| { |
| "epoch": 4.083916083916084, |
| "grad_norm": 0.32829158816501314, |
| "learning_rate": 9.005630671132767e-06, |
| "loss": 0.3247, |
| "num_tokens": 2293090934.0, |
| "step": 8760 |
| }, |
| { |
| "epoch": 4.086247086247086, |
| "grad_norm": 0.3400820384105229, |
| "learning_rate": 8.985895320087657e-06, |
| "loss": 0.322, |
| "num_tokens": 2294401654.0, |
| "step": 8765 |
| }, |
| { |
| "epoch": 4.088578088578089, |
| "grad_norm": 0.3469249174397503, |
| "learning_rate": 8.96620398037432e-06, |
| "loss": 0.3204, |
| "num_tokens": 2295712374.0, |
| "step": 8770 |
| }, |
| { |
| "epoch": 4.090909090909091, |
| "grad_norm": 0.33092792284960687, |
| "learning_rate": 8.946556698802578e-06, |
| "loss": 0.3171, |
| "num_tokens": 2297023094.0, |
| "step": 8775 |
| }, |
| { |
| "epoch": 4.093240093240094, |
| "grad_norm": 0.34641277690350863, |
| "learning_rate": 8.926953522077528e-06, |
| "loss": 0.314, |
| "num_tokens": 2298333814.0, |
| "step": 8780 |
| }, |
| { |
| "epoch": 4.0955710955710956, |
| "grad_norm": 0.32438963991128505, |
| "learning_rate": 8.907394496799429e-06, |
| "loss": 0.3143, |
| "num_tokens": 2299637704.0, |
| "step": 8785 |
| }, |
| { |
| "epoch": 4.0979020979020975, |
| "grad_norm": 0.33556755134102456, |
| "learning_rate": 8.887879669463562e-06, |
| "loss": 0.3144, |
| "num_tokens": 2300946471.0, |
| "step": 8790 |
| }, |
| { |
| "epoch": 4.1002331002331, |
| "grad_norm": 0.3370515747236785, |
| "learning_rate": 8.868409086460167e-06, |
| "loss": 0.3138, |
| "num_tokens": 2302257191.0, |
| "step": 8795 |
| }, |
| { |
| "epoch": 4.102564102564102, |
| "grad_norm": 0.32539271406101317, |
| "learning_rate": 8.848982794074288e-06, |
| "loss": 0.3013, |
| "num_tokens": 2303567911.0, |
| "step": 8800 |
| }, |
| { |
| "epoch": 4.104895104895105, |
| "grad_norm": 0.3348268658945831, |
| "learning_rate": 8.829600838485691e-06, |
| "loss": 0.3191, |
| "num_tokens": 2304878631.0, |
| "step": 8805 |
| }, |
| { |
| "epoch": 4.107226107226107, |
| "grad_norm": 0.3254305192351051, |
| "learning_rate": 8.810263265768749e-06, |
| "loss": 0.3097, |
| "num_tokens": 2306189351.0, |
| "step": 8810 |
| }, |
| { |
| "epoch": 4.10955710955711, |
| "grad_norm": 0.3528490799980861, |
| "learning_rate": 8.790970121892318e-06, |
| "loss": 0.3144, |
| "num_tokens": 2307500071.0, |
| "step": 8815 |
| }, |
| { |
| "epoch": 4.111888111888112, |
| "grad_norm": 0.34560029308995477, |
| "learning_rate": 8.771721452719644e-06, |
| "loss": 0.329, |
| "num_tokens": 2308800878.0, |
| "step": 8820 |
| }, |
| { |
| "epoch": 4.1142191142191145, |
| "grad_norm": 0.34271459470922583, |
| "learning_rate": 8.752517304008263e-06, |
| "loss": 0.3179, |
| "num_tokens": 2310111598.0, |
| "step": 8825 |
| }, |
| { |
| "epoch": 4.116550116550116, |
| "grad_norm": 0.34444328829999327, |
| "learning_rate": 8.733357721409847e-06, |
| "loss": 0.3074, |
| "num_tokens": 2311422318.0, |
| "step": 8830 |
| }, |
| { |
| "epoch": 4.118881118881119, |
| "grad_norm": 0.3345099910245634, |
| "learning_rate": 8.714242750470155e-06, |
| "loss": 0.3169, |
| "num_tokens": 2312733038.0, |
| "step": 8835 |
| }, |
| { |
| "epoch": 4.121212121212121, |
| "grad_norm": 0.336105428464414, |
| "learning_rate": 8.695172436628885e-06, |
| "loss": 0.3074, |
| "num_tokens": 2314036848.0, |
| "step": 8840 |
| }, |
| { |
| "epoch": 4.123543123543124, |
| "grad_norm": 0.3432184731029786, |
| "learning_rate": 8.676146825219574e-06, |
| "loss": 0.3244, |
| "num_tokens": 2315347568.0, |
| "step": 8845 |
| }, |
| { |
| "epoch": 4.125874125874126, |
| "grad_norm": 0.3514286741500932, |
| "learning_rate": 8.657165961469496e-06, |
| "loss": 0.3122, |
| "num_tokens": 2316647401.0, |
| "step": 8850 |
| }, |
| { |
| "epoch": 4.128205128205128, |
| "grad_norm": 0.3464258867521915, |
| "learning_rate": 8.63822989049955e-06, |
| "loss": 0.3121, |
| "num_tokens": 2317958121.0, |
| "step": 8855 |
| }, |
| { |
| "epoch": 4.130536130536131, |
| "grad_norm": 0.33188673347599557, |
| "learning_rate": 8.619338657324167e-06, |
| "loss": 0.308, |
| "num_tokens": 2319268841.0, |
| "step": 8860 |
| }, |
| { |
| "epoch": 4.1328671328671325, |
| "grad_norm": 0.33368696164039957, |
| "learning_rate": 8.600492306851166e-06, |
| "loss": 0.3115, |
| "num_tokens": 2320579561.0, |
| "step": 8865 |
| }, |
| { |
| "epoch": 4.135198135198135, |
| "grad_norm": 0.3233544453202552, |
| "learning_rate": 8.581690883881696e-06, |
| "loss": 0.3185, |
| "num_tokens": 2321890281.0, |
| "step": 8870 |
| }, |
| { |
| "epoch": 4.137529137529137, |
| "grad_norm": 0.33173085265251867, |
| "learning_rate": 8.562934433110101e-06, |
| "loss": 0.3081, |
| "num_tokens": 2323194296.0, |
| "step": 8875 |
| }, |
| { |
| "epoch": 4.13986013986014, |
| "grad_norm": 0.32262540271377654, |
| "learning_rate": 8.544222999123798e-06, |
| "loss": 0.3099, |
| "num_tokens": 2324493603.0, |
| "step": 8880 |
| }, |
| { |
| "epoch": 4.142191142191142, |
| "grad_norm": 0.3267911687013554, |
| "learning_rate": 8.525556626403214e-06, |
| "loss": 0.3149, |
| "num_tokens": 2325804323.0, |
| "step": 8885 |
| }, |
| { |
| "epoch": 4.144522144522145, |
| "grad_norm": 0.3668716182370164, |
| "learning_rate": 8.506935359321655e-06, |
| "loss": 0.317, |
| "num_tokens": 2327101698.0, |
| "step": 8890 |
| }, |
| { |
| "epoch": 4.146853146853147, |
| "grad_norm": 0.3223134667321982, |
| "learning_rate": 8.488359242145182e-06, |
| "loss": 0.3086, |
| "num_tokens": 2328412418.0, |
| "step": 8895 |
| }, |
| { |
| "epoch": 4.1491841491841495, |
| "grad_norm": 0.31791906605050724, |
| "learning_rate": 8.469828319032555e-06, |
| "loss": 0.3112, |
| "num_tokens": 2329705306.0, |
| "step": 8900 |
| }, |
| { |
| "epoch": 4.151515151515151, |
| "grad_norm": 0.3238691360164819, |
| "learning_rate": 8.451342634035081e-06, |
| "loss": 0.312, |
| "num_tokens": 2331002662.0, |
| "step": 8905 |
| }, |
| { |
| "epoch": 4.153846153846154, |
| "grad_norm": 0.32796231037465196, |
| "learning_rate": 8.432902231096532e-06, |
| "loss": 0.318, |
| "num_tokens": 2332313382.0, |
| "step": 8910 |
| }, |
| { |
| "epoch": 4.156177156177156, |
| "grad_norm": 0.33796829228728653, |
| "learning_rate": 8.414507154053038e-06, |
| "loss": 0.309, |
| "num_tokens": 2333624102.0, |
| "step": 8915 |
| }, |
| { |
| "epoch": 4.158508158508159, |
| "grad_norm": 0.32836861040158594, |
| "learning_rate": 8.396157446632985e-06, |
| "loss": 0.3019, |
| "num_tokens": 2334934822.0, |
| "step": 8920 |
| }, |
| { |
| "epoch": 4.160839160839161, |
| "grad_norm": 0.32684570489397824, |
| "learning_rate": 8.3778531524569e-06, |
| "loss": 0.312, |
| "num_tokens": 2336245542.0, |
| "step": 8925 |
| }, |
| { |
| "epoch": 4.163170163170163, |
| "grad_norm": 0.3318787549687187, |
| "learning_rate": 8.359594315037348e-06, |
| "loss": 0.3202, |
| "num_tokens": 2337556262.0, |
| "step": 8930 |
| }, |
| { |
| "epoch": 4.165501165501166, |
| "grad_norm": 0.33111122882180744, |
| "learning_rate": 8.341380977778866e-06, |
| "loss": 0.3155, |
| "num_tokens": 2338853654.0, |
| "step": 8935 |
| }, |
| { |
| "epoch": 4.1678321678321675, |
| "grad_norm": 0.3224813447044692, |
| "learning_rate": 8.323213183977793e-06, |
| "loss": 0.3091, |
| "num_tokens": 2340164374.0, |
| "step": 8940 |
| }, |
| { |
| "epoch": 4.17016317016317, |
| "grad_norm": 0.3456964708575295, |
| "learning_rate": 8.305090976822214e-06, |
| "loss": 0.31, |
| "num_tokens": 2341462721.0, |
| "step": 8945 |
| }, |
| { |
| "epoch": 4.172494172494172, |
| "grad_norm": 0.33107924062669025, |
| "learning_rate": 8.287014399391866e-06, |
| "loss": 0.3207, |
| "num_tokens": 2342762061.0, |
| "step": 8950 |
| }, |
| { |
| "epoch": 4.174825174825175, |
| "grad_norm": 0.33431120526499053, |
| "learning_rate": 8.268983494657993e-06, |
| "loss": 0.3179, |
| "num_tokens": 2344072781.0, |
| "step": 8955 |
| }, |
| { |
| "epoch": 4.177156177156177, |
| "grad_norm": 0.32952875537848103, |
| "learning_rate": 8.250998305483268e-06, |
| "loss": 0.306, |
| "num_tokens": 2345376435.0, |
| "step": 8960 |
| }, |
| { |
| "epoch": 4.17948717948718, |
| "grad_norm": 0.3456893653270103, |
| "learning_rate": 8.233058874621704e-06, |
| "loss": 0.326, |
| "num_tokens": 2346687155.0, |
| "step": 8965 |
| }, |
| { |
| "epoch": 4.181818181818182, |
| "grad_norm": 0.3394277090932486, |
| "learning_rate": 8.215165244718532e-06, |
| "loss": 0.3144, |
| "num_tokens": 2347997875.0, |
| "step": 8970 |
| }, |
| { |
| "epoch": 4.1841491841491845, |
| "grad_norm": 0.33738822320988965, |
| "learning_rate": 8.197317458310092e-06, |
| "loss": 0.3092, |
| "num_tokens": 2349296210.0, |
| "step": 8975 |
| }, |
| { |
| "epoch": 4.186480186480186, |
| "grad_norm": 0.34369988163488063, |
| "learning_rate": 8.179515557823769e-06, |
| "loss": 0.3109, |
| "num_tokens": 2350606930.0, |
| "step": 8980 |
| }, |
| { |
| "epoch": 4.188811188811189, |
| "grad_norm": 0.33185490990267685, |
| "learning_rate": 8.161759585577863e-06, |
| "loss": 0.3222, |
| "num_tokens": 2351905894.0, |
| "step": 8985 |
| }, |
| { |
| "epoch": 4.191142191142191, |
| "grad_norm": 0.33437248830096067, |
| "learning_rate": 8.144049583781475e-06, |
| "loss": 0.313, |
| "num_tokens": 2353216614.0, |
| "step": 8990 |
| }, |
| { |
| "epoch": 4.193473193473194, |
| "grad_norm": 0.32606670299996504, |
| "learning_rate": 8.126385594534448e-06, |
| "loss": 0.3155, |
| "num_tokens": 2354527334.0, |
| "step": 8995 |
| }, |
| { |
| "epoch": 4.195804195804196, |
| "grad_norm": 0.354696304621865, |
| "learning_rate": 8.108767659827245e-06, |
| "loss": 0.3019, |
| "num_tokens": 2355838054.0, |
| "step": 9000 |
| }, |
| { |
| "epoch": 4.198135198135198, |
| "grad_norm": 0.30886261035523754, |
| "learning_rate": 8.09119582154083e-06, |
| "loss": 0.3125, |
| "num_tokens": 2357148774.0, |
| "step": 9005 |
| }, |
| { |
| "epoch": 4.200466200466201, |
| "grad_norm": 0.32186205072669777, |
| "learning_rate": 8.07367012144661e-06, |
| "loss": 0.3102, |
| "num_tokens": 2358459494.0, |
| "step": 9010 |
| }, |
| { |
| "epoch": 4.2027972027972025, |
| "grad_norm": 0.32692083529916577, |
| "learning_rate": 8.05619060120629e-06, |
| "loss": 0.3097, |
| "num_tokens": 2359770023.0, |
| "step": 9015 |
| }, |
| { |
| "epoch": 4.205128205128205, |
| "grad_norm": 0.3536074622682776, |
| "learning_rate": 8.038757302371816e-06, |
| "loss": 0.3124, |
| "num_tokens": 2361080743.0, |
| "step": 9020 |
| }, |
| { |
| "epoch": 4.207459207459207, |
| "grad_norm": 0.3320417618080785, |
| "learning_rate": 8.021370266385257e-06, |
| "loss": 0.3143, |
| "num_tokens": 2362384296.0, |
| "step": 9025 |
| }, |
| { |
| "epoch": 4.20979020979021, |
| "grad_norm": 0.33236316161184004, |
| "learning_rate": 8.004029534578694e-06, |
| "loss": 0.3202, |
| "num_tokens": 2363695016.0, |
| "step": 9030 |
| }, |
| { |
| "epoch": 4.212121212121212, |
| "grad_norm": 0.3282580657954475, |
| "learning_rate": 7.986735148174142e-06, |
| "loss": 0.3102, |
| "num_tokens": 2364989382.0, |
| "step": 9035 |
| }, |
| { |
| "epoch": 4.214452214452215, |
| "grad_norm": 0.3449624050121814, |
| "learning_rate": 7.969487148283451e-06, |
| "loss": 0.3222, |
| "num_tokens": 2366300102.0, |
| "step": 9040 |
| }, |
| { |
| "epoch": 4.216783216783217, |
| "grad_norm": 0.33015411004128303, |
| "learning_rate": 7.95228557590819e-06, |
| "loss": 0.3189, |
| "num_tokens": 2367610822.0, |
| "step": 9045 |
| }, |
| { |
| "epoch": 4.2191142191142195, |
| "grad_norm": 0.3482626079276277, |
| "learning_rate": 7.935130471939572e-06, |
| "loss": 0.319, |
| "num_tokens": 2368919557.0, |
| "step": 9050 |
| }, |
| { |
| "epoch": 4.221445221445221, |
| "grad_norm": 0.3313155428462869, |
| "learning_rate": 7.918021877158333e-06, |
| "loss": 0.3229, |
| "num_tokens": 2370230277.0, |
| "step": 9055 |
| }, |
| { |
| "epoch": 4.223776223776224, |
| "grad_norm": 0.3286320237256984, |
| "learning_rate": 7.900959832234667e-06, |
| "loss": 0.315, |
| "num_tokens": 2371540997.0, |
| "step": 9060 |
| }, |
| { |
| "epoch": 4.226107226107226, |
| "grad_norm": 0.32781843149335, |
| "learning_rate": 7.883944377728091e-06, |
| "loss": 0.3168, |
| "num_tokens": 2372851717.0, |
| "step": 9065 |
| }, |
| { |
| "epoch": 4.228438228438228, |
| "grad_norm": 0.3199378658171041, |
| "learning_rate": 7.866975554087384e-06, |
| "loss": 0.3196, |
| "num_tokens": 2374154156.0, |
| "step": 9070 |
| }, |
| { |
| "epoch": 4.230769230769231, |
| "grad_norm": 0.32318125321218416, |
| "learning_rate": 7.85005340165047e-06, |
| "loss": 0.3109, |
| "num_tokens": 2375464876.0, |
| "step": 9075 |
| }, |
| { |
| "epoch": 4.233100233100233, |
| "grad_norm": 0.32948239675038016, |
| "learning_rate": 7.833177960644318e-06, |
| "loss": 0.3149, |
| "num_tokens": 2376775596.0, |
| "step": 9080 |
| }, |
| { |
| "epoch": 4.235431235431236, |
| "grad_norm": 0.330342035385706, |
| "learning_rate": 7.816349271184873e-06, |
| "loss": 0.3228, |
| "num_tokens": 2378072943.0, |
| "step": 9085 |
| }, |
| { |
| "epoch": 4.2377622377622375, |
| "grad_norm": 0.3343981894720827, |
| "learning_rate": 7.79956737327693e-06, |
| "loss": 0.3272, |
| "num_tokens": 2379383663.0, |
| "step": 9090 |
| }, |
| { |
| "epoch": 4.24009324009324, |
| "grad_norm": 0.3370576768884812, |
| "learning_rate": 7.782832306814055e-06, |
| "loss": 0.3215, |
| "num_tokens": 2380694383.0, |
| "step": 9095 |
| }, |
| { |
| "epoch": 4.242424242424242, |
| "grad_norm": 0.34055936569913503, |
| "learning_rate": 7.766144111578488e-06, |
| "loss": 0.3154, |
| "num_tokens": 2382005103.0, |
| "step": 9100 |
| }, |
| { |
| "epoch": 4.244755244755245, |
| "grad_norm": 0.3250618537313876, |
| "learning_rate": 7.749502827241053e-06, |
| "loss": 0.3054, |
| "num_tokens": 2383315823.0, |
| "step": 9105 |
| }, |
| { |
| "epoch": 4.247086247086247, |
| "grad_norm": 0.33979822723341285, |
| "learning_rate": 7.732908493361054e-06, |
| "loss": 0.3131, |
| "num_tokens": 2384626543.0, |
| "step": 9110 |
| }, |
| { |
| "epoch": 4.24941724941725, |
| "grad_norm": 0.3448739247944029, |
| "learning_rate": 7.716361149386169e-06, |
| "loss": 0.3154, |
| "num_tokens": 2385937263.0, |
| "step": 9115 |
| }, |
| { |
| "epoch": 4.251748251748252, |
| "grad_norm": 0.3513942678682862, |
| "learning_rate": 7.69986083465241e-06, |
| "loss": 0.3161, |
| "num_tokens": 2387247983.0, |
| "step": 9120 |
| }, |
| { |
| "epoch": 4.2540792540792545, |
| "grad_norm": 0.33283021075110353, |
| "learning_rate": 7.68340758838396e-06, |
| "loss": 0.3138, |
| "num_tokens": 2388558703.0, |
| "step": 9125 |
| }, |
| { |
| "epoch": 4.256410256410256, |
| "grad_norm": 0.3241327933794044, |
| "learning_rate": 7.667001449693118e-06, |
| "loss": 0.3073, |
| "num_tokens": 2389869423.0, |
| "step": 9130 |
| }, |
| { |
| "epoch": 4.258741258741258, |
| "grad_norm": 0.33663247742640745, |
| "learning_rate": 7.650642457580216e-06, |
| "loss": 0.3245, |
| "num_tokens": 2391180143.0, |
| "step": 9135 |
| }, |
| { |
| "epoch": 4.261072261072261, |
| "grad_norm": 0.35167952703734645, |
| "learning_rate": 7.634330650933491e-06, |
| "loss": 0.318, |
| "num_tokens": 2392490863.0, |
| "step": 9140 |
| }, |
| { |
| "epoch": 4.263403263403263, |
| "grad_norm": 0.3458772358952002, |
| "learning_rate": 7.618066068529013e-06, |
| "loss": 0.3217, |
| "num_tokens": 2393801583.0, |
| "step": 9145 |
| }, |
| { |
| "epoch": 4.265734265734266, |
| "grad_norm": 0.33672532361513857, |
| "learning_rate": 7.601848749030614e-06, |
| "loss": 0.3153, |
| "num_tokens": 2395112303.0, |
| "step": 9150 |
| }, |
| { |
| "epoch": 4.268065268065268, |
| "grad_norm": 0.3412841871135003, |
| "learning_rate": 7.5856787309897485e-06, |
| "loss": 0.3129, |
| "num_tokens": 2396423023.0, |
| "step": 9155 |
| }, |
| { |
| "epoch": 4.270396270396271, |
| "grad_norm": 0.34739331638218984, |
| "learning_rate": 7.5695560528454335e-06, |
| "loss": 0.3268, |
| "num_tokens": 2397733743.0, |
| "step": 9160 |
| }, |
| { |
| "epoch": 4.2727272727272725, |
| "grad_norm": 0.32838660057211366, |
| "learning_rate": 7.553480752924152e-06, |
| "loss": 0.3176, |
| "num_tokens": 2399044463.0, |
| "step": 9165 |
| }, |
| { |
| "epoch": 4.275058275058275, |
| "grad_norm": 0.34100398627810036, |
| "learning_rate": 7.537452869439773e-06, |
| "loss": 0.3238, |
| "num_tokens": 2400355183.0, |
| "step": 9170 |
| }, |
| { |
| "epoch": 4.277389277389277, |
| "grad_norm": 0.34349089530086746, |
| "learning_rate": 7.521472440493424e-06, |
| "loss": 0.3241, |
| "num_tokens": 2401665903.0, |
| "step": 9175 |
| }, |
| { |
| "epoch": 4.27972027972028, |
| "grad_norm": 0.32237561114424745, |
| "learning_rate": 7.5055395040734375e-06, |
| "loss": 0.31, |
| "num_tokens": 2402964469.0, |
| "step": 9180 |
| }, |
| { |
| "epoch": 4.282051282051282, |
| "grad_norm": 0.34307510366114763, |
| "learning_rate": 7.489654098055261e-06, |
| "loss": 0.3307, |
| "num_tokens": 2404275189.0, |
| "step": 9185 |
| }, |
| { |
| "epoch": 4.284382284382285, |
| "grad_norm": 0.3541210255290945, |
| "learning_rate": 7.473816260201326e-06, |
| "loss": 0.3213, |
| "num_tokens": 2405585909.0, |
| "step": 9190 |
| }, |
| { |
| "epoch": 4.286713286713287, |
| "grad_norm": 0.33830233519487823, |
| "learning_rate": 7.458026028161005e-06, |
| "loss": 0.3171, |
| "num_tokens": 2406871865.0, |
| "step": 9195 |
| }, |
| { |
| "epoch": 4.2890442890442895, |
| "grad_norm": 0.3280850693643302, |
| "learning_rate": 7.442283439470503e-06, |
| "loss": 0.3234, |
| "num_tokens": 2408182585.0, |
| "step": 9200 |
| }, |
| { |
| "epoch": 4.291375291375291, |
| "grad_norm": 0.34250336046966473, |
| "learning_rate": 7.426588531552755e-06, |
| "loss": 0.3188, |
| "num_tokens": 2409488739.0, |
| "step": 9205 |
| }, |
| { |
| "epoch": 4.293706293706293, |
| "grad_norm": 0.33353326558959956, |
| "learning_rate": 7.4109413417173645e-06, |
| "loss": 0.3162, |
| "num_tokens": 2410799459.0, |
| "step": 9210 |
| }, |
| { |
| "epoch": 4.296037296037296, |
| "grad_norm": 0.32332388680356194, |
| "learning_rate": 7.3953419071604965e-06, |
| "loss": 0.3229, |
| "num_tokens": 2412110179.0, |
| "step": 9215 |
| }, |
| { |
| "epoch": 4.298368298368298, |
| "grad_norm": 0.3240810807807097, |
| "learning_rate": 7.379790264964787e-06, |
| "loss": 0.3071, |
| "num_tokens": 2413420899.0, |
| "step": 9220 |
| }, |
| { |
| "epoch": 4.300699300699301, |
| "grad_norm": 0.33571497946632756, |
| "learning_rate": 7.364286452099268e-06, |
| "loss": 0.3247, |
| "num_tokens": 2414731619.0, |
| "step": 9225 |
| }, |
| { |
| "epoch": 4.303030303030303, |
| "grad_norm": 0.3156999850446858, |
| "learning_rate": 7.348830505419266e-06, |
| "loss": 0.3078, |
| "num_tokens": 2416042339.0, |
| "step": 9230 |
| }, |
| { |
| "epoch": 4.305361305361306, |
| "grad_norm": 0.31560747105000503, |
| "learning_rate": 7.333422461666334e-06, |
| "loss": 0.3079, |
| "num_tokens": 2417352288.0, |
| "step": 9235 |
| }, |
| { |
| "epoch": 4.3076923076923075, |
| "grad_norm": 0.31764759996906916, |
| "learning_rate": 7.318062357468133e-06, |
| "loss": 0.3109, |
| "num_tokens": 2418663008.0, |
| "step": 9240 |
| }, |
| { |
| "epoch": 4.31002331002331, |
| "grad_norm": 0.33954512734132447, |
| "learning_rate": 7.302750229338377e-06, |
| "loss": 0.3141, |
| "num_tokens": 2419973728.0, |
| "step": 9245 |
| }, |
| { |
| "epoch": 4.312354312354312, |
| "grad_norm": 0.33905444936733614, |
| "learning_rate": 7.287486113676732e-06, |
| "loss": 0.3055, |
| "num_tokens": 2421284448.0, |
| "step": 9250 |
| }, |
| { |
| "epoch": 4.314685314685315, |
| "grad_norm": 0.3516319488679377, |
| "learning_rate": 7.272270046768719e-06, |
| "loss": 0.3229, |
| "num_tokens": 2422595168.0, |
| "step": 9255 |
| }, |
| { |
| "epoch": 4.317016317016317, |
| "grad_norm": 0.31543886841356666, |
| "learning_rate": 7.257102064785647e-06, |
| "loss": 0.3026, |
| "num_tokens": 2423905888.0, |
| "step": 9260 |
| }, |
| { |
| "epoch": 4.31934731934732, |
| "grad_norm": 0.33654187527085266, |
| "learning_rate": 7.241982203784521e-06, |
| "loss": 0.316, |
| "num_tokens": 2425216608.0, |
| "step": 9265 |
| }, |
| { |
| "epoch": 4.321678321678322, |
| "grad_norm": 0.3453335987898658, |
| "learning_rate": 7.226910499707942e-06, |
| "loss": 0.3213, |
| "num_tokens": 2426527328.0, |
| "step": 9270 |
| }, |
| { |
| "epoch": 4.3240093240093245, |
| "grad_norm": 0.31972916985922056, |
| "learning_rate": 7.211886988384051e-06, |
| "loss": 0.3141, |
| "num_tokens": 2427838048.0, |
| "step": 9275 |
| }, |
| { |
| "epoch": 4.326340326340326, |
| "grad_norm": 0.31446079269320515, |
| "learning_rate": 7.196911705526405e-06, |
| "loss": 0.3161, |
| "num_tokens": 2429148768.0, |
| "step": 9280 |
| }, |
| { |
| "epoch": 4.328671328671328, |
| "grad_norm": 0.31884066154984625, |
| "learning_rate": 7.181984686733929e-06, |
| "loss": 0.3059, |
| "num_tokens": 2430459488.0, |
| "step": 9285 |
| }, |
| { |
| "epoch": 4.331002331002331, |
| "grad_norm": 0.338313943294416, |
| "learning_rate": 7.167105967490818e-06, |
| "loss": 0.3104, |
| "num_tokens": 2431770208.0, |
| "step": 9290 |
| }, |
| { |
| "epoch": 4.333333333333333, |
| "grad_norm": 0.3434119449215798, |
| "learning_rate": 7.1522755831664345e-06, |
| "loss": 0.3128, |
| "num_tokens": 2433080928.0, |
| "step": 9295 |
| }, |
| { |
| "epoch": 4.335664335664336, |
| "grad_norm": 0.31426173762807547, |
| "learning_rate": 7.137493569015252e-06, |
| "loss": 0.3136, |
| "num_tokens": 2434388313.0, |
| "step": 9300 |
| }, |
| { |
| "epoch": 4.337995337995338, |
| "grad_norm": 0.33860929397786954, |
| "learning_rate": 7.122759960176764e-06, |
| "loss": 0.316, |
| "num_tokens": 2435699033.0, |
| "step": 9305 |
| }, |
| { |
| "epoch": 4.340326340326341, |
| "grad_norm": 0.32512758447513196, |
| "learning_rate": 7.108074791675377e-06, |
| "loss": 0.3276, |
| "num_tokens": 2437009753.0, |
| "step": 9310 |
| }, |
| { |
| "epoch": 4.3426573426573425, |
| "grad_norm": 0.3408691365223931, |
| "learning_rate": 7.093438098420364e-06, |
| "loss": 0.3111, |
| "num_tokens": 2438320473.0, |
| "step": 9315 |
| }, |
| { |
| "epoch": 4.344988344988345, |
| "grad_norm": 10.7223619115978, |
| "learning_rate": 7.078849915205761e-06, |
| "loss": 0.3984, |
| "num_tokens": 2439624916.0, |
| "step": 9320 |
| }, |
| { |
| "epoch": 4.347319347319347, |
| "grad_norm": 0.3465031175218188, |
| "learning_rate": 7.06431027671028e-06, |
| "loss": 0.3282, |
| "num_tokens": 2440935636.0, |
| "step": 9325 |
| }, |
| { |
| "epoch": 4.34965034965035, |
| "grad_norm": 0.34298399330314866, |
| "learning_rate": 7.049819217497229e-06, |
| "loss": 0.3151, |
| "num_tokens": 2442246356.0, |
| "step": 9330 |
| }, |
| { |
| "epoch": 4.351981351981352, |
| "grad_norm": 0.32774175918296494, |
| "learning_rate": 7.0353767720144585e-06, |
| "loss": 0.311, |
| "num_tokens": 2443557076.0, |
| "step": 9335 |
| }, |
| { |
| "epoch": 4.354312354312354, |
| "grad_norm": 0.3445401287963301, |
| "learning_rate": 7.020982974594234e-06, |
| "loss": 0.3177, |
| "num_tokens": 2444867796.0, |
| "step": 9340 |
| }, |
| { |
| "epoch": 4.356643356643357, |
| "grad_norm": 0.33840413843763606, |
| "learning_rate": 7.006637859453166e-06, |
| "loss": 0.3175, |
| "num_tokens": 2446166667.0, |
| "step": 9345 |
| }, |
| { |
| "epoch": 4.358974358974359, |
| "grad_norm": 0.3442185853976276, |
| "learning_rate": 6.99234146069218e-06, |
| "loss": 0.3285, |
| "num_tokens": 2447477387.0, |
| "step": 9350 |
| }, |
| { |
| "epoch": 4.361305361305361, |
| "grad_norm": 0.32304618994576395, |
| "learning_rate": 6.978093812296353e-06, |
| "loss": 0.3241, |
| "num_tokens": 2448788107.0, |
| "step": 9355 |
| }, |
| { |
| "epoch": 4.363636363636363, |
| "grad_norm": 0.3371003210685687, |
| "learning_rate": 6.963894948134886e-06, |
| "loss": 0.3153, |
| "num_tokens": 2450098827.0, |
| "step": 9360 |
| }, |
| { |
| "epoch": 4.365967365967366, |
| "grad_norm": 0.32770902047531997, |
| "learning_rate": 6.949744901961018e-06, |
| "loss": 0.3205, |
| "num_tokens": 2451409547.0, |
| "step": 9365 |
| }, |
| { |
| "epoch": 4.368298368298368, |
| "grad_norm": 0.3316976289164916, |
| "learning_rate": 6.935643707411941e-06, |
| "loss": 0.3181, |
| "num_tokens": 2452715351.0, |
| "step": 9370 |
| }, |
| { |
| "epoch": 4.370629370629371, |
| "grad_norm": 0.3297586670212026, |
| "learning_rate": 6.9215913980087e-06, |
| "loss": 0.3127, |
| "num_tokens": 2454026071.0, |
| "step": 9375 |
| }, |
| { |
| "epoch": 4.372960372960373, |
| "grad_norm": 0.34164744331202934, |
| "learning_rate": 6.907588007156147e-06, |
| "loss": 0.3167, |
| "num_tokens": 2455323641.0, |
| "step": 9380 |
| }, |
| { |
| "epoch": 4.375291375291376, |
| "grad_norm": 0.3325814425613154, |
| "learning_rate": 6.893633568142849e-06, |
| "loss": 0.3115, |
| "num_tokens": 2456634361.0, |
| "step": 9385 |
| }, |
| { |
| "epoch": 4.3776223776223775, |
| "grad_norm": 0.358839160865776, |
| "learning_rate": 6.87972811414099e-06, |
| "loss": 0.3007, |
| "num_tokens": 2457945081.0, |
| "step": 9390 |
| }, |
| { |
| "epoch": 4.37995337995338, |
| "grad_norm": 0.34252531902628414, |
| "learning_rate": 6.865871678206317e-06, |
| "loss": 0.3189, |
| "num_tokens": 2459255801.0, |
| "step": 9395 |
| }, |
| { |
| "epoch": 4.382284382284382, |
| "grad_norm": 0.3241607534033865, |
| "learning_rate": 6.85206429327806e-06, |
| "loss": 0.3063, |
| "num_tokens": 2460566521.0, |
| "step": 9400 |
| }, |
| { |
| "epoch": 4.384615384615385, |
| "grad_norm": 0.32915418617519726, |
| "learning_rate": 6.838305992178824e-06, |
| "loss": 0.3181, |
| "num_tokens": 2461877241.0, |
| "step": 9405 |
| }, |
| { |
| "epoch": 4.386946386946387, |
| "grad_norm": 0.3252694583787816, |
| "learning_rate": 6.824596807614559e-06, |
| "loss": 0.3115, |
| "num_tokens": 2463187961.0, |
| "step": 9410 |
| }, |
| { |
| "epoch": 4.389277389277389, |
| "grad_norm": 0.3293083654300197, |
| "learning_rate": 6.810936772174439e-06, |
| "loss": 0.3235, |
| "num_tokens": 2464495546.0, |
| "step": 9415 |
| }, |
| { |
| "epoch": 4.391608391608392, |
| "grad_norm": 0.3542236093862788, |
| "learning_rate": 6.797325918330806e-06, |
| "loss": 0.3032, |
| "num_tokens": 2465796046.0, |
| "step": 9420 |
| }, |
| { |
| "epoch": 4.393939393939394, |
| "grad_norm": 0.3330424585038265, |
| "learning_rate": 6.783764278439092e-06, |
| "loss": 0.3112, |
| "num_tokens": 2467106766.0, |
| "step": 9425 |
| }, |
| { |
| "epoch": 4.396270396270396, |
| "grad_norm": 0.34014850867321345, |
| "learning_rate": 6.77025188473773e-06, |
| "loss": 0.3108, |
| "num_tokens": 2468417486.0, |
| "step": 9430 |
| }, |
| { |
| "epoch": 4.398601398601398, |
| "grad_norm": 0.3325831324351841, |
| "learning_rate": 6.756788769348103e-06, |
| "loss": 0.3189, |
| "num_tokens": 2469728206.0, |
| "step": 9435 |
| }, |
| { |
| "epoch": 4.400932400932401, |
| "grad_norm": 0.33398045142731836, |
| "learning_rate": 6.743374964274427e-06, |
| "loss": 0.3212, |
| "num_tokens": 2471038926.0, |
| "step": 9440 |
| }, |
| { |
| "epoch": 4.403263403263403, |
| "grad_norm": 0.3149622560196894, |
| "learning_rate": 6.730010501403718e-06, |
| "loss": 0.3103, |
| "num_tokens": 2472349646.0, |
| "step": 9445 |
| }, |
| { |
| "epoch": 4.405594405594406, |
| "grad_norm": 0.3296632544724334, |
| "learning_rate": 6.716695412505688e-06, |
| "loss": 0.3141, |
| "num_tokens": 2473660366.0, |
| "step": 9450 |
| }, |
| { |
| "epoch": 4.407925407925408, |
| "grad_norm": 0.3266017858365408, |
| "learning_rate": 6.703429729232682e-06, |
| "loss": 0.3195, |
| "num_tokens": 2474971086.0, |
| "step": 9455 |
| }, |
| { |
| "epoch": 4.410256410256411, |
| "grad_norm": 0.3373805559516959, |
| "learning_rate": 6.690213483119595e-06, |
| "loss": 0.312, |
| "num_tokens": 2476281806.0, |
| "step": 9460 |
| }, |
| { |
| "epoch": 4.4125874125874125, |
| "grad_norm": 0.3366721023486427, |
| "learning_rate": 6.677046705583806e-06, |
| "loss": 0.3171, |
| "num_tokens": 2477592526.0, |
| "step": 9465 |
| }, |
| { |
| "epoch": 4.414918414918415, |
| "grad_norm": 0.3310996468709432, |
| "learning_rate": 6.663929427925095e-06, |
| "loss": 0.3054, |
| "num_tokens": 2478903246.0, |
| "step": 9470 |
| }, |
| { |
| "epoch": 4.417249417249417, |
| "grad_norm": 0.3237625603637502, |
| "learning_rate": 6.650861681325567e-06, |
| "loss": 0.3063, |
| "num_tokens": 2480204978.0, |
| "step": 9475 |
| }, |
| { |
| "epoch": 4.41958041958042, |
| "grad_norm": 0.35443174214884327, |
| "learning_rate": 6.6378434968495965e-06, |
| "loss": 0.3186, |
| "num_tokens": 2481515698.0, |
| "step": 9480 |
| }, |
| { |
| "epoch": 4.421911421911422, |
| "grad_norm": 0.34643582807007817, |
| "learning_rate": 6.624874905443726e-06, |
| "loss": 0.3104, |
| "num_tokens": 2482810080.0, |
| "step": 9485 |
| }, |
| { |
| "epoch": 4.424242424242424, |
| "grad_norm": 0.3353431088763468, |
| "learning_rate": 6.611955937936619e-06, |
| "loss": 0.3042, |
| "num_tokens": 2484095674.0, |
| "step": 9490 |
| }, |
| { |
| "epoch": 4.426573426573427, |
| "grad_norm": 0.3242056478567321, |
| "learning_rate": 6.599086625038957e-06, |
| "loss": 0.32, |
| "num_tokens": 2485406394.0, |
| "step": 9495 |
| }, |
| { |
| "epoch": 4.428904428904429, |
| "grad_norm": 0.3331241038387844, |
| "learning_rate": 6.586266997343402e-06, |
| "loss": 0.3078, |
| "num_tokens": 2486697670.0, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.431235431235431, |
| "grad_norm": 0.3288847500668807, |
| "learning_rate": 6.5734970853244985e-06, |
| "loss": 0.3095, |
| "num_tokens": 2488008390.0, |
| "step": 9505 |
| }, |
| { |
| "epoch": 4.433566433566433, |
| "grad_norm": 0.3444690042268666, |
| "learning_rate": 6.560776919338599e-06, |
| "loss": 0.3171, |
| "num_tokens": 2489319110.0, |
| "step": 9510 |
| }, |
| { |
| "epoch": 4.435897435897436, |
| "grad_norm": 0.3401045593755526, |
| "learning_rate": 6.5481065296238155e-06, |
| "loss": 0.3233, |
| "num_tokens": 2490621224.0, |
| "step": 9515 |
| }, |
| { |
| "epoch": 4.438228438228438, |
| "grad_norm": 0.32485207168584423, |
| "learning_rate": 6.535485946299927e-06, |
| "loss": 0.3, |
| "num_tokens": 2491931944.0, |
| "step": 9520 |
| }, |
| { |
| "epoch": 4.440559440559441, |
| "grad_norm": 0.3362671229182379, |
| "learning_rate": 6.5229151993683065e-06, |
| "loss": 0.3231, |
| "num_tokens": 2493242664.0, |
| "step": 9525 |
| }, |
| { |
| "epoch": 4.442890442890443, |
| "grad_norm": 0.3188938856125156, |
| "learning_rate": 6.5103943187118654e-06, |
| "loss": 0.3248, |
| "num_tokens": 2494553384.0, |
| "step": 9530 |
| }, |
| { |
| "epoch": 4.445221445221446, |
| "grad_norm": 0.3206644304295667, |
| "learning_rate": 6.49792333409498e-06, |
| "loss": 0.3193, |
| "num_tokens": 2495864104.0, |
| "step": 9535 |
| }, |
| { |
| "epoch": 4.4475524475524475, |
| "grad_norm": 0.32725720854706297, |
| "learning_rate": 6.485502275163401e-06, |
| "loss": 0.3128, |
| "num_tokens": 2497174824.0, |
| "step": 9540 |
| }, |
| { |
| "epoch": 4.449883449883449, |
| "grad_norm": 0.3377685213956361, |
| "learning_rate": 6.473131171444192e-06, |
| "loss": 0.3098, |
| "num_tokens": 2498485544.0, |
| "step": 9545 |
| }, |
| { |
| "epoch": 4.452214452214452, |
| "grad_norm": 0.3267131751428466, |
| "learning_rate": 6.460810052345697e-06, |
| "loss": 0.3122, |
| "num_tokens": 2499796264.0, |
| "step": 9550 |
| }, |
| { |
| "epoch": 4.454545454545454, |
| "grad_norm": 0.3369467611573572, |
| "learning_rate": 6.4485389471574025e-06, |
| "loss": 0.3121, |
| "num_tokens": 2501097144.0, |
| "step": 9555 |
| }, |
| { |
| "epoch": 4.456876456876457, |
| "grad_norm": 0.31471698424983463, |
| "learning_rate": 6.4363178850499115e-06, |
| "loss": 0.3114, |
| "num_tokens": 2502407864.0, |
| "step": 9560 |
| }, |
| { |
| "epoch": 4.459207459207459, |
| "grad_norm": 0.3182422570183859, |
| "learning_rate": 6.424146895074878e-06, |
| "loss": 0.3217, |
| "num_tokens": 2503718584.0, |
| "step": 9565 |
| }, |
| { |
| "epoch": 4.461538461538462, |
| "grad_norm": 0.32824460237041364, |
| "learning_rate": 6.41202600616492e-06, |
| "loss": 0.312, |
| "num_tokens": 2505029304.0, |
| "step": 9570 |
| }, |
| { |
| "epoch": 4.463869463869464, |
| "grad_norm": 0.3276454341355643, |
| "learning_rate": 6.399955247133547e-06, |
| "loss": 0.3233, |
| "num_tokens": 2506340024.0, |
| "step": 9575 |
| }, |
| { |
| "epoch": 4.466200466200466, |
| "grad_norm": 0.35551047742829733, |
| "learning_rate": 6.387934646675109e-06, |
| "loss": 0.3172, |
| "num_tokens": 2507650744.0, |
| "step": 9580 |
| }, |
| { |
| "epoch": 4.468531468531468, |
| "grad_norm": 0.33605570429689574, |
| "learning_rate": 6.375964233364725e-06, |
| "loss": 0.3353, |
| "num_tokens": 2508961464.0, |
| "step": 9585 |
| }, |
| { |
| "epoch": 4.470862470862471, |
| "grad_norm": 0.3172835951855472, |
| "learning_rate": 6.364044035658198e-06, |
| "loss": 0.3063, |
| "num_tokens": 2510272184.0, |
| "step": 9590 |
| }, |
| { |
| "epoch": 4.473193473193473, |
| "grad_norm": 0.3207971543619864, |
| "learning_rate": 6.352174081891969e-06, |
| "loss": 0.3132, |
| "num_tokens": 2511582904.0, |
| "step": 9595 |
| }, |
| { |
| "epoch": 4.475524475524476, |
| "grad_norm": 0.31751145509697243, |
| "learning_rate": 6.340354400283039e-06, |
| "loss": 0.3107, |
| "num_tokens": 2512893624.0, |
| "step": 9600 |
| }, |
| { |
| "epoch": 4.477855477855478, |
| "grad_norm": 0.3384772695617782, |
| "learning_rate": 6.328585018928896e-06, |
| "loss": 0.3239, |
| "num_tokens": 2514204344.0, |
| "step": 9605 |
| }, |
| { |
| "epoch": 4.480186480186481, |
| "grad_norm": 0.34136380265968547, |
| "learning_rate": 6.31686596580746e-06, |
| "loss": 0.3159, |
| "num_tokens": 2515515064.0, |
| "step": 9610 |
| }, |
| { |
| "epoch": 4.4825174825174825, |
| "grad_norm": 0.34265867835608826, |
| "learning_rate": 6.305197268777023e-06, |
| "loss": 0.3232, |
| "num_tokens": 2516804613.0, |
| "step": 9615 |
| }, |
| { |
| "epoch": 4.484848484848484, |
| "grad_norm": 0.3471232349713498, |
| "learning_rate": 6.293578955576149e-06, |
| "loss": 0.3162, |
| "num_tokens": 2518115333.0, |
| "step": 9620 |
| }, |
| { |
| "epoch": 4.487179487179487, |
| "grad_norm": 0.33307127442521534, |
| "learning_rate": 6.28201105382364e-06, |
| "loss": 0.3196, |
| "num_tokens": 2519426053.0, |
| "step": 9625 |
| }, |
| { |
| "epoch": 4.489510489510489, |
| "grad_norm": 0.33359074202000116, |
| "learning_rate": 6.2704935910184785e-06, |
| "loss": 0.3136, |
| "num_tokens": 2520736773.0, |
| "step": 9630 |
| }, |
| { |
| "epoch": 4.491841491841492, |
| "grad_norm": 0.3240069604325885, |
| "learning_rate": 6.259026594539719e-06, |
| "loss": 0.3188, |
| "num_tokens": 2522047493.0, |
| "step": 9635 |
| }, |
| { |
| "epoch": 4.494172494172494, |
| "grad_norm": 0.3388605125051464, |
| "learning_rate": 6.2476100916464585e-06, |
| "loss": 0.3154, |
| "num_tokens": 2523358213.0, |
| "step": 9640 |
| }, |
| { |
| "epoch": 4.496503496503497, |
| "grad_norm": 0.32541913779560644, |
| "learning_rate": 6.236244109477764e-06, |
| "loss": 0.3197, |
| "num_tokens": 2524663045.0, |
| "step": 9645 |
| }, |
| { |
| "epoch": 4.498834498834499, |
| "grad_norm": 0.33945252106405477, |
| "learning_rate": 6.224928675052609e-06, |
| "loss": 0.3211, |
| "num_tokens": 2525973765.0, |
| "step": 9650 |
| }, |
| { |
| "epoch": 4.501165501165501, |
| "grad_norm": 0.3476363419781912, |
| "learning_rate": 6.213663815269794e-06, |
| "loss": 0.3079, |
| "num_tokens": 2527279335.0, |
| "step": 9655 |
| }, |
| { |
| "epoch": 4.503496503496503, |
| "grad_norm": 0.33583724777887775, |
| "learning_rate": 6.202449556907903e-06, |
| "loss": 0.325, |
| "num_tokens": 2528590055.0, |
| "step": 9660 |
| }, |
| { |
| "epoch": 4.505827505827506, |
| "grad_norm": 0.3216459701800872, |
| "learning_rate": 6.191285926625236e-06, |
| "loss": 0.3106, |
| "num_tokens": 2529900775.0, |
| "step": 9665 |
| }, |
| { |
| "epoch": 4.508158508158508, |
| "grad_norm": 0.3346538262362633, |
| "learning_rate": 6.180172950959726e-06, |
| "loss": 0.3161, |
| "num_tokens": 2531211495.0, |
| "step": 9670 |
| }, |
| { |
| "epoch": 4.510489510489511, |
| "grad_norm": 0.33904717924175304, |
| "learning_rate": 6.169110656328905e-06, |
| "loss": 0.3256, |
| "num_tokens": 2532522215.0, |
| "step": 9675 |
| }, |
| { |
| "epoch": 4.512820512820513, |
| "grad_norm": 0.31774716560986643, |
| "learning_rate": 6.158099069029825e-06, |
| "loss": 0.3101, |
| "num_tokens": 2533832935.0, |
| "step": 9680 |
| }, |
| { |
| "epoch": 4.515151515151516, |
| "grad_norm": 0.3219903613779173, |
| "learning_rate": 6.147138215238987e-06, |
| "loss": 0.3175, |
| "num_tokens": 2535143655.0, |
| "step": 9685 |
| }, |
| { |
| "epoch": 4.5174825174825175, |
| "grad_norm": 0.3270086455520368, |
| "learning_rate": 6.136228121012301e-06, |
| "loss": 0.3025, |
| "num_tokens": 2536454375.0, |
| "step": 9690 |
| }, |
| { |
| "epoch": 4.519813519813519, |
| "grad_norm": 0.32790806083662694, |
| "learning_rate": 6.125368812285014e-06, |
| "loss": 0.324, |
| "num_tokens": 2537765095.0, |
| "step": 9695 |
| }, |
| { |
| "epoch": 4.522144522144522, |
| "grad_norm": 0.3374631769629436, |
| "learning_rate": 6.11456031487163e-06, |
| "loss": 0.3113, |
| "num_tokens": 2539075815.0, |
| "step": 9700 |
| }, |
| { |
| "epoch": 4.524475524475524, |
| "grad_norm": 0.3165604361693966, |
| "learning_rate": 6.103802654465887e-06, |
| "loss": 0.3189, |
| "num_tokens": 2540386535.0, |
| "step": 9705 |
| }, |
| { |
| "epoch": 4.526806526806527, |
| "grad_norm": 0.33329264324814467, |
| "learning_rate": 6.093095856640659e-06, |
| "loss": 0.3267, |
| "num_tokens": 2541697255.0, |
| "step": 9710 |
| }, |
| { |
| "epoch": 4.529137529137529, |
| "grad_norm": 0.318747910398136, |
| "learning_rate": 6.082439946847914e-06, |
| "loss": 0.3152, |
| "num_tokens": 2543007975.0, |
| "step": 9715 |
| }, |
| { |
| "epoch": 4.531468531468532, |
| "grad_norm": 0.3395245128133879, |
| "learning_rate": 6.0718349504186596e-06, |
| "loss": 0.3177, |
| "num_tokens": 2544305294.0, |
| "step": 9720 |
| }, |
| { |
| "epoch": 4.533799533799534, |
| "grad_norm": 0.3234654794073021, |
| "learning_rate": 6.061280892562856e-06, |
| "loss": 0.313, |
| "num_tokens": 2545614275.0, |
| "step": 9725 |
| }, |
| { |
| "epoch": 4.536130536130536, |
| "grad_norm": 0.3230472732102473, |
| "learning_rate": 6.050777798369387e-06, |
| "loss": 0.3145, |
| "num_tokens": 2546924995.0, |
| "step": 9730 |
| }, |
| { |
| "epoch": 4.538461538461538, |
| "grad_norm": 0.32753670861955114, |
| "learning_rate": 6.040325692805984e-06, |
| "loss": 0.3119, |
| "num_tokens": 2548235715.0, |
| "step": 9735 |
| }, |
| { |
| "epoch": 4.540792540792541, |
| "grad_norm": 0.3312438687676759, |
| "learning_rate": 6.029924600719165e-06, |
| "loss": 0.3168, |
| "num_tokens": 2549546435.0, |
| "step": 9740 |
| }, |
| { |
| "epoch": 4.543123543123543, |
| "grad_norm": 0.34207396319559835, |
| "learning_rate": 6.019574546834186e-06, |
| "loss": 0.329, |
| "num_tokens": 2550857155.0, |
| "step": 9745 |
| }, |
| { |
| "epoch": 4.545454545454545, |
| "grad_norm": 0.32384193950890633, |
| "learning_rate": 6.009275555754967e-06, |
| "loss": 0.3133, |
| "num_tokens": 2552167875.0, |
| "step": 9750 |
| }, |
| { |
| "epoch": 4.547785547785548, |
| "grad_norm": 0.33222986120067743, |
| "learning_rate": 5.999027651964054e-06, |
| "loss": 0.3178, |
| "num_tokens": 2553478595.0, |
| "step": 9755 |
| }, |
| { |
| "epoch": 4.550116550116551, |
| "grad_norm": 0.3394591599285521, |
| "learning_rate": 5.988830859822541e-06, |
| "loss": 0.3106, |
| "num_tokens": 2554789315.0, |
| "step": 9760 |
| }, |
| { |
| "epoch": 4.5524475524475525, |
| "grad_norm": 0.3273894065028813, |
| "learning_rate": 5.978685203570021e-06, |
| "loss": 0.3109, |
| "num_tokens": 2556100035.0, |
| "step": 9765 |
| }, |
| { |
| "epoch": 4.554778554778554, |
| "grad_norm": 0.3441663270198807, |
| "learning_rate": 5.968590707324535e-06, |
| "loss": 0.3214, |
| "num_tokens": 2557410755.0, |
| "step": 9770 |
| }, |
| { |
| "epoch": 4.557109557109557, |
| "grad_norm": 0.3305110868532999, |
| "learning_rate": 5.958547395082498e-06, |
| "loss": 0.3214, |
| "num_tokens": 2558721475.0, |
| "step": 9775 |
| }, |
| { |
| "epoch": 4.559440559440559, |
| "grad_norm": 0.3318319061404925, |
| "learning_rate": 5.948555290718658e-06, |
| "loss": 0.3203, |
| "num_tokens": 2560032195.0, |
| "step": 9780 |
| }, |
| { |
| "epoch": 4.561771561771562, |
| "grad_norm": 0.3143112654235783, |
| "learning_rate": 5.938614417986035e-06, |
| "loss": 0.3238, |
| "num_tokens": 2561342915.0, |
| "step": 9785 |
| }, |
| { |
| "epoch": 4.564102564102564, |
| "grad_norm": 0.33345941184977745, |
| "learning_rate": 5.928724800515848e-06, |
| "loss": 0.3143, |
| "num_tokens": 2562653635.0, |
| "step": 9790 |
| }, |
| { |
| "epoch": 4.566433566433567, |
| "grad_norm": 0.34686197219373827, |
| "learning_rate": 5.91888646181749e-06, |
| "loss": 0.3137, |
| "num_tokens": 2563948407.0, |
| "step": 9795 |
| }, |
| { |
| "epoch": 4.568764568764569, |
| "grad_norm": 0.33117506317785395, |
| "learning_rate": 5.909099425278451e-06, |
| "loss": 0.32, |
| "num_tokens": 2565259127.0, |
| "step": 9800 |
| }, |
| { |
| "epoch": 4.571095571095571, |
| "grad_norm": 0.3435052314461775, |
| "learning_rate": 5.899363714164259e-06, |
| "loss": 0.3148, |
| "num_tokens": 2566569847.0, |
| "step": 9805 |
| }, |
| { |
| "epoch": 4.573426573426573, |
| "grad_norm": 0.3511413949641888, |
| "learning_rate": 5.889679351618435e-06, |
| "loss": 0.3239, |
| "num_tokens": 2567880567.0, |
| "step": 9810 |
| }, |
| { |
| "epoch": 4.575757575757576, |
| "grad_norm": 0.3277981153602279, |
| "learning_rate": 5.880046360662442e-06, |
| "loss": 0.319, |
| "num_tokens": 2569191287.0, |
| "step": 9815 |
| }, |
| { |
| "epoch": 4.578088578088578, |
| "grad_norm": 0.34041898778575075, |
| "learning_rate": 5.870464764195621e-06, |
| "loss": 0.3117, |
| "num_tokens": 2570502007.0, |
| "step": 9820 |
| }, |
| { |
| "epoch": 4.58041958041958, |
| "grad_norm": 0.3243193311955865, |
| "learning_rate": 5.8609345849951275e-06, |
| "loss": 0.312, |
| "num_tokens": 2571812727.0, |
| "step": 9825 |
| }, |
| { |
| "epoch": 4.582750582750583, |
| "grad_norm": 0.3310866370673146, |
| "learning_rate": 5.851455845715912e-06, |
| "loss": 0.3109, |
| "num_tokens": 2573123447.0, |
| "step": 9830 |
| }, |
| { |
| "epoch": 4.585081585081585, |
| "grad_norm": 0.31513987342632316, |
| "learning_rate": 5.842028568890624e-06, |
| "loss": 0.3069, |
| "num_tokens": 2574434167.0, |
| "step": 9835 |
| }, |
| { |
| "epoch": 4.5874125874125875, |
| "grad_norm": 0.3275061402794141, |
| "learning_rate": 5.832652776929576e-06, |
| "loss": 0.3048, |
| "num_tokens": 2575744887.0, |
| "step": 9840 |
| }, |
| { |
| "epoch": 4.589743589743589, |
| "grad_norm": 0.33530078415240044, |
| "learning_rate": 5.823328492120709e-06, |
| "loss": 0.3205, |
| "num_tokens": 2577055607.0, |
| "step": 9845 |
| }, |
| { |
| "epoch": 4.592074592074592, |
| "grad_norm": 0.3482685265210104, |
| "learning_rate": 5.814055736629512e-06, |
| "loss": 0.3222, |
| "num_tokens": 2578366327.0, |
| "step": 9850 |
| }, |
| { |
| "epoch": 4.594405594405594, |
| "grad_norm": 0.3537893890025293, |
| "learning_rate": 5.804834532498973e-06, |
| "loss": 0.3125, |
| "num_tokens": 2579662506.0, |
| "step": 9855 |
| }, |
| { |
| "epoch": 4.596736596736597, |
| "grad_norm": 0.3358652150747448, |
| "learning_rate": 5.795664901649546e-06, |
| "loss": 0.3123, |
| "num_tokens": 2580973226.0, |
| "step": 9860 |
| }, |
| { |
| "epoch": 4.599067599067599, |
| "grad_norm": 0.35598478994424876, |
| "learning_rate": 5.78654686587908e-06, |
| "loss": 0.3333, |
| "num_tokens": 2582283946.0, |
| "step": 9865 |
| }, |
| { |
| "epoch": 4.601398601398602, |
| "grad_norm": 0.331525987147412, |
| "learning_rate": 5.777480446862771e-06, |
| "loss": 0.3199, |
| "num_tokens": 2583594666.0, |
| "step": 9870 |
| }, |
| { |
| "epoch": 4.603729603729604, |
| "grad_norm": 0.32178290426004424, |
| "learning_rate": 5.768465666153116e-06, |
| "loss": 0.3289, |
| "num_tokens": 2584905386.0, |
| "step": 9875 |
| }, |
| { |
| "epoch": 4.606060606060606, |
| "grad_norm": 0.30393892105397075, |
| "learning_rate": 5.759502545179865e-06, |
| "loss": 0.3076, |
| "num_tokens": 2586200553.0, |
| "step": 9880 |
| }, |
| { |
| "epoch": 4.608391608391608, |
| "grad_norm": 0.32968332333326905, |
| "learning_rate": 5.750591105249945e-06, |
| "loss": 0.3105, |
| "num_tokens": 2587511273.0, |
| "step": 9885 |
| }, |
| { |
| "epoch": 4.610722610722611, |
| "grad_norm": 0.3231640929433455, |
| "learning_rate": 5.741731367547445e-06, |
| "loss": 0.3175, |
| "num_tokens": 2588821993.0, |
| "step": 9890 |
| }, |
| { |
| "epoch": 4.613053613053613, |
| "grad_norm": 0.31411918328374727, |
| "learning_rate": 5.732923353133545e-06, |
| "loss": 0.3102, |
| "num_tokens": 2590132713.0, |
| "step": 9895 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 0.335002070080694, |
| "learning_rate": 5.724167082946466e-06, |
| "loss": 0.3225, |
| "num_tokens": 2591443433.0, |
| "step": 9900 |
| }, |
| { |
| "epoch": 4.617715617715618, |
| "grad_norm": 0.3229052741900776, |
| "learning_rate": 5.715462577801427e-06, |
| "loss": 0.3156, |
| "num_tokens": 2592754153.0, |
| "step": 9905 |
| }, |
| { |
| "epoch": 4.62004662004662, |
| "grad_norm": 0.3297660710371224, |
| "learning_rate": 5.706809858390583e-06, |
| "loss": 0.3276, |
| "num_tokens": 2594060378.0, |
| "step": 9910 |
| }, |
| { |
| "epoch": 4.6223776223776225, |
| "grad_norm": 0.3155783908720556, |
| "learning_rate": 5.698208945283e-06, |
| "loss": 0.2992, |
| "num_tokens": 2595371098.0, |
| "step": 9915 |
| }, |
| { |
| "epoch": 4.624708624708624, |
| "grad_norm": 0.3193903370909751, |
| "learning_rate": 5.689659858924586e-06, |
| "loss": 0.3068, |
| "num_tokens": 2596681818.0, |
| "step": 9920 |
| }, |
| { |
| "epoch": 4.627039627039627, |
| "grad_norm": 0.33774842672012795, |
| "learning_rate": 5.6811626196380385e-06, |
| "loss": 0.3121, |
| "num_tokens": 2597992538.0, |
| "step": 9925 |
| }, |
| { |
| "epoch": 4.629370629370629, |
| "grad_norm": 0.33740225064211665, |
| "learning_rate": 5.672717247622816e-06, |
| "loss": 0.3102, |
| "num_tokens": 2599303258.0, |
| "step": 9930 |
| }, |
| { |
| "epoch": 4.631701631701632, |
| "grad_norm": 0.3373159671469985, |
| "learning_rate": 5.664323762955072e-06, |
| "loss": 0.3221, |
| "num_tokens": 2600613978.0, |
| "step": 9935 |
| }, |
| { |
| "epoch": 4.634032634032634, |
| "grad_norm": 0.35331673034205946, |
| "learning_rate": 5.655982185587621e-06, |
| "loss": 0.3184, |
| "num_tokens": 2601924698.0, |
| "step": 9940 |
| }, |
| { |
| "epoch": 4.636363636363637, |
| "grad_norm": 0.34075562342066007, |
| "learning_rate": 5.647692535349884e-06, |
| "loss": 0.3176, |
| "num_tokens": 2603235418.0, |
| "step": 9945 |
| }, |
| { |
| "epoch": 4.638694638694639, |
| "grad_norm": 0.3272741009052787, |
| "learning_rate": 5.6394548319478325e-06, |
| "loss": 0.308, |
| "num_tokens": 2604546138.0, |
| "step": 9950 |
| }, |
| { |
| "epoch": 4.641025641025641, |
| "grad_norm": 0.3353878917152132, |
| "learning_rate": 5.631269094963962e-06, |
| "loss": 0.3132, |
| "num_tokens": 2605856858.0, |
| "step": 9955 |
| }, |
| { |
| "epoch": 4.643356643356643, |
| "grad_norm": 0.335095370367202, |
| "learning_rate": 5.623135343857232e-06, |
| "loss": 0.3179, |
| "num_tokens": 2607167578.0, |
| "step": 9960 |
| }, |
| { |
| "epoch": 4.645687645687646, |
| "grad_norm": 0.3448209805418296, |
| "learning_rate": 5.615053597963018e-06, |
| "loss": 0.3266, |
| "num_tokens": 2608468942.0, |
| "step": 9965 |
| }, |
| { |
| "epoch": 4.648018648018648, |
| "grad_norm": 0.3365763346832491, |
| "learning_rate": 5.607023876493075e-06, |
| "loss": 0.3251, |
| "num_tokens": 2609779662.0, |
| "step": 9970 |
| }, |
| { |
| "epoch": 4.65034965034965, |
| "grad_norm": 0.3371348358061654, |
| "learning_rate": 5.59904619853548e-06, |
| "loss": 0.314, |
| "num_tokens": 2611090382.0, |
| "step": 9975 |
| }, |
| { |
| "epoch": 4.652680652680653, |
| "grad_norm": 0.3211402055694574, |
| "learning_rate": 5.591120583054602e-06, |
| "loss": 0.3172, |
| "num_tokens": 2612401102.0, |
| "step": 9980 |
| }, |
| { |
| "epoch": 4.655011655011655, |
| "grad_norm": 0.32650824383942834, |
| "learning_rate": 5.583247048891042e-06, |
| "loss": 0.3177, |
| "num_tokens": 2613704775.0, |
| "step": 9985 |
| }, |
| { |
| "epoch": 4.6573426573426575, |
| "grad_norm": 0.33015275188111703, |
| "learning_rate": 5.575425614761597e-06, |
| "loss": 0.3105, |
| "num_tokens": 2615005017.0, |
| "step": 9990 |
| }, |
| { |
| "epoch": 4.659673659673659, |
| "grad_norm": 0.3350066530847012, |
| "learning_rate": 5.567656299259212e-06, |
| "loss": 0.3179, |
| "num_tokens": 2616315737.0, |
| "step": 9995 |
| }, |
| { |
| "epoch": 4.662004662004662, |
| "grad_norm": 0.3296673905861499, |
| "learning_rate": 5.559939120852936e-06, |
| "loss": 0.3183, |
| "num_tokens": 2617626457.0, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.664335664335664, |
| "grad_norm": 0.3314605447902211, |
| "learning_rate": 5.552274097887879e-06, |
| "loss": 0.311, |
| "num_tokens": 2618937177.0, |
| "step": 10005 |
| }, |
| { |
| "epoch": 4.666666666666667, |
| "grad_norm": 0.31963246919644556, |
| "learning_rate": 5.544661248585172e-06, |
| "loss": 0.3148, |
| "num_tokens": 2620238809.0, |
| "step": 10010 |
| }, |
| { |
| "epoch": 4.668997668997669, |
| "grad_norm": 0.32606186826503414, |
| "learning_rate": 5.537100591041915e-06, |
| "loss": 0.3197, |
| "num_tokens": 2621537881.0, |
| "step": 10015 |
| }, |
| { |
| "epoch": 4.671328671328672, |
| "grad_norm": 0.3127362641909877, |
| "learning_rate": 5.529592143231142e-06, |
| "loss": 0.3187, |
| "num_tokens": 2622848601.0, |
| "step": 10020 |
| }, |
| { |
| "epoch": 4.673659673659674, |
| "grad_norm": 0.34140879351797326, |
| "learning_rate": 5.522135923001767e-06, |
| "loss": 0.3129, |
| "num_tokens": 2624159321.0, |
| "step": 10025 |
| }, |
| { |
| "epoch": 4.6759906759906755, |
| "grad_norm": 0.32642677770519635, |
| "learning_rate": 5.514731948078565e-06, |
| "loss": 0.3089, |
| "num_tokens": 2625454738.0, |
| "step": 10030 |
| }, |
| { |
| "epoch": 4.678321678321678, |
| "grad_norm": 0.329034389428116, |
| "learning_rate": 5.5073802360621035e-06, |
| "loss": 0.315, |
| "num_tokens": 2626765458.0, |
| "step": 10035 |
| }, |
| { |
| "epoch": 4.680652680652681, |
| "grad_norm": 0.33163073393602516, |
| "learning_rate": 5.50008080442871e-06, |
| "loss": 0.3146, |
| "num_tokens": 2628076178.0, |
| "step": 10040 |
| }, |
| { |
| "epoch": 4.682983682983683, |
| "grad_norm": 0.3272067629094256, |
| "learning_rate": 5.492833670530445e-06, |
| "loss": 0.3173, |
| "num_tokens": 2629386898.0, |
| "step": 10045 |
| }, |
| { |
| "epoch": 4.685314685314685, |
| "grad_norm": 0.3144574455080941, |
| "learning_rate": 5.485638851595033e-06, |
| "loss": 0.3054, |
| "num_tokens": 2630697618.0, |
| "step": 10050 |
| }, |
| { |
| "epoch": 4.687645687645688, |
| "grad_norm": 0.33627454004902, |
| "learning_rate": 5.478496364725844e-06, |
| "loss": 0.3188, |
| "num_tokens": 2632008338.0, |
| "step": 10055 |
| }, |
| { |
| "epoch": 4.68997668997669, |
| "grad_norm": 0.3353988218499314, |
| "learning_rate": 5.471406226901843e-06, |
| "loss": 0.3178, |
| "num_tokens": 2633315147.0, |
| "step": 10060 |
| }, |
| { |
| "epoch": 4.6923076923076925, |
| "grad_norm": 0.34081231503644693, |
| "learning_rate": 5.464368454977559e-06, |
| "loss": 0.3181, |
| "num_tokens": 2634625867.0, |
| "step": 10065 |
| }, |
| { |
| "epoch": 4.694638694638694, |
| "grad_norm": 0.32001347894247395, |
| "learning_rate": 5.457383065683023e-06, |
| "loss": 0.3094, |
| "num_tokens": 2635936587.0, |
| "step": 10070 |
| }, |
| { |
| "epoch": 4.696969696969697, |
| "grad_norm": 0.33209202640372304, |
| "learning_rate": 5.450450075623761e-06, |
| "loss": 0.3203, |
| "num_tokens": 2637233194.0, |
| "step": 10075 |
| }, |
| { |
| "epoch": 4.699300699300699, |
| "grad_norm": 0.3444659685507314, |
| "learning_rate": 5.443569501280724e-06, |
| "loss": 0.3298, |
| "num_tokens": 2638543914.0, |
| "step": 10080 |
| }, |
| { |
| "epoch": 4.701631701631702, |
| "grad_norm": 0.32572981399860407, |
| "learning_rate": 5.436741359010265e-06, |
| "loss": 0.3145, |
| "num_tokens": 2639854634.0, |
| "step": 10085 |
| }, |
| { |
| "epoch": 4.703962703962704, |
| "grad_norm": 0.3265125453077855, |
| "learning_rate": 5.429965665044099e-06, |
| "loss": 0.3113, |
| "num_tokens": 2641165354.0, |
| "step": 10090 |
| }, |
| { |
| "epoch": 4.706293706293707, |
| "grad_norm": 0.32908645785435287, |
| "learning_rate": 5.4232424354892605e-06, |
| "loss": 0.3259, |
| "num_tokens": 2642476074.0, |
| "step": 10095 |
| }, |
| { |
| "epoch": 4.708624708624709, |
| "grad_norm": 0.32471263137765566, |
| "learning_rate": 5.4165716863280626e-06, |
| "loss": 0.3148, |
| "num_tokens": 2643786794.0, |
| "step": 10100 |
| }, |
| { |
| "epoch": 4.7109557109557105, |
| "grad_norm": 0.33045093010828397, |
| "learning_rate": 5.409953433418071e-06, |
| "loss": 0.3265, |
| "num_tokens": 2645097514.0, |
| "step": 10105 |
| }, |
| { |
| "epoch": 4.713286713286713, |
| "grad_norm": 0.3293031597544229, |
| "learning_rate": 5.403387692492053e-06, |
| "loss": 0.312, |
| "num_tokens": 2646390978.0, |
| "step": 10110 |
| }, |
| { |
| "epoch": 4.715617715617715, |
| "grad_norm": 0.33516744422096806, |
| "learning_rate": 5.396874479157943e-06, |
| "loss": 0.3169, |
| "num_tokens": 2647689285.0, |
| "step": 10115 |
| }, |
| { |
| "epoch": 4.717948717948718, |
| "grad_norm": 0.3351308591346485, |
| "learning_rate": 5.39041380889882e-06, |
| "loss": 0.3235, |
| "num_tokens": 2648996984.0, |
| "step": 10120 |
| }, |
| { |
| "epoch": 4.72027972027972, |
| "grad_norm": 0.3345027011060605, |
| "learning_rate": 5.384005697072842e-06, |
| "loss": 0.308, |
| "num_tokens": 2650307704.0, |
| "step": 10125 |
| }, |
| { |
| "epoch": 4.722610722610723, |
| "grad_norm": 0.326143155782565, |
| "learning_rate": 5.377650158913239e-06, |
| "loss": 0.3272, |
| "num_tokens": 2651618424.0, |
| "step": 10130 |
| }, |
| { |
| "epoch": 4.724941724941725, |
| "grad_norm": 0.3382309775663869, |
| "learning_rate": 5.371347209528259e-06, |
| "loss": 0.3201, |
| "num_tokens": 2652929144.0, |
| "step": 10135 |
| }, |
| { |
| "epoch": 4.7272727272727275, |
| "grad_norm": 0.3229354536782934, |
| "learning_rate": 5.365096863901139e-06, |
| "loss": 0.317, |
| "num_tokens": 2654239864.0, |
| "step": 10140 |
| }, |
| { |
| "epoch": 4.729603729603729, |
| "grad_norm": 0.3265549491709372, |
| "learning_rate": 5.3588991368900655e-06, |
| "loss": 0.3197, |
| "num_tokens": 2655550584.0, |
| "step": 10145 |
| }, |
| { |
| "epoch": 4.731934731934732, |
| "grad_norm": 0.3317883087071852, |
| "learning_rate": 5.352754043228138e-06, |
| "loss": 0.3105, |
| "num_tokens": 2656861304.0, |
| "step": 10150 |
| }, |
| { |
| "epoch": 4.734265734265734, |
| "grad_norm": 0.30886908963659704, |
| "learning_rate": 5.346661597523347e-06, |
| "loss": 0.3183, |
| "num_tokens": 2658172024.0, |
| "step": 10155 |
| }, |
| { |
| "epoch": 4.736596736596737, |
| "grad_norm": 0.3265593067229369, |
| "learning_rate": 5.340621814258523e-06, |
| "loss": 0.3113, |
| "num_tokens": 2659482744.0, |
| "step": 10160 |
| }, |
| { |
| "epoch": 4.738927738927739, |
| "grad_norm": 0.34583140436007476, |
| "learning_rate": 5.334634707791303e-06, |
| "loss": 0.3193, |
| "num_tokens": 2660789135.0, |
| "step": 10165 |
| }, |
| { |
| "epoch": 4.741258741258742, |
| "grad_norm": 0.33571097006056455, |
| "learning_rate": 5.328700292354117e-06, |
| "loss": 0.3122, |
| "num_tokens": 2662099855.0, |
| "step": 10170 |
| }, |
| { |
| "epoch": 4.743589743589744, |
| "grad_norm": 0.3233278556899711, |
| "learning_rate": 5.322818582054123e-06, |
| "loss": 0.3159, |
| "num_tokens": 2663410575.0, |
| "step": 10175 |
| }, |
| { |
| "epoch": 4.7459207459207455, |
| "grad_norm": 0.3317303155316829, |
| "learning_rate": 5.316989590873196e-06, |
| "loss": 0.3194, |
| "num_tokens": 2664721295.0, |
| "step": 10180 |
| }, |
| { |
| "epoch": 4.748251748251748, |
| "grad_norm": 0.33625182489617234, |
| "learning_rate": 5.311213332667893e-06, |
| "loss": 0.3163, |
| "num_tokens": 2666032015.0, |
| "step": 10185 |
| }, |
| { |
| "epoch": 4.75058275058275, |
| "grad_norm": 0.3356074033954618, |
| "learning_rate": 5.305489821169408e-06, |
| "loss": 0.3078, |
| "num_tokens": 2667342735.0, |
| "step": 10190 |
| }, |
| { |
| "epoch": 4.752913752913753, |
| "grad_norm": 0.34585034308778323, |
| "learning_rate": 5.2998190699835485e-06, |
| "loss": 0.3257, |
| "num_tokens": 2668653455.0, |
| "step": 10195 |
| }, |
| { |
| "epoch": 4.755244755244755, |
| "grad_norm": 0.3657646741180601, |
| "learning_rate": 5.2942010925907074e-06, |
| "loss": 0.3309, |
| "num_tokens": 2669964175.0, |
| "step": 10200 |
| }, |
| { |
| "epoch": 4.757575757575758, |
| "grad_norm": 0.3300884821473932, |
| "learning_rate": 5.288635902345814e-06, |
| "loss": 0.3172, |
| "num_tokens": 2671274895.0, |
| "step": 10205 |
| }, |
| { |
| "epoch": 4.75990675990676, |
| "grad_norm": 0.3148848991416642, |
| "learning_rate": 5.283123512478321e-06, |
| "loss": 0.3097, |
| "num_tokens": 2672585615.0, |
| "step": 10210 |
| }, |
| { |
| "epoch": 4.7622377622377625, |
| "grad_norm": 0.3248022910063405, |
| "learning_rate": 5.2776639360921664e-06, |
| "loss": 0.3113, |
| "num_tokens": 2673881785.0, |
| "step": 10215 |
| }, |
| { |
| "epoch": 4.764568764568764, |
| "grad_norm": 0.32808387177263987, |
| "learning_rate": 5.272257186165733e-06, |
| "loss": 0.3208, |
| "num_tokens": 2675192505.0, |
| "step": 10220 |
| }, |
| { |
| "epoch": 4.766899766899767, |
| "grad_norm": 0.3269866573020375, |
| "learning_rate": 5.26690327555183e-06, |
| "loss": 0.3149, |
| "num_tokens": 2676503225.0, |
| "step": 10225 |
| }, |
| { |
| "epoch": 4.769230769230769, |
| "grad_norm": 0.332961069358366, |
| "learning_rate": 5.261602216977668e-06, |
| "loss": 0.3145, |
| "num_tokens": 2677813945.0, |
| "step": 10230 |
| }, |
| { |
| "epoch": 4.771561771561771, |
| "grad_norm": 0.33357108039723987, |
| "learning_rate": 5.256354023044799e-06, |
| "loss": 0.324, |
| "num_tokens": 2679124665.0, |
| "step": 10235 |
| }, |
| { |
| "epoch": 4.773892773892774, |
| "grad_norm": 0.3288868211446766, |
| "learning_rate": 5.251158706229117e-06, |
| "loss": 0.318, |
| "num_tokens": 2680435385.0, |
| "step": 10240 |
| }, |
| { |
| "epoch": 4.776223776223777, |
| "grad_norm": 0.3295445331080107, |
| "learning_rate": 5.246016278880824e-06, |
| "loss": 0.3233, |
| "num_tokens": 2681746105.0, |
| "step": 10245 |
| }, |
| { |
| "epoch": 4.778554778554779, |
| "grad_norm": 0.3456197462845134, |
| "learning_rate": 5.240926753224386e-06, |
| "loss": 0.3186, |
| "num_tokens": 2683056825.0, |
| "step": 10250 |
| }, |
| { |
| "epoch": 4.7808857808857805, |
| "grad_norm": 0.3204798461467708, |
| "learning_rate": 5.235890141358512e-06, |
| "loss": 0.3118, |
| "num_tokens": 2684367545.0, |
| "step": 10255 |
| }, |
| { |
| "epoch": 4.783216783216783, |
| "grad_norm": 0.330521485739032, |
| "learning_rate": 5.230906455256126e-06, |
| "loss": 0.319, |
| "num_tokens": 2685678265.0, |
| "step": 10260 |
| }, |
| { |
| "epoch": 4.785547785547785, |
| "grad_norm": 0.32924346560709944, |
| "learning_rate": 5.225975706764347e-06, |
| "loss": 0.3112, |
| "num_tokens": 2686988499.0, |
| "step": 10265 |
| }, |
| { |
| "epoch": 4.787878787878788, |
| "grad_norm": 0.3368638495877885, |
| "learning_rate": 5.221097907604436e-06, |
| "loss": 0.3194, |
| "num_tokens": 2688299219.0, |
| "step": 10270 |
| }, |
| { |
| "epoch": 4.79020979020979, |
| "grad_norm": 0.3263693614554864, |
| "learning_rate": 5.216273069371794e-06, |
| "loss": 0.3189, |
| "num_tokens": 2689609939.0, |
| "step": 10275 |
| }, |
| { |
| "epoch": 4.792540792540793, |
| "grad_norm": 0.334167060120909, |
| "learning_rate": 5.211501203535926e-06, |
| "loss": 0.316, |
| "num_tokens": 2690918769.0, |
| "step": 10280 |
| }, |
| { |
| "epoch": 4.794871794871795, |
| "grad_norm": 0.33700365497537554, |
| "learning_rate": 5.2067823214404076e-06, |
| "loss": 0.3136, |
| "num_tokens": 2692229489.0, |
| "step": 10285 |
| }, |
| { |
| "epoch": 4.7972027972027975, |
| "grad_norm": 0.3550316959254366, |
| "learning_rate": 5.2021164343028615e-06, |
| "loss": 0.3226, |
| "num_tokens": 2693540209.0, |
| "step": 10290 |
| }, |
| { |
| "epoch": 4.799533799533799, |
| "grad_norm": 0.32560252408951285, |
| "learning_rate": 5.1975035532149374e-06, |
| "loss": 0.3153, |
| "num_tokens": 2694850929.0, |
| "step": 10295 |
| }, |
| { |
| "epoch": 4.801864801864802, |
| "grad_norm": 0.33773170010312276, |
| "learning_rate": 5.192943689142276e-06, |
| "loss": 0.3197, |
| "num_tokens": 2696161649.0, |
| "step": 10300 |
| }, |
| { |
| "epoch": 4.804195804195804, |
| "grad_norm": 0.32245870964120255, |
| "learning_rate": 5.188436852924488e-06, |
| "loss": 0.3096, |
| "num_tokens": 2697472369.0, |
| "step": 10305 |
| }, |
| { |
| "epoch": 4.806526806526806, |
| "grad_norm": 0.3157741766261794, |
| "learning_rate": 5.183983055275129e-06, |
| "loss": 0.318, |
| "num_tokens": 2698783089.0, |
| "step": 10310 |
| }, |
| { |
| "epoch": 4.808857808857809, |
| "grad_norm": 0.3230046443910475, |
| "learning_rate": 5.17958230678167e-06, |
| "loss": 0.3038, |
| "num_tokens": 2700093809.0, |
| "step": 10315 |
| }, |
| { |
| "epoch": 4.811188811188811, |
| "grad_norm": 0.32531156650812953, |
| "learning_rate": 5.175234617905471e-06, |
| "loss": 0.3056, |
| "num_tokens": 2701404529.0, |
| "step": 10320 |
| }, |
| { |
| "epoch": 4.813519813519814, |
| "grad_norm": 0.3377022386596855, |
| "learning_rate": 5.170939998981775e-06, |
| "loss": 0.3138, |
| "num_tokens": 2702706568.0, |
| "step": 10325 |
| }, |
| { |
| "epoch": 4.8158508158508155, |
| "grad_norm": 0.3440675929974891, |
| "learning_rate": 5.16669846021965e-06, |
| "loss": 0.3208, |
| "num_tokens": 2704003885.0, |
| "step": 10330 |
| }, |
| { |
| "epoch": 4.818181818181818, |
| "grad_norm": 0.3293134125006892, |
| "learning_rate": 5.162510011701991e-06, |
| "loss": 0.313, |
| "num_tokens": 2705310038.0, |
| "step": 10335 |
| }, |
| { |
| "epoch": 4.82051282051282, |
| "grad_norm": 0.34589478246737115, |
| "learning_rate": 5.15837466338549e-06, |
| "loss": 0.3227, |
| "num_tokens": 2706605004.0, |
| "step": 10340 |
| }, |
| { |
| "epoch": 4.822843822843823, |
| "grad_norm": 0.3250195530849573, |
| "learning_rate": 5.15429242510061e-06, |
| "loss": 0.3095, |
| "num_tokens": 2707915724.0, |
| "step": 10345 |
| }, |
| { |
| "epoch": 4.825174825174825, |
| "grad_norm": 0.33813235349784826, |
| "learning_rate": 5.150263306551556e-06, |
| "loss": 0.3176, |
| "num_tokens": 2709226444.0, |
| "step": 10350 |
| }, |
| { |
| "epoch": 4.827505827505828, |
| "grad_norm": 0.3342791833320397, |
| "learning_rate": 5.146287317316262e-06, |
| "loss": 0.3177, |
| "num_tokens": 2710537164.0, |
| "step": 10355 |
| }, |
| { |
| "epoch": 4.82983682983683, |
| "grad_norm": 0.32303166071349837, |
| "learning_rate": 5.1423644668463695e-06, |
| "loss": 0.3127, |
| "num_tokens": 2711847884.0, |
| "step": 10360 |
| }, |
| { |
| "epoch": 4.8321678321678325, |
| "grad_norm": 0.34342920997262655, |
| "learning_rate": 5.138494764467189e-06, |
| "loss": 0.3207, |
| "num_tokens": 2713158604.0, |
| "step": 10365 |
| }, |
| { |
| "epoch": 4.834498834498834, |
| "grad_norm": 0.3370264711785083, |
| "learning_rate": 5.134678219377695e-06, |
| "loss": 0.3169, |
| "num_tokens": 2714469324.0, |
| "step": 10370 |
| }, |
| { |
| "epoch": 4.836829836829837, |
| "grad_norm": 0.33019492580209653, |
| "learning_rate": 5.1309148406505e-06, |
| "loss": 0.319, |
| "num_tokens": 2715780044.0, |
| "step": 10375 |
| }, |
| { |
| "epoch": 4.839160839160839, |
| "grad_norm": 0.3296889225888387, |
| "learning_rate": 5.127204637231821e-06, |
| "loss": 0.3096, |
| "num_tokens": 2717090764.0, |
| "step": 10380 |
| }, |
| { |
| "epoch": 4.841491841491841, |
| "grad_norm": 0.3170653884338263, |
| "learning_rate": 5.12354761794148e-06, |
| "loss": 0.3151, |
| "num_tokens": 2718401484.0, |
| "step": 10385 |
| }, |
| { |
| "epoch": 4.843822843822844, |
| "grad_norm": 0.3171594099332368, |
| "learning_rate": 5.1199437914728596e-06, |
| "loss": 0.3121, |
| "num_tokens": 2719712204.0, |
| "step": 10390 |
| }, |
| { |
| "epoch": 4.846153846153846, |
| "grad_norm": 0.333775671862216, |
| "learning_rate": 5.116393166392901e-06, |
| "loss": 0.3082, |
| "num_tokens": 2721022924.0, |
| "step": 10395 |
| }, |
| { |
| "epoch": 4.848484848484849, |
| "grad_norm": 0.34403397168216404, |
| "learning_rate": 5.112895751142073e-06, |
| "loss": 0.3231, |
| "num_tokens": 2722333644.0, |
| "step": 10400 |
| }, |
| { |
| "epoch": 4.8508158508158505, |
| "grad_norm": 0.31993219706477866, |
| "learning_rate": 5.109451554034357e-06, |
| "loss": 0.3184, |
| "num_tokens": 2723644364.0, |
| "step": 10405 |
| }, |
| { |
| "epoch": 4.853146853146853, |
| "grad_norm": 0.343409768713205, |
| "learning_rate": 5.1060605832572235e-06, |
| "loss": 0.3171, |
| "num_tokens": 2724955084.0, |
| "step": 10410 |
| }, |
| { |
| "epoch": 4.855477855477855, |
| "grad_norm": 0.32246016600388927, |
| "learning_rate": 5.102722846871616e-06, |
| "loss": 0.3084, |
| "num_tokens": 2726265804.0, |
| "step": 10415 |
| }, |
| { |
| "epoch": 4.857808857808858, |
| "grad_norm": 0.3418584388038729, |
| "learning_rate": 5.099438352811931e-06, |
| "loss": 0.3302, |
| "num_tokens": 2727576524.0, |
| "step": 10420 |
| }, |
| { |
| "epoch": 4.86013986013986, |
| "grad_norm": 0.3242160539074971, |
| "learning_rate": 5.0962071088859935e-06, |
| "loss": 0.3102, |
| "num_tokens": 2728887244.0, |
| "step": 10425 |
| }, |
| { |
| "epoch": 4.862470862470863, |
| "grad_norm": 0.3149813322054902, |
| "learning_rate": 5.093029122775049e-06, |
| "loss": 0.3071, |
| "num_tokens": 2730197964.0, |
| "step": 10430 |
| }, |
| { |
| "epoch": 4.864801864801865, |
| "grad_norm": 0.3334336110289972, |
| "learning_rate": 5.08990440203374e-06, |
| "loss": 0.3203, |
| "num_tokens": 2731508684.0, |
| "step": 10435 |
| }, |
| { |
| "epoch": 4.867132867132867, |
| "grad_norm": 0.32299813348426, |
| "learning_rate": 5.086832954090082e-06, |
| "loss": 0.313, |
| "num_tokens": 2732819404.0, |
| "step": 10440 |
| }, |
| { |
| "epoch": 4.869463869463869, |
| "grad_norm": 0.3364027385588916, |
| "learning_rate": 5.083814786245458e-06, |
| "loss": 0.3179, |
| "num_tokens": 2734130124.0, |
| "step": 10445 |
| }, |
| { |
| "epoch": 4.871794871794872, |
| "grad_norm": 0.3656168760454608, |
| "learning_rate": 5.080849905674588e-06, |
| "loss": 0.3201, |
| "num_tokens": 2735440844.0, |
| "step": 10450 |
| }, |
| { |
| "epoch": 4.874125874125874, |
| "grad_norm": 0.33683461461528713, |
| "learning_rate": 5.077938319425526e-06, |
| "loss": 0.3186, |
| "num_tokens": 2736751564.0, |
| "step": 10455 |
| }, |
| { |
| "epoch": 4.876456876456876, |
| "grad_norm": 0.3300837602076609, |
| "learning_rate": 5.075080034419631e-06, |
| "loss": 0.3262, |
| "num_tokens": 2738062284.0, |
| "step": 10460 |
| }, |
| { |
| "epoch": 4.878787878787879, |
| "grad_norm": 0.348047283341893, |
| "learning_rate": 5.072275057451558e-06, |
| "loss": 0.3164, |
| "num_tokens": 2739356820.0, |
| "step": 10465 |
| }, |
| { |
| "epoch": 4.881118881118881, |
| "grad_norm": 0.32910332567884476, |
| "learning_rate": 5.0695233951892345e-06, |
| "loss": 0.3107, |
| "num_tokens": 2740662089.0, |
| "step": 10470 |
| }, |
| { |
| "epoch": 4.883449883449884, |
| "grad_norm": 0.3200833744056691, |
| "learning_rate": 5.066825054173854e-06, |
| "loss": 0.3117, |
| "num_tokens": 2741972809.0, |
| "step": 10475 |
| }, |
| { |
| "epoch": 4.8857808857808855, |
| "grad_norm": 0.3341587779600143, |
| "learning_rate": 5.064180040819858e-06, |
| "loss": 0.3179, |
| "num_tokens": 2743283529.0, |
| "step": 10480 |
| }, |
| { |
| "epoch": 4.888111888111888, |
| "grad_norm": 0.33148410077095414, |
| "learning_rate": 5.0615883614149136e-06, |
| "loss": 0.3172, |
| "num_tokens": 2744594249.0, |
| "step": 10485 |
| }, |
| { |
| "epoch": 4.89044289044289, |
| "grad_norm": 0.3272484124935763, |
| "learning_rate": 5.059050022119904e-06, |
| "loss": 0.3165, |
| "num_tokens": 2745887369.0, |
| "step": 10490 |
| }, |
| { |
| "epoch": 4.892773892773893, |
| "grad_norm": 0.3209922201282976, |
| "learning_rate": 5.056565028968916e-06, |
| "loss": 0.3154, |
| "num_tokens": 2747198089.0, |
| "step": 10495 |
| }, |
| { |
| "epoch": 4.895104895104895, |
| "grad_norm": 0.3269414783389918, |
| "learning_rate": 5.05413338786922e-06, |
| "loss": 0.312, |
| "num_tokens": 2748508809.0, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.897435897435898, |
| "grad_norm": 0.31618238859555264, |
| "learning_rate": 5.051755104601264e-06, |
| "loss": 0.3143, |
| "num_tokens": 2749819529.0, |
| "step": 10505 |
| }, |
| { |
| "epoch": 4.8997668997669, |
| "grad_norm": 0.32153457363047355, |
| "learning_rate": 5.049430184818651e-06, |
| "loss": 0.3224, |
| "num_tokens": 2751117328.0, |
| "step": 10510 |
| }, |
| { |
| "epoch": 4.902097902097902, |
| "grad_norm": 0.31465978988545573, |
| "learning_rate": 5.047158634048129e-06, |
| "loss": 0.3177, |
| "num_tokens": 2752420296.0, |
| "step": 10515 |
| }, |
| { |
| "epoch": 4.9044289044289044, |
| "grad_norm": 0.31044680611687897, |
| "learning_rate": 5.044940457689581e-06, |
| "loss": 0.3105, |
| "num_tokens": 2753731016.0, |
| "step": 10520 |
| }, |
| { |
| "epoch": 4.906759906759907, |
| "grad_norm": 0.31811560483359963, |
| "learning_rate": 5.042775661016008e-06, |
| "loss": 0.3169, |
| "num_tokens": 2755041736.0, |
| "step": 10525 |
| }, |
| { |
| "epoch": 4.909090909090909, |
| "grad_norm": 0.31742684514452485, |
| "learning_rate": 5.040664249173518e-06, |
| "loss": 0.305, |
| "num_tokens": 2756352456.0, |
| "step": 10530 |
| }, |
| { |
| "epoch": 4.911421911421911, |
| "grad_norm": 0.33606166438372637, |
| "learning_rate": 5.038606227181312e-06, |
| "loss": 0.3182, |
| "num_tokens": 2757663176.0, |
| "step": 10535 |
| }, |
| { |
| "epoch": 4.913752913752914, |
| "grad_norm": 0.3205242654431987, |
| "learning_rate": 5.0366015999316775e-06, |
| "loss": 0.3147, |
| "num_tokens": 2758973413.0, |
| "step": 10540 |
| }, |
| { |
| "epoch": 4.916083916083916, |
| "grad_norm": 0.3427922994912874, |
| "learning_rate": 5.034650372189974e-06, |
| "loss": 0.3125, |
| "num_tokens": 2760284133.0, |
| "step": 10545 |
| }, |
| { |
| "epoch": 4.918414918414919, |
| "grad_norm": 0.3310997932717806, |
| "learning_rate": 5.0327525485946135e-06, |
| "loss": 0.3184, |
| "num_tokens": 2761592831.0, |
| "step": 10550 |
| }, |
| { |
| "epoch": 4.9207459207459205, |
| "grad_norm": 0.3393685987917944, |
| "learning_rate": 5.030908133657063e-06, |
| "loss": 0.3156, |
| "num_tokens": 2762895660.0, |
| "step": 10555 |
| }, |
| { |
| "epoch": 4.923076923076923, |
| "grad_norm": 0.3322488130315273, |
| "learning_rate": 5.029117131761826e-06, |
| "loss": 0.3213, |
| "num_tokens": 2764206380.0, |
| "step": 10560 |
| }, |
| { |
| "epoch": 4.925407925407925, |
| "grad_norm": 0.35642657501632585, |
| "learning_rate": 5.027379547166436e-06, |
| "loss": 0.3173, |
| "num_tokens": 2765517100.0, |
| "step": 10565 |
| }, |
| { |
| "epoch": 4.927738927738928, |
| "grad_norm": 0.33722387753318867, |
| "learning_rate": 5.025695384001438e-06, |
| "loss": 0.3297, |
| "num_tokens": 2766827820.0, |
| "step": 10570 |
| }, |
| { |
| "epoch": 4.93006993006993, |
| "grad_norm": 0.32350013184482923, |
| "learning_rate": 5.02406464627039e-06, |
| "loss": 0.3141, |
| "num_tokens": 2768138540.0, |
| "step": 10575 |
| }, |
| { |
| "epoch": 4.932400932400933, |
| "grad_norm": 0.31985329238680343, |
| "learning_rate": 5.0224873378498475e-06, |
| "loss": 0.3103, |
| "num_tokens": 2769449260.0, |
| "step": 10580 |
| }, |
| { |
| "epoch": 4.934731934731935, |
| "grad_norm": 0.33320437693468646, |
| "learning_rate": 5.0209634624893535e-06, |
| "loss": 0.316, |
| "num_tokens": 2770759980.0, |
| "step": 10585 |
| }, |
| { |
| "epoch": 4.937062937062937, |
| "grad_norm": 0.34199930181650334, |
| "learning_rate": 5.0194930238114344e-06, |
| "loss": 0.3165, |
| "num_tokens": 2772070700.0, |
| "step": 10590 |
| }, |
| { |
| "epoch": 4.9393939393939394, |
| "grad_norm": 0.33840903211030815, |
| "learning_rate": 5.01807602531158e-06, |
| "loss": 0.3279, |
| "num_tokens": 2773381420.0, |
| "step": 10595 |
| }, |
| { |
| "epoch": 4.941724941724941, |
| "grad_norm": 0.33970146942955454, |
| "learning_rate": 5.016712470358254e-06, |
| "loss": 0.3243, |
| "num_tokens": 2774692140.0, |
| "step": 10600 |
| }, |
| { |
| "epoch": 4.944055944055944, |
| "grad_norm": 0.32417078423146617, |
| "learning_rate": 5.015402362192865e-06, |
| "loss": 0.3095, |
| "num_tokens": 2776002860.0, |
| "step": 10605 |
| }, |
| { |
| "epoch": 4.946386946386946, |
| "grad_norm": 0.31477911467606806, |
| "learning_rate": 5.0141457039297765e-06, |
| "loss": 0.3152, |
| "num_tokens": 2777313580.0, |
| "step": 10610 |
| }, |
| { |
| "epoch": 4.948717948717949, |
| "grad_norm": 0.3382420127269151, |
| "learning_rate": 5.012942498556292e-06, |
| "loss": 0.3145, |
| "num_tokens": 2778624300.0, |
| "step": 10615 |
| }, |
| { |
| "epoch": 4.951048951048951, |
| "grad_norm": 0.31746762640712656, |
| "learning_rate": 5.011792748932641e-06, |
| "loss": 0.3067, |
| "num_tokens": 2779935020.0, |
| "step": 10620 |
| }, |
| { |
| "epoch": 4.953379953379954, |
| "grad_norm": 0.3281133514868718, |
| "learning_rate": 5.010696457791986e-06, |
| "loss": 0.3132, |
| "num_tokens": 2781245740.0, |
| "step": 10625 |
| }, |
| { |
| "epoch": 4.9557109557109555, |
| "grad_norm": 0.3239412806584807, |
| "learning_rate": 5.009653627740407e-06, |
| "loss": 0.3212, |
| "num_tokens": 2782556460.0, |
| "step": 10630 |
| }, |
| { |
| "epoch": 4.958041958041958, |
| "grad_norm": 0.3368010167927629, |
| "learning_rate": 5.008664261256898e-06, |
| "loss": 0.3145, |
| "num_tokens": 2783867180.0, |
| "step": 10635 |
| }, |
| { |
| "epoch": 4.96037296037296, |
| "grad_norm": 0.34491073030538105, |
| "learning_rate": 5.007728360693355e-06, |
| "loss": 0.3176, |
| "num_tokens": 2785177900.0, |
| "step": 10640 |
| }, |
| { |
| "epoch": 4.962703962703963, |
| "grad_norm": 0.3510601956916241, |
| "learning_rate": 5.006845928274586e-06, |
| "loss": 0.3187, |
| "num_tokens": 2786488620.0, |
| "step": 10645 |
| }, |
| { |
| "epoch": 4.965034965034965, |
| "grad_norm": 0.3337835159331755, |
| "learning_rate": 5.006016966098288e-06, |
| "loss": 0.314, |
| "num_tokens": 2787799340.0, |
| "step": 10650 |
| }, |
| { |
| "epoch": 4.967365967365968, |
| "grad_norm": 0.3172585463158374, |
| "learning_rate": 5.005241476135051e-06, |
| "loss": 0.317, |
| "num_tokens": 2789110060.0, |
| "step": 10655 |
| }, |
| { |
| "epoch": 4.96969696969697, |
| "grad_norm": 0.32635822297017036, |
| "learning_rate": 5.004519460228356e-06, |
| "loss": 0.3102, |
| "num_tokens": 2790420780.0, |
| "step": 10660 |
| }, |
| { |
| "epoch": 4.972027972027972, |
| "grad_norm": 0.34214637134938164, |
| "learning_rate": 5.003850920094564e-06, |
| "loss": 0.3127, |
| "num_tokens": 2791731500.0, |
| "step": 10665 |
| }, |
| { |
| "epoch": 4.9743589743589745, |
| "grad_norm": 0.31863256082976954, |
| "learning_rate": 5.00323585732291e-06, |
| "loss": 0.3037, |
| "num_tokens": 2793042220.0, |
| "step": 10670 |
| }, |
| { |
| "epoch": 4.976689976689976, |
| "grad_norm": 0.33066360911088155, |
| "learning_rate": 5.00267427337551e-06, |
| "loss": 0.3155, |
| "num_tokens": 2794352940.0, |
| "step": 10675 |
| }, |
| { |
| "epoch": 4.979020979020979, |
| "grad_norm": 0.32620437111284734, |
| "learning_rate": 5.002166169587351e-06, |
| "loss": 0.3171, |
| "num_tokens": 2795657814.0, |
| "step": 10680 |
| }, |
| { |
| "epoch": 4.981351981351981, |
| "grad_norm": 0.33430055011282034, |
| "learning_rate": 5.001711547166285e-06, |
| "loss": 0.3189, |
| "num_tokens": 2796968534.0, |
| "step": 10685 |
| }, |
| { |
| "epoch": 4.983682983682984, |
| "grad_norm": 0.3224437309450024, |
| "learning_rate": 5.001310407193031e-06, |
| "loss": 0.3223, |
| "num_tokens": 2798279254.0, |
| "step": 10690 |
| }, |
| { |
| "epoch": 4.986013986013986, |
| "grad_norm": 0.32410671370691807, |
| "learning_rate": 5.000962750621168e-06, |
| "loss": 0.3311, |
| "num_tokens": 2799589974.0, |
| "step": 10695 |
| }, |
| { |
| "epoch": 4.988344988344989, |
| "grad_norm": 0.3235106959855654, |
| "learning_rate": 5.0006685782771445e-06, |
| "loss": 0.3132, |
| "num_tokens": 2800887285.0, |
| "step": 10700 |
| }, |
| { |
| "epoch": 4.9906759906759905, |
| "grad_norm": 0.3305908727408153, |
| "learning_rate": 5.000427890860252e-06, |
| "loss": 0.3113, |
| "num_tokens": 2802198005.0, |
| "step": 10705 |
| }, |
| { |
| "epoch": 4.993006993006993, |
| "grad_norm": 0.33403825665973846, |
| "learning_rate": 5.000240688942652e-06, |
| "loss": 0.3186, |
| "num_tokens": 2803508725.0, |
| "step": 10710 |
| }, |
| { |
| "epoch": 4.995337995337995, |
| "grad_norm": 0.3278267015029189, |
| "learning_rate": 5.000106972969358e-06, |
| "loss": 0.3166, |
| "num_tokens": 2804819445.0, |
| "step": 10715 |
| }, |
| { |
| "epoch": 4.997668997668997, |
| "grad_norm": 0.3295223914621982, |
| "learning_rate": 5.000026743258234e-06, |
| "loss": 0.3119, |
| "num_tokens": 2806130165.0, |
| "step": 10720 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.3242625515113468, |
| "learning_rate": 5e-06, |
| "loss": 0.3108, |
| "num_tokens": 2807440885.0, |
| "step": 10725 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 10725, |
| "total_flos": 2444245755494400.0, |
| "train_loss": 0.42360519842668015, |
| "train_runtime": 82605.1286, |
| "train_samples_per_second": 2.077, |
| "train_steps_per_second": 0.13 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 10725, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2444245755494400.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|