| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.15151515151515152, | |
| "eval_steps": 50000, | |
| "global_step": 90000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0003367003367003367, | |
| "grad_norm": 0.3848826289176941, | |
| "learning_rate": 4.998316498316499e-05, | |
| "loss": 0.483, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0006734006734006734, | |
| "grad_norm": 0.28108373284339905, | |
| "learning_rate": 4.9966329966329964e-05, | |
| "loss": 0.4907, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.00101010101010101, | |
| "grad_norm": 1.1632909774780273, | |
| "learning_rate": 4.994949494949495e-05, | |
| "loss": 0.4656, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0013468013468013469, | |
| "grad_norm": 0.38611674308776855, | |
| "learning_rate": 4.993265993265993e-05, | |
| "loss": 0.4801, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0016835016835016834, | |
| "grad_norm": 0.44287604093551636, | |
| "learning_rate": 4.991582491582492e-05, | |
| "loss": 0.5212, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.00202020202020202, | |
| "grad_norm": 0.3867124617099762, | |
| "learning_rate": 4.9899074074074075e-05, | |
| "loss": 0.4678, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0023569023569023568, | |
| "grad_norm": 0.5485501885414124, | |
| "learning_rate": 4.988223905723906e-05, | |
| "loss": 0.4865, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.0026936026936026937, | |
| "grad_norm": 0.20481592416763306, | |
| "learning_rate": 4.986548821548822e-05, | |
| "loss": 0.5055, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.0030303030303030303, | |
| "grad_norm": 0.2336668223142624, | |
| "learning_rate": 4.98486531986532e-05, | |
| "loss": 0.4695, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.003367003367003367, | |
| "grad_norm": 0.5381026268005371, | |
| "learning_rate": 4.9831818181818186e-05, | |
| "loss": 0.4399, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.003703703703703704, | |
| "grad_norm": 0.7313366532325745, | |
| "learning_rate": 4.981498316498317e-05, | |
| "loss": 0.4459, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.00404040404040404, | |
| "grad_norm": 0.36844003200531006, | |
| "learning_rate": 4.979814814814815e-05, | |
| "loss": 0.4606, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.004377104377104377, | |
| "grad_norm": 0.3787059187889099, | |
| "learning_rate": 4.978131313131313e-05, | |
| "loss": 0.5135, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.0047138047138047135, | |
| "grad_norm": 0.1680217981338501, | |
| "learning_rate": 4.9764478114478116e-05, | |
| "loss": 0.4913, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.005050505050505051, | |
| "grad_norm": 0.2730850875377655, | |
| "learning_rate": 4.97476430976431e-05, | |
| "loss": 0.4262, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.0053872053872053875, | |
| "grad_norm": 0.9977230429649353, | |
| "learning_rate": 4.9730808080808085e-05, | |
| "loss": 0.4774, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.005723905723905724, | |
| "grad_norm": 0.38187840580940247, | |
| "learning_rate": 4.971397306397307e-05, | |
| "loss": 0.4642, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.006060606060606061, | |
| "grad_norm": 0.33152151107788086, | |
| "learning_rate": 4.969713804713805e-05, | |
| "loss": 0.4542, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.006397306397306397, | |
| "grad_norm": 0.4819263815879822, | |
| "learning_rate": 4.968030303030303e-05, | |
| "loss": 0.456, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.006734006734006734, | |
| "grad_norm": 0.7936732172966003, | |
| "learning_rate": 4.9663468013468016e-05, | |
| "loss": 0.5043, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.007070707070707071, | |
| "grad_norm": 0.15946243703365326, | |
| "learning_rate": 4.9646632996632993e-05, | |
| "loss": 0.4722, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.007407407407407408, | |
| "grad_norm": 0.32496747374534607, | |
| "learning_rate": 4.9629797979797985e-05, | |
| "loss": 0.5135, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.007744107744107744, | |
| "grad_norm": 0.4165472388267517, | |
| "learning_rate": 4.961304713804714e-05, | |
| "loss": 0.5593, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.00808080808080808, | |
| "grad_norm": 0.25771287083625793, | |
| "learning_rate": 4.959621212121212e-05, | |
| "loss": 0.4484, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.008417508417508417, | |
| "grad_norm": 0.3406078815460205, | |
| "learning_rate": 4.9579377104377104e-05, | |
| "loss": 0.4823, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.008754208754208754, | |
| "grad_norm": 8.295658111572266, | |
| "learning_rate": 4.956254208754209e-05, | |
| "loss": 0.4635, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.00909090909090909, | |
| "grad_norm": 0.44405078887939453, | |
| "learning_rate": 4.954570707070707e-05, | |
| "loss": 0.4985, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.009427609427609427, | |
| "grad_norm": 0.4546993374824524, | |
| "learning_rate": 4.952887205387206e-05, | |
| "loss": 0.5113, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.009764309764309764, | |
| "grad_norm": 0.5952478647232056, | |
| "learning_rate": 4.951203703703704e-05, | |
| "loss": 0.514, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.010101010101010102, | |
| "grad_norm": 0.27716225385665894, | |
| "learning_rate": 4.9495202020202026e-05, | |
| "loss": 0.5375, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.010437710437710438, | |
| "grad_norm": 0.7038645148277283, | |
| "learning_rate": 4.9478367003367004e-05, | |
| "loss": 0.4905, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.010774410774410775, | |
| "grad_norm": 0.48266687989234924, | |
| "learning_rate": 4.946153198653199e-05, | |
| "loss": 0.4924, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.011111111111111112, | |
| "grad_norm": 0.5230734944343567, | |
| "learning_rate": 4.944469696969697e-05, | |
| "loss": 0.533, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.011447811447811448, | |
| "grad_norm": 1.7186241149902344, | |
| "learning_rate": 4.942794612794613e-05, | |
| "loss": 0.4852, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.011784511784511785, | |
| "grad_norm": 0.5641151666641235, | |
| "learning_rate": 4.9411111111111114e-05, | |
| "loss": 0.492, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.012121212121212121, | |
| "grad_norm": 0.18955977261066437, | |
| "learning_rate": 4.93942760942761e-05, | |
| "loss": 0.4994, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.012457912457912458, | |
| "grad_norm": 0.4500541687011719, | |
| "learning_rate": 4.9377441077441076e-05, | |
| "loss": 0.4761, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.012794612794612794, | |
| "grad_norm": 0.5736501812934875, | |
| "learning_rate": 4.936060606060606e-05, | |
| "loss": 0.4835, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.013131313131313131, | |
| "grad_norm": 0.12210117280483246, | |
| "learning_rate": 4.9343771043771045e-05, | |
| "loss": 0.4753, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.013468013468013467, | |
| "grad_norm": 0.6027535796165466, | |
| "learning_rate": 4.932693602693603e-05, | |
| "loss": 0.442, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.013804713804713804, | |
| "grad_norm": 0.6002740859985352, | |
| "learning_rate": 4.9310101010101014e-05, | |
| "loss": 0.4668, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.014141414141414142, | |
| "grad_norm": 0.45654693245887756, | |
| "learning_rate": 4.9293265993266e-05, | |
| "loss": 0.4792, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.014478114478114479, | |
| "grad_norm": 0.5119714736938477, | |
| "learning_rate": 4.9276430976430976e-05, | |
| "loss": 0.4767, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.014814814814814815, | |
| "grad_norm": 0.22278934717178345, | |
| "learning_rate": 4.925959595959596e-05, | |
| "loss": 0.4826, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.015151515151515152, | |
| "grad_norm": 0.265720933675766, | |
| "learning_rate": 4.9242845117845124e-05, | |
| "loss": 0.5267, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.015488215488215488, | |
| "grad_norm": 0.4445594847202301, | |
| "learning_rate": 4.922601010101011e-05, | |
| "loss": 0.507, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.015824915824915825, | |
| "grad_norm": 0.6113983988761902, | |
| "learning_rate": 4.9209175084175086e-05, | |
| "loss": 0.5192, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.01616161616161616, | |
| "grad_norm": 0.2967589497566223, | |
| "learning_rate": 4.919234006734007e-05, | |
| "loss": 0.4728, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.016498316498316498, | |
| "grad_norm": 1.5599192380905151, | |
| "learning_rate": 4.9175505050505055e-05, | |
| "loss": 0.5145, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.016835016835016835, | |
| "grad_norm": 0.6804638504981995, | |
| "learning_rate": 4.915867003367003e-05, | |
| "loss": 0.4293, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.01717171717171717, | |
| "grad_norm": 0.5212819576263428, | |
| "learning_rate": 4.914183501683502e-05, | |
| "loss": 0.5484, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.017508417508417508, | |
| "grad_norm": 0.5872311592102051, | |
| "learning_rate": 4.9125e-05, | |
| "loss": 0.502, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.017845117845117844, | |
| "grad_norm": 0.2819989323616028, | |
| "learning_rate": 4.9108164983164986e-05, | |
| "loss": 0.474, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.01818181818181818, | |
| "grad_norm": 0.2001451998949051, | |
| "learning_rate": 4.909132996632997e-05, | |
| "loss": 0.4348, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.018518518518518517, | |
| "grad_norm": 2.0528833866119385, | |
| "learning_rate": 4.9074494949494955e-05, | |
| "loss": 0.4569, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.018855218855218854, | |
| "grad_norm": 0.7602177858352661, | |
| "learning_rate": 4.905765993265993e-05, | |
| "loss": 0.4319, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.01919191919191919, | |
| "grad_norm": 0.2091585099697113, | |
| "learning_rate": 4.904090909090909e-05, | |
| "loss": 0.4193, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.019528619528619527, | |
| "grad_norm": 0.20664581656455994, | |
| "learning_rate": 4.902407407407408e-05, | |
| "loss": 0.493, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.019865319865319864, | |
| "grad_norm": 0.40701425075531006, | |
| "learning_rate": 4.900723905723906e-05, | |
| "loss": 0.5205, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.020202020202020204, | |
| "grad_norm": 1.0059823989868164, | |
| "learning_rate": 4.899040404040404e-05, | |
| "loss": 0.4822, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.02053872053872054, | |
| "grad_norm": 0.2437160164117813, | |
| "learning_rate": 4.897356902356903e-05, | |
| "loss": 0.4796, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.020875420875420877, | |
| "grad_norm": 0.3996870219707489, | |
| "learning_rate": 4.8956734006734005e-05, | |
| "loss": 0.4243, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.021212121212121213, | |
| "grad_norm": 1.2844949960708618, | |
| "learning_rate": 4.893989898989899e-05, | |
| "loss": 0.4628, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.02154882154882155, | |
| "grad_norm": 0.5382771492004395, | |
| "learning_rate": 4.8923063973063974e-05, | |
| "loss": 0.5122, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.021885521885521887, | |
| "grad_norm": 1.4352107048034668, | |
| "learning_rate": 4.890622895622896e-05, | |
| "loss": 0.4959, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.022222222222222223, | |
| "grad_norm": 0.3689097464084625, | |
| "learning_rate": 4.888939393939394e-05, | |
| "loss": 0.4711, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.02255892255892256, | |
| "grad_norm": 0.13188982009887695, | |
| "learning_rate": 4.887255892255893e-05, | |
| "loss": 0.4761, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.022895622895622896, | |
| "grad_norm": 0.18019753694534302, | |
| "learning_rate": 4.885572390572391e-05, | |
| "loss": 0.4604, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.023232323232323233, | |
| "grad_norm": 0.4621998071670532, | |
| "learning_rate": 4.883888888888889e-05, | |
| "loss": 0.5366, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.02356902356902357, | |
| "grad_norm": 0.21382929384708405, | |
| "learning_rate": 4.882205387205387e-05, | |
| "loss": 0.4864, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.023905723905723906, | |
| "grad_norm": 0.2051325887441635, | |
| "learning_rate": 4.880521885521886e-05, | |
| "loss": 0.458, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.024242424242424242, | |
| "grad_norm": 0.3620564341545105, | |
| "learning_rate": 4.8788468013468015e-05, | |
| "loss": 0.499, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.02457912457912458, | |
| "grad_norm": 0.9814438223838806, | |
| "learning_rate": 4.8771632996633e-05, | |
| "loss": 0.442, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.024915824915824916, | |
| "grad_norm": 0.46618032455444336, | |
| "learning_rate": 4.8754797979797984e-05, | |
| "loss": 0.4635, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.025252525252525252, | |
| "grad_norm": 0.2610645592212677, | |
| "learning_rate": 4.873796296296296e-05, | |
| "loss": 0.458, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.02558922558922559, | |
| "grad_norm": 0.3188152611255646, | |
| "learning_rate": 4.8721127946127946e-05, | |
| "loss": 0.4726, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.025925925925925925, | |
| "grad_norm": 0.3566981852054596, | |
| "learning_rate": 4.870429292929293e-05, | |
| "loss": 0.4847, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.026262626262626262, | |
| "grad_norm": 1.0731638669967651, | |
| "learning_rate": 4.8687457912457914e-05, | |
| "loss": 0.4912, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.0265993265993266, | |
| "grad_norm": 0.4343542456626892, | |
| "learning_rate": 4.86706228956229e-05, | |
| "loss": 0.471, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.026936026936026935, | |
| "grad_norm": 0.37956860661506653, | |
| "learning_rate": 4.865378787878788e-05, | |
| "loss": 0.4797, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.02727272727272727, | |
| "grad_norm": 0.49000558257102966, | |
| "learning_rate": 4.863695286195287e-05, | |
| "loss": 0.5084, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.027609427609427608, | |
| "grad_norm": 0.28972625732421875, | |
| "learning_rate": 4.8620117845117845e-05, | |
| "loss": 0.493, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.027946127946127945, | |
| "grad_norm": 0.5928806662559509, | |
| "learning_rate": 4.860328282828283e-05, | |
| "loss": 0.5001, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.028282828282828285, | |
| "grad_norm": 0.4121922552585602, | |
| "learning_rate": 4.8586447811447814e-05, | |
| "loss": 0.448, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.02861952861952862, | |
| "grad_norm": 0.3214101195335388, | |
| "learning_rate": 4.85696127946128e-05, | |
| "loss": 0.5098, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.028956228956228958, | |
| "grad_norm": 2.317594289779663, | |
| "learning_rate": 4.855277777777778e-05, | |
| "loss": 0.4628, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.029292929292929294, | |
| "grad_norm": 0.46101972460746765, | |
| "learning_rate": 4.853594276094277e-05, | |
| "loss": 0.4556, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.02962962962962963, | |
| "grad_norm": 0.24499452114105225, | |
| "learning_rate": 4.8519107744107745e-05, | |
| "loss": 0.4103, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.029966329966329967, | |
| "grad_norm": 0.21861068904399872, | |
| "learning_rate": 4.850227272727273e-05, | |
| "loss": 0.4887, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.030303030303030304, | |
| "grad_norm": 0.6664220094680786, | |
| "learning_rate": 4.848543771043771e-05, | |
| "loss": 0.4822, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.03063973063973064, | |
| "grad_norm": 0.5134005546569824, | |
| "learning_rate": 4.846860269360269e-05, | |
| "loss": 0.4432, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.030976430976430977, | |
| "grad_norm": 0.31726887822151184, | |
| "learning_rate": 4.8451851851851855e-05, | |
| "loss": 0.4757, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.031313131313131314, | |
| "grad_norm": 0.5236911177635193, | |
| "learning_rate": 4.843501683501684e-05, | |
| "loss": 0.4522, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.03164983164983165, | |
| "grad_norm": 0.359935998916626, | |
| "learning_rate": 4.841818181818182e-05, | |
| "loss": 0.4901, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.03198653198653199, | |
| "grad_norm": 0.5292563438415527, | |
| "learning_rate": 4.84013468013468e-05, | |
| "loss": 0.4962, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.03232323232323232, | |
| "grad_norm": 0.5163784623146057, | |
| "learning_rate": 4.8384511784511786e-05, | |
| "loss": 0.4427, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.03265993265993266, | |
| "grad_norm": 0.19916895031929016, | |
| "learning_rate": 4.836767676767677e-05, | |
| "loss": 0.4778, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.032996632996632996, | |
| "grad_norm": 0.16879796981811523, | |
| "learning_rate": 4.8350841750841755e-05, | |
| "loss": 0.4561, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": 0.40591439604759216, | |
| "learning_rate": 4.833400673400674e-05, | |
| "loss": 0.532, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.03367003367003367, | |
| "grad_norm": 0.27528542280197144, | |
| "learning_rate": 4.8317171717171723e-05, | |
| "loss": 0.5181, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.034006734006734006, | |
| "grad_norm": 0.46540895104408264, | |
| "learning_rate": 4.83003367003367e-05, | |
| "loss": 0.5005, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.03434343434343434, | |
| "grad_norm": 0.4676566421985626, | |
| "learning_rate": 4.8283501683501685e-05, | |
| "loss": 0.4752, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.03468013468013468, | |
| "grad_norm": 0.5396921038627625, | |
| "learning_rate": 4.826666666666667e-05, | |
| "loss": 0.4566, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.035016835016835016, | |
| "grad_norm": 0.1875556856393814, | |
| "learning_rate": 4.824983164983165e-05, | |
| "loss": 0.4705, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.03535353535353535, | |
| "grad_norm": 0.5470389723777771, | |
| "learning_rate": 4.823299663299664e-05, | |
| "loss": 0.5035, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.03569023569023569, | |
| "grad_norm": 0.2772787809371948, | |
| "learning_rate": 4.821616161616162e-05, | |
| "loss": 0.4857, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.036026936026936025, | |
| "grad_norm": 0.43938860297203064, | |
| "learning_rate": 4.81993265993266e-05, | |
| "loss": 0.5107, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.03636363636363636, | |
| "grad_norm": 0.2839397192001343, | |
| "learning_rate": 4.818257575757576e-05, | |
| "loss": 0.451, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.0367003367003367, | |
| "grad_norm": 0.46151599287986755, | |
| "learning_rate": 4.816574074074074e-05, | |
| "loss": 0.4984, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.037037037037037035, | |
| "grad_norm": 0.4271756410598755, | |
| "learning_rate": 4.814890572390573e-05, | |
| "loss": 0.4462, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.03737373737373737, | |
| "grad_norm": 0.20119212567806244, | |
| "learning_rate": 4.813207070707071e-05, | |
| "loss": 0.4914, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.03771043771043771, | |
| "grad_norm": 0.7174796462059021, | |
| "learning_rate": 4.8115235690235696e-05, | |
| "loss": 0.4468, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.038047138047138045, | |
| "grad_norm": 0.25557178258895874, | |
| "learning_rate": 4.809840067340067e-05, | |
| "loss": 0.4969, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.03838383838383838, | |
| "grad_norm": 0.2094777226448059, | |
| "learning_rate": 4.808156565656566e-05, | |
| "loss": 0.4552, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.03872053872053872, | |
| "grad_norm": 0.2642809748649597, | |
| "learning_rate": 4.806473063973064e-05, | |
| "loss": 0.4741, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.039057239057239054, | |
| "grad_norm": 0.9128819704055786, | |
| "learning_rate": 4.8047895622895626e-05, | |
| "loss": 0.4964, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.03939393939393939, | |
| "grad_norm": 0.6326189637184143, | |
| "learning_rate": 4.8031060606060604e-05, | |
| "loss": 0.4749, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.03973063973063973, | |
| "grad_norm": 0.6259990334510803, | |
| "learning_rate": 4.8014225589225595e-05, | |
| "loss": 0.4915, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.040067340067340064, | |
| "grad_norm": 0.22890082001686096, | |
| "learning_rate": 4.799739057239058e-05, | |
| "loss": 0.4941, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.04040404040404041, | |
| "grad_norm": 0.28990963101387024, | |
| "learning_rate": 4.798063973063973e-05, | |
| "loss": 0.4874, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.040740740740740744, | |
| "grad_norm": 0.21455387771129608, | |
| "learning_rate": 4.7963804713804715e-05, | |
| "loss": 0.5352, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.04107744107744108, | |
| "grad_norm": 0.17958062887191772, | |
| "learning_rate": 4.79469696969697e-05, | |
| "loss": 0.421, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.04141414141414142, | |
| "grad_norm": 0.36474618315696716, | |
| "learning_rate": 4.793013468013468e-05, | |
| "loss": 0.4513, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.041750841750841754, | |
| "grad_norm": 0.4638340175151825, | |
| "learning_rate": 4.791329966329967e-05, | |
| "loss": 0.5075, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.04208754208754209, | |
| "grad_norm": 0.2026415318250656, | |
| "learning_rate": 4.789646464646465e-05, | |
| "loss": 0.4807, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.04242424242424243, | |
| "grad_norm": 0.3414445221424103, | |
| "learning_rate": 4.787962962962963e-05, | |
| "loss": 0.4731, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.04276094276094276, | |
| "grad_norm": 0.20735152065753937, | |
| "learning_rate": 4.7862794612794614e-05, | |
| "loss": 0.5116, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.0430976430976431, | |
| "grad_norm": 0.3263112008571625, | |
| "learning_rate": 4.78459595959596e-05, | |
| "loss": 0.4675, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.043434343434343436, | |
| "grad_norm": 1.1240352392196655, | |
| "learning_rate": 4.7829124579124576e-05, | |
| "loss": 0.4651, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.04377104377104377, | |
| "grad_norm": 0.47903600335121155, | |
| "learning_rate": 4.781228956228957e-05, | |
| "loss": 0.4696, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.04410774410774411, | |
| "grad_norm": 2.3555760383605957, | |
| "learning_rate": 4.779545454545455e-05, | |
| "loss": 0.4826, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": 0.22116072475910187, | |
| "learning_rate": 4.777861952861953e-05, | |
| "loss": 0.483, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.04478114478114478, | |
| "grad_norm": 0.7030754685401917, | |
| "learning_rate": 4.7761784511784514e-05, | |
| "loss": 0.4317, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.04511784511784512, | |
| "grad_norm": 0.3827281594276428, | |
| "learning_rate": 4.774503367003367e-05, | |
| "loss": 0.4292, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.045454545454545456, | |
| "grad_norm": 0.8839394450187683, | |
| "learning_rate": 4.7728198653198655e-05, | |
| "loss": 0.4646, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.04579124579124579, | |
| "grad_norm": 0.5600021481513977, | |
| "learning_rate": 4.771136363636364e-05, | |
| "loss": 0.5081, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.04612794612794613, | |
| "grad_norm": 0.23018187284469604, | |
| "learning_rate": 4.7694528619528624e-05, | |
| "loss": 0.4908, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.046464646464646465, | |
| "grad_norm": 0.456559419631958, | |
| "learning_rate": 4.767769360269361e-05, | |
| "loss": 0.4872, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.0468013468013468, | |
| "grad_norm": 0.12121502310037613, | |
| "learning_rate": 4.7660858585858586e-05, | |
| "loss": 0.4979, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.04713804713804714, | |
| "grad_norm": 0.38946759700775146, | |
| "learning_rate": 4.764402356902357e-05, | |
| "loss": 0.4651, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.047474747474747475, | |
| "grad_norm": 2.142538547515869, | |
| "learning_rate": 4.7627188552188555e-05, | |
| "loss": 0.4288, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.04781144781144781, | |
| "grad_norm": 0.15298590064048767, | |
| "learning_rate": 4.761035353535353e-05, | |
| "loss": 0.4365, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.04814814814814815, | |
| "grad_norm": 0.6363445520401001, | |
| "learning_rate": 4.7593518518518524e-05, | |
| "loss": 0.4428, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.048484848484848485, | |
| "grad_norm": 0.5169795751571655, | |
| "learning_rate": 4.757668350168351e-05, | |
| "loss": 0.4666, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.04882154882154882, | |
| "grad_norm": 0.2661610245704651, | |
| "learning_rate": 4.7559848484848486e-05, | |
| "loss": 0.4492, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.04915824915824916, | |
| "grad_norm": 0.4399496018886566, | |
| "learning_rate": 4.754301346801347e-05, | |
| "loss": 0.4504, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.049494949494949494, | |
| "grad_norm": 0.31160300970077515, | |
| "learning_rate": 4.7526178451178454e-05, | |
| "loss": 0.4527, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.04983164983164983, | |
| "grad_norm": 0.6169541478157043, | |
| "learning_rate": 4.750934343434343e-05, | |
| "loss": 0.4995, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.05016835016835017, | |
| "grad_norm": 0.43009576201438904, | |
| "learning_rate": 4.7492508417508416e-05, | |
| "loss": 0.4681, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.050505050505050504, | |
| "grad_norm": 0.3901723623275757, | |
| "learning_rate": 4.74756734006734e-05, | |
| "loss": 0.4509, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.05084175084175084, | |
| "grad_norm": 0.2859044373035431, | |
| "learning_rate": 4.745883838383839e-05, | |
| "loss": 0.4262, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.05117845117845118, | |
| "grad_norm": 0.29008200764656067, | |
| "learning_rate": 4.744208754208754e-05, | |
| "loss": 0.434, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.051515151515151514, | |
| "grad_norm": 0.8030261397361755, | |
| "learning_rate": 4.742525252525253e-05, | |
| "loss": 0.5066, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.05185185185185185, | |
| "grad_norm": 0.6552255153656006, | |
| "learning_rate": 4.740841750841751e-05, | |
| "loss": 0.4888, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.05218855218855219, | |
| "grad_norm": 0.47479531168937683, | |
| "learning_rate": 4.7391582491582496e-05, | |
| "loss": 0.4886, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.052525252525252523, | |
| "grad_norm": 0.6294977068901062, | |
| "learning_rate": 4.737474747474748e-05, | |
| "loss": 0.4144, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.05286195286195286, | |
| "grad_norm": 0.346327006816864, | |
| "learning_rate": 4.7357912457912465e-05, | |
| "loss": 0.517, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.0531986531986532, | |
| "grad_norm": 0.4449813961982727, | |
| "learning_rate": 4.734107744107744e-05, | |
| "loss": 0.4681, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.05353535353535353, | |
| "grad_norm": 0.4756247401237488, | |
| "learning_rate": 4.7324242424242426e-05, | |
| "loss": 0.4772, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.05387205387205387, | |
| "grad_norm": 0.5156170129776001, | |
| "learning_rate": 4.730740740740741e-05, | |
| "loss": 0.456, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.054208754208754206, | |
| "grad_norm": 1.6009584665298462, | |
| "learning_rate": 4.729057239057239e-05, | |
| "loss": 0.4483, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.05454545454545454, | |
| "grad_norm": 0.4179598391056061, | |
| "learning_rate": 4.727373737373737e-05, | |
| "loss": 0.4438, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.05488215488215488, | |
| "grad_norm": 0.09701373428106308, | |
| "learning_rate": 4.725690235690236e-05, | |
| "loss": 0.4746, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.055218855218855216, | |
| "grad_norm": 0.4205819368362427, | |
| "learning_rate": 4.724006734006734e-05, | |
| "loss": 0.4459, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 0.5670439004898071, | |
| "learning_rate": 4.7223232323232326e-05, | |
| "loss": 0.5025, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.05589225589225589, | |
| "grad_norm": 0.47377878427505493, | |
| "learning_rate": 4.720639730639731e-05, | |
| "loss": 0.4746, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.056228956228956226, | |
| "grad_norm": 0.254245400428772, | |
| "learning_rate": 4.718956228956229e-05, | |
| "loss": 0.4926, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.05656565656565657, | |
| "grad_norm": 0.299713671207428, | |
| "learning_rate": 4.717272727272727e-05, | |
| "loss": 0.4748, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.056902356902356906, | |
| "grad_norm": 0.16089321672916412, | |
| "learning_rate": 4.715589225589226e-05, | |
| "loss": 0.4813, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.05723905723905724, | |
| "grad_norm": 0.27492621541023254, | |
| "learning_rate": 4.713914141414142e-05, | |
| "loss": 0.473, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.05757575757575758, | |
| "grad_norm": 0.8170735239982605, | |
| "learning_rate": 4.71223063973064e-05, | |
| "loss": 0.4251, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.057912457912457915, | |
| "grad_norm": 0.41072168946266174, | |
| "learning_rate": 4.710547138047138e-05, | |
| "loss": 0.4692, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.05824915824915825, | |
| "grad_norm": 0.37332773208618164, | |
| "learning_rate": 4.708863636363637e-05, | |
| "loss": 0.4289, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.05858585858585859, | |
| "grad_norm": 0.3257604241371155, | |
| "learning_rate": 4.7071801346801345e-05, | |
| "loss": 0.4623, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.058922558922558925, | |
| "grad_norm": 0.23426009714603424, | |
| "learning_rate": 4.705496632996633e-05, | |
| "loss": 0.5082, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.05925925925925926, | |
| "grad_norm": 0.28719109296798706, | |
| "learning_rate": 4.703813131313132e-05, | |
| "loss": 0.4767, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.0595959595959596, | |
| "grad_norm": 0.35480618476867676, | |
| "learning_rate": 4.70212962962963e-05, | |
| "loss": 0.5006, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.059932659932659935, | |
| "grad_norm": 0.5050226449966431, | |
| "learning_rate": 4.700446127946128e-05, | |
| "loss": 0.4809, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.06026936026936027, | |
| "grad_norm": 0.2631937265396118, | |
| "learning_rate": 4.698762626262627e-05, | |
| "loss": 0.4508, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.06060606060606061, | |
| "grad_norm": 0.32295939326286316, | |
| "learning_rate": 4.6970791245791244e-05, | |
| "loss": 0.4697, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.060942760942760944, | |
| "grad_norm": 0.34667742252349854, | |
| "learning_rate": 4.695404040404041e-05, | |
| "loss": 0.4692, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.06127946127946128, | |
| "grad_norm": 0.6987492442131042, | |
| "learning_rate": 4.693720538720539e-05, | |
| "loss": 0.4596, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.06161616161616162, | |
| "grad_norm": 0.4795779883861542, | |
| "learning_rate": 4.692037037037037e-05, | |
| "loss": 0.4361, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.061952861952861954, | |
| "grad_norm": 0.5291064381599426, | |
| "learning_rate": 4.6903535353535355e-05, | |
| "loss": 0.4408, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.06228956228956229, | |
| "grad_norm": 0.46040576696395874, | |
| "learning_rate": 4.688670033670034e-05, | |
| "loss": 0.4976, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.06262626262626263, | |
| "grad_norm": 2.0511856079101562, | |
| "learning_rate": 4.6869865319865324e-05, | |
| "loss": 0.4747, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.06296296296296296, | |
| "grad_norm": 0.6845996975898743, | |
| "learning_rate": 4.68530303030303e-05, | |
| "loss": 0.4145, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.0632996632996633, | |
| "grad_norm": 0.2830463945865631, | |
| "learning_rate": 4.6836195286195286e-05, | |
| "loss": 0.4888, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.06363636363636363, | |
| "grad_norm": 0.4033803343772888, | |
| "learning_rate": 4.681936026936028e-05, | |
| "loss": 0.4584, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.06397306397306397, | |
| "grad_norm": 0.26968395709991455, | |
| "learning_rate": 4.6802525252525255e-05, | |
| "loss": 0.4246, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.0643097643097643, | |
| "grad_norm": 0.14037840068340302, | |
| "learning_rate": 4.678569023569024e-05, | |
| "loss": 0.4047, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.06464646464646465, | |
| "grad_norm": 0.2874729335308075, | |
| "learning_rate": 4.676885521885522e-05, | |
| "loss": 0.4445, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.06498316498316499, | |
| "grad_norm": 0.2633935213088989, | |
| "learning_rate": 4.67520202020202e-05, | |
| "loss": 0.4435, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.06531986531986532, | |
| "grad_norm": 0.4510101079940796, | |
| "learning_rate": 4.6735185185185185e-05, | |
| "loss": 0.4724, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.06565656565656566, | |
| "grad_norm": 0.20095351338386536, | |
| "learning_rate": 4.671835016835017e-05, | |
| "loss": 0.5128, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.06599326599326599, | |
| "grad_norm": 0.4433535635471344, | |
| "learning_rate": 4.670159932659933e-05, | |
| "loss": 0.4581, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.06632996632996634, | |
| "grad_norm": 0.5821954607963562, | |
| "learning_rate": 4.668476430976431e-05, | |
| "loss": 0.4985, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 1.3577245473861694, | |
| "learning_rate": 4.6667929292929296e-05, | |
| "loss": 0.5283, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.06700336700336701, | |
| "grad_norm": 0.37699806690216064, | |
| "learning_rate": 4.6651094276094274e-05, | |
| "loss": 0.4825, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.06734006734006734, | |
| "grad_norm": 0.41804903745651245, | |
| "learning_rate": 4.663425925925926e-05, | |
| "loss": 0.4907, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.06767676767676768, | |
| "grad_norm": 0.243534654378891, | |
| "learning_rate": 4.661742424242425e-05, | |
| "loss": 0.5059, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.06801346801346801, | |
| "grad_norm": 0.1186649277806282, | |
| "learning_rate": 4.660058922558923e-05, | |
| "loss": 0.4808, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.06835016835016836, | |
| "grad_norm": 0.30161020159721375, | |
| "learning_rate": 4.658375420875421e-05, | |
| "loss": 0.4816, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.06868686868686869, | |
| "grad_norm": 0.2418268918991089, | |
| "learning_rate": 4.6566919191919195e-05, | |
| "loss": 0.4371, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.06902356902356903, | |
| "grad_norm": 0.2833971381187439, | |
| "learning_rate": 4.655008417508418e-05, | |
| "loss": 0.4686, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.06936026936026936, | |
| "grad_norm": 0.2797035276889801, | |
| "learning_rate": 4.653324915824916e-05, | |
| "loss": 0.4599, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.0696969696969697, | |
| "grad_norm": 0.3841836154460907, | |
| "learning_rate": 4.651641414141414e-05, | |
| "loss": 0.4111, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.07003367003367003, | |
| "grad_norm": 0.6590111255645752, | |
| "learning_rate": 4.6499579124579126e-05, | |
| "loss": 0.4631, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.07037037037037037, | |
| "grad_norm": 0.29527220129966736, | |
| "learning_rate": 4.648274410774411e-05, | |
| "loss": 0.4963, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.0707070707070707, | |
| "grad_norm": 0.7178300619125366, | |
| "learning_rate": 4.6465909090909095e-05, | |
| "loss": 0.4694, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.07104377104377105, | |
| "grad_norm": 0.38491058349609375, | |
| "learning_rate": 4.644907407407408e-05, | |
| "loss": 0.4437, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.07138047138047138, | |
| "grad_norm": 0.3037305772304535, | |
| "learning_rate": 4.643223905723906e-05, | |
| "loss": 0.4635, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.07171717171717172, | |
| "grad_norm": 0.4430043697357178, | |
| "learning_rate": 4.641540404040404e-05, | |
| "loss": 0.4623, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.07205387205387205, | |
| "grad_norm": 0.3590750992298126, | |
| "learning_rate": 4.6398569023569026e-05, | |
| "loss": 0.4827, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.0723905723905724, | |
| "grad_norm": 0.6161913275718689, | |
| "learning_rate": 4.638181818181818e-05, | |
| "loss": 0.4796, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.07272727272727272, | |
| "grad_norm": 0.3607730269432068, | |
| "learning_rate": 4.636498316498317e-05, | |
| "loss": 0.4801, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.07306397306397307, | |
| "grad_norm": 0.07056716829538345, | |
| "learning_rate": 4.634814814814815e-05, | |
| "loss": 0.4738, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.0734006734006734, | |
| "grad_norm": 0.09327512234449387, | |
| "learning_rate": 4.633131313131313e-05, | |
| "loss": 0.4885, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.07373737373737374, | |
| "grad_norm": 0.2519952952861786, | |
| "learning_rate": 4.6314478114478114e-05, | |
| "loss": 0.4541, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 0.4618964195251465, | |
| "learning_rate": 4.62976430976431e-05, | |
| "loss": 0.4593, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.07441077441077441, | |
| "grad_norm": 0.4683738946914673, | |
| "learning_rate": 4.628080808080808e-05, | |
| "loss": 0.4536, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.07474747474747474, | |
| "grad_norm": 0.2552854120731354, | |
| "learning_rate": 4.626397306397307e-05, | |
| "loss": 0.4702, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.07508417508417509, | |
| "grad_norm": 0.33385610580444336, | |
| "learning_rate": 4.624713804713805e-05, | |
| "loss": 0.4623, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.07542087542087542, | |
| "grad_norm": 0.17833998799324036, | |
| "learning_rate": 4.6230303030303036e-05, | |
| "loss": 0.419, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.07575757575757576, | |
| "grad_norm": 0.14885468780994415, | |
| "learning_rate": 4.621346801346801e-05, | |
| "loss": 0.5132, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.07609427609427609, | |
| "grad_norm": 0.4861992597579956, | |
| "learning_rate": 4.6196632996633e-05, | |
| "loss": 0.4914, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.07643097643097643, | |
| "grad_norm": 0.20314612984657288, | |
| "learning_rate": 4.617979797979798e-05, | |
| "loss": 0.4535, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.07676767676767676, | |
| "grad_norm": 0.7097423076629639, | |
| "learning_rate": 4.616304713804714e-05, | |
| "loss": 0.4343, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.0771043771043771, | |
| "grad_norm": 0.23547014594078064, | |
| "learning_rate": 4.6146212121212124e-05, | |
| "loss": 0.449, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.07744107744107744, | |
| "grad_norm": 0.25944817066192627, | |
| "learning_rate": 4.612937710437711e-05, | |
| "loss": 0.4535, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.07777777777777778, | |
| "grad_norm": 0.6145304441452026, | |
| "learning_rate": 4.6112542087542086e-05, | |
| "loss": 0.4536, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.07811447811447811, | |
| "grad_norm": 1.2527995109558105, | |
| "learning_rate": 4.609570707070707e-05, | |
| "loss": 0.4616, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.07845117845117845, | |
| "grad_norm": 0.9534751772880554, | |
| "learning_rate": 4.6078872053872055e-05, | |
| "loss": 0.5009, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.07878787878787878, | |
| "grad_norm": 0.552191436290741, | |
| "learning_rate": 4.606203703703704e-05, | |
| "loss": 0.4738, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.07912457912457913, | |
| "grad_norm": 0.28889888525009155, | |
| "learning_rate": 4.6045202020202023e-05, | |
| "loss": 0.4721, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.07946127946127945, | |
| "grad_norm": 0.4266869127750397, | |
| "learning_rate": 4.602845117845118e-05, | |
| "loss": 0.4695, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.0797979797979798, | |
| "grad_norm": 0.5105581879615784, | |
| "learning_rate": 4.601161616161616e-05, | |
| "loss": 0.4739, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.08013468013468013, | |
| "grad_norm": 0.4175490736961365, | |
| "learning_rate": 4.599478114478114e-05, | |
| "loss": 0.4456, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.08047138047138047, | |
| "grad_norm": 0.3257778584957123, | |
| "learning_rate": 4.5977946127946134e-05, | |
| "loss": 0.4808, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.08080808080808081, | |
| "grad_norm": 0.3000372648239136, | |
| "learning_rate": 4.596111111111112e-05, | |
| "loss": 0.4635, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.08114478114478114, | |
| "grad_norm": 0.32268643379211426, | |
| "learning_rate": 4.5944276094276096e-05, | |
| "loss": 0.4925, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.08148148148148149, | |
| "grad_norm": 0.5290645956993103, | |
| "learning_rate": 4.592744107744108e-05, | |
| "loss": 0.4711, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.08181818181818182, | |
| "grad_norm": 0.29082873463630676, | |
| "learning_rate": 4.5910606060606065e-05, | |
| "loss": 0.4728, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.08215488215488216, | |
| "grad_norm": 0.6704333424568176, | |
| "learning_rate": 4.589377104377104e-05, | |
| "loss": 0.4779, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.08249158249158249, | |
| "grad_norm": 0.27797549962997437, | |
| "learning_rate": 4.587693602693603e-05, | |
| "loss": 0.4529, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.08282828282828283, | |
| "grad_norm": 0.1398361176252365, | |
| "learning_rate": 4.586010101010101e-05, | |
| "loss": 0.4379, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.08316498316498316, | |
| "grad_norm": 0.31926196813583374, | |
| "learning_rate": 4.5843265993265996e-05, | |
| "loss": 0.457, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.08350168350168351, | |
| "grad_norm": 0.17603324353694916, | |
| "learning_rate": 4.582643097643098e-05, | |
| "loss": 0.452, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.08383838383838384, | |
| "grad_norm": 0.4734348654747009, | |
| "learning_rate": 4.5809595959595964e-05, | |
| "loss": 0.4489, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.08417508417508418, | |
| "grad_norm": 0.2849540710449219, | |
| "learning_rate": 4.579276094276094e-05, | |
| "loss": 0.4448, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.08417508417508418, | |
| "eval_loss": 0.4674188494682312, | |
| "eval_runtime": 400.4812, | |
| "eval_samples_per_second": 14.982, | |
| "eval_steps_per_second": 14.982, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.08451178451178451, | |
| "grad_norm": 0.3861866295337677, | |
| "learning_rate": 4.5775925925925926e-05, | |
| "loss": 0.4691, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.08484848484848485, | |
| "grad_norm": 0.23927472531795502, | |
| "learning_rate": 4.575909090909091e-05, | |
| "loss": 0.519, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.08518518518518518, | |
| "grad_norm": 0.2663820683956146, | |
| "learning_rate": 4.5742255892255895e-05, | |
| "loss": 0.4394, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.08552188552188553, | |
| "grad_norm": 0.0782080739736557, | |
| "learning_rate": 4.572542087542088e-05, | |
| "loss": 0.4181, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.08585858585858586, | |
| "grad_norm": 0.32413387298583984, | |
| "learning_rate": 4.5708585858585864e-05, | |
| "loss": 0.4643, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.0861952861952862, | |
| "grad_norm": 0.18156534433364868, | |
| "learning_rate": 4.569175084175085e-05, | |
| "loss": 0.4511, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.08653198653198653, | |
| "grad_norm": 0.298673152923584, | |
| "learning_rate": 4.5674915824915826e-05, | |
| "loss": 0.4424, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.08686868686868687, | |
| "grad_norm": 0.24452580511569977, | |
| "learning_rate": 4.565808080808081e-05, | |
| "loss": 0.4556, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.0872053872053872, | |
| "grad_norm": 0.2959561049938202, | |
| "learning_rate": 4.564132996632997e-05, | |
| "loss": 0.4543, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.08754208754208755, | |
| "grad_norm": 0.6213822960853577, | |
| "learning_rate": 4.5624579124579125e-05, | |
| "loss": 0.5136, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.08787878787878788, | |
| "grad_norm": 0.5385012030601501, | |
| "learning_rate": 4.560774410774411e-05, | |
| "loss": 0.4602, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.08821548821548822, | |
| "grad_norm": 0.6063356995582581, | |
| "learning_rate": 4.5590909090909094e-05, | |
| "loss": 0.4367, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.08855218855218855, | |
| "grad_norm": 0.43720120191574097, | |
| "learning_rate": 4.557407407407407e-05, | |
| "loss": 0.4902, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 0.4334559738636017, | |
| "learning_rate": 4.555723905723906e-05, | |
| "loss": 0.4585, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.08922558922558922, | |
| "grad_norm": 0.2874049246311188, | |
| "learning_rate": 4.554040404040405e-05, | |
| "loss": 0.4907, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.08956228956228957, | |
| "grad_norm": 0.19016990065574646, | |
| "learning_rate": 4.5523569023569025e-05, | |
| "loss": 0.4561, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.0898989898989899, | |
| "grad_norm": 0.7278497815132141, | |
| "learning_rate": 4.550673400673401e-05, | |
| "loss": 0.4702, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.09023569023569024, | |
| "grad_norm": 0.28533700108528137, | |
| "learning_rate": 4.5489898989898993e-05, | |
| "loss": 0.5139, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.09057239057239057, | |
| "grad_norm": 0.6488041281700134, | |
| "learning_rate": 4.547306397306397e-05, | |
| "loss": 0.4553, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 0.3091227412223816, | |
| "learning_rate": 4.5456228956228955e-05, | |
| "loss": 0.4779, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.09124579124579124, | |
| "grad_norm": 0.3282964825630188, | |
| "learning_rate": 4.543939393939394e-05, | |
| "loss": 0.5117, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.09158249158249158, | |
| "grad_norm": 0.473143070936203, | |
| "learning_rate": 4.5422558922558924e-05, | |
| "loss": 0.4791, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.09191919191919191, | |
| "grad_norm": 0.5263796448707581, | |
| "learning_rate": 4.540572390572391e-05, | |
| "loss": 0.4686, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.09225589225589226, | |
| "grad_norm": 0.4568365216255188, | |
| "learning_rate": 4.538888888888889e-05, | |
| "loss": 0.4936, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.09259259259259259, | |
| "grad_norm": 0.9846563935279846, | |
| "learning_rate": 4.537205387205388e-05, | |
| "loss": 0.4968, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.09292929292929293, | |
| "grad_norm": 0.15145862102508545, | |
| "learning_rate": 4.5355218855218855e-05, | |
| "loss": 0.5163, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.09326599326599326, | |
| "grad_norm": 0.428117573261261, | |
| "learning_rate": 4.533838383838384e-05, | |
| "loss": 0.4646, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.0936026936026936, | |
| "grad_norm": 0.3261561393737793, | |
| "learning_rate": 4.5321548821548824e-05, | |
| "loss": 0.4697, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.09393939393939393, | |
| "grad_norm": 0.34254854917526245, | |
| "learning_rate": 4.530471380471381e-05, | |
| "loss": 0.4331, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.09427609427609428, | |
| "grad_norm": 0.15681512653827667, | |
| "learning_rate": 4.528787878787879e-05, | |
| "loss": 0.4765, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.0946127946127946, | |
| "grad_norm": 0.16159813106060028, | |
| "learning_rate": 4.527104377104378e-05, | |
| "loss": 0.4775, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.09494949494949495, | |
| "grad_norm": 0.6212481260299683, | |
| "learning_rate": 4.5254208754208754e-05, | |
| "loss": 0.5314, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.09528619528619528, | |
| "grad_norm": 0.3396393656730652, | |
| "learning_rate": 4.523737373737374e-05, | |
| "loss": 0.4898, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.09562289562289562, | |
| "grad_norm": 0.32701626420021057, | |
| "learning_rate": 4.5220622895622896e-05, | |
| "loss": 0.4421, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.09595959595959595, | |
| "grad_norm": 0.15523914992809296, | |
| "learning_rate": 4.520378787878788e-05, | |
| "loss": 0.4415, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.0962962962962963, | |
| "grad_norm": 0.5103595852851868, | |
| "learning_rate": 4.5186952861952865e-05, | |
| "loss": 0.453, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.09663299663299663, | |
| "grad_norm": 0.42163121700286865, | |
| "learning_rate": 4.517011784511785e-05, | |
| "loss": 0.4738, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.09696969696969697, | |
| "grad_norm": 0.9396620392799377, | |
| "learning_rate": 4.515328282828283e-05, | |
| "loss": 0.473, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.09730639730639731, | |
| "grad_norm": 0.4714924991130829, | |
| "learning_rate": 4.513644781144781e-05, | |
| "loss": 0.5031, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.09764309764309764, | |
| "grad_norm": 0.24718382954597473, | |
| "learning_rate": 4.5119612794612796e-05, | |
| "loss": 0.4592, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.09797979797979799, | |
| "grad_norm": 0.3186817169189453, | |
| "learning_rate": 4.510277777777778e-05, | |
| "loss": 0.4451, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.09831649831649832, | |
| "grad_norm": 0.34213390946388245, | |
| "learning_rate": 4.5085942760942764e-05, | |
| "loss": 0.4646, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.09865319865319866, | |
| "grad_norm": 0.29326021671295166, | |
| "learning_rate": 4.506910774410775e-05, | |
| "loss": 0.4825, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.09898989898989899, | |
| "grad_norm": 0.8425318598747253, | |
| "learning_rate": 4.505227272727273e-05, | |
| "loss": 0.4899, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.09932659932659933, | |
| "grad_norm": 0.23540657758712769, | |
| "learning_rate": 4.503543771043771e-05, | |
| "loss": 0.4654, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.09966329966329966, | |
| "grad_norm": 0.44379663467407227, | |
| "learning_rate": 4.5018602693602695e-05, | |
| "loss": 0.4722, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.23975303769111633, | |
| "learning_rate": 4.500176767676768e-05, | |
| "loss": 0.4589, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.10033670033670034, | |
| "grad_norm": 0.29341402649879456, | |
| "learning_rate": 4.498501683501684e-05, | |
| "loss": 0.5041, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.10067340067340068, | |
| "grad_norm": 0.34747016429901123, | |
| "learning_rate": 4.496818181818182e-05, | |
| "loss": 0.4647, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.10101010101010101, | |
| "grad_norm": 0.8880186080932617, | |
| "learning_rate": 4.4951346801346806e-05, | |
| "loss": 0.4497, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.10134680134680135, | |
| "grad_norm": 0.6821927428245544, | |
| "learning_rate": 4.4934511784511783e-05, | |
| "loss": 0.4716, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.10168350168350168, | |
| "grad_norm": 0.4481610953807831, | |
| "learning_rate": 4.491767676767677e-05, | |
| "loss": 0.4576, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.10202020202020202, | |
| "grad_norm": 0.6106315851211548, | |
| "learning_rate": 4.490084175084175e-05, | |
| "loss": 0.4386, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.10235690235690235, | |
| "grad_norm": 0.3058004081249237, | |
| "learning_rate": 4.4884006734006737e-05, | |
| "loss": 0.4914, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.1026936026936027, | |
| "grad_norm": 0.4685909152030945, | |
| "learning_rate": 4.486717171717172e-05, | |
| "loss": 0.4707, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.10303030303030303, | |
| "grad_norm": 0.643690288066864, | |
| "learning_rate": 4.4850336700336705e-05, | |
| "loss": 0.5428, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.10336700336700337, | |
| "grad_norm": 0.44329872727394104, | |
| "learning_rate": 4.483350168350168e-05, | |
| "loss": 0.4279, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.1037037037037037, | |
| "grad_norm": 0.21025417745113373, | |
| "learning_rate": 4.481666666666667e-05, | |
| "loss": 0.458, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.10404040404040404, | |
| "grad_norm": 0.24184706807136536, | |
| "learning_rate": 4.479983164983165e-05, | |
| "loss": 0.4964, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.10437710437710437, | |
| "grad_norm": 0.4054918587207794, | |
| "learning_rate": 4.4782996632996636e-05, | |
| "loss": 0.4427, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.10471380471380472, | |
| "grad_norm": 0.4844823181629181, | |
| "learning_rate": 4.476616161616162e-05, | |
| "loss": 0.4529, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.10505050505050505, | |
| "grad_norm": 0.39470815658569336, | |
| "learning_rate": 4.4749326599326605e-05, | |
| "loss": 0.4441, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.10538720538720539, | |
| "grad_norm": 0.47343677282333374, | |
| "learning_rate": 4.473249158249159e-05, | |
| "loss": 0.4253, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.10572390572390572, | |
| "grad_norm": 0.21908357739448547, | |
| "learning_rate": 4.471565656565657e-05, | |
| "loss": 0.4445, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.10606060606060606, | |
| "grad_norm": 0.4983006715774536, | |
| "learning_rate": 4.469882154882155e-05, | |
| "loss": 0.4567, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.1063973063973064, | |
| "grad_norm": 0.4615258276462555, | |
| "learning_rate": 4.4681986531986536e-05, | |
| "loss": 0.489, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.10673400673400674, | |
| "grad_norm": 0.30304470658302307, | |
| "learning_rate": 4.466523569023569e-05, | |
| "loss": 0.4748, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.10707070707070707, | |
| "grad_norm": 0.1221388503909111, | |
| "learning_rate": 4.464840067340068e-05, | |
| "loss": 0.4529, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.10740740740740741, | |
| "grad_norm": 0.36816734075546265, | |
| "learning_rate": 4.463156565656566e-05, | |
| "loss": 0.4518, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.10774410774410774, | |
| "grad_norm": 0.13719257712364197, | |
| "learning_rate": 4.461473063973064e-05, | |
| "loss": 0.4314, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.10808080808080808, | |
| "grad_norm": 0.45440635085105896, | |
| "learning_rate": 4.4597895622895624e-05, | |
| "loss": 0.5005, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.10841750841750841, | |
| "grad_norm": 0.4109625816345215, | |
| "learning_rate": 4.458106060606061e-05, | |
| "loss": 0.4847, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.10875420875420876, | |
| "grad_norm": 0.43106183409690857, | |
| "learning_rate": 4.4564225589225586e-05, | |
| "loss": 0.4607, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.10909090909090909, | |
| "grad_norm": 0.3796352744102478, | |
| "learning_rate": 4.454739057239058e-05, | |
| "loss": 0.4793, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.10942760942760943, | |
| "grad_norm": 0.5014599561691284, | |
| "learning_rate": 4.453055555555556e-05, | |
| "loss": 0.4848, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.10976430976430976, | |
| "grad_norm": 0.3484991192817688, | |
| "learning_rate": 4.4513720538720546e-05, | |
| "loss": 0.5129, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.1101010101010101, | |
| "grad_norm": 0.2991756200790405, | |
| "learning_rate": 4.449688552188552e-05, | |
| "loss": 0.4927, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.11043771043771043, | |
| "grad_norm": 0.37985363602638245, | |
| "learning_rate": 4.448005050505051e-05, | |
| "loss": 0.4372, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.11077441077441078, | |
| "grad_norm": 0.4508950114250183, | |
| "learning_rate": 4.446321548821549e-05, | |
| "loss": 0.4182, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 0.7306589484214783, | |
| "learning_rate": 4.444646464646465e-05, | |
| "loss": 0.5217, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.11144781144781145, | |
| "grad_norm": 0.17345421016216278, | |
| "learning_rate": 4.4429629629629634e-05, | |
| "loss": 0.4653, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.11178451178451178, | |
| "grad_norm": 0.36997854709625244, | |
| "learning_rate": 4.441279461279462e-05, | |
| "loss": 0.4631, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.11212121212121212, | |
| "grad_norm": 0.31563735008239746, | |
| "learning_rate": 4.4395959595959596e-05, | |
| "loss": 0.4606, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.11245791245791245, | |
| "grad_norm": 0.23173430562019348, | |
| "learning_rate": 4.437912457912458e-05, | |
| "loss": 0.4868, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.1127946127946128, | |
| "grad_norm": 0.336233526468277, | |
| "learning_rate": 4.4362289562289565e-05, | |
| "loss": 0.4809, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.11313131313131314, | |
| "grad_norm": 0.3722301423549652, | |
| "learning_rate": 4.434545454545454e-05, | |
| "loss": 0.4396, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.11346801346801347, | |
| "grad_norm": 0.5491744875907898, | |
| "learning_rate": 4.432861952861953e-05, | |
| "loss": 0.3938, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.11380471380471381, | |
| "grad_norm": 0.2742317020893097, | |
| "learning_rate": 4.431178451178452e-05, | |
| "loss": 0.4782, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.11414141414141414, | |
| "grad_norm": 0.22197793424129486, | |
| "learning_rate": 4.4294949494949495e-05, | |
| "loss": 0.4233, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.11447811447811448, | |
| "grad_norm": 0.2449079006910324, | |
| "learning_rate": 4.427811447811448e-05, | |
| "loss": 0.4889, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.11481481481481481, | |
| "grad_norm": 0.25308141112327576, | |
| "learning_rate": 4.4261279461279464e-05, | |
| "loss": 0.4275, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.11515151515151516, | |
| "grad_norm": 0.22254426777362823, | |
| "learning_rate": 4.424444444444444e-05, | |
| "loss": 0.487, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.11548821548821549, | |
| "grad_norm": 0.4978940784931183, | |
| "learning_rate": 4.4227609427609426e-05, | |
| "loss": 0.4709, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.11582491582491583, | |
| "grad_norm": 0.2366330623626709, | |
| "learning_rate": 4.421077441077441e-05, | |
| "loss": 0.487, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.11616161616161616, | |
| "grad_norm": 0.38192349672317505, | |
| "learning_rate": 4.41939393939394e-05, | |
| "loss": 0.4751, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.1164983164983165, | |
| "grad_norm": 0.4711579382419586, | |
| "learning_rate": 4.417710437710438e-05, | |
| "loss": 0.4694, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.11683501683501683, | |
| "grad_norm": 0.7776811122894287, | |
| "learning_rate": 4.4160269360269364e-05, | |
| "loss": 0.505, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.11717171717171718, | |
| "grad_norm": 0.22125215828418732, | |
| "learning_rate": 4.414343434343435e-05, | |
| "loss": 0.4599, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.1175084175084175, | |
| "grad_norm": 0.3384982943534851, | |
| "learning_rate": 4.4126683501683505e-05, | |
| "loss": 0.4825, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.11784511784511785, | |
| "grad_norm": 0.35308724641799927, | |
| "learning_rate": 4.410984848484849e-05, | |
| "loss": 0.4698, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.11818181818181818, | |
| "grad_norm": 0.3890261650085449, | |
| "learning_rate": 4.4093013468013474e-05, | |
| "loss": 0.4414, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.11851851851851852, | |
| "grad_norm": 0.6729969382286072, | |
| "learning_rate": 4.407617845117845e-05, | |
| "loss": 0.5287, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.11885521885521885, | |
| "grad_norm": 0.18775266408920288, | |
| "learning_rate": 4.4059343434343436e-05, | |
| "loss": 0.4809, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.1191919191919192, | |
| "grad_norm": 0.13612866401672363, | |
| "learning_rate": 4.404250841750842e-05, | |
| "loss": 0.4591, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.11952861952861953, | |
| "grad_norm": 0.25239524245262146, | |
| "learning_rate": 4.40256734006734e-05, | |
| "loss": 0.4514, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.11986531986531987, | |
| "grad_norm": 0.5928908586502075, | |
| "learning_rate": 4.400883838383838e-05, | |
| "loss": 0.4437, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.1202020202020202, | |
| "grad_norm": 0.3206656575202942, | |
| "learning_rate": 4.3992003367003374e-05, | |
| "loss": 0.4193, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.12053872053872054, | |
| "grad_norm": 0.1375039964914322, | |
| "learning_rate": 4.397516835016835e-05, | |
| "loss": 0.4642, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.12087542087542087, | |
| "grad_norm": 0.31986692547798157, | |
| "learning_rate": 4.3958333333333336e-05, | |
| "loss": 0.4771, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 0.3976145088672638, | |
| "learning_rate": 4.394158249158249e-05, | |
| "loss": 0.5199, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.12154882154882155, | |
| "grad_norm": 0.5115092992782593, | |
| "learning_rate": 4.392474747474747e-05, | |
| "loss": 0.4639, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.12188552188552189, | |
| "grad_norm": 0.6884472370147705, | |
| "learning_rate": 4.390791245791246e-05, | |
| "loss": 0.5004, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.12222222222222222, | |
| "grad_norm": 0.35599565505981445, | |
| "learning_rate": 4.3891077441077446e-05, | |
| "loss": 0.4125, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.12255892255892256, | |
| "grad_norm": 0.31085407733917236, | |
| "learning_rate": 4.387424242424243e-05, | |
| "loss": 0.4276, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.12289562289562289, | |
| "grad_norm": 0.4173491299152374, | |
| "learning_rate": 4.385740740740741e-05, | |
| "loss": 0.5051, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.12323232323232323, | |
| "grad_norm": 0.857441782951355, | |
| "learning_rate": 4.384057239057239e-05, | |
| "loss": 0.4494, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.12356902356902356, | |
| "grad_norm": 0.2960607707500458, | |
| "learning_rate": 4.382382154882155e-05, | |
| "loss": 0.5233, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.12390572390572391, | |
| "grad_norm": 0.38231462240219116, | |
| "learning_rate": 4.3806986531986535e-05, | |
| "loss": 0.5003, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.12424242424242424, | |
| "grad_norm": 0.20170505344867706, | |
| "learning_rate": 4.379015151515152e-05, | |
| "loss": 0.4543, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.12457912457912458, | |
| "grad_norm": 0.49870565533638, | |
| "learning_rate": 4.37733164983165e-05, | |
| "loss": 0.4931, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.12491582491582491, | |
| "grad_norm": 0.21166172623634338, | |
| "learning_rate": 4.375648148148148e-05, | |
| "loss": 0.4956, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.12525252525252525, | |
| "grad_norm": 1.7191145420074463, | |
| "learning_rate": 4.3739646464646465e-05, | |
| "loss": 0.5209, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.12558922558922558, | |
| "grad_norm": 0.15442191064357758, | |
| "learning_rate": 4.372281144781145e-05, | |
| "loss": 0.4723, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.1259259259259259, | |
| "grad_norm": 0.6088646650314331, | |
| "learning_rate": 4.3705976430976434e-05, | |
| "loss": 0.4756, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.12626262626262627, | |
| "grad_norm": 0.20357204973697662, | |
| "learning_rate": 4.368914141414142e-05, | |
| "loss": 0.4358, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.1265993265993266, | |
| "grad_norm": 0.23374512791633606, | |
| "learning_rate": 4.36723063973064e-05, | |
| "loss": 0.4873, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.12693602693602693, | |
| "grad_norm": 0.28036201000213623, | |
| "learning_rate": 4.365547138047138e-05, | |
| "loss": 0.4627, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.12727272727272726, | |
| "grad_norm": 0.4876658320426941, | |
| "learning_rate": 4.3638636363636365e-05, | |
| "loss": 0.4865, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.12760942760942762, | |
| "grad_norm": 0.24980993568897247, | |
| "learning_rate": 4.362180134680135e-05, | |
| "loss": 0.4715, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.12794612794612795, | |
| "grad_norm": 0.5056689977645874, | |
| "learning_rate": 4.3604966329966334e-05, | |
| "loss": 0.5066, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.12828282828282828, | |
| "grad_norm": 0.3562251329421997, | |
| "learning_rate": 4.358813131313131e-05, | |
| "loss": 0.4561, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.1286195286195286, | |
| "grad_norm": 0.5188980102539062, | |
| "learning_rate": 4.35712962962963e-05, | |
| "loss": 0.4451, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.12895622895622896, | |
| "grad_norm": 0.24979503452777863, | |
| "learning_rate": 4.355446127946129e-05, | |
| "loss": 0.4977, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.1292929292929293, | |
| "grad_norm": 0.2918744385242462, | |
| "learning_rate": 4.3537626262626264e-05, | |
| "loss": 0.4465, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.12962962962962962, | |
| "grad_norm": 0.11484523117542267, | |
| "learning_rate": 4.352079124579125e-05, | |
| "loss": 0.5008, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.12996632996632998, | |
| "grad_norm": 0.13835379481315613, | |
| "learning_rate": 4.350395622895623e-05, | |
| "loss": 0.4469, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.1303030303030303, | |
| "grad_norm": 0.17499831318855286, | |
| "learning_rate": 4.348712121212121e-05, | |
| "loss": 0.4558, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.13063973063973064, | |
| "grad_norm": 0.14153020083904266, | |
| "learning_rate": 4.3470286195286195e-05, | |
| "loss": 0.4764, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.13097643097643097, | |
| "grad_norm": 0.32110026478767395, | |
| "learning_rate": 4.345353535353536e-05, | |
| "loss": 0.4496, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.13131313131313133, | |
| "grad_norm": 0.347741961479187, | |
| "learning_rate": 4.343670033670034e-05, | |
| "loss": 0.4365, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.13164983164983166, | |
| "grad_norm": 1.8142364025115967, | |
| "learning_rate": 4.341986531986532e-05, | |
| "loss": 0.4963, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.13198653198653199, | |
| "grad_norm": 0.18966235220432281, | |
| "learning_rate": 4.3403030303030306e-05, | |
| "loss": 0.4641, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.13232323232323231, | |
| "grad_norm": 0.7899078726768494, | |
| "learning_rate": 4.338619528619528e-05, | |
| "loss": 0.42, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.13265993265993267, | |
| "grad_norm": 0.11921744793653488, | |
| "learning_rate": 4.336936026936027e-05, | |
| "loss": 0.4489, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.132996632996633, | |
| "grad_norm": 0.3183203935623169, | |
| "learning_rate": 4.335252525252526e-05, | |
| "loss": 0.4128, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 0.9126468896865845, | |
| "learning_rate": 4.3335690235690236e-05, | |
| "loss": 0.4063, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.13367003367003366, | |
| "grad_norm": 0.30544015765190125, | |
| "learning_rate": 4.331885521885522e-05, | |
| "loss": 0.4363, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.13400673400673402, | |
| "grad_norm": 0.32495343685150146, | |
| "learning_rate": 4.3302020202020205e-05, | |
| "loss": 0.4311, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.13434343434343435, | |
| "grad_norm": 0.5995136499404907, | |
| "learning_rate": 4.328518518518519e-05, | |
| "loss": 0.4304, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.13468013468013468, | |
| "grad_norm": 0.09273191541433334, | |
| "learning_rate": 4.326835016835017e-05, | |
| "loss": 0.44, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.135016835016835, | |
| "grad_norm": 0.5083215832710266, | |
| "learning_rate": 4.325151515151515e-05, | |
| "loss": 0.4484, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.13535353535353536, | |
| "grad_norm": 0.4086732566356659, | |
| "learning_rate": 4.3234680134680136e-05, | |
| "loss": 0.4431, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.1356902356902357, | |
| "grad_norm": 0.25825831294059753, | |
| "learning_rate": 4.321784511784512e-05, | |
| "loss": 0.4903, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.13602693602693602, | |
| "grad_norm": 0.22938190400600433, | |
| "learning_rate": 4.3201010101010105e-05, | |
| "loss": 0.446, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.13636363636363635, | |
| "grad_norm": 4.104939937591553, | |
| "learning_rate": 4.318417508417509e-05, | |
| "loss": 0.5158, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.1367003367003367, | |
| "grad_norm": 0.6533791422843933, | |
| "learning_rate": 4.3167340067340067e-05, | |
| "loss": 0.4411, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.13703703703703704, | |
| "grad_norm": 0.3710763156414032, | |
| "learning_rate": 4.315050505050505e-05, | |
| "loss": 0.4504, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.13737373737373737, | |
| "grad_norm": 0.32174888253211975, | |
| "learning_rate": 4.3133670033670035e-05, | |
| "loss": 0.4494, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.1377104377104377, | |
| "grad_norm": 0.47536543011665344, | |
| "learning_rate": 4.311683501683502e-05, | |
| "loss": 0.4547, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.13804713804713806, | |
| "grad_norm": 0.5591254830360413, | |
| "learning_rate": 4.310008417508418e-05, | |
| "loss": 0.4306, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.1383838383838384, | |
| "grad_norm": 0.26102516055107117, | |
| "learning_rate": 4.308324915824916e-05, | |
| "loss": 0.4377, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.13872053872053872, | |
| "grad_norm": 0.540073812007904, | |
| "learning_rate": 4.306641414141414e-05, | |
| "loss": 0.4913, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.13905723905723905, | |
| "grad_norm": 0.8017529249191284, | |
| "learning_rate": 4.3049579124579124e-05, | |
| "loss": 0.4382, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.1393939393939394, | |
| "grad_norm": 0.6620075106620789, | |
| "learning_rate": 4.303274410774411e-05, | |
| "loss": 0.3944, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.13973063973063973, | |
| "grad_norm": 0.42346033453941345, | |
| "learning_rate": 4.301590909090909e-05, | |
| "loss": 0.4149, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.14006734006734006, | |
| "grad_norm": 0.259355366230011, | |
| "learning_rate": 4.299907407407408e-05, | |
| "loss": 0.4737, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.1404040404040404, | |
| "grad_norm": 0.10005613416433334, | |
| "learning_rate": 4.298223905723906e-05, | |
| "loss": 0.4642, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.14074074074074075, | |
| "grad_norm": 0.40637290477752686, | |
| "learning_rate": 4.2965404040404045e-05, | |
| "loss": 0.4251, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.14107744107744108, | |
| "grad_norm": 0.5488855838775635, | |
| "learning_rate": 4.294856902356902e-05, | |
| "loss": 0.4669, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.1414141414141414, | |
| "grad_norm": 0.36019712686538696, | |
| "learning_rate": 4.293173400673401e-05, | |
| "loss": 0.4493, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.14175084175084174, | |
| "grad_norm": 0.20330995321273804, | |
| "learning_rate": 4.291498316498317e-05, | |
| "loss": 0.4969, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.1420875420875421, | |
| "grad_norm": 0.23681996762752533, | |
| "learning_rate": 4.289814814814815e-05, | |
| "loss": 0.4774, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.14242424242424243, | |
| "grad_norm": 0.1740342527627945, | |
| "learning_rate": 4.2881313131313134e-05, | |
| "loss": 0.4915, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.14276094276094276, | |
| "grad_norm": 0.4355227053165436, | |
| "learning_rate": 4.286447811447812e-05, | |
| "loss": 0.4681, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.14309764309764308, | |
| "grad_norm": 0.295913964509964, | |
| "learning_rate": 4.2847643097643096e-05, | |
| "loss": 0.4818, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.14343434343434344, | |
| "grad_norm": 0.17617417871952057, | |
| "learning_rate": 4.283080808080808e-05, | |
| "loss": 0.4661, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.14377104377104377, | |
| "grad_norm": 0.36346206068992615, | |
| "learning_rate": 4.2813973063973064e-05, | |
| "loss": 0.456, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.1441077441077441, | |
| "grad_norm": 0.5108135342597961, | |
| "learning_rate": 4.279713804713805e-05, | |
| "loss": 0.4311, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.14444444444444443, | |
| "grad_norm": 0.6109139919281006, | |
| "learning_rate": 4.278030303030303e-05, | |
| "loss": 0.4398, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.1447811447811448, | |
| "grad_norm": 0.35451897978782654, | |
| "learning_rate": 4.276346801346802e-05, | |
| "loss": 0.4674, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.14511784511784512, | |
| "grad_norm": 0.4733004570007324, | |
| "learning_rate": 4.2746632996632995e-05, | |
| "loss": 0.4275, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.14545454545454545, | |
| "grad_norm": 0.30051594972610474, | |
| "learning_rate": 4.272979797979798e-05, | |
| "loss": 0.4493, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.1457912457912458, | |
| "grad_norm": 0.5513753294944763, | |
| "learning_rate": 4.2712962962962964e-05, | |
| "loss": 0.4803, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.14612794612794613, | |
| "grad_norm": 0.5906115770339966, | |
| "learning_rate": 4.269612794612795e-05, | |
| "loss": 0.4502, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.14646464646464646, | |
| "grad_norm": 0.662507176399231, | |
| "learning_rate": 4.267929292929293e-05, | |
| "loss": 0.4247, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.1468013468013468, | |
| "grad_norm": 0.5167519450187683, | |
| "learning_rate": 4.266254208754209e-05, | |
| "loss": 0.477, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.14713804713804715, | |
| "grad_norm": 0.3927953243255615, | |
| "learning_rate": 4.2645707070707075e-05, | |
| "loss": 0.461, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.14747474747474748, | |
| "grad_norm": 0.3797866106033325, | |
| "learning_rate": 4.262895622895623e-05, | |
| "loss": 0.4923, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.1478114478114478, | |
| "grad_norm": 0.3852689862251282, | |
| "learning_rate": 4.2612121212121216e-05, | |
| "loss": 0.4254, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 0.4846220016479492, | |
| "learning_rate": 4.25952861952862e-05, | |
| "loss": 0.4837, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.1484848484848485, | |
| "grad_norm": 0.7036873698234558, | |
| "learning_rate": 4.257845117845118e-05, | |
| "loss": 0.4854, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.14882154882154883, | |
| "grad_norm": 0.5932942032814026, | |
| "learning_rate": 4.256161616161616e-05, | |
| "loss": 0.4509, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.14915824915824916, | |
| "grad_norm": 0.2926032841205597, | |
| "learning_rate": 4.254478114478115e-05, | |
| "loss": 0.4805, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.1494949494949495, | |
| "grad_norm": 0.16851143538951874, | |
| "learning_rate": 4.2527946127946125e-05, | |
| "loss": 0.419, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.14983164983164984, | |
| "grad_norm": 0.1768457293510437, | |
| "learning_rate": 4.2511111111111116e-05, | |
| "loss": 0.4432, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.15016835016835017, | |
| "grad_norm": 1.1971328258514404, | |
| "learning_rate": 4.24942760942761e-05, | |
| "loss": 0.542, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.1505050505050505, | |
| "grad_norm": 0.19443285465240479, | |
| "learning_rate": 4.247744107744108e-05, | |
| "loss": 0.4485, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.15084175084175083, | |
| "grad_norm": 0.2966189682483673, | |
| "learning_rate": 4.246060606060606e-05, | |
| "loss": 0.4376, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.1511784511784512, | |
| "grad_norm": 0.3715890645980835, | |
| "learning_rate": 4.244385521885522e-05, | |
| "loss": 0.4862, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.15151515151515152, | |
| "grad_norm": 0.2697141468524933, | |
| "learning_rate": 4.2427020202020204e-05, | |
| "loss": 0.4746, | |
| "step": 90000 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 594000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.8214306373632e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |