| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9928400954653938, | |
| "eval_steps": 500, | |
| "global_step": 418, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00477326968973747, | |
| "grad_norm": 6.016661782287111, | |
| "learning_rate": 9.523809523809523e-08, | |
| "loss": 1.0606, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00954653937947494, | |
| "grad_norm": 6.0953583965166676, | |
| "learning_rate": 1.9047619047619045e-07, | |
| "loss": 1.0205, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.014319809069212411, | |
| "grad_norm": 5.545005089565625, | |
| "learning_rate": 2.857142857142857e-07, | |
| "loss": 1.0092, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01909307875894988, | |
| "grad_norm": 6.013893802513984, | |
| "learning_rate": 3.809523809523809e-07, | |
| "loss": 0.9935, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02386634844868735, | |
| "grad_norm": 5.376025361134291, | |
| "learning_rate": 4.761904761904761e-07, | |
| "loss": 1.0184, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.028639618138424822, | |
| "grad_norm": 5.360630219441705, | |
| "learning_rate": 5.714285714285714e-07, | |
| "loss": 1.0072, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03341288782816229, | |
| "grad_norm": 5.135874756495844, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 0.9987, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.03818615751789976, | |
| "grad_norm": 4.183937042855387, | |
| "learning_rate": 7.619047619047618e-07, | |
| "loss": 0.9639, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04295942720763723, | |
| "grad_norm": 4.099248159117762, | |
| "learning_rate": 8.57142857142857e-07, | |
| "loss": 0.9497, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0477326968973747, | |
| "grad_norm": 4.049680347020253, | |
| "learning_rate": 9.523809523809522e-07, | |
| "loss": 0.9488, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05250596658711217, | |
| "grad_norm": 3.3413136627880506, | |
| "learning_rate": 1.0476190476190476e-06, | |
| "loss": 0.925, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.057279236276849645, | |
| "grad_norm": 3.0774268853711955, | |
| "learning_rate": 1.1428571428571428e-06, | |
| "loss": 0.9231, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06205250596658711, | |
| "grad_norm": 2.911361629869161, | |
| "learning_rate": 1.238095238095238e-06, | |
| "loss": 0.9177, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06682577565632458, | |
| "grad_norm": 2.428528787361087, | |
| "learning_rate": 1.3333333333333332e-06, | |
| "loss": 0.8811, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07159904534606205, | |
| "grad_norm": 1.8195746682720535, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.8533, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07637231503579953, | |
| "grad_norm": 1.947836891057091, | |
| "learning_rate": 1.5238095238095236e-06, | |
| "loss": 0.8706, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.081145584725537, | |
| "grad_norm": 1.8462478481431221, | |
| "learning_rate": 1.619047619047619e-06, | |
| "loss": 0.8208, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.08591885441527446, | |
| "grad_norm": 2.137106522697402, | |
| "learning_rate": 1.714285714285714e-06, | |
| "loss": 0.8181, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09069212410501193, | |
| "grad_norm": 1.9201958176581, | |
| "learning_rate": 1.8095238095238095e-06, | |
| "loss": 0.8245, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0954653937947494, | |
| "grad_norm": 1.523269971944646, | |
| "learning_rate": 1.9047619047619045e-06, | |
| "loss": 0.7545, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10023866348448687, | |
| "grad_norm": 1.8071688523978784, | |
| "learning_rate": 2e-06, | |
| "loss": 0.776, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.10501193317422435, | |
| "grad_norm": 1.5489776099138406, | |
| "learning_rate": 1.9999686897547167e-06, | |
| "loss": 0.7445, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10978520286396182, | |
| "grad_norm": 1.483755076083143, | |
| "learning_rate": 1.9998747609795305e-06, | |
| "loss": 0.7351, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.11455847255369929, | |
| "grad_norm": 1.4689369885238521, | |
| "learning_rate": 1.999718219556307e-06, | |
| "loss": 0.7332, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11933174224343675, | |
| "grad_norm": 1.4297615546988631, | |
| "learning_rate": 1.999499075287747e-06, | |
| "loss": 0.738, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12410501193317422, | |
| "grad_norm": 1.1356908706873299, | |
| "learning_rate": 1.999217341896772e-06, | |
| "loss": 0.7089, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1288782816229117, | |
| "grad_norm": 1.2895783555833555, | |
| "learning_rate": 1.998873037025665e-06, | |
| "loss": 0.6936, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.13365155131264916, | |
| "grad_norm": 1.1235754706376115, | |
| "learning_rate": 1.9984661822349665e-06, | |
| "loss": 0.6785, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.13842482100238662, | |
| "grad_norm": 1.0872843150821465, | |
| "learning_rate": 1.997996803002123e-06, | |
| "loss": 0.6978, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.1431980906921241, | |
| "grad_norm": 1.0898740583426263, | |
| "learning_rate": 1.9974649287198914e-06, | |
| "loss": 0.669, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14797136038186157, | |
| "grad_norm": 1.050118078989169, | |
| "learning_rate": 1.9968705926945013e-06, | |
| "loss": 0.6674, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.15274463007159905, | |
| "grad_norm": 0.8999107812930152, | |
| "learning_rate": 1.9962138321435656e-06, | |
| "loss": 0.6765, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1575178997613365, | |
| "grad_norm": 0.9612552915492341, | |
| "learning_rate": 1.9954946881937524e-06, | |
| "loss": 0.6745, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.162291169451074, | |
| "grad_norm": 0.8921943607730816, | |
| "learning_rate": 1.994713205878208e-06, | |
| "loss": 0.6546, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.16706443914081145, | |
| "grad_norm": 1.1285406074360596, | |
| "learning_rate": 1.9938694341337393e-06, | |
| "loss": 0.6612, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1718377088305489, | |
| "grad_norm": 0.9414713249176138, | |
| "learning_rate": 1.9929634257977467e-06, | |
| "loss": 0.6525, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1766109785202864, | |
| "grad_norm": 0.8006257830637218, | |
| "learning_rate": 1.991995237604916e-06, | |
| "loss": 0.6465, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.18138424821002386, | |
| "grad_norm": 0.9526156911755369, | |
| "learning_rate": 1.9909649301836674e-06, | |
| "loss": 0.6581, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.18615751789976134, | |
| "grad_norm": 0.9928551887252647, | |
| "learning_rate": 1.9898725680523566e-06, | |
| "loss": 0.6623, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1909307875894988, | |
| "grad_norm": 0.9455779960744521, | |
| "learning_rate": 1.9887182196152367e-06, | |
| "loss": 0.6527, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1957040572792363, | |
| "grad_norm": 0.8607850079912805, | |
| "learning_rate": 1.9875019571581726e-06, | |
| "loss": 0.6148, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.20047732696897375, | |
| "grad_norm": 1.050584563468554, | |
| "learning_rate": 1.9862238568441165e-06, | |
| "loss": 0.6288, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.2052505966587112, | |
| "grad_norm": 0.9453773840363461, | |
| "learning_rate": 1.9848839987083364e-06, | |
| "loss": 0.6373, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2100238663484487, | |
| "grad_norm": 0.8585148243018186, | |
| "learning_rate": 1.983482466653407e-06, | |
| "loss": 0.6401, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.21479713603818615, | |
| "grad_norm": 0.9814711262628284, | |
| "learning_rate": 1.982019348443952e-06, | |
| "loss": 0.6274, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.21957040572792363, | |
| "grad_norm": 0.9528618373675226, | |
| "learning_rate": 1.9804947357011523e-06, | |
| "loss": 0.6694, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2243436754176611, | |
| "grad_norm": 0.7974828002980384, | |
| "learning_rate": 1.978908723897005e-06, | |
| "loss": 0.6236, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.22911694510739858, | |
| "grad_norm": 0.8409814486609728, | |
| "learning_rate": 1.9772614123483485e-06, | |
| "loss": 0.6408, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.23389021479713604, | |
| "grad_norm": 0.9308103385624037, | |
| "learning_rate": 1.9755529042106393e-06, | |
| "loss": 0.5919, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2386634844868735, | |
| "grad_norm": 0.8733866970343211, | |
| "learning_rate": 1.973783306471495e-06, | |
| "loss": 0.5969, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24343675417661098, | |
| "grad_norm": 0.785222837947662, | |
| "learning_rate": 1.971952729943994e-06, | |
| "loss": 0.5973, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.24821002386634844, | |
| "grad_norm": 0.7734659209134986, | |
| "learning_rate": 1.9700612892597372e-06, | |
| "loss": 0.6098, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2529832935560859, | |
| "grad_norm": 0.7186573988976016, | |
| "learning_rate": 1.9681091028616676e-06, | |
| "loss": 0.5991, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2577565632458234, | |
| "grad_norm": 0.7687713083483249, | |
| "learning_rate": 1.966096292996655e-06, | |
| "loss": 0.612, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.26252983293556087, | |
| "grad_norm": 0.8621825025712473, | |
| "learning_rate": 1.9640229857078413e-06, | |
| "loss": 0.5949, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.26730310262529833, | |
| "grad_norm": 0.8257565967386995, | |
| "learning_rate": 1.9618893108267454e-06, | |
| "loss": 0.6103, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2720763723150358, | |
| "grad_norm": 0.8446529899139308, | |
| "learning_rate": 1.9596954019651354e-06, | |
| "loss": 0.5788, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.27684964200477324, | |
| "grad_norm": 0.7679525180581322, | |
| "learning_rate": 1.95744139650666e-06, | |
| "loss": 0.6069, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.28162291169451076, | |
| "grad_norm": 2.820852049381465, | |
| "learning_rate": 1.955127435598247e-06, | |
| "loss": 0.5987, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2863961813842482, | |
| "grad_norm": 0.814858484667224, | |
| "learning_rate": 1.9527536641412637e-06, | |
| "loss": 0.6112, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2911694510739857, | |
| "grad_norm": 0.8763795811271151, | |
| "learning_rate": 1.950320230782443e-06, | |
| "loss": 0.5827, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.29594272076372313, | |
| "grad_norm": 0.7856677046080051, | |
| "learning_rate": 1.9478272879045763e-06, | |
| "loss": 0.596, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.30071599045346065, | |
| "grad_norm": 0.922334054326887, | |
| "learning_rate": 1.9452749916169685e-06, | |
| "loss": 0.6131, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3054892601431981, | |
| "grad_norm": 0.9217839433257945, | |
| "learning_rate": 1.942663501745666e-06, | |
| "loss": 0.6014, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.31026252983293556, | |
| "grad_norm": 0.7832581576619595, | |
| "learning_rate": 1.939992981823445e-06, | |
| "loss": 0.599, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.315035799522673, | |
| "grad_norm": 0.8859458814902181, | |
| "learning_rate": 1.9372635990795744e-06, | |
| "loss": 0.5606, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3198090692124105, | |
| "grad_norm": 0.8263334442045513, | |
| "learning_rate": 1.934475524429339e-06, | |
| "loss": 0.5845, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.324582338902148, | |
| "grad_norm": 0.7750993438508201, | |
| "learning_rate": 1.9316289324633416e-06, | |
| "loss": 0.5938, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.32935560859188545, | |
| "grad_norm": 0.8557785609879223, | |
| "learning_rate": 1.928724001436568e-06, | |
| "loss": 0.5971, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3341288782816229, | |
| "grad_norm": 0.8226562290018226, | |
| "learning_rate": 1.925760913257224e-06, | |
| "loss": 0.5896, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.33890214797136037, | |
| "grad_norm": 0.7875386984949746, | |
| "learning_rate": 1.922739853475345e-06, | |
| "loss": 0.5957, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3436754176610978, | |
| "grad_norm": 0.7466977444466465, | |
| "learning_rate": 1.919661011271176e-06, | |
| "loss": 0.5782, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.34844868735083534, | |
| "grad_norm": 0.8961994451430955, | |
| "learning_rate": 1.916524579443327e-06, | |
| "loss": 0.5912, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3532219570405728, | |
| "grad_norm": 0.80271020509165, | |
| "learning_rate": 1.9133307543966972e-06, | |
| "loss": 0.568, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.35799522673031026, | |
| "grad_norm": 9.459451148135054, | |
| "learning_rate": 1.910079736130178e-06, | |
| "loss": 0.5831, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3627684964200477, | |
| "grad_norm": 0.8049759872673024, | |
| "learning_rate": 1.9067717282241275e-06, | |
| "loss": 0.5752, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.36754176610978523, | |
| "grad_norm": 0.9365222380955207, | |
| "learning_rate": 1.9034069378276248e-06, | |
| "loss": 0.6037, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3723150357995227, | |
| "grad_norm": 0.7813871400804118, | |
| "learning_rate": 1.8999855756454943e-06, | |
| "loss": 0.5814, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.37708830548926014, | |
| "grad_norm": 0.8403752789759832, | |
| "learning_rate": 1.8965078559251141e-06, | |
| "loss": 0.5864, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3818615751789976, | |
| "grad_norm": 0.8051967980548511, | |
| "learning_rate": 1.892973996443e-06, | |
| "loss": 0.5872, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.38663484486873506, | |
| "grad_norm": 0.8042594188373205, | |
| "learning_rate": 1.8893842184911652e-06, | |
| "loss": 0.5763, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3914081145584726, | |
| "grad_norm": 0.9020174309993688, | |
| "learning_rate": 1.8857387468632673e-06, | |
| "loss": 0.5663, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.39618138424821003, | |
| "grad_norm": 0.7886287092080712, | |
| "learning_rate": 1.8820378098405269e-06, | |
| "loss": 0.5749, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.4009546539379475, | |
| "grad_norm": 0.7891386094058271, | |
| "learning_rate": 1.878281639177437e-06, | |
| "loss": 0.5791, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.40572792362768495, | |
| "grad_norm": 0.8638559742903111, | |
| "learning_rate": 1.874470470087246e-06, | |
| "loss": 0.594, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4105011933174224, | |
| "grad_norm": 0.8722054176885525, | |
| "learning_rate": 1.8706045412272329e-06, | |
| "loss": 0.5958, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4152744630071599, | |
| "grad_norm": 0.8861516356836725, | |
| "learning_rate": 1.8666840946837588e-06, | |
| "loss": 0.5831, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4200477326968974, | |
| "grad_norm": 1.1646833402992178, | |
| "learning_rate": 1.8627093759571097e-06, | |
| "loss": 0.5773, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.42482100238663484, | |
| "grad_norm": 1.015546055180046, | |
| "learning_rate": 1.8586806339461223e-06, | |
| "loss": 0.567, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4295942720763723, | |
| "grad_norm": 0.9466071623549958, | |
| "learning_rate": 1.8545981209325974e-06, | |
| "loss": 0.5859, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4343675417661098, | |
| "grad_norm": 0.7761872762176855, | |
| "learning_rate": 1.850462092565503e-06, | |
| "loss": 0.5786, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.43914081145584727, | |
| "grad_norm": 0.7212954328261074, | |
| "learning_rate": 1.846272807844964e-06, | |
| "loss": 0.5643, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4439140811455847, | |
| "grad_norm": 0.9652990021129971, | |
| "learning_rate": 1.8420305291060453e-06, | |
| "loss": 0.5772, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4486873508353222, | |
| "grad_norm": 3.476043998914064, | |
| "learning_rate": 1.837735522002322e-06, | |
| "loss": 0.5973, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.45346062052505964, | |
| "grad_norm": 0.9648957060855661, | |
| "learning_rate": 1.8333880554892465e-06, | |
| "loss": 0.5683, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.45823389021479716, | |
| "grad_norm": 0.8226895202723103, | |
| "learning_rate": 1.828988401807304e-06, | |
| "loss": 0.5631, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4630071599045346, | |
| "grad_norm": 0.8353418687299229, | |
| "learning_rate": 1.8245368364649672e-06, | |
| "loss": 0.5478, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4677804295942721, | |
| "grad_norm": 0.7861845701165756, | |
| "learning_rate": 1.8200336382214404e-06, | |
| "loss": 0.5814, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.47255369928400953, | |
| "grad_norm": 0.7869818557092823, | |
| "learning_rate": 1.815479089069208e-06, | |
| "loss": 0.5831, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.477326968973747, | |
| "grad_norm": 1.0793699054838668, | |
| "learning_rate": 1.8108734742163714e-06, | |
| "loss": 0.5711, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4821002386634845, | |
| "grad_norm": 0.9191351283369057, | |
| "learning_rate": 1.8062170820687923e-06, | |
| "loss": 0.5829, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.48687350835322196, | |
| "grad_norm": 0.8555793060148964, | |
| "learning_rate": 1.8015102042120314e-06, | |
| "loss": 0.5651, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4916467780429594, | |
| "grad_norm": 0.8381062392654873, | |
| "learning_rate": 1.796753135393089e-06, | |
| "loss": 0.578, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.4964200477326969, | |
| "grad_norm": 0.9192300787533598, | |
| "learning_rate": 1.791946173501948e-06, | |
| "loss": 0.549, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5011933174224343, | |
| "grad_norm": 0.8307533286502056, | |
| "learning_rate": 1.7870896195529204e-06, | |
| "loss": 0.5427, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5059665871121718, | |
| "grad_norm": 0.7905696548307439, | |
| "learning_rate": 1.7821837776657967e-06, | |
| "loss": 0.5765, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5107398568019093, | |
| "grad_norm": 0.8311340345264336, | |
| "learning_rate": 1.777228955046803e-06, | |
| "loss": 0.5627, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5155131264916468, | |
| "grad_norm": 1.1408460136923761, | |
| "learning_rate": 1.7722254619693617e-06, | |
| "loss": 0.5615, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5202863961813843, | |
| "grad_norm": 0.9215940982960842, | |
| "learning_rate": 1.7671736117546643e-06, | |
| "loss": 0.559, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5250596658711217, | |
| "grad_norm": 0.9073194364535173, | |
| "learning_rate": 1.7620737207520498e-06, | |
| "loss": 0.5675, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5298329355608592, | |
| "grad_norm": 0.9064733521778133, | |
| "learning_rate": 1.756926108319194e-06, | |
| "loss": 0.564, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5346062052505967, | |
| "grad_norm": 0.8006367733355821, | |
| "learning_rate": 1.751731096802113e-06, | |
| "loss": 0.5697, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5393794749403341, | |
| "grad_norm": 0.7703477827683232, | |
| "learning_rate": 1.7464890115149759e-06, | |
| "loss": 0.5556, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5441527446300716, | |
| "grad_norm": 0.7808625090724881, | |
| "learning_rate": 1.7412001807197361e-06, | |
| "loss": 0.5699, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.548926014319809, | |
| "grad_norm": 0.7891354086520267, | |
| "learning_rate": 1.735864935605572e-06, | |
| "loss": 0.5535, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5536992840095465, | |
| "grad_norm": 0.8559410057738829, | |
| "learning_rate": 1.7304836102681493e-06, | |
| "loss": 0.5456, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5584725536992841, | |
| "grad_norm": 1.0113045114994854, | |
| "learning_rate": 1.7250565416887015e-06, | |
| "loss": 0.5724, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5632458233890215, | |
| "grad_norm": 0.8876991951748312, | |
| "learning_rate": 1.719584069712925e-06, | |
| "loss": 0.568, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.568019093078759, | |
| "grad_norm": 0.8642199309829095, | |
| "learning_rate": 1.7140665370296992e-06, | |
| "loss": 0.5501, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5727923627684964, | |
| "grad_norm": 0.7976943947559357, | |
| "learning_rate": 1.708504289149628e-06, | |
| "loss": 0.586, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5775656324582339, | |
| "grad_norm": 0.8256312101115841, | |
| "learning_rate": 1.702897674383402e-06, | |
| "loss": 0.5533, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5823389021479713, | |
| "grad_norm": 1.0090990785205396, | |
| "learning_rate": 1.697247043819988e-06, | |
| "loss": 0.5662, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5871121718377088, | |
| "grad_norm": 0.9155456337094188, | |
| "learning_rate": 1.6915527513046443e-06, | |
| "loss": 0.5683, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5918854415274463, | |
| "grad_norm": 0.8131468025811117, | |
| "learning_rate": 1.6858151534167616e-06, | |
| "loss": 0.5621, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5966587112171837, | |
| "grad_norm": 0.8064567687343521, | |
| "learning_rate": 1.6800346094475346e-06, | |
| "loss": 0.5596, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6014319809069213, | |
| "grad_norm": 0.7492395201342102, | |
| "learning_rate": 1.6742114813774618e-06, | |
| "loss": 0.5531, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6062052505966588, | |
| "grad_norm": 0.7647965464540142, | |
| "learning_rate": 1.6683461338536798e-06, | |
| "loss": 0.5832, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6109785202863962, | |
| "grad_norm": 0.7808066517921948, | |
| "learning_rate": 1.6624389341671278e-06, | |
| "loss": 0.5541, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6157517899761337, | |
| "grad_norm": 0.8430152851631113, | |
| "learning_rate": 1.656490252229548e-06, | |
| "loss": 0.5528, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6205250596658711, | |
| "grad_norm": 0.799740321239669, | |
| "learning_rate": 1.6505004605503223e-06, | |
| "loss": 0.5754, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6252983293556086, | |
| "grad_norm": 0.8524369396059758, | |
| "learning_rate": 1.6444699342131428e-06, | |
| "loss": 0.5659, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.630071599045346, | |
| "grad_norm": 0.8594592125322017, | |
| "learning_rate": 1.638399050852528e-06, | |
| "loss": 0.5468, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6348448687350835, | |
| "grad_norm": 0.8710890648276657, | |
| "learning_rate": 1.632288190630172e-06, | |
| "loss": 0.5547, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.639618138424821, | |
| "grad_norm": 1.3695399621239903, | |
| "learning_rate": 1.6261377362111396e-06, | |
| "loss": 0.5475, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6443914081145584, | |
| "grad_norm": 0.9119912953537386, | |
| "learning_rate": 1.6199480727399032e-06, | |
| "loss": 0.5622, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.649164677804296, | |
| "grad_norm": 0.8174877663301265, | |
| "learning_rate": 1.6137195878162267e-06, | |
| "loss": 0.5646, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6539379474940334, | |
| "grad_norm": 0.9968710402813645, | |
| "learning_rate": 1.607452671470891e-06, | |
| "loss": 0.5524, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6587112171837709, | |
| "grad_norm": 0.7838173267581942, | |
| "learning_rate": 1.601147716141272e-06, | |
| "loss": 0.5517, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6634844868735084, | |
| "grad_norm": 0.8600041378892647, | |
| "learning_rate": 1.5948051166467657e-06, | |
| "loss": 0.5664, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6682577565632458, | |
| "grad_norm": 0.7393813982622772, | |
| "learning_rate": 1.5884252701640634e-06, | |
| "loss": 0.5611, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6730310262529833, | |
| "grad_norm": 0.8312116599801993, | |
| "learning_rate": 1.5820085762022823e-06, | |
| "loss": 0.5609, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6778042959427207, | |
| "grad_norm": 0.782610924284724, | |
| "learning_rate": 1.5755554365779455e-06, | |
| "loss": 0.5586, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6825775656324582, | |
| "grad_norm": 0.7869375949652244, | |
| "learning_rate": 1.5690662553898222e-06, | |
| "loss": 0.5557, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6873508353221957, | |
| "grad_norm": 0.7871275055021261, | |
| "learning_rate": 1.5625414389936218e-06, | |
| "loss": 0.5379, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6921241050119332, | |
| "grad_norm": 0.7978567113817064, | |
| "learning_rate": 1.555981395976548e-06, | |
| "loss": 0.5459, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6968973747016707, | |
| "grad_norm": 0.8678454065910531, | |
| "learning_rate": 1.5493865371317123e-06, | |
| "loss": 0.5538, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7016706443914081, | |
| "grad_norm": 0.8640558568867235, | |
| "learning_rate": 1.542757275432411e-06, | |
| "loss": 0.5511, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7064439140811456, | |
| "grad_norm": 0.8257539417151866, | |
| "learning_rate": 1.5360940260062635e-06, | |
| "loss": 0.5395, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.711217183770883, | |
| "grad_norm": 0.7735477084244853, | |
| "learning_rate": 1.5293972061092185e-06, | |
| "loss": 0.5487, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7159904534606205, | |
| "grad_norm": 2.21607832896325, | |
| "learning_rate": 1.522667235099422e-06, | |
| "loss": 0.5313, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.720763723150358, | |
| "grad_norm": 0.8260305997634725, | |
| "learning_rate": 1.515904534410961e-06, | |
| "loss": 0.548, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7255369928400954, | |
| "grad_norm": 0.9282281415854876, | |
| "learning_rate": 1.5091095275274699e-06, | |
| "loss": 0.5366, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7303102625298329, | |
| "grad_norm": 0.835392664470487, | |
| "learning_rate": 1.5022826399556133e-06, | |
| "loss": 0.5365, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7350835322195705, | |
| "grad_norm": 1.0014547232970634, | |
| "learning_rate": 1.4954242991984396e-06, | |
| "loss": 0.5601, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7398568019093079, | |
| "grad_norm": 0.7999358357306402, | |
| "learning_rate": 1.4885349347286115e-06, | |
| "loss": 0.549, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7446300715990454, | |
| "grad_norm": 0.7456244196208853, | |
| "learning_rate": 1.4816149779615126e-06, | |
| "loss": 0.5516, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7494033412887828, | |
| "grad_norm": 0.7568817924270603, | |
| "learning_rate": 1.474664862228229e-06, | |
| "loss": 0.5572, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7541766109785203, | |
| "grad_norm": 0.9329993871672655, | |
| "learning_rate": 1.467685022748419e-06, | |
| "loss": 0.5617, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7589498806682577, | |
| "grad_norm": 0.7402702977169047, | |
| "learning_rate": 1.4606758966030534e-06, | |
| "loss": 0.5426, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7637231503579952, | |
| "grad_norm": 0.7912657849322988, | |
| "learning_rate": 1.4536379227070509e-06, | |
| "loss": 0.544, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7684964200477327, | |
| "grad_norm": 0.8280839624728757, | |
| "learning_rate": 1.4465715417817888e-06, | |
| "loss": 0.5435, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7732696897374701, | |
| "grad_norm": 0.7376680395132865, | |
| "learning_rate": 1.4394771963275076e-06, | |
| "loss": 0.5199, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7780429594272077, | |
| "grad_norm": 0.7984252215551224, | |
| "learning_rate": 1.4323553305955997e-06, | |
| "loss": 0.5479, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7828162291169452, | |
| "grad_norm": 0.788726316639838, | |
| "learning_rate": 1.4252063905607909e-06, | |
| "loss": 0.5219, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7875894988066826, | |
| "grad_norm": 0.7350598897520126, | |
| "learning_rate": 1.4180308238932135e-06, | |
| "loss": 0.531, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7923627684964201, | |
| "grad_norm": 0.7786806805958749, | |
| "learning_rate": 1.410829079930372e-06, | |
| "loss": 0.5481, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7971360381861575, | |
| "grad_norm": 0.9607237271282482, | |
| "learning_rate": 1.4036016096490064e-06, | |
| "loss": 0.5478, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.801909307875895, | |
| "grad_norm": 0.7782148550862285, | |
| "learning_rate": 1.3963488656368517e-06, | |
| "loss": 0.535, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8066825775656324, | |
| "grad_norm": 0.8100946646751193, | |
| "learning_rate": 1.389071302064295e-06, | |
| "loss": 0.5277, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8114558472553699, | |
| "grad_norm": 0.7502947220609039, | |
| "learning_rate": 1.381769374655938e-06, | |
| "loss": 0.5553, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8162291169451074, | |
| "grad_norm": 0.9124000354997026, | |
| "learning_rate": 1.374443540662057e-06, | |
| "loss": 0.5518, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8210023866348448, | |
| "grad_norm": 0.8409623949497625, | |
| "learning_rate": 1.3670942588299705e-06, | |
| "loss": 0.5294, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8257756563245824, | |
| "grad_norm": 0.8018568702519514, | |
| "learning_rate": 1.3597219893753117e-06, | |
| "loss": 0.5121, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8305489260143198, | |
| "grad_norm": 0.9262097539109866, | |
| "learning_rate": 1.352327193953211e-06, | |
| "loss": 0.5259, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8353221957040573, | |
| "grad_norm": 0.7289872898963717, | |
| "learning_rate": 1.3449103356293852e-06, | |
| "loss": 0.5601, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8400954653937948, | |
| "grad_norm": 0.7836398407929648, | |
| "learning_rate": 1.337471878851141e-06, | |
| "loss": 0.5359, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8448687350835322, | |
| "grad_norm": 0.8058359597234802, | |
| "learning_rate": 1.3300122894182909e-06, | |
| "loss": 0.5485, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8496420047732697, | |
| "grad_norm": 0.9118002301436436, | |
| "learning_rate": 1.3225320344539842e-06, | |
| "loss": 0.5562, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.8544152744630071, | |
| "grad_norm": 0.7609979767002807, | |
| "learning_rate": 1.315031582375457e-06, | |
| "loss": 0.5485, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8591885441527446, | |
| "grad_norm": 0.7105869344115592, | |
| "learning_rate": 1.3075114028646974e-06, | |
| "loss": 0.5444, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.863961813842482, | |
| "grad_norm": 0.8004311294692876, | |
| "learning_rate": 1.299971966839036e-06, | |
| "loss": 0.5481, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8687350835322196, | |
| "grad_norm": 0.7667234252631754, | |
| "learning_rate": 1.292413746421655e-06, | |
| "loss": 0.5345, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8735083532219571, | |
| "grad_norm": 0.7709523318159157, | |
| "learning_rate": 1.2848372149120246e-06, | |
| "loss": 0.512, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8782816229116945, | |
| "grad_norm": 0.8742048693859581, | |
| "learning_rate": 1.2772428467562651e-06, | |
| "loss": 0.55, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.883054892601432, | |
| "grad_norm": 0.8768649061250284, | |
| "learning_rate": 1.2696311175174357e-06, | |
| "loss": 0.5365, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8878281622911695, | |
| "grad_norm": 0.8468420712736167, | |
| "learning_rate": 1.2620025038457554e-06, | |
| "loss": 0.5421, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8926014319809069, | |
| "grad_norm": 0.725877140171063, | |
| "learning_rate": 1.254357483448755e-06, | |
| "loss": 0.519, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8973747016706444, | |
| "grad_norm": 0.7168188099187686, | |
| "learning_rate": 1.2466965350613615e-06, | |
| "loss": 0.5651, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9021479713603818, | |
| "grad_norm": 0.8993966404570418, | |
| "learning_rate": 1.2390201384159219e-06, | |
| "loss": 0.5603, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9069212410501193, | |
| "grad_norm": 0.741646072361816, | |
| "learning_rate": 1.231328774212159e-06, | |
| "loss": 0.5157, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9116945107398569, | |
| "grad_norm": 0.7741706595084717, | |
| "learning_rate": 1.223622924087073e-06, | |
| "loss": 0.5367, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9164677804295943, | |
| "grad_norm": 0.760645151447744, | |
| "learning_rate": 1.215903070584779e-06, | |
| "loss": 0.5401, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.9212410501193318, | |
| "grad_norm": 0.7462809840684769, | |
| "learning_rate": 1.2081696971262903e-06, | |
| "loss": 0.5458, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9260143198090692, | |
| "grad_norm": 0.867349599337623, | |
| "learning_rate": 1.2004232879792464e-06, | |
| "loss": 0.5398, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9307875894988067, | |
| "grad_norm": 0.7728255267176583, | |
| "learning_rate": 1.1926643282275882e-06, | |
| "loss": 0.5343, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9355608591885441, | |
| "grad_norm": 0.7946709962404823, | |
| "learning_rate": 1.1848933037411825e-06, | |
| "loss": 0.5181, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9403341288782816, | |
| "grad_norm": 0.7159173523126642, | |
| "learning_rate": 1.1771107011453933e-06, | |
| "loss": 0.5442, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9451073985680191, | |
| "grad_norm": 0.8493976289870552, | |
| "learning_rate": 1.1693170077906143e-06, | |
| "loss": 0.5467, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9498806682577565, | |
| "grad_norm": 0.7390118080756048, | |
| "learning_rate": 1.1615127117217463e-06, | |
| "loss": 0.5251, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.954653937947494, | |
| "grad_norm": 0.7595495597083671, | |
| "learning_rate": 1.1536983016476373e-06, | |
| "loss": 0.5368, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9594272076372315, | |
| "grad_norm": 0.7399505119485492, | |
| "learning_rate": 1.1458742669104803e-06, | |
| "loss": 0.514, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.964200477326969, | |
| "grad_norm": 0.7693531287817772, | |
| "learning_rate": 1.1380410974551682e-06, | |
| "loss": 0.5327, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.9689737470167065, | |
| "grad_norm": 0.7361655101073081, | |
| "learning_rate": 1.130199283798615e-06, | |
| "loss": 0.5152, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9737470167064439, | |
| "grad_norm": 0.8174253218643999, | |
| "learning_rate": 1.1223493169990391e-06, | |
| "loss": 0.5376, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9785202863961814, | |
| "grad_norm": 0.7646163527785592, | |
| "learning_rate": 1.1144916886252124e-06, | |
| "loss": 0.5198, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9832935560859188, | |
| "grad_norm": 0.7600726494815581, | |
| "learning_rate": 1.1066268907256782e-06, | |
| "loss": 0.5358, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9880668257756563, | |
| "grad_norm": 0.8292480992474258, | |
| "learning_rate": 1.098755415797939e-06, | |
| "loss": 0.5319, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9928400954653938, | |
| "grad_norm": 0.7584975382780693, | |
| "learning_rate": 1.0908777567576168e-06, | |
| "loss": 0.5453, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9976133651551312, | |
| "grad_norm": 0.7360353406613074, | |
| "learning_rate": 1.0829944069075847e-06, | |
| "loss": 0.5398, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.7360353406613074, | |
| "learning_rate": 1.0751058599070781e-06, | |
| "loss": 0.2683, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.0047732696897376, | |
| "grad_norm": 0.7735348980384088, | |
| "learning_rate": 1.0672126097407795e-06, | |
| "loss": 0.4862, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.009546539379475, | |
| "grad_norm": 0.6892850244639656, | |
| "learning_rate": 1.0593151506878865e-06, | |
| "loss": 0.4886, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.0143198090692125, | |
| "grad_norm": 0.7416432308937427, | |
| "learning_rate": 1.0514139772911597e-06, | |
| "loss": 0.4755, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.0190930787589498, | |
| "grad_norm": 0.6788376232914372, | |
| "learning_rate": 1.043509584325953e-06, | |
| "loss": 0.4643, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.0238663484486874, | |
| "grad_norm": 0.7328906073842687, | |
| "learning_rate": 1.0356024667692314e-06, | |
| "loss": 0.4934, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.0286396181384247, | |
| "grad_norm": 0.7697429459150121, | |
| "learning_rate": 1.0276931197685753e-06, | |
| "loss": 0.4976, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0334128878281623, | |
| "grad_norm": 0.7939705310040335, | |
| "learning_rate": 1.0197820386111737e-06, | |
| "loss": 0.4897, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.0381861575178997, | |
| "grad_norm": 0.9752936792347606, | |
| "learning_rate": 1.0118697186928105e-06, | |
| "loss": 0.4632, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.0429594272076372, | |
| "grad_norm": 0.810300278966379, | |
| "learning_rate": 1.0039566554868392e-06, | |
| "loss": 0.4667, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.0477326968973748, | |
| "grad_norm": 0.7651633767231123, | |
| "learning_rate": 9.960433445131607e-07, | |
| "loss": 0.4913, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0525059665871122, | |
| "grad_norm": 0.7783544485209318, | |
| "learning_rate": 9.881302813071896e-07, | |
| "loss": 0.485, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.0572792362768497, | |
| "grad_norm": 0.7728747490030172, | |
| "learning_rate": 9.802179613888262e-07, | |
| "loss": 0.4663, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.062052505966587, | |
| "grad_norm": 0.7199803548701269, | |
| "learning_rate": 9.723068802314246e-07, | |
| "loss": 0.4724, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.0668257756563246, | |
| "grad_norm": 0.8173682429078198, | |
| "learning_rate": 9.643975332307687e-07, | |
| "loss": 0.4777, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.071599045346062, | |
| "grad_norm": 0.9029276240129886, | |
| "learning_rate": 9.564904156740471e-07, | |
| "loss": 0.4664, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.0763723150357996, | |
| "grad_norm": 0.7595074592495551, | |
| "learning_rate": 9.485860227088405e-07, | |
| "loss": 0.4808, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.081145584725537, | |
| "grad_norm": 0.8019805756491788, | |
| "learning_rate": 9.406848493121134e-07, | |
| "loss": 0.4764, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.0859188544152745, | |
| "grad_norm": 0.7750922258239085, | |
| "learning_rate": 9.327873902592205e-07, | |
| "loss": 0.4711, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.0906921241050118, | |
| "grad_norm": 0.7272348247085987, | |
| "learning_rate": 9.248941400929222e-07, | |
| "loss": 0.4753, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.0954653937947494, | |
| "grad_norm": 0.8135968715591004, | |
| "learning_rate": 9.17005593092415e-07, | |
| "loss": 0.49, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.100238663484487, | |
| "grad_norm": 0.784517413630989, | |
| "learning_rate": 9.09122243242383e-07, | |
| "loss": 0.4636, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.1050119331742243, | |
| "grad_norm": 0.7967633635464352, | |
| "learning_rate": 9.01244584202061e-07, | |
| "loss": 0.4638, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.1097852028639619, | |
| "grad_norm": 0.7347125585892648, | |
| "learning_rate": 8.933731092743219e-07, | |
| "loss": 0.4951, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.1145584725536992, | |
| "grad_norm": 0.762506543894173, | |
| "learning_rate": 8.855083113747875e-07, | |
| "loss": 0.4715, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.1193317422434368, | |
| "grad_norm": 0.7657159811972606, | |
| "learning_rate": 8.776506830009607e-07, | |
| "loss": 0.4792, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.1241050119331741, | |
| "grad_norm": 0.7746280343348994, | |
| "learning_rate": 8.698007162013849e-07, | |
| "loss": 0.4734, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.1288782816229117, | |
| "grad_norm": 0.7287940319917965, | |
| "learning_rate": 8.619589025448318e-07, | |
| "loss": 0.4899, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.1336515513126493, | |
| "grad_norm": 0.7283506274833321, | |
| "learning_rate": 8.541257330895197e-07, | |
| "loss": 0.461, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.1384248210023866, | |
| "grad_norm": 1.109020964160513, | |
| "learning_rate": 8.463016983523627e-07, | |
| "loss": 0.4789, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.1431980906921242, | |
| "grad_norm": 0.8916069268430648, | |
| "learning_rate": 8.384872882782541e-07, | |
| "loss": 0.4951, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.1479713603818615, | |
| "grad_norm": 0.7832561259348029, | |
| "learning_rate": 8.306829922093857e-07, | |
| "loss": 0.4666, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.152744630071599, | |
| "grad_norm": 0.7246823419762234, | |
| "learning_rate": 8.228892988546067e-07, | |
| "loss": 0.475, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.1575178997613365, | |
| "grad_norm": 0.705366097498364, | |
| "learning_rate": 8.15106696258818e-07, | |
| "loss": 0.4727, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.162291169451074, | |
| "grad_norm": 0.7563603316000965, | |
| "learning_rate": 8.073356717724115e-07, | |
| "loss": 0.4779, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.1670644391408114, | |
| "grad_norm": 0.7463996376621957, | |
| "learning_rate": 7.995767120207536e-07, | |
| "loss": 0.4647, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.171837708830549, | |
| "grad_norm": 0.7117618711530662, | |
| "learning_rate": 7.918303028737096e-07, | |
| "loss": 0.4712, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.1766109785202863, | |
| "grad_norm": 0.7445420769436453, | |
| "learning_rate": 7.840969294152211e-07, | |
| "loss": 0.4747, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.1813842482100239, | |
| "grad_norm": 0.7339272409779617, | |
| "learning_rate": 7.763770759129269e-07, | |
| "loss": 0.4732, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.1861575178997614, | |
| "grad_norm": 0.7680499628702099, | |
| "learning_rate": 7.68671225787841e-07, | |
| "loss": 0.4677, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.1909307875894988, | |
| "grad_norm": 0.7289596879207738, | |
| "learning_rate": 7.609798615840785e-07, | |
| "loss": 0.4788, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.1957040572792363, | |
| "grad_norm": 0.7375098113291024, | |
| "learning_rate": 7.533034649386384e-07, | |
| "loss": 0.456, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.2004773269689737, | |
| "grad_norm": 0.7788484912408599, | |
| "learning_rate": 7.456425165512452e-07, | |
| "loss": 0.4768, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.2052505966587113, | |
| "grad_norm": 0.7545300469644135, | |
| "learning_rate": 7.379974961542447e-07, | |
| "loss": 0.4864, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.2100238663484486, | |
| "grad_norm": 0.8818787967594464, | |
| "learning_rate": 7.303688824825646e-07, | |
| "loss": 0.4768, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.2147971360381862, | |
| "grad_norm": 0.7762788166887581, | |
| "learning_rate": 7.227571532437349e-07, | |
| "loss": 0.4676, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.2195704057279237, | |
| "grad_norm": 0.674374793234199, | |
| "learning_rate": 7.151627850879755e-07, | |
| "loss": 0.4688, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.224343675417661, | |
| "grad_norm": 0.7391271163895584, | |
| "learning_rate": 7.075862535783453e-07, | |
| "loss": 0.4545, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.2291169451073987, | |
| "grad_norm": 0.7377869581736503, | |
| "learning_rate": 7.00028033160964e-07, | |
| "loss": 0.4842, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.233890214797136, | |
| "grad_norm": 0.7182033053068443, | |
| "learning_rate": 6.924885971353026e-07, | |
| "loss": 0.4841, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.2386634844868736, | |
| "grad_norm": 0.7165206421556828, | |
| "learning_rate": 6.849684176245431e-07, | |
| "loss": 0.4485, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.243436754176611, | |
| "grad_norm": 0.8274126483370449, | |
| "learning_rate": 6.774679655460158e-07, | |
| "loss": 0.4632, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.2482100238663485, | |
| "grad_norm": 0.7849668814937834, | |
| "learning_rate": 6.699877105817092e-07, | |
| "loss": 0.4701, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.2529832935560858, | |
| "grad_norm": 0.7246643685451561, | |
| "learning_rate": 6.625281211488591e-07, | |
| "loss": 0.4884, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.2577565632458234, | |
| "grad_norm": 0.7413214893244733, | |
| "learning_rate": 6.55089664370615e-07, | |
| "loss": 0.4821, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.2625298329355608, | |
| "grad_norm": 0.7307541408287506, | |
| "learning_rate": 6.476728060467888e-07, | |
| "loss": 0.4585, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.2673031026252983, | |
| "grad_norm": 0.7439228818529052, | |
| "learning_rate": 6.402780106246884e-07, | |
| "loss": 0.4688, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.272076372315036, | |
| "grad_norm": 0.7075632105234686, | |
| "learning_rate": 6.329057411700298e-07, | |
| "loss": 0.4813, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.2768496420047732, | |
| "grad_norm": 0.757650326028371, | |
| "learning_rate": 6.255564593379429e-07, | |
| "loss": 0.4878, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.2816229116945108, | |
| "grad_norm": 0.729712295017678, | |
| "learning_rate": 6.182306253440619e-07, | |
| "loss": 0.4629, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.2863961813842482, | |
| "grad_norm": 0.8230987908171445, | |
| "learning_rate": 6.109286979357051e-07, | |
| "loss": 0.4842, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.2911694510739857, | |
| "grad_norm": 0.7878144207218812, | |
| "learning_rate": 6.036511343631488e-07, | |
| "loss": 0.4588, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.295942720763723, | |
| "grad_norm": 0.7162555025211284, | |
| "learning_rate": 5.963983903509935e-07, | |
| "loss": 0.4817, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.3007159904534606, | |
| "grad_norm": 0.7352227500252277, | |
| "learning_rate": 5.89170920069628e-07, | |
| "loss": 0.4781, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.3054892601431982, | |
| "grad_norm": 0.7097358431174013, | |
| "learning_rate": 5.819691761067865e-07, | |
| "loss": 0.46, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.3102625298329356, | |
| "grad_norm": 1.146161188184777, | |
| "learning_rate": 5.747936094392089e-07, | |
| "loss": 0.4647, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.315035799522673, | |
| "grad_norm": 0.7072592435264768, | |
| "learning_rate": 5.676446694044002e-07, | |
| "loss": 0.4639, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.3198090692124105, | |
| "grad_norm": 0.7215149618117556, | |
| "learning_rate": 5.605228036724927e-07, | |
| "loss": 0.4652, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.324582338902148, | |
| "grad_norm": 0.670785774408122, | |
| "learning_rate": 5.534284582182114e-07, | |
| "loss": 0.4717, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.3293556085918854, | |
| "grad_norm": 0.747767864677791, | |
| "learning_rate": 5.463620772929494e-07, | |
| "loss": 0.4536, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.334128878281623, | |
| "grad_norm": 0.8516514509018951, | |
| "learning_rate": 5.393241033969466e-07, | |
| "loss": 0.4649, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.3389021479713603, | |
| "grad_norm": 0.8138001829719436, | |
| "learning_rate": 5.323149772515812e-07, | |
| "loss": 0.4668, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.3436754176610979, | |
| "grad_norm": 0.7576171145048753, | |
| "learning_rate": 5.253351377717706e-07, | |
| "loss": 0.4761, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.3484486873508352, | |
| "grad_norm": 0.8613520066962265, | |
| "learning_rate": 5.183850220384873e-07, | |
| "loss": 0.469, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.3532219570405728, | |
| "grad_norm": 0.766228885306893, | |
| "learning_rate": 5.114650652713884e-07, | |
| "loss": 0.4802, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.3579952267303104, | |
| "grad_norm": 0.7068637893292556, | |
| "learning_rate": 5.045757008015606e-07, | |
| "loss": 0.4773, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.3627684964200477, | |
| "grad_norm": 0.8429657657602729, | |
| "learning_rate": 4.977173600443868e-07, | |
| "loss": 0.4605, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.3675417661097853, | |
| "grad_norm": 0.7007932505507933, | |
| "learning_rate": 4.908904724725299e-07, | |
| "loss": 0.4767, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.3723150357995226, | |
| "grad_norm": 0.7671222670718428, | |
| "learning_rate": 4.840954655890391e-07, | |
| "loss": 0.4682, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.3770883054892602, | |
| "grad_norm": 0.694265618019185, | |
| "learning_rate": 4.773327649005777e-07, | |
| "loss": 0.4855, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.3818615751789975, | |
| "grad_norm": 0.7519150028535938, | |
| "learning_rate": 4.7060279389078184e-07, | |
| "loss": 0.4761, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.3866348448687351, | |
| "grad_norm": 0.7486630511459641, | |
| "learning_rate": 4.6390597399373644e-07, | |
| "loss": 0.4565, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.3914081145584727, | |
| "grad_norm": 0.7422555751664944, | |
| "learning_rate": 4.5724272456758907e-07, | |
| "loss": 0.4826, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.39618138424821, | |
| "grad_norm": 0.77856112043872, | |
| "learning_rate": 4.506134628682877e-07, | |
| "loss": 0.4763, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.4009546539379474, | |
| "grad_norm": 0.7684572854516972, | |
| "learning_rate": 4.440186040234524e-07, | |
| "loss": 0.4672, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.405727923627685, | |
| "grad_norm": 0.7665847058665568, | |
| "learning_rate": 4.3745856100637834e-07, | |
| "loss": 0.4656, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.4105011933174225, | |
| "grad_norm": 0.733469970387663, | |
| "learning_rate": 4.3093374461017785e-07, | |
| "loss": 0.4676, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.4152744630071599, | |
| "grad_norm": 0.8421640257156171, | |
| "learning_rate": 4.244445634220545e-07, | |
| "loss": 0.4843, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.4200477326968974, | |
| "grad_norm": 0.8009564109297522, | |
| "learning_rate": 4.1799142379771766e-07, | |
| "loss": 0.4809, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.4248210023866348, | |
| "grad_norm": 0.7033349702559853, | |
| "learning_rate": 4.115747298359363e-07, | |
| "loss": 0.464, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.4295942720763724, | |
| "grad_norm": 0.7437100788001662, | |
| "learning_rate": 4.0519488335323415e-07, | |
| "loss": 0.4851, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.4343675417661097, | |
| "grad_norm": 0.7732697984175376, | |
| "learning_rate": 3.9885228385872806e-07, | |
| "loss": 0.4594, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.4391408114558473, | |
| "grad_norm": 0.7940793070581448, | |
| "learning_rate": 3.925473285291091e-07, | |
| "loss": 0.4661, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.4439140811455848, | |
| "grad_norm": 0.7351909971969558, | |
| "learning_rate": 3.862804121837733e-07, | |
| "loss": 0.4757, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.4486873508353222, | |
| "grad_norm": 0.781207875542895, | |
| "learning_rate": 3.8005192726009663e-07, | |
| "loss": 0.4787, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.4534606205250595, | |
| "grad_norm": 0.7991516861553173, | |
| "learning_rate": 3.738622637888608e-07, | |
| "loss": 0.4668, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.458233890214797, | |
| "grad_norm": 0.8987252432386614, | |
| "learning_rate": 3.677118093698278e-07, | |
| "loss": 0.4606, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.4630071599045347, | |
| "grad_norm": 0.698103668533834, | |
| "learning_rate": 3.61600949147472e-07, | |
| "loss": 0.4683, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.467780429594272, | |
| "grad_norm": 0.7560261667555234, | |
| "learning_rate": 3.5553006578685706e-07, | |
| "loss": 0.4519, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.4725536992840096, | |
| "grad_norm": 0.7382407678980342, | |
| "learning_rate": 3.494995394496778e-07, | |
| "loss": 0.469, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.477326968973747, | |
| "grad_norm": 0.720898348204588, | |
| "learning_rate": 3.435097477704517e-07, | |
| "loss": 0.449, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.4821002386634845, | |
| "grad_norm": 0.7319822241837816, | |
| "learning_rate": 3.3756106583287205e-07, | |
| "loss": 0.4745, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.4868735083532219, | |
| "grad_norm": 0.7518826329514531, | |
| "learning_rate": 3.316538661463204e-07, | |
| "loss": 0.4918, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.4916467780429594, | |
| "grad_norm": 0.8013086574909619, | |
| "learning_rate": 3.2578851862253796e-07, | |
| "loss": 0.4846, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.496420047732697, | |
| "grad_norm": 0.7101861238945232, | |
| "learning_rate": 3.199653905524654e-07, | |
| "loss": 0.4604, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.5011933174224343, | |
| "grad_norm": 0.7204781171906866, | |
| "learning_rate": 3.1418484658323806e-07, | |
| "loss": 0.4772, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.5059665871121717, | |
| "grad_norm": 0.73033687450555, | |
| "learning_rate": 3.0844724869535577e-07, | |
| "loss": 0.468, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.5107398568019093, | |
| "grad_norm": 0.7700114197888783, | |
| "learning_rate": 3.027529561800117e-07, | |
| "loss": 0.4808, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.5155131264916468, | |
| "grad_norm": 0.8599415830432524, | |
| "learning_rate": 2.971023256165983e-07, | |
| "loss": 0.469, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.5202863961813842, | |
| "grad_norm": 0.7490557961852297, | |
| "learning_rate": 2.9149571085037215e-07, | |
| "loss": 0.4758, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.5250596658711217, | |
| "grad_norm": 0.6911043116400506, | |
| "learning_rate": 2.8593346297030073e-07, | |
| "loss": 0.4662, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.5298329355608593, | |
| "grad_norm": 0.7444306144257443, | |
| "learning_rate": 2.804159302870751e-07, | |
| "loss": 0.4638, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.5346062052505967, | |
| "grad_norm": 0.6930295325600317, | |
| "learning_rate": 2.7494345831129837e-07, | |
| "loss": 0.4584, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.539379474940334, | |
| "grad_norm": 0.7461580524158721, | |
| "learning_rate": 2.6951638973185073e-07, | |
| "loss": 0.4757, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.5441527446300716, | |
| "grad_norm": 0.7678530858976563, | |
| "learning_rate": 2.64135064394428e-07, | |
| "loss": 0.4807, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.5489260143198091, | |
| "grad_norm": 1.8144860255245707, | |
| "learning_rate": 2.587998192802638e-07, | |
| "loss": 0.4605, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.5536992840095465, | |
| "grad_norm": 0.7200319691236525, | |
| "learning_rate": 2.5351098848502386e-07, | |
| "loss": 0.474, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.558472553699284, | |
| "grad_norm": 0.7134577877268367, | |
| "learning_rate": 2.482689031978872e-07, | |
| "loss": 0.4715, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.5632458233890216, | |
| "grad_norm": 0.9468756981275396, | |
| "learning_rate": 2.4307389168080606e-07, | |
| "loss": 0.4656, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.568019093078759, | |
| "grad_norm": 0.6688722309384391, | |
| "learning_rate": 2.3792627924795038e-07, | |
| "loss": 0.4922, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.5727923627684963, | |
| "grad_norm": 0.7125789762828182, | |
| "learning_rate": 2.3282638824533529e-07, | |
| "loss": 0.4692, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.577565632458234, | |
| "grad_norm": 0.8844333458882234, | |
| "learning_rate": 2.277745380306383e-07, | |
| "loss": 0.4876, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.5823389021479715, | |
| "grad_norm": 0.7950308834961601, | |
| "learning_rate": 2.227710449531971e-07, | |
| "loss": 0.4918, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.5871121718377088, | |
| "grad_norm": 0.796382860942759, | |
| "learning_rate": 2.178162223342035e-07, | |
| "loss": 0.4641, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.5918854415274462, | |
| "grad_norm": 0.7285520770077796, | |
| "learning_rate": 2.1291038044707965e-07, | |
| "loss": 0.4661, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.5966587112171837, | |
| "grad_norm": 0.6921820001369808, | |
| "learning_rate": 2.0805382649805225e-07, | |
| "loss": 0.4681, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.6014319809069213, | |
| "grad_norm": 0.7552481890637776, | |
| "learning_rate": 2.032468646069112e-07, | |
| "loss": 0.4672, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.6062052505966586, | |
| "grad_norm": 0.7155745101307224, | |
| "learning_rate": 1.9848979578796865e-07, | |
| "loss": 0.4767, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.6109785202863962, | |
| "grad_norm": 0.6993076336434562, | |
| "learning_rate": 1.937829179312076e-07, | |
| "loss": 0.4822, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.6157517899761338, | |
| "grad_norm": 0.7530303728674003, | |
| "learning_rate": 1.8912652578362853e-07, | |
| "loss": 0.4709, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.6205250596658711, | |
| "grad_norm": 0.7510327363849882, | |
| "learning_rate": 1.8452091093079215e-07, | |
| "loss": 0.4604, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.6252983293556085, | |
| "grad_norm": 0.7282910633876013, | |
| "learning_rate": 1.7996636177855928e-07, | |
| "loss": 0.4984, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.630071599045346, | |
| "grad_norm": 0.7524297400825809, | |
| "learning_rate": 1.75463163535033e-07, | |
| "loss": 0.4823, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.6348448687350836, | |
| "grad_norm": 0.7049222733481684, | |
| "learning_rate": 1.7101159819269583e-07, | |
| "loss": 0.4635, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.639618138424821, | |
| "grad_norm": 1.1034453594616451, | |
| "learning_rate": 1.6661194451075345e-07, | |
| "loss": 0.4765, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.6443914081145583, | |
| "grad_norm": 0.83013391018154, | |
| "learning_rate": 1.6226447799767772e-07, | |
| "loss": 0.4533, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.649164677804296, | |
| "grad_norm": 2.858030289791699, | |
| "learning_rate": 1.5796947089395475e-07, | |
| "loss": 0.4691, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.6539379474940334, | |
| "grad_norm": 0.7332905568570133, | |
| "learning_rate": 1.5372719215503582e-07, | |
| "loss": 0.4544, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.6587112171837708, | |
| "grad_norm": 0.7481224605220782, | |
| "learning_rate": 1.4953790743449702e-07, | |
| "loss": 0.4806, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.6634844868735084, | |
| "grad_norm": 0.9099408876904721, | |
| "learning_rate": 1.4540187906740241e-07, | |
| "loss": 0.4569, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.668257756563246, | |
| "grad_norm": 0.6921320546034447, | |
| "learning_rate": 1.4131936605387762e-07, | |
| "loss": 0.4897, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.6730310262529833, | |
| "grad_norm": 0.7172188028374827, | |
| "learning_rate": 1.3729062404289017e-07, | |
| "loss": 0.4799, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.6778042959427206, | |
| "grad_norm": 0.7348308299387173, | |
| "learning_rate": 1.3331590531624115e-07, | |
| "loss": 0.4714, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.6825775656324582, | |
| "grad_norm": 0.7524117454719962, | |
| "learning_rate": 1.2939545877276726e-07, | |
| "loss": 0.4679, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.6873508353221958, | |
| "grad_norm": 0.7609980327732692, | |
| "learning_rate": 1.25529529912754e-07, | |
| "loss": 0.4678, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.692124105011933, | |
| "grad_norm": 0.7906234591099575, | |
| "learning_rate": 1.2171836082256316e-07, | |
| "loss": 0.4754, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.6968973747016707, | |
| "grad_norm": 0.7519337557814546, | |
| "learning_rate": 1.1796219015947285e-07, | |
| "loss": 0.4803, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.7016706443914082, | |
| "grad_norm": 0.6859134821445197, | |
| "learning_rate": 1.1426125313673285e-07, | |
| "loss": 0.4939, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.7064439140811456, | |
| "grad_norm": 0.8229493204752176, | |
| "learning_rate": 1.1061578150883444e-07, | |
| "loss": 0.4372, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.711217183770883, | |
| "grad_norm": 0.692317996696451, | |
| "learning_rate": 1.070260035570002e-07, | |
| "loss": 0.4792, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.7159904534606205, | |
| "grad_norm": 0.7390705342617898, | |
| "learning_rate": 1.0349214407488571e-07, | |
| "loss": 0.4719, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.720763723150358, | |
| "grad_norm": 0.7057263439063961, | |
| "learning_rate": 1.000144243545058e-07, | |
| "loss": 0.4724, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.7255369928400954, | |
| "grad_norm": 0.707795857913463, | |
| "learning_rate": 9.659306217237517e-08, | |
| "loss": 0.4717, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.7303102625298328, | |
| "grad_norm": 0.7912536951606031, | |
| "learning_rate": 9.322827177587212e-08, | |
| "loss": 0.4623, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.7350835322195706, | |
| "grad_norm": 0.746736598206851, | |
| "learning_rate": 8.992026386982221e-08, | |
| "loss": 0.4735, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.739856801909308, | |
| "grad_norm": 0.6948885657819285, | |
| "learning_rate": 8.66692456033029e-08, | |
| "loss": 0.4825, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.7446300715990453, | |
| "grad_norm": 0.7262491961744311, | |
| "learning_rate": 8.347542055667311e-08, | |
| "loss": 0.4699, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.7494033412887828, | |
| "grad_norm": 0.7863038143235231, | |
| "learning_rate": 8.033898872882394e-08, | |
| "loss": 0.4679, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.7541766109785204, | |
| "grad_norm": 0.6727626949269937, | |
| "learning_rate": 7.726014652465507e-08, | |
| "loss": 0.4421, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.7589498806682577, | |
| "grad_norm": 0.6867145980818331, | |
| "learning_rate": 7.423908674277579e-08, | |
| "loss": 0.4778, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.763723150357995, | |
| "grad_norm": 1.4213029472300538, | |
| "learning_rate": 7.127599856343192e-08, | |
| "loss": 0.4727, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.7684964200477327, | |
| "grad_norm": 0.692012937763345, | |
| "learning_rate": 6.837106753665823e-08, | |
| "loss": 0.4741, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.7732696897374702, | |
| "grad_norm": 0.7092148893859065, | |
| "learning_rate": 6.552447557066109e-08, | |
| "loss": 0.4697, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.7780429594272076, | |
| "grad_norm": 0.6973356829898804, | |
| "learning_rate": 6.273640092042575e-08, | |
| "loss": 0.4544, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.7828162291169452, | |
| "grad_norm": 1.5448551643686548, | |
| "learning_rate": 6.000701817655474e-08, | |
| "loss": 0.4523, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.7875894988066827, | |
| "grad_norm": 1.4827724692081619, | |
| "learning_rate": 5.733649825433384e-08, | |
| "loss": 0.4551, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.79236276849642, | |
| "grad_norm": 0.7790516793749164, | |
| "learning_rate": 5.47250083830314e-08, | |
| "loss": 0.494, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.7971360381861574, | |
| "grad_norm": 0.7365514384441436, | |
| "learning_rate": 5.217271209542384e-08, | |
| "loss": 0.4735, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.801909307875895, | |
| "grad_norm": 0.7707502808832377, | |
| "learning_rate": 4.967976921755679e-08, | |
| "loss": 0.4501, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.8066825775656326, | |
| "grad_norm": 0.7176835200739754, | |
| "learning_rate": 4.724633585873627e-08, | |
| "loss": 0.4686, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.81145584725537, | |
| "grad_norm": 0.6889468337016494, | |
| "learning_rate": 4.487256440175291e-08, | |
| "loss": 0.4771, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.8162291169451072, | |
| "grad_norm": 1.0649529564643607, | |
| "learning_rate": 4.255860349334006e-08, | |
| "loss": 0.4661, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.8210023866348448, | |
| "grad_norm": 1.1333041301606328, | |
| "learning_rate": 4.030459803486464e-08, | |
| "loss": 0.4606, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.8257756563245824, | |
| "grad_norm": 0.765268616008849, | |
| "learning_rate": 3.811068917325444e-08, | |
| "loss": 0.4442, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.8305489260143197, | |
| "grad_norm": 0.701547689578903, | |
| "learning_rate": 3.5977014292158495e-08, | |
| "loss": 0.4739, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.8353221957040573, | |
| "grad_norm": 0.7141975076446941, | |
| "learning_rate": 3.3903707003344774e-08, | |
| "loss": 0.4719, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.8400954653937949, | |
| "grad_norm": 0.6918753885495199, | |
| "learning_rate": 3.189089713833226e-08, | |
| "loss": 0.4772, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.8448687350835322, | |
| "grad_norm": 0.714964202433507, | |
| "learning_rate": 2.9938710740262884e-08, | |
| "loss": 0.4561, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.8496420047732696, | |
| "grad_norm": 0.7838822438811583, | |
| "learning_rate": 2.8047270056005934e-08, | |
| "loss": 0.4565, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.8544152744630071, | |
| "grad_norm": 0.7061577623995287, | |
| "learning_rate": 2.6216693528505195e-08, | |
| "loss": 0.4648, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.8591885441527447, | |
| "grad_norm": 0.9071757882196184, | |
| "learning_rate": 2.4447095789360884e-08, | |
| "loss": 0.4711, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.863961813842482, | |
| "grad_norm": 0.765845128347514, | |
| "learning_rate": 2.2738587651651487e-08, | |
| "loss": 0.4577, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.8687350835322196, | |
| "grad_norm": 0.7650600946027074, | |
| "learning_rate": 2.109127610299466e-08, | |
| "loss": 0.4679, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.8735083532219572, | |
| "grad_norm": 0.6957819402359949, | |
| "learning_rate": 1.950526429884769e-08, | |
| "loss": 0.4559, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.8782816229116945, | |
| "grad_norm": 0.7430172728751436, | |
| "learning_rate": 1.7980651556048e-08, | |
| "loss": 0.4732, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.8830548926014319, | |
| "grad_norm": 0.6767278663023139, | |
| "learning_rate": 1.6517533346593226e-08, | |
| "loss": 0.4758, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.8878281622911695, | |
| "grad_norm": 0.7619777582419104, | |
| "learning_rate": 1.5116001291663462e-08, | |
| "loss": 0.4932, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.892601431980907, | |
| "grad_norm": 0.790159743362526, | |
| "learning_rate": 1.3776143155883491e-08, | |
| "loss": 0.4558, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.8973747016706444, | |
| "grad_norm": 0.7261843559497824, | |
| "learning_rate": 1.2498042841827317e-08, | |
| "loss": 0.4595, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.9021479713603817, | |
| "grad_norm": 0.7017669980294373, | |
| "learning_rate": 1.128178038476324e-08, | |
| "loss": 0.4625, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.9069212410501193, | |
| "grad_norm": 0.6784318458229694, | |
| "learning_rate": 1.0127431947643316e-08, | |
| "loss": 0.4671, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.9116945107398569, | |
| "grad_norm": 0.675130035717692, | |
| "learning_rate": 9.035069816332619e-09, | |
| "loss": 0.464, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.9164677804295942, | |
| "grad_norm": 0.8221120490850481, | |
| "learning_rate": 8.004762395083963e-09, | |
| "loss": 0.4537, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.9212410501193318, | |
| "grad_norm": 0.7612136405972405, | |
| "learning_rate": 7.036574202253343e-09, | |
| "loss": 0.4914, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.9260143198090693, | |
| "grad_norm": 0.8291594902189451, | |
| "learning_rate": 6.130565866260484e-09, | |
| "loss": 0.4727, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.9307875894988067, | |
| "grad_norm": 1.0329364100399496, | |
| "learning_rate": 5.286794121791782e-09, | |
| "loss": 0.4767, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.935560859188544, | |
| "grad_norm": 0.8758229910700595, | |
| "learning_rate": 4.5053118062478025e-09, | |
| "loss": 0.4501, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.9403341288782816, | |
| "grad_norm": 0.7067697193260255, | |
| "learning_rate": 3.786167856434375e-09, | |
| "loss": 0.4747, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.9451073985680192, | |
| "grad_norm": 0.7459961970155857, | |
| "learning_rate": 3.1294073054987102e-09, | |
| "loss": 0.4605, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.9498806682577565, | |
| "grad_norm": 0.7585534385150827, | |
| "learning_rate": 2.5350712801084363e-09, | |
| "loss": 0.4528, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.9546539379474939, | |
| "grad_norm": 0.6767868247269999, | |
| "learning_rate": 2.003196997877099e-09, | |
| "loss": 0.4585, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.9594272076372317, | |
| "grad_norm": 0.7126370902337825, | |
| "learning_rate": 1.5338177650332517e-09, | |
| "loss": 0.4591, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.964200477326969, | |
| "grad_norm": 0.7172728813954358, | |
| "learning_rate": 1.1269629743346777e-09, | |
| "loss": 0.4589, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.9689737470167064, | |
| "grad_norm": 0.8158860106123756, | |
| "learning_rate": 7.826581032279734e-10, | |
| "loss": 0.4601, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.973747016706444, | |
| "grad_norm": 0.8261699459606863, | |
| "learning_rate": 5.00924712252937e-10, | |
| "loss": 0.4731, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.9785202863961815, | |
| "grad_norm": 0.7168072767819187, | |
| "learning_rate": 2.8178044369286945e-10, | |
| "loss": 0.4657, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.9832935560859188, | |
| "grad_norm": 0.6783006404134123, | |
| "learning_rate": 1.2523902046934763e-10, | |
| "loss": 0.452, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.9880668257756562, | |
| "grad_norm": 0.7080089156985594, | |
| "learning_rate": 3.131024528302273e-11, | |
| "loss": 0.4737, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.9928400954653938, | |
| "grad_norm": 0.7031897431837284, | |
| "learning_rate": 0.0, | |
| "loss": 0.4817, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.9928400954653938, | |
| "step": 418, | |
| "total_flos": 3166299160051712.0, | |
| "train_loss": 0.5405693022828353, | |
| "train_runtime": 17864.3337, | |
| "train_samples_per_second": 2.995, | |
| "train_steps_per_second": 0.023 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 418, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3166299160051712.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |