| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9949177877428999, | |
| "eval_steps": 500, | |
| "global_step": 418, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004783258594917788, | |
| "grad_norm": 0.709558174057161, | |
| "learning_rate": 9.523809523809523e-08, | |
| "loss": 1.2153, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009566517189835576, | |
| "grad_norm": 0.7136115199943648, | |
| "learning_rate": 1.9047619047619045e-07, | |
| "loss": 1.1666, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.014349775784753363, | |
| "grad_norm": 0.675693723617585, | |
| "learning_rate": 2.857142857142857e-07, | |
| "loss": 1.1641, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.019133034379671152, | |
| "grad_norm": 0.6920682930548318, | |
| "learning_rate": 3.809523809523809e-07, | |
| "loss": 1.1522, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02391629297458894, | |
| "grad_norm": 0.7102565482472595, | |
| "learning_rate": 4.761904761904761e-07, | |
| "loss": 1.1849, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.028699551569506727, | |
| "grad_norm": 0.7063832004098226, | |
| "learning_rate": 5.714285714285714e-07, | |
| "loss": 1.1832, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03348281016442452, | |
| "grad_norm": 0.7273064919061014, | |
| "learning_rate": 6.666666666666666e-07, | |
| "loss": 1.1646, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.038266068759342305, | |
| "grad_norm": 0.6515215643931184, | |
| "learning_rate": 7.619047619047618e-07, | |
| "loss": 1.1492, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04304932735426009, | |
| "grad_norm": 0.662055704210126, | |
| "learning_rate": 8.57142857142857e-07, | |
| "loss": 1.1356, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04783258594917788, | |
| "grad_norm": 0.6717882944749636, | |
| "learning_rate": 9.523809523809522e-07, | |
| "loss": 1.1373, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.052615844544095666, | |
| "grad_norm": 0.5448741661939914, | |
| "learning_rate": 1.0476190476190476e-06, | |
| "loss": 1.1133, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05739910313901345, | |
| "grad_norm": 0.5462157636865493, | |
| "learning_rate": 1.1428571428571428e-06, | |
| "loss": 1.1025, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06218236173393124, | |
| "grad_norm": 0.5384420206966651, | |
| "learning_rate": 1.238095238095238e-06, | |
| "loss": 1.1102, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.06696562032884903, | |
| "grad_norm": 0.5199166835309963, | |
| "learning_rate": 1.3333333333333332e-06, | |
| "loss": 1.0716, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07174887892376682, | |
| "grad_norm": 0.3132434598969462, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.0198, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07653213751868461, | |
| "grad_norm": 0.2906366968418868, | |
| "learning_rate": 1.5238095238095236e-06, | |
| "loss": 1.0402, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08131539611360239, | |
| "grad_norm": 0.27762786913003945, | |
| "learning_rate": 1.619047619047619e-06, | |
| "loss": 1.0027, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.08609865470852018, | |
| "grad_norm": 0.27399342611565175, | |
| "learning_rate": 1.714285714285714e-06, | |
| "loss": 1.0022, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09088191330343796, | |
| "grad_norm": 0.24537662923792491, | |
| "learning_rate": 1.8095238095238095e-06, | |
| "loss": 1.0272, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.09566517189835576, | |
| "grad_norm": 0.20010147123002528, | |
| "learning_rate": 1.9047619047619045e-06, | |
| "loss": 0.9569, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10044843049327354, | |
| "grad_norm": 0.25587399010113915, | |
| "learning_rate": 2e-06, | |
| "loss": 0.9848, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.10523168908819133, | |
| "grad_norm": 0.3007962756012024, | |
| "learning_rate": 1.9999686897547167e-06, | |
| "loss": 0.9581, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.11001494768310911, | |
| "grad_norm": 0.3006848188189002, | |
| "learning_rate": 1.9998747609795305e-06, | |
| "loss": 0.9478, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1147982062780269, | |
| "grad_norm": 0.30781981272131237, | |
| "learning_rate": 1.999718219556307e-06, | |
| "loss": 0.9834, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.11958146487294469, | |
| "grad_norm": 0.3118837626745979, | |
| "learning_rate": 1.999499075287747e-06, | |
| "loss": 0.9852, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.12436472346786248, | |
| "grad_norm": 0.26545213805928825, | |
| "learning_rate": 1.999217341896772e-06, | |
| "loss": 0.9549, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.12914798206278028, | |
| "grad_norm": 0.28369668711343804, | |
| "learning_rate": 1.998873037025665e-06, | |
| "loss": 0.9395, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.13393124065769807, | |
| "grad_norm": 0.228056096803738, | |
| "learning_rate": 1.9984661822349665e-06, | |
| "loss": 0.9124, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.13871449925261584, | |
| "grad_norm": 0.23393624794141885, | |
| "learning_rate": 1.997996803002123e-06, | |
| "loss": 0.9306, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.14349775784753363, | |
| "grad_norm": 0.20376330319941563, | |
| "learning_rate": 1.9974649287198914e-06, | |
| "loss": 0.8882, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14828101644245142, | |
| "grad_norm": 0.19033042713450593, | |
| "learning_rate": 1.9968705926945013e-06, | |
| "loss": 0.8699, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.15306427503736922, | |
| "grad_norm": 0.20517384186395837, | |
| "learning_rate": 1.9962138321435656e-06, | |
| "loss": 0.8919, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.15784753363228698, | |
| "grad_norm": 0.19219397333283395, | |
| "learning_rate": 1.9954946881937524e-06, | |
| "loss": 0.8985, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.16263079222720478, | |
| "grad_norm": 0.18095506989716384, | |
| "learning_rate": 1.994713205878208e-06, | |
| "loss": 0.8504, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.16741405082212257, | |
| "grad_norm": 0.1722529909885032, | |
| "learning_rate": 1.9938694341337393e-06, | |
| "loss": 0.8743, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.17219730941704037, | |
| "grad_norm": 0.16508567356320156, | |
| "learning_rate": 1.9929634257977467e-06, | |
| "loss": 0.857, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.17698056801195813, | |
| "grad_norm": 0.15380307949646846, | |
| "learning_rate": 1.991995237604916e-06, | |
| "loss": 0.8487, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.18176382660687593, | |
| "grad_norm": 0.14856130486975244, | |
| "learning_rate": 1.9909649301836674e-06, | |
| "loss": 0.8692, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.18654708520179372, | |
| "grad_norm": 0.1518842900714723, | |
| "learning_rate": 1.9898725680523566e-06, | |
| "loss": 0.8679, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.19133034379671152, | |
| "grad_norm": 0.1443106182213824, | |
| "learning_rate": 1.9887182196152367e-06, | |
| "loss": 0.8504, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1961136023916293, | |
| "grad_norm": 0.14664015617981188, | |
| "learning_rate": 1.9875019571581726e-06, | |
| "loss": 0.8125, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.20089686098654708, | |
| "grad_norm": 0.14692793192413753, | |
| "learning_rate": 1.9862238568441165e-06, | |
| "loss": 0.8257, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.20568011958146487, | |
| "grad_norm": 0.13896889627771705, | |
| "learning_rate": 1.9848839987083364e-06, | |
| "loss": 0.8329, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.21046337817638266, | |
| "grad_norm": 0.14943974659921427, | |
| "learning_rate": 1.983482466653407e-06, | |
| "loss": 0.8409, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.21524663677130046, | |
| "grad_norm": 0.138210028938997, | |
| "learning_rate": 1.982019348443952e-06, | |
| "loss": 0.8323, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.22002989536621823, | |
| "grad_norm": 0.1250406305407292, | |
| "learning_rate": 1.9804947357011523e-06, | |
| "loss": 0.8673, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.22481315396113602, | |
| "grad_norm": 0.12719252526959784, | |
| "learning_rate": 1.978908723897005e-06, | |
| "loss": 0.8192, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2295964125560538, | |
| "grad_norm": 0.10853106729801387, | |
| "learning_rate": 1.9772614123483485e-06, | |
| "loss": 0.8384, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2343796711509716, | |
| "grad_norm": 0.11375286279894396, | |
| "learning_rate": 1.9755529042106393e-06, | |
| "loss": 0.7854, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.23916292974588937, | |
| "grad_norm": 0.11326113932314119, | |
| "learning_rate": 1.973783306471495e-06, | |
| "loss": 0.795, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24394618834080717, | |
| "grad_norm": 0.12664705711535487, | |
| "learning_rate": 1.971952729943994e-06, | |
| "loss": 0.783, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.24872944693572496, | |
| "grad_norm": 0.11119059988645158, | |
| "learning_rate": 1.9700612892597372e-06, | |
| "loss": 0.8059, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.25351270553064276, | |
| "grad_norm": 0.10545114737351395, | |
| "learning_rate": 1.9681091028616676e-06, | |
| "loss": 0.7885, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.25829596412556055, | |
| "grad_norm": 0.11679452392637804, | |
| "learning_rate": 1.966096292996655e-06, | |
| "loss": 0.8031, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.26307922272047835, | |
| "grad_norm": 0.11363287552532539, | |
| "learning_rate": 1.9640229857078413e-06, | |
| "loss": 0.7774, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.26786248131539614, | |
| "grad_norm": 0.1164225509000403, | |
| "learning_rate": 1.9618893108267454e-06, | |
| "loss": 0.7949, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2726457399103139, | |
| "grad_norm": 0.11077425052933487, | |
| "learning_rate": 1.9596954019651354e-06, | |
| "loss": 0.7674, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.27742899850523167, | |
| "grad_norm": 0.10576177825898277, | |
| "learning_rate": 1.95744139650666e-06, | |
| "loss": 0.7953, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.28221225710014947, | |
| "grad_norm": 0.10359885133841641, | |
| "learning_rate": 1.955127435598247e-06, | |
| "loss": 0.7881, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.28699551569506726, | |
| "grad_norm": 0.10586032252156977, | |
| "learning_rate": 1.9527536641412637e-06, | |
| "loss": 0.7984, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.29177877428998505, | |
| "grad_norm": 0.10642116844371083, | |
| "learning_rate": 1.950320230782443e-06, | |
| "loss": 0.7666, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.29656203288490285, | |
| "grad_norm": 0.11202675632435576, | |
| "learning_rate": 1.9478272879045763e-06, | |
| "loss": 0.7809, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.30134529147982064, | |
| "grad_norm": 0.10728322195233368, | |
| "learning_rate": 1.9452749916169685e-06, | |
| "loss": 0.7948, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.30612855007473844, | |
| "grad_norm": 0.10427886124668943, | |
| "learning_rate": 1.942663501745666e-06, | |
| "loss": 0.7843, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3109118086696562, | |
| "grad_norm": 0.09150641957182463, | |
| "learning_rate": 1.939992981823445e-06, | |
| "loss": 0.7713, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.31569506726457397, | |
| "grad_norm": 0.10652939965487439, | |
| "learning_rate": 1.9372635990795744e-06, | |
| "loss": 0.7338, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.32047832585949176, | |
| "grad_norm": 0.12224668990837938, | |
| "learning_rate": 1.934475524429339e-06, | |
| "loss": 0.7651, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.32526158445440956, | |
| "grad_norm": 0.09554788331952155, | |
| "learning_rate": 1.9316289324633416e-06, | |
| "loss": 0.7743, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.33004484304932735, | |
| "grad_norm": 0.10311314948775388, | |
| "learning_rate": 1.928724001436568e-06, | |
| "loss": 0.7818, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.33482810164424515, | |
| "grad_norm": 0.11402809897006772, | |
| "learning_rate": 1.925760913257224e-06, | |
| "loss": 0.7738, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.33961136023916294, | |
| "grad_norm": 0.10099702778225672, | |
| "learning_rate": 1.922739853475345e-06, | |
| "loss": 0.7694, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.34439461883408073, | |
| "grad_norm": 0.09669133625846159, | |
| "learning_rate": 1.919661011271176e-06, | |
| "loss": 0.7695, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.34917787742899853, | |
| "grad_norm": 0.10013746372306316, | |
| "learning_rate": 1.916524579443327e-06, | |
| "loss": 0.7762, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.35396113602391627, | |
| "grad_norm": 0.09840254254939616, | |
| "learning_rate": 1.9133307543966972e-06, | |
| "loss": 0.7465, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.35874439461883406, | |
| "grad_norm": 0.10348087475535427, | |
| "learning_rate": 1.910079736130178e-06, | |
| "loss": 0.7591, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.36352765321375186, | |
| "grad_norm": 0.09831488128647803, | |
| "learning_rate": 1.9067717282241275e-06, | |
| "loss": 0.7473, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.36831091180866965, | |
| "grad_norm": 0.10747256347092367, | |
| "learning_rate": 1.9034069378276248e-06, | |
| "loss": 0.7899, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.37309417040358744, | |
| "grad_norm": 0.10145726153107046, | |
| "learning_rate": 1.8999855756454943e-06, | |
| "loss": 0.759, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.37787742899850524, | |
| "grad_norm": 0.09521749859691808, | |
| "learning_rate": 1.8965078559251141e-06, | |
| "loss": 0.765, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.38266068759342303, | |
| "grad_norm": 0.09559204768504546, | |
| "learning_rate": 1.892973996443e-06, | |
| "loss": 0.7653, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3874439461883408, | |
| "grad_norm": 0.09893961689958143, | |
| "learning_rate": 1.8893842184911652e-06, | |
| "loss": 0.7585, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3922272047832586, | |
| "grad_norm": 0.10469293200053865, | |
| "learning_rate": 1.8857387468632673e-06, | |
| "loss": 0.7396, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.39701046337817636, | |
| "grad_norm": 0.09881168266263542, | |
| "learning_rate": 1.8820378098405269e-06, | |
| "loss": 0.7449, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.40179372197309415, | |
| "grad_norm": 0.09472923155314936, | |
| "learning_rate": 1.878281639177437e-06, | |
| "loss": 0.7536, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.40657698056801195, | |
| "grad_norm": 0.09940252508830999, | |
| "learning_rate": 1.874470470087246e-06, | |
| "loss": 0.7695, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.41136023916292974, | |
| "grad_norm": 0.10835992130612712, | |
| "learning_rate": 1.8706045412272329e-06, | |
| "loss": 0.7804, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.41614349775784754, | |
| "grad_norm": 0.09850260645852206, | |
| "learning_rate": 1.8666840946837588e-06, | |
| "loss": 0.7581, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.42092675635276533, | |
| "grad_norm": 0.10663807706116737, | |
| "learning_rate": 1.8627093759571097e-06, | |
| "loss": 0.7486, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.4257100149476831, | |
| "grad_norm": 0.09576966700987803, | |
| "learning_rate": 1.8586806339461223e-06, | |
| "loss": 0.7393, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4304932735426009, | |
| "grad_norm": 0.13616509255793824, | |
| "learning_rate": 1.8545981209325974e-06, | |
| "loss": 0.7412, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.43527653213751866, | |
| "grad_norm": 0.10078747049635026, | |
| "learning_rate": 1.850462092565503e-06, | |
| "loss": 0.7522, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.44005979073243645, | |
| "grad_norm": 0.09590506182617801, | |
| "learning_rate": 1.846272807844964e-06, | |
| "loss": 0.7361, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.44484304932735425, | |
| "grad_norm": 0.09599938671410663, | |
| "learning_rate": 1.8420305291060453e-06, | |
| "loss": 0.7454, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.44962630792227204, | |
| "grad_norm": 0.10175459960116054, | |
| "learning_rate": 1.837735522002322e-06, | |
| "loss": 0.7776, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.45440956651718983, | |
| "grad_norm": 0.10921604960602464, | |
| "learning_rate": 1.8333880554892465e-06, | |
| "loss": 0.7284, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4591928251121076, | |
| "grad_norm": 0.10701793438795469, | |
| "learning_rate": 1.828988401807304e-06, | |
| "loss": 0.7275, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4639760837070254, | |
| "grad_norm": 0.10671158442373065, | |
| "learning_rate": 1.8245368364649672e-06, | |
| "loss": 0.7176, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4687593423019432, | |
| "grad_norm": 0.09323865008012455, | |
| "learning_rate": 1.8200336382214404e-06, | |
| "loss": 0.7558, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.473542600896861, | |
| "grad_norm": 0.09924243426975013, | |
| "learning_rate": 1.815479089069208e-06, | |
| "loss": 0.7477, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.47832585949177875, | |
| "grad_norm": 0.10034019533981096, | |
| "learning_rate": 1.8108734742163714e-06, | |
| "loss": 0.7302, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.48310911808669654, | |
| "grad_norm": 0.09289950458176202, | |
| "learning_rate": 1.8062170820687923e-06, | |
| "loss": 0.7461, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.48789237668161434, | |
| "grad_norm": 0.10063821105969947, | |
| "learning_rate": 1.8015102042120314e-06, | |
| "loss": 0.7374, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.49267563527653213, | |
| "grad_norm": 0.10431764482912426, | |
| "learning_rate": 1.796753135393089e-06, | |
| "loss": 0.753, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.4974588938714499, | |
| "grad_norm": 0.09777703419526715, | |
| "learning_rate": 1.791946173501948e-06, | |
| "loss": 0.7172, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5022421524663677, | |
| "grad_norm": 0.09880039694565383, | |
| "learning_rate": 1.7870896195529204e-06, | |
| "loss": 0.7157, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5070254110612855, | |
| "grad_norm": 0.10103523012523379, | |
| "learning_rate": 1.7821837776657967e-06, | |
| "loss": 0.7522, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5118086696562033, | |
| "grad_norm": 0.09953632352625874, | |
| "learning_rate": 1.777228955046803e-06, | |
| "loss": 0.7215, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5165919282511211, | |
| "grad_norm": 0.09448842637214858, | |
| "learning_rate": 1.7722254619693617e-06, | |
| "loss": 0.7311, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5213751868460389, | |
| "grad_norm": 0.09926544596139777, | |
| "learning_rate": 1.7671736117546643e-06, | |
| "loss": 0.7242, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5261584454409567, | |
| "grad_norm": 0.09420983432319698, | |
| "learning_rate": 1.7620737207520498e-06, | |
| "loss": 0.7302, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5309417040358745, | |
| "grad_norm": 0.09391867567605319, | |
| "learning_rate": 1.756926108319194e-06, | |
| "loss": 0.7222, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5357249626307923, | |
| "grad_norm": 0.09479652603956866, | |
| "learning_rate": 1.751731096802113e-06, | |
| "loss": 0.7361, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.54050822122571, | |
| "grad_norm": 0.09440230389077435, | |
| "learning_rate": 1.7464890115149759e-06, | |
| "loss": 0.7183, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5452914798206278, | |
| "grad_norm": 0.09514244364363002, | |
| "learning_rate": 1.7412001807197361e-06, | |
| "loss": 0.7342, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5500747384155455, | |
| "grad_norm": 0.10939831006494534, | |
| "learning_rate": 1.735864935605572e-06, | |
| "loss": 0.7251, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5548579970104633, | |
| "grad_norm": 0.10066676165355973, | |
| "learning_rate": 1.7304836102681493e-06, | |
| "loss": 0.7081, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5596412556053811, | |
| "grad_norm": 0.10100361164339053, | |
| "learning_rate": 1.7250565416887015e-06, | |
| "loss": 0.742, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5644245142002989, | |
| "grad_norm": 0.09740229601345607, | |
| "learning_rate": 1.719584069712925e-06, | |
| "loss": 0.7314, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5692077727952167, | |
| "grad_norm": 0.1012821496567702, | |
| "learning_rate": 1.7140665370296992e-06, | |
| "loss": 0.7167, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5739910313901345, | |
| "grad_norm": 0.09994075838359362, | |
| "learning_rate": 1.708504289149628e-06, | |
| "loss": 0.7421, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5787742899850523, | |
| "grad_norm": 0.09513046173828367, | |
| "learning_rate": 1.702897674383402e-06, | |
| "loss": 0.7067, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5835575485799701, | |
| "grad_norm": 0.10488877885042427, | |
| "learning_rate": 1.697247043819988e-06, | |
| "loss": 0.7283, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5883408071748879, | |
| "grad_norm": 0.10017563354892535, | |
| "learning_rate": 1.6915527513046443e-06, | |
| "loss": 0.7289, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5931240657698057, | |
| "grad_norm": 0.09910676006320021, | |
| "learning_rate": 1.6858151534167616e-06, | |
| "loss": 0.7258, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5979073243647235, | |
| "grad_norm": 0.10226756484228856, | |
| "learning_rate": 1.6800346094475346e-06, | |
| "loss": 0.7294, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6026905829596413, | |
| "grad_norm": 0.0941277312513867, | |
| "learning_rate": 1.6742114813774618e-06, | |
| "loss": 0.7059, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6074738415545591, | |
| "grad_norm": 0.10468386708851042, | |
| "learning_rate": 1.6683461338536798e-06, | |
| "loss": 0.76, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6122571001494769, | |
| "grad_norm": 0.09546912003315239, | |
| "learning_rate": 1.6624389341671278e-06, | |
| "loss": 0.7199, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6170403587443947, | |
| "grad_norm": 0.09278710008849092, | |
| "learning_rate": 1.656490252229548e-06, | |
| "loss": 0.71, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6218236173393124, | |
| "grad_norm": 0.09629578223078193, | |
| "learning_rate": 1.6505004605503223e-06, | |
| "loss": 0.7297, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6266068759342301, | |
| "grad_norm": 0.10564515959559177, | |
| "learning_rate": 1.6444699342131428e-06, | |
| "loss": 0.7323, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6313901345291479, | |
| "grad_norm": 0.11359024419098725, | |
| "learning_rate": 1.638399050852528e-06, | |
| "loss": 0.7091, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6361733931240657, | |
| "grad_norm": 0.11261022540293862, | |
| "learning_rate": 1.632288190630172e-06, | |
| "loss": 0.7092, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6409566517189835, | |
| "grad_norm": 0.11356374624941931, | |
| "learning_rate": 1.6261377362111396e-06, | |
| "loss": 0.7226, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6457399103139013, | |
| "grad_norm": 0.09628738165774237, | |
| "learning_rate": 1.6199480727399032e-06, | |
| "loss": 0.7313, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6505231689088191, | |
| "grad_norm": 0.09955265729242128, | |
| "learning_rate": 1.6137195878162267e-06, | |
| "loss": 0.7264, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6553064275037369, | |
| "grad_norm": 0.10088157860044299, | |
| "learning_rate": 1.607452671470891e-06, | |
| "loss": 0.72, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6600896860986547, | |
| "grad_norm": 0.09316854100471951, | |
| "learning_rate": 1.601147716141272e-06, | |
| "loss": 0.7043, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6648729446935725, | |
| "grad_norm": 0.09866104920600266, | |
| "learning_rate": 1.5948051166467657e-06, | |
| "loss": 0.7314, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6696562032884903, | |
| "grad_norm": 0.09908667617176863, | |
| "learning_rate": 1.5884252701640634e-06, | |
| "loss": 0.7223, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6744394618834081, | |
| "grad_norm": 0.10108043693556777, | |
| "learning_rate": 1.5820085762022823e-06, | |
| "loss": 0.7145, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.6792227204783259, | |
| "grad_norm": 0.09483321797525981, | |
| "learning_rate": 1.5755554365779455e-06, | |
| "loss": 0.712, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6840059790732437, | |
| "grad_norm": 0.09772063438530315, | |
| "learning_rate": 1.5690662553898222e-06, | |
| "loss": 0.7262, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6887892376681615, | |
| "grad_norm": 0.09547210509162248, | |
| "learning_rate": 1.5625414389936218e-06, | |
| "loss": 0.6881, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6935724962630793, | |
| "grad_norm": 0.10198333563773951, | |
| "learning_rate": 1.555981395976548e-06, | |
| "loss": 0.7023, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6983557548579971, | |
| "grad_norm": 0.0960216671080163, | |
| "learning_rate": 1.5493865371317123e-06, | |
| "loss": 0.7041, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7031390134529149, | |
| "grad_norm": 0.10811878950887173, | |
| "learning_rate": 1.542757275432411e-06, | |
| "loss": 0.7121, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7079222720478325, | |
| "grad_norm": 0.09745342759060693, | |
| "learning_rate": 1.5360940260062635e-06, | |
| "loss": 0.7, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7127055306427503, | |
| "grad_norm": 0.10002068890855158, | |
| "learning_rate": 1.5293972061092185e-06, | |
| "loss": 0.7174, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7174887892376681, | |
| "grad_norm": 0.094440761646848, | |
| "learning_rate": 1.522667235099422e-06, | |
| "loss": 0.6842, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7222720478325859, | |
| "grad_norm": 0.09714805521617614, | |
| "learning_rate": 1.515904534410961e-06, | |
| "loss": 0.6917, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7270553064275037, | |
| "grad_norm": 0.09206634939711936, | |
| "learning_rate": 1.5091095275274699e-06, | |
| "loss": 0.6807, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7318385650224215, | |
| "grad_norm": 0.09811924963451824, | |
| "learning_rate": 1.5022826399556133e-06, | |
| "loss": 0.6938, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7366218236173393, | |
| "grad_norm": 0.09469018906462104, | |
| "learning_rate": 1.4954242991984396e-06, | |
| "loss": 0.7262, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7414050822122571, | |
| "grad_norm": 0.09900495842570976, | |
| "learning_rate": 1.4885349347286115e-06, | |
| "loss": 0.6928, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7461883408071749, | |
| "grad_norm": 0.09813499443182924, | |
| "learning_rate": 1.4816149779615126e-06, | |
| "loss": 0.7041, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7509715994020927, | |
| "grad_norm": 0.09285509032551069, | |
| "learning_rate": 1.474664862228229e-06, | |
| "loss": 0.7157, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7557548579970105, | |
| "grad_norm": 0.09930227957877516, | |
| "learning_rate": 1.467685022748419e-06, | |
| "loss": 0.7077, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.7605381165919283, | |
| "grad_norm": 0.09336816965151891, | |
| "learning_rate": 1.4606758966030534e-06, | |
| "loss": 0.6905, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7653213751868461, | |
| "grad_norm": 0.09584860785157516, | |
| "learning_rate": 1.4536379227070509e-06, | |
| "loss": 0.704, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7701046337817639, | |
| "grad_norm": 0.09906164552724124, | |
| "learning_rate": 1.4465715417817888e-06, | |
| "loss": 0.7014, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7748878923766817, | |
| "grad_norm": 0.09920929186360831, | |
| "learning_rate": 1.4394771963275076e-06, | |
| "loss": 0.6711, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7796711509715994, | |
| "grad_norm": 0.09312914704123235, | |
| "learning_rate": 1.4323553305955997e-06, | |
| "loss": 0.704, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7844544095665172, | |
| "grad_norm": 0.09380001375870357, | |
| "learning_rate": 1.4252063905607909e-06, | |
| "loss": 0.6769, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7892376681614349, | |
| "grad_norm": 0.09383108087011895, | |
| "learning_rate": 1.4180308238932135e-06, | |
| "loss": 0.6903, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7940209267563527, | |
| "grad_norm": 0.09761627284743495, | |
| "learning_rate": 1.410829079930372e-06, | |
| "loss": 0.7126, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7988041853512705, | |
| "grad_norm": 0.09591926993818495, | |
| "learning_rate": 1.4036016096490064e-06, | |
| "loss": 0.6936, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8035874439461883, | |
| "grad_norm": 0.09463907898930997, | |
| "learning_rate": 1.3963488656368517e-06, | |
| "loss": 0.6918, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8083707025411061, | |
| "grad_norm": 0.10314575539858357, | |
| "learning_rate": 1.389071302064295e-06, | |
| "loss": 0.6837, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8131539611360239, | |
| "grad_norm": 0.0964154089668258, | |
| "learning_rate": 1.381769374655938e-06, | |
| "loss": 0.7087, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8179372197309417, | |
| "grad_norm": 0.10458955759891816, | |
| "learning_rate": 1.374443540662057e-06, | |
| "loss": 0.7132, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8227204783258595, | |
| "grad_norm": 0.11118113052583456, | |
| "learning_rate": 1.3670942588299705e-06, | |
| "loss": 0.689, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.8275037369207773, | |
| "grad_norm": 0.09430050647819165, | |
| "learning_rate": 1.3597219893753117e-06, | |
| "loss": 0.6669, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8322869955156951, | |
| "grad_norm": 0.10018122520539552, | |
| "learning_rate": 1.352327193953211e-06, | |
| "loss": 0.675, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8370702541106129, | |
| "grad_norm": 0.1036112926787395, | |
| "learning_rate": 1.3449103356293852e-06, | |
| "loss": 0.7151, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8418535127055307, | |
| "grad_norm": 0.09652117392718416, | |
| "learning_rate": 1.337471878851141e-06, | |
| "loss": 0.6819, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8466367713004485, | |
| "grad_norm": 0.11467070226240633, | |
| "learning_rate": 1.3300122894182909e-06, | |
| "loss": 0.7063, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8514200298953662, | |
| "grad_norm": 0.0974406950357686, | |
| "learning_rate": 1.3225320344539842e-06, | |
| "loss": 0.7154, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.856203288490284, | |
| "grad_norm": 0.10056923973958724, | |
| "learning_rate": 1.315031582375457e-06, | |
| "loss": 0.7119, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.8609865470852018, | |
| "grad_norm": 0.10289512917324216, | |
| "learning_rate": 1.3075114028646974e-06, | |
| "loss": 0.6872, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.8657698056801196, | |
| "grad_norm": 0.10284996024746469, | |
| "learning_rate": 1.299971966839036e-06, | |
| "loss": 0.6995, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.8705530642750373, | |
| "grad_norm": 0.09442402879665361, | |
| "learning_rate": 1.292413746421655e-06, | |
| "loss": 0.6788, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8753363228699551, | |
| "grad_norm": 0.09221585066528634, | |
| "learning_rate": 1.2848372149120246e-06, | |
| "loss": 0.6625, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8801195814648729, | |
| "grad_norm": 0.09614590670948946, | |
| "learning_rate": 1.2772428467562651e-06, | |
| "loss": 0.6993, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8849028400597907, | |
| "grad_norm": 0.09884964743533457, | |
| "learning_rate": 1.2696311175174357e-06, | |
| "loss": 0.6826, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8896860986547085, | |
| "grad_norm": 0.10049262287084837, | |
| "learning_rate": 1.2620025038457554e-06, | |
| "loss": 0.6875, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8944693572496263, | |
| "grad_norm": 0.0951319815934962, | |
| "learning_rate": 1.254357483448755e-06, | |
| "loss": 0.6763, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8992526158445441, | |
| "grad_norm": 0.0935897850203258, | |
| "learning_rate": 1.2466965350613615e-06, | |
| "loss": 0.7191, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9040358744394619, | |
| "grad_norm": 0.10488228598924217, | |
| "learning_rate": 1.2390201384159219e-06, | |
| "loss": 0.7031, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9088191330343797, | |
| "grad_norm": 0.09803611282531831, | |
| "learning_rate": 1.231328774212159e-06, | |
| "loss": 0.6596, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9136023916292975, | |
| "grad_norm": 0.10982924572402691, | |
| "learning_rate": 1.223622924087073e-06, | |
| "loss": 0.685, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9183856502242153, | |
| "grad_norm": 0.0990057467989385, | |
| "learning_rate": 1.215903070584779e-06, | |
| "loss": 0.6905, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.923168908819133, | |
| "grad_norm": 0.09806799076875558, | |
| "learning_rate": 1.2081696971262903e-06, | |
| "loss": 0.6888, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9279521674140508, | |
| "grad_norm": 0.09725950749183558, | |
| "learning_rate": 1.2004232879792464e-06, | |
| "loss": 0.6897, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.9327354260089686, | |
| "grad_norm": 0.09998658118754998, | |
| "learning_rate": 1.1926643282275882e-06, | |
| "loss": 0.6808, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9375186846038864, | |
| "grad_norm": 0.09991311679692257, | |
| "learning_rate": 1.1848933037411825e-06, | |
| "loss": 0.6721, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9423019431988042, | |
| "grad_norm": 0.09570773453199784, | |
| "learning_rate": 1.1771107011453933e-06, | |
| "loss": 0.6943, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.947085201793722, | |
| "grad_norm": 0.09891331359398514, | |
| "learning_rate": 1.1693170077906143e-06, | |
| "loss": 0.6989, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.9518684603886398, | |
| "grad_norm": 0.09162536810525922, | |
| "learning_rate": 1.1615127117217463e-06, | |
| "loss": 0.6705, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.9566517189835575, | |
| "grad_norm": 0.08903988395053124, | |
| "learning_rate": 1.1536983016476373e-06, | |
| "loss": 0.679, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.9614349775784753, | |
| "grad_norm": 0.09042806424104788, | |
| "learning_rate": 1.1458742669104803e-06, | |
| "loss": 0.6652, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.9662182361733931, | |
| "grad_norm": 0.10347050843667145, | |
| "learning_rate": 1.1380410974551682e-06, | |
| "loss": 0.6891, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.9710014947683109, | |
| "grad_norm": 0.0937785288147842, | |
| "learning_rate": 1.130199283798615e-06, | |
| "loss": 0.662, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.9757847533632287, | |
| "grad_norm": 0.10125646071292, | |
| "learning_rate": 1.1223493169990391e-06, | |
| "loss": 0.6857, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.9805680119581465, | |
| "grad_norm": 0.09552098120941739, | |
| "learning_rate": 1.1144916886252124e-06, | |
| "loss": 0.6693, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9853512705530643, | |
| "grad_norm": 0.0939464203547695, | |
| "learning_rate": 1.1066268907256782e-06, | |
| "loss": 0.689, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9901345291479821, | |
| "grad_norm": 0.1083244661837491, | |
| "learning_rate": 1.098755415797939e-06, | |
| "loss": 0.6795, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.9949177877428999, | |
| "grad_norm": 0.09671011359258122, | |
| "learning_rate": 1.0908777567576168e-06, | |
| "loss": 0.697, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9997010463378176, | |
| "grad_norm": 0.09491067631505212, | |
| "learning_rate": 1.0829944069075847e-06, | |
| "loss": 0.6913, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.09491067631505212, | |
| "learning_rate": 1.0751058599070781e-06, | |
| "loss": 0.0398, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.0047832585949177, | |
| "grad_norm": 0.09568291564665689, | |
| "learning_rate": 1.0672126097407795e-06, | |
| "loss": 0.6558, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.0095665171898356, | |
| "grad_norm": 0.0890899262566247, | |
| "learning_rate": 1.0593151506878865e-06, | |
| "loss": 0.6742, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.0143497757847533, | |
| "grad_norm": 0.08951496407842846, | |
| "learning_rate": 1.0514139772911597e-06, | |
| "loss": 0.6589, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.0191330343796712, | |
| "grad_norm": 0.09303979677050327, | |
| "learning_rate": 1.043509584325953e-06, | |
| "loss": 0.6526, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.0239162929745889, | |
| "grad_norm": 0.10551892280989528, | |
| "learning_rate": 1.0356024667692314e-06, | |
| "loss": 0.6849, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.0286995515695068, | |
| "grad_norm": 0.10560698057117009, | |
| "learning_rate": 1.0276931197685753e-06, | |
| "loss": 0.6947, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.0334828101644244, | |
| "grad_norm": 0.09055248425609617, | |
| "learning_rate": 1.0197820386111737e-06, | |
| "loss": 0.6692, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.0382660687593424, | |
| "grad_norm": 0.08952534903326591, | |
| "learning_rate": 1.0118697186928105e-06, | |
| "loss": 0.6481, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.04304932735426, | |
| "grad_norm": 0.0949207133753394, | |
| "learning_rate": 1.0039566554868392e-06, | |
| "loss": 0.6561, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.047832585949178, | |
| "grad_norm": 0.09247582314260705, | |
| "learning_rate": 9.960433445131607e-07, | |
| "loss": 0.6727, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0526158445440956, | |
| "grad_norm": 0.0922431854223743, | |
| "learning_rate": 9.881302813071896e-07, | |
| "loss": 0.6786, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.0573991031390135, | |
| "grad_norm": 0.09921340856730206, | |
| "learning_rate": 9.802179613888262e-07, | |
| "loss": 0.6492, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.0621823617339312, | |
| "grad_norm": 0.09405904196612806, | |
| "learning_rate": 9.723068802314246e-07, | |
| "loss": 0.6435, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.0669656203288491, | |
| "grad_norm": 0.10252064804861775, | |
| "learning_rate": 9.643975332307687e-07, | |
| "loss": 0.6693, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.0717488789237668, | |
| "grad_norm": 0.09137882604103069, | |
| "learning_rate": 9.564904156740471e-07, | |
| "loss": 0.6554, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.0765321375186847, | |
| "grad_norm": 0.09506143141231545, | |
| "learning_rate": 9.485860227088405e-07, | |
| "loss": 0.6524, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.0813153961136024, | |
| "grad_norm": 0.09471266291722098, | |
| "learning_rate": 9.406848493121134e-07, | |
| "loss": 0.6598, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.08609865470852, | |
| "grad_norm": 0.09374158444399681, | |
| "learning_rate": 9.327873902592205e-07, | |
| "loss": 0.6546, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.090881913303438, | |
| "grad_norm": 0.0988485463507574, | |
| "learning_rate": 9.248941400929222e-07, | |
| "loss": 0.6659, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.0956651718983557, | |
| "grad_norm": 0.09989186431558944, | |
| "learning_rate": 9.17005593092415e-07, | |
| "loss": 0.6789, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.1004484304932736, | |
| "grad_norm": 0.09577210416129449, | |
| "learning_rate": 9.09122243242383e-07, | |
| "loss": 0.6395, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.1052316890881912, | |
| "grad_norm": 0.09417460653116495, | |
| "learning_rate": 9.01244584202061e-07, | |
| "loss": 0.6351, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.1100149476831092, | |
| "grad_norm": 0.1060296134876217, | |
| "learning_rate": 8.933731092743219e-07, | |
| "loss": 0.6843, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.1147982062780268, | |
| "grad_norm": 0.1015156854708665, | |
| "learning_rate": 8.855083113747875e-07, | |
| "loss": 0.6533, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.1195814648729447, | |
| "grad_norm": 0.09252864648733664, | |
| "learning_rate": 8.776506830009607e-07, | |
| "loss": 0.6529, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.1243647234678624, | |
| "grad_norm": 0.09810040579156247, | |
| "learning_rate": 8.698007162013849e-07, | |
| "loss": 0.6622, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.1291479820627803, | |
| "grad_norm": 0.10333456832019272, | |
| "learning_rate": 8.619589025448318e-07, | |
| "loss": 0.6698, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.133931240657698, | |
| "grad_norm": 0.09369526359642345, | |
| "learning_rate": 8.541257330895197e-07, | |
| "loss": 0.6397, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.138714499252616, | |
| "grad_norm": 0.0934070849673633, | |
| "learning_rate": 8.463016983523627e-07, | |
| "loss": 0.6724, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.1434977578475336, | |
| "grad_norm": 0.0968568071003159, | |
| "learning_rate": 8.384872882782541e-07, | |
| "loss": 0.6651, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.1482810164424515, | |
| "grad_norm": 0.09218848184783551, | |
| "learning_rate": 8.306829922093857e-07, | |
| "loss": 0.6482, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.1530642750373692, | |
| "grad_norm": 0.09367162146496326, | |
| "learning_rate": 8.228892988546067e-07, | |
| "loss": 0.6532, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.157847533632287, | |
| "grad_norm": 0.09179870741014423, | |
| "learning_rate": 8.15106696258818e-07, | |
| "loss": 0.6458, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.1626307922272048, | |
| "grad_norm": 0.10425982157218257, | |
| "learning_rate": 8.073356717724115e-07, | |
| "loss": 0.6476, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.1674140508221225, | |
| "grad_norm": 0.10785978296392415, | |
| "learning_rate": 7.995767120207536e-07, | |
| "loss": 0.6542, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.1721973094170404, | |
| "grad_norm": 0.09053925155843066, | |
| "learning_rate": 7.918303028737096e-07, | |
| "loss": 0.6444, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.176980568011958, | |
| "grad_norm": 0.11054671698924359, | |
| "learning_rate": 7.840969294152211e-07, | |
| "loss": 0.6546, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.181763826606876, | |
| "grad_norm": 0.09190168624229306, | |
| "learning_rate": 7.763770759129269e-07, | |
| "loss": 0.6483, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.1865470852017936, | |
| "grad_norm": 0.10112895278117082, | |
| "learning_rate": 7.68671225787841e-07, | |
| "loss": 0.6607, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.1913303437967115, | |
| "grad_norm": 0.09521368142452571, | |
| "learning_rate": 7.609798615840785e-07, | |
| "loss": 0.6632, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.1961136023916292, | |
| "grad_norm": 0.09631678500828386, | |
| "learning_rate": 7.533034649386384e-07, | |
| "loss": 0.6271, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.2008968609865471, | |
| "grad_norm": 0.09402110237205977, | |
| "learning_rate": 7.456425165512452e-07, | |
| "loss": 0.649, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.2056801195814648, | |
| "grad_norm": 0.10452266128761932, | |
| "learning_rate": 7.379974961542447e-07, | |
| "loss": 0.6744, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.2104633781763827, | |
| "grad_norm": 0.09522707743392524, | |
| "learning_rate": 7.303688824825646e-07, | |
| "loss": 0.6608, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.2152466367713004, | |
| "grad_norm": 0.09573208889216732, | |
| "learning_rate": 7.227571532437349e-07, | |
| "loss": 0.652, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.2200298953662183, | |
| "grad_norm": 0.08917908293059873, | |
| "learning_rate": 7.151627850879755e-07, | |
| "loss": 0.6543, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.224813153961136, | |
| "grad_norm": 0.09616438435062312, | |
| "learning_rate": 7.075862535783453e-07, | |
| "loss": 0.6337, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.229596412556054, | |
| "grad_norm": 0.09640367364080155, | |
| "learning_rate": 7.00028033160964e-07, | |
| "loss": 0.6839, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.2343796711509716, | |
| "grad_norm": 0.09586353497663917, | |
| "learning_rate": 6.924885971353026e-07, | |
| "loss": 0.6669, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.2391629297458895, | |
| "grad_norm": 0.09267059238961081, | |
| "learning_rate": 6.849684176245431e-07, | |
| "loss": 0.6314, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.2439461883408072, | |
| "grad_norm": 0.09031407329588002, | |
| "learning_rate": 6.774679655460158e-07, | |
| "loss": 0.6449, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.2487294469357249, | |
| "grad_norm": 0.09470627715876291, | |
| "learning_rate": 6.699877105817092e-07, | |
| "loss": 0.6502, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.2535127055306428, | |
| "grad_norm": 0.10074811226580811, | |
| "learning_rate": 6.625281211488591e-07, | |
| "loss": 0.6686, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.2582959641255607, | |
| "grad_norm": 0.10063396201285223, | |
| "learning_rate": 6.55089664370615e-07, | |
| "loss": 0.6695, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.2630792227204783, | |
| "grad_norm": 0.0918463846096307, | |
| "learning_rate": 6.476728060467888e-07, | |
| "loss": 0.6451, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.267862481315396, | |
| "grad_norm": 0.09328601851356563, | |
| "learning_rate": 6.402780106246884e-07, | |
| "loss": 0.6532, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.272645739910314, | |
| "grad_norm": 0.09424847785405825, | |
| "learning_rate": 6.329057411700298e-07, | |
| "loss": 0.6673, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.2774289985052316, | |
| "grad_norm": 0.10008134051501576, | |
| "learning_rate": 6.255564593379429e-07, | |
| "loss": 0.6672, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.2822122571001495, | |
| "grad_norm": 0.09294984655524738, | |
| "learning_rate": 6.182306253440619e-07, | |
| "loss": 0.6395, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.2869955156950672, | |
| "grad_norm": 0.10285895388747343, | |
| "learning_rate": 6.109286979357051e-07, | |
| "loss": 0.6637, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.291778774289985, | |
| "grad_norm": 0.11139784795321246, | |
| "learning_rate": 6.036511343631488e-07, | |
| "loss": 0.6455, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.2965620328849028, | |
| "grad_norm": 0.09212296328590026, | |
| "learning_rate": 5.963983903509935e-07, | |
| "loss": 0.6638, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.3013452914798207, | |
| "grad_norm": 0.0949968377343012, | |
| "learning_rate": 5.89170920069628e-07, | |
| "loss": 0.6548, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.3061285500747384, | |
| "grad_norm": 0.09690303299554558, | |
| "learning_rate": 5.819691761067865e-07, | |
| "loss": 0.6388, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.310911808669656, | |
| "grad_norm": 0.09255296263795812, | |
| "learning_rate": 5.747936094392089e-07, | |
| "loss": 0.6435, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.315695067264574, | |
| "grad_norm": 0.09503263182638313, | |
| "learning_rate": 5.676446694044002e-07, | |
| "loss": 0.638, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.3204783258594919, | |
| "grad_norm": 0.09478054996201758, | |
| "learning_rate": 5.605228036724927e-07, | |
| "loss": 0.6502, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.3252615844544096, | |
| "grad_norm": 0.0933411883471192, | |
| "learning_rate": 5.534284582182114e-07, | |
| "loss": 0.6511, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.3300448430493272, | |
| "grad_norm": 0.09944351370813859, | |
| "learning_rate": 5.463620772929494e-07, | |
| "loss": 0.6325, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.3348281016442451, | |
| "grad_norm": 0.10023032726854744, | |
| "learning_rate": 5.393241033969466e-07, | |
| "loss": 0.6418, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.339611360239163, | |
| "grad_norm": 0.09729398494948012, | |
| "learning_rate": 5.323149772515812e-07, | |
| "loss": 0.6372, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.3443946188340807, | |
| "grad_norm": 0.09323209082587747, | |
| "learning_rate": 5.253351377717706e-07, | |
| "loss": 0.6504, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.3491778774289984, | |
| "grad_norm": 0.08940562070783202, | |
| "learning_rate": 5.183850220384873e-07, | |
| "loss": 0.6461, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.3539611360239163, | |
| "grad_norm": 0.09092518318025446, | |
| "learning_rate": 5.114650652713884e-07, | |
| "loss": 0.6542, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.358744394618834, | |
| "grad_norm": 0.0957083892879257, | |
| "learning_rate": 5.045757008015606e-07, | |
| "loss": 0.6627, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.363527653213752, | |
| "grad_norm": 0.09918131125769998, | |
| "learning_rate": 4.977173600443868e-07, | |
| "loss": 0.6447, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.3683109118086696, | |
| "grad_norm": 0.09079455495976413, | |
| "learning_rate": 4.908904724725299e-07, | |
| "loss": 0.651, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.3730941704035875, | |
| "grad_norm": 0.09533039778556848, | |
| "learning_rate": 4.840954655890391e-07, | |
| "loss": 0.6518, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.3778774289985052, | |
| "grad_norm": 0.09328409620590697, | |
| "learning_rate": 4.773327649005777e-07, | |
| "loss": 0.6712, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.382660687593423, | |
| "grad_norm": 0.10546886430926707, | |
| "learning_rate": 4.7060279389078184e-07, | |
| "loss": 0.6594, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.3874439461883408, | |
| "grad_norm": 0.09513157037379577, | |
| "learning_rate": 4.6390597399373644e-07, | |
| "loss": 0.6311, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.3922272047832587, | |
| "grad_norm": 0.0910714399276055, | |
| "learning_rate": 4.5724272456758907e-07, | |
| "loss": 0.6524, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.3970104633781764, | |
| "grad_norm": 0.08960044994197404, | |
| "learning_rate": 4.506134628682877e-07, | |
| "loss": 0.6515, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.4017937219730943, | |
| "grad_norm": 0.0939439987196228, | |
| "learning_rate": 4.440186040234524e-07, | |
| "loss": 0.6487, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.406576980568012, | |
| "grad_norm": 0.10645194425387064, | |
| "learning_rate": 4.3745856100637834e-07, | |
| "loss": 0.629, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.4113602391629296, | |
| "grad_norm": 0.1047763121754449, | |
| "learning_rate": 4.3093374461017785e-07, | |
| "loss": 0.6466, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.4161434977578475, | |
| "grad_norm": 0.09982639743024341, | |
| "learning_rate": 4.244445634220545e-07, | |
| "loss": 0.6504, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.4209267563527654, | |
| "grad_norm": 0.094704337085837, | |
| "learning_rate": 4.1799142379771766e-07, | |
| "loss": 0.6675, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.4257100149476831, | |
| "grad_norm": 0.09542340607816273, | |
| "learning_rate": 4.115747298359363e-07, | |
| "loss": 0.6379, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.4304932735426008, | |
| "grad_norm": 0.09975848410849608, | |
| "learning_rate": 4.0519488335323415e-07, | |
| "loss": 0.6684, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.4352765321375187, | |
| "grad_norm": 0.09564133208363568, | |
| "learning_rate": 3.9885228385872806e-07, | |
| "loss": 0.6345, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.4400597907324364, | |
| "grad_norm": 0.0955432935737647, | |
| "learning_rate": 3.925473285291091e-07, | |
| "loss": 0.6419, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.4448430493273543, | |
| "grad_norm": 0.0971708074341661, | |
| "learning_rate": 3.862804121837733e-07, | |
| "loss": 0.6568, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.449626307922272, | |
| "grad_norm": 0.09654206097129785, | |
| "learning_rate": 3.8005192726009663e-07, | |
| "loss": 0.6526, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.45440956651719, | |
| "grad_norm": 0.1047844291301578, | |
| "learning_rate": 3.738622637888608e-07, | |
| "loss": 0.6554, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.4591928251121076, | |
| "grad_norm": 0.10495835343403974, | |
| "learning_rate": 3.677118093698278e-07, | |
| "loss": 0.639, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.4639760837070255, | |
| "grad_norm": 0.09312185978330073, | |
| "learning_rate": 3.61600949147472e-07, | |
| "loss": 0.6534, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.4687593423019432, | |
| "grad_norm": 0.0914400067851364, | |
| "learning_rate": 3.5553006578685706e-07, | |
| "loss": 0.6364, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.473542600896861, | |
| "grad_norm": 0.10168751711517944, | |
| "learning_rate": 3.494995394496778e-07, | |
| "loss": 0.6438, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.4783258594917787, | |
| "grad_norm": 0.08777082505313431, | |
| "learning_rate": 3.435097477704517e-07, | |
| "loss": 0.6159, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.4831091180866967, | |
| "grad_norm": 0.0992483436164171, | |
| "learning_rate": 3.3756106583287205e-07, | |
| "loss": 0.6692, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.4878923766816143, | |
| "grad_norm": 0.09763140125702534, | |
| "learning_rate": 3.316538661463204e-07, | |
| "loss": 0.6704, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.492675635276532, | |
| "grad_norm": 0.103958466638517, | |
| "learning_rate": 3.2578851862253796e-07, | |
| "loss": 0.6582, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.49745889387145, | |
| "grad_norm": 0.09058417960194183, | |
| "learning_rate": 3.199653905524654e-07, | |
| "loss": 0.6353, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.5022421524663678, | |
| "grad_norm": 0.10131403619552605, | |
| "learning_rate": 3.1418484658323806e-07, | |
| "loss": 0.6566, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.5070254110612855, | |
| "grad_norm": 0.09681513597634411, | |
| "learning_rate": 3.0844724869535577e-07, | |
| "loss": 0.6437, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.5118086696562032, | |
| "grad_norm": 0.10073309195120103, | |
| "learning_rate": 3.027529561800117e-07, | |
| "loss": 0.6541, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.516591928251121, | |
| "grad_norm": 0.09187767379862512, | |
| "learning_rate": 2.971023256165983e-07, | |
| "loss": 0.6429, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.521375186846039, | |
| "grad_norm": 0.09322468814151724, | |
| "learning_rate": 2.9149571085037215e-07, | |
| "loss": 0.6536, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.5261584454409567, | |
| "grad_norm": 0.09535864278016615, | |
| "learning_rate": 2.8593346297030073e-07, | |
| "loss": 0.6448, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.5309417040358744, | |
| "grad_norm": 0.09853757658051235, | |
| "learning_rate": 2.804159302870751e-07, | |
| "loss": 0.6361, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.5357249626307923, | |
| "grad_norm": 0.08652865663588583, | |
| "learning_rate": 2.7494345831129837e-07, | |
| "loss": 0.6275, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.54050822122571, | |
| "grad_norm": 0.09209381258321075, | |
| "learning_rate": 2.6951638973185073e-07, | |
| "loss": 0.6528, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.5452914798206279, | |
| "grad_norm": 0.09568385273192681, | |
| "learning_rate": 2.64135064394428e-07, | |
| "loss": 0.6632, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.5500747384155455, | |
| "grad_norm": 0.0947277435093391, | |
| "learning_rate": 2.587998192802638e-07, | |
| "loss": 0.6306, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.5548579970104632, | |
| "grad_norm": 0.0985703474276344, | |
| "learning_rate": 2.5351098848502386e-07, | |
| "loss": 0.6511, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.5596412556053811, | |
| "grad_norm": 0.09427610648180619, | |
| "learning_rate": 2.482689031978872e-07, | |
| "loss": 0.6533, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.564424514200299, | |
| "grad_norm": 0.09520925811802433, | |
| "learning_rate": 2.4307389168080606e-07, | |
| "loss": 0.6603, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.5692077727952167, | |
| "grad_norm": 0.0907369263004915, | |
| "learning_rate": 2.3792627924795038e-07, | |
| "loss": 0.6818, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.5739910313901344, | |
| "grad_norm": 0.09440279581013306, | |
| "learning_rate": 2.3282638824533529e-07, | |
| "loss": 0.6531, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.5787742899850523, | |
| "grad_norm": 0.09614745051429147, | |
| "learning_rate": 2.277745380306383e-07, | |
| "loss": 0.6795, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.5835575485799702, | |
| "grad_norm": 0.09778941686336041, | |
| "learning_rate": 2.227710449531971e-07, | |
| "loss": 0.6778, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.588340807174888, | |
| "grad_norm": 0.09575250682717351, | |
| "learning_rate": 2.178162223342035e-07, | |
| "loss": 0.6404, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.5931240657698056, | |
| "grad_norm": 0.09627217057571222, | |
| "learning_rate": 2.1291038044707965e-07, | |
| "loss": 0.6528, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.5979073243647235, | |
| "grad_norm": 0.09572743591446818, | |
| "learning_rate": 2.0805382649805225e-07, | |
| "loss": 0.6461, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.6026905829596414, | |
| "grad_norm": 0.09528928099830879, | |
| "learning_rate": 2.032468646069112e-07, | |
| "loss": 0.6425, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.607473841554559, | |
| "grad_norm": 0.09652866769512121, | |
| "learning_rate": 1.9848979578796865e-07, | |
| "loss": 0.6548, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.6122571001494768, | |
| "grad_norm": 0.0954083836089715, | |
| "learning_rate": 1.937829179312076e-07, | |
| "loss": 0.6633, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.6170403587443947, | |
| "grad_norm": 0.09389212828330971, | |
| "learning_rate": 1.8912652578362853e-07, | |
| "loss": 0.653, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.6218236173393124, | |
| "grad_norm": 0.09323975661872334, | |
| "learning_rate": 1.8452091093079215e-07, | |
| "loss": 0.6405, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.6266068759342303, | |
| "grad_norm": 0.1030124431981675, | |
| "learning_rate": 1.7996636177855928e-07, | |
| "loss": 0.6776, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.631390134529148, | |
| "grad_norm": 0.09627742650338285, | |
| "learning_rate": 1.75463163535033e-07, | |
| "loss": 0.6579, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.6361733931240656, | |
| "grad_norm": 0.09724021609427144, | |
| "learning_rate": 1.7101159819269583e-07, | |
| "loss": 0.6432, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.6409566517189835, | |
| "grad_norm": 0.09615121849981347, | |
| "learning_rate": 1.6661194451075345e-07, | |
| "loss": 0.6628, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.6457399103139014, | |
| "grad_norm": 0.11302849698050037, | |
| "learning_rate": 1.6226447799767772e-07, | |
| "loss": 0.6306, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.6505231689088191, | |
| "grad_norm": 0.10400127614773519, | |
| "learning_rate": 1.5796947089395475e-07, | |
| "loss": 0.6462, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.6553064275037368, | |
| "grad_norm": 0.08798479350296001, | |
| "learning_rate": 1.5372719215503582e-07, | |
| "loss": 0.6309, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.6600896860986547, | |
| "grad_norm": 0.09514870211869147, | |
| "learning_rate": 1.4953790743449702e-07, | |
| "loss": 0.6631, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.6648729446935726, | |
| "grad_norm": 0.09749807157916107, | |
| "learning_rate": 1.4540187906740241e-07, | |
| "loss": 0.6285, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.6696562032884903, | |
| "grad_norm": 0.0901583318721974, | |
| "learning_rate": 1.4131936605387762e-07, | |
| "loss": 0.6731, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.674439461883408, | |
| "grad_norm": 0.09526536450165937, | |
| "learning_rate": 1.3729062404289017e-07, | |
| "loss": 0.6729, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.6792227204783259, | |
| "grad_norm": 0.09836491336123554, | |
| "learning_rate": 1.3331590531624115e-07, | |
| "loss": 0.6515, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.6840059790732438, | |
| "grad_norm": 0.10075181987095727, | |
| "learning_rate": 1.2939545877276726e-07, | |
| "loss": 0.6452, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.6887892376681615, | |
| "grad_norm": 0.09365016014154177, | |
| "learning_rate": 1.25529529912754e-07, | |
| "loss": 0.6477, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.6935724962630792, | |
| "grad_norm": 0.09704957910910289, | |
| "learning_rate": 1.2171836082256316e-07, | |
| "loss": 0.6678, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.698355754857997, | |
| "grad_norm": 0.0902657671425916, | |
| "learning_rate": 1.1796219015947285e-07, | |
| "loss": 0.6515, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.703139013452915, | |
| "grad_norm": 0.09237650202510098, | |
| "learning_rate": 1.1426125313673285e-07, | |
| "loss": 0.6645, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.7079222720478326, | |
| "grad_norm": 0.09196231975892524, | |
| "learning_rate": 1.1061578150883444e-07, | |
| "loss": 0.6092, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.7127055306427503, | |
| "grad_norm": 0.10378820492061246, | |
| "learning_rate": 1.070260035570002e-07, | |
| "loss": 0.6539, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.717488789237668, | |
| "grad_norm": 0.09091589756400278, | |
| "learning_rate": 1.0349214407488571e-07, | |
| "loss": 0.6454, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.722272047832586, | |
| "grad_norm": 0.09881444337923977, | |
| "learning_rate": 1.000144243545058e-07, | |
| "loss": 0.6486, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.7270553064275038, | |
| "grad_norm": 0.09311309771551186, | |
| "learning_rate": 9.659306217237517e-08, | |
| "loss": 0.6402, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.7318385650224215, | |
| "grad_norm": 0.09631340848121332, | |
| "learning_rate": 9.322827177587212e-08, | |
| "loss": 0.6469, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.7366218236173392, | |
| "grad_norm": 0.08882699558772723, | |
| "learning_rate": 8.992026386982221e-08, | |
| "loss": 0.6535, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.741405082212257, | |
| "grad_norm": 0.09280206311141305, | |
| "learning_rate": 8.66692456033029e-08, | |
| "loss": 0.648, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.746188340807175, | |
| "grad_norm": 0.0909402496845187, | |
| "learning_rate": 8.347542055667311e-08, | |
| "loss": 0.6529, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.7509715994020927, | |
| "grad_norm": 0.09512784479004122, | |
| "learning_rate": 8.033898872882394e-08, | |
| "loss": 0.6383, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.7557548579970104, | |
| "grad_norm": 0.09252600518424785, | |
| "learning_rate": 7.726014652465507e-08, | |
| "loss": 0.6202, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.7605381165919283, | |
| "grad_norm": 0.09450252582803388, | |
| "learning_rate": 7.423908674277579e-08, | |
| "loss": 0.6494, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.7653213751868462, | |
| "grad_norm": 0.09089301547199258, | |
| "learning_rate": 7.127599856343192e-08, | |
| "loss": 0.6583, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.7701046337817639, | |
| "grad_norm": 0.0917284963739844, | |
| "learning_rate": 6.837106753665823e-08, | |
| "loss": 0.666, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.7748878923766815, | |
| "grad_norm": 0.09493041895710681, | |
| "learning_rate": 6.552447557066109e-08, | |
| "loss": 0.6464, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.7796711509715994, | |
| "grad_norm": 0.08941486424509316, | |
| "learning_rate": 6.273640092042575e-08, | |
| "loss": 0.6367, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.7844544095665174, | |
| "grad_norm": 0.08812104207206783, | |
| "learning_rate": 6.000701817655474e-08, | |
| "loss": 0.6259, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.789237668161435, | |
| "grad_norm": 0.09772722276760373, | |
| "learning_rate": 5.733649825433384e-08, | |
| "loss": 0.6316, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.7940209267563527, | |
| "grad_norm": 0.09550366242600927, | |
| "learning_rate": 5.47250083830314e-08, | |
| "loss": 0.6764, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.7988041853512704, | |
| "grad_norm": 0.09529244067030168, | |
| "learning_rate": 5.217271209542384e-08, | |
| "loss": 0.6581, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.8035874439461883, | |
| "grad_norm": 0.09484969927499808, | |
| "learning_rate": 4.967976921755679e-08, | |
| "loss": 0.6238, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.8083707025411062, | |
| "grad_norm": 0.0922584352432481, | |
| "learning_rate": 4.724633585873627e-08, | |
| "loss": 0.6417, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.813153961136024, | |
| "grad_norm": 0.09178466251978876, | |
| "learning_rate": 4.487256440175291e-08, | |
| "loss": 0.6563, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.8179372197309416, | |
| "grad_norm": 0.0945223759439494, | |
| "learning_rate": 4.255860349334006e-08, | |
| "loss": 0.6479, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.8227204783258595, | |
| "grad_norm": 0.08929357609354767, | |
| "learning_rate": 4.030459803486464e-08, | |
| "loss": 0.6378, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.8275037369207774, | |
| "grad_norm": 0.08950252320624025, | |
| "learning_rate": 3.811068917325444e-08, | |
| "loss": 0.6128, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.832286995515695, | |
| "grad_norm": 0.09959763380863362, | |
| "learning_rate": 3.5977014292158495e-08, | |
| "loss": 0.6493, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.8370702541106128, | |
| "grad_norm": 0.09877239003895597, | |
| "learning_rate": 3.3903707003344774e-08, | |
| "loss": 0.6453, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.8418535127055307, | |
| "grad_norm": 0.09253710326481404, | |
| "learning_rate": 3.189089713833226e-08, | |
| "loss": 0.6564, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.8466367713004486, | |
| "grad_norm": 0.09295026609135121, | |
| "learning_rate": 2.9938710740262884e-08, | |
| "loss": 0.6286, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.8514200298953662, | |
| "grad_norm": 0.0931563883337063, | |
| "learning_rate": 2.8047270056005934e-08, | |
| "loss": 0.6431, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.856203288490284, | |
| "grad_norm": 0.10071203031568553, | |
| "learning_rate": 2.6216693528505195e-08, | |
| "loss": 0.6419, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.8609865470852018, | |
| "grad_norm": 0.0926672982724561, | |
| "learning_rate": 2.4447095789360884e-08, | |
| "loss": 0.6426, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.8657698056801197, | |
| "grad_norm": 0.10839157436286975, | |
| "learning_rate": 2.2738587651651487e-08, | |
| "loss": 0.6418, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.8705530642750374, | |
| "grad_norm": 0.09452841812388145, | |
| "learning_rate": 2.109127610299466e-08, | |
| "loss": 0.6534, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.875336322869955, | |
| "grad_norm": 0.09059164967961951, | |
| "learning_rate": 1.950526429884769e-08, | |
| "loss": 0.6385, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.8801195814648728, | |
| "grad_norm": 0.09541292286319235, | |
| "learning_rate": 1.7980651556048e-08, | |
| "loss": 0.6533, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.8849028400597907, | |
| "grad_norm": 0.09352871341544354, | |
| "learning_rate": 1.6517533346593226e-08, | |
| "loss": 0.6533, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.8896860986547086, | |
| "grad_norm": 0.09830540898676399, | |
| "learning_rate": 1.5116001291663462e-08, | |
| "loss": 0.686, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.8944693572496263, | |
| "grad_norm": 0.09186784336874675, | |
| "learning_rate": 1.3776143155883491e-08, | |
| "loss": 0.6265, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.899252615844544, | |
| "grad_norm": 0.0903805903035563, | |
| "learning_rate": 1.2498042841827317e-08, | |
| "loss": 0.6444, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.9040358744394619, | |
| "grad_norm": 0.09251729842752435, | |
| "learning_rate": 1.128178038476324e-08, | |
| "loss": 0.643, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.9088191330343798, | |
| "grad_norm": 0.08909847951509034, | |
| "learning_rate": 1.0127431947643316e-08, | |
| "loss": 0.643, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.9136023916292975, | |
| "grad_norm": 0.09779029431433935, | |
| "learning_rate": 9.035069816332619e-09, | |
| "loss": 0.6312, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.9183856502242151, | |
| "grad_norm": 0.09602092233428558, | |
| "learning_rate": 8.004762395083963e-09, | |
| "loss": 0.629, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.923168908819133, | |
| "grad_norm": 0.09003448698278545, | |
| "learning_rate": 7.036574202253343e-09, | |
| "loss": 0.6706, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.927952167414051, | |
| "grad_norm": 0.09531787472090986, | |
| "learning_rate": 6.130565866260484e-09, | |
| "loss": 0.65, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.9327354260089686, | |
| "grad_norm": 0.09179251340184746, | |
| "learning_rate": 5.286794121791782e-09, | |
| "loss": 0.6574, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.9375186846038863, | |
| "grad_norm": 0.09493544791044316, | |
| "learning_rate": 4.5053118062478025e-09, | |
| "loss": 0.6322, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.9423019431988042, | |
| "grad_norm": 0.09306468796228341, | |
| "learning_rate": 3.786167856434375e-09, | |
| "loss": 0.6634, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.9470852017937221, | |
| "grad_norm": 0.09006826318963117, | |
| "learning_rate": 3.1294073054987102e-09, | |
| "loss": 0.6418, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.9518684603886398, | |
| "grad_norm": 0.09638156976673805, | |
| "learning_rate": 2.5350712801084363e-09, | |
| "loss": 0.631, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.9566517189835575, | |
| "grad_norm": 0.09585138354438733, | |
| "learning_rate": 2.003196997877099e-09, | |
| "loss": 0.6405, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.9614349775784752, | |
| "grad_norm": 0.0982765637161277, | |
| "learning_rate": 1.5338177650332517e-09, | |
| "loss": 0.631, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.966218236173393, | |
| "grad_norm": 0.0924075594922873, | |
| "learning_rate": 1.1269629743346777e-09, | |
| "loss": 0.6433, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.971001494768311, | |
| "grad_norm": 0.09407079001673903, | |
| "learning_rate": 7.826581032279734e-10, | |
| "loss": 0.6422, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.9757847533632287, | |
| "grad_norm": 0.09103323653600585, | |
| "learning_rate": 5.00924712252937e-10, | |
| "loss": 0.6645, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.9805680119581464, | |
| "grad_norm": 0.09999729799669839, | |
| "learning_rate": 2.8178044369286945e-10, | |
| "loss": 0.6495, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.9853512705530643, | |
| "grad_norm": 0.0958229669734574, | |
| "learning_rate": 1.2523902046934763e-10, | |
| "loss": 0.6238, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.9901345291479822, | |
| "grad_norm": 0.08983387781419207, | |
| "learning_rate": 3.131024528302273e-11, | |
| "loss": 0.6478, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.9949177877428999, | |
| "grad_norm": 0.09621386225221452, | |
| "learning_rate": 0.0, | |
| "loss": 0.6557, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.9949177877428999, | |
| "step": 418, | |
| "total_flos": 862605439369216.0, | |
| "train_loss": 0.715426175948678, | |
| "train_runtime": 10328.0995, | |
| "train_samples_per_second": 5.181, | |
| "train_steps_per_second": 0.04 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 418, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 862605439369216.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |