| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 7106, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00028145229383619476, | |
| "grad_norm": 1.9831818342208862, | |
| "learning_rate": 2.8129395218002816e-07, | |
| "loss": 2.1254, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0014072614691809737, | |
| "grad_norm": 1.082022786140442, | |
| "learning_rate": 1.4064697609001406e-06, | |
| "loss": 1.2669, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0028145229383619475, | |
| "grad_norm": 1.065866231918335, | |
| "learning_rate": 2.8129395218002813e-06, | |
| "loss": 1.3653, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004221784407542921, | |
| "grad_norm": 2.5712316036224365, | |
| "learning_rate": 4.219409282700422e-06, | |
| "loss": 1.53, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.005629045876723895, | |
| "grad_norm": 2.248335838317871, | |
| "learning_rate": 5.6258790436005626e-06, | |
| "loss": 1.4474, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007036307345904869, | |
| "grad_norm": 1.0500571727752686, | |
| "learning_rate": 7.032348804500703e-06, | |
| "loss": 1.3372, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.008443568815085843, | |
| "grad_norm": 1.867329716682434, | |
| "learning_rate": 8.438818565400844e-06, | |
| "loss": 1.2333, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.009850830284266816, | |
| "grad_norm": 3.1149637699127197, | |
| "learning_rate": 9.845288326300985e-06, | |
| "loss": 1.2918, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01125809175344779, | |
| "grad_norm": 1.9895963668823242, | |
| "learning_rate": 1.1251758087201125e-05, | |
| "loss": 1.3152, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.012665353222628765, | |
| "grad_norm": 2.0947887897491455, | |
| "learning_rate": 1.2658227848101267e-05, | |
| "loss": 1.3231, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.014072614691809739, | |
| "grad_norm": 1.4856278896331787, | |
| "learning_rate": 1.4064697609001406e-05, | |
| "loss": 1.2376, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.015479876160990712, | |
| "grad_norm": 1.2920206785202026, | |
| "learning_rate": 1.547116736990155e-05, | |
| "loss": 1.1083, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.016887137630171686, | |
| "grad_norm": 1.3694531917572021, | |
| "learning_rate": 1.6877637130801688e-05, | |
| "loss": 0.9554, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01829439909935266, | |
| "grad_norm": 1.335752248764038, | |
| "learning_rate": 1.828410689170183e-05, | |
| "loss": 0.8074, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.019701660568533633, | |
| "grad_norm": 0.8360928297042847, | |
| "learning_rate": 1.969057665260197e-05, | |
| "loss": 0.9003, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.021108922037714608, | |
| "grad_norm": 1.4033712148666382, | |
| "learning_rate": 2.1097046413502112e-05, | |
| "loss": 1.0069, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.02251618350689558, | |
| "grad_norm": 3.524489164352417, | |
| "learning_rate": 2.250351617440225e-05, | |
| "loss": 0.8655, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.023923444976076555, | |
| "grad_norm": 5.065525054931641, | |
| "learning_rate": 2.3909985935302392e-05, | |
| "loss": 0.8884, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.02533070644525753, | |
| "grad_norm": 1.1002086400985718, | |
| "learning_rate": 2.5316455696202533e-05, | |
| "loss": 0.7538, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.026737967914438502, | |
| "grad_norm": 1.5529321432113647, | |
| "learning_rate": 2.672292545710267e-05, | |
| "loss": 0.9944, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.028145229383619477, | |
| "grad_norm": 2.5230774879455566, | |
| "learning_rate": 2.8129395218002813e-05, | |
| "loss": 0.742, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02955249085280045, | |
| "grad_norm": 1.8407468795776367, | |
| "learning_rate": 2.9535864978902954e-05, | |
| "loss": 0.6106, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.030959752321981424, | |
| "grad_norm": 1.8544448614120483, | |
| "learning_rate": 3.09423347398031e-05, | |
| "loss": 0.7784, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0323670137911624, | |
| "grad_norm": 1.5393428802490234, | |
| "learning_rate": 3.234880450070324e-05, | |
| "loss": 0.6225, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.03377427526034337, | |
| "grad_norm": 0.9650129675865173, | |
| "learning_rate": 3.3755274261603375e-05, | |
| "loss": 0.466, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03518153672952434, | |
| "grad_norm": 1.2199194431304932, | |
| "learning_rate": 3.516174402250352e-05, | |
| "loss": 0.7885, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03658879819870532, | |
| "grad_norm": 3.1491034030914307, | |
| "learning_rate": 3.656821378340366e-05, | |
| "loss": 0.642, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.037996059667886294, | |
| "grad_norm": 1.015199899673462, | |
| "learning_rate": 3.79746835443038e-05, | |
| "loss": 1.0289, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.039403321137067265, | |
| "grad_norm": 1.211543321609497, | |
| "learning_rate": 3.938115330520394e-05, | |
| "loss": 0.8134, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.040810582606248244, | |
| "grad_norm": 1.6816538572311401, | |
| "learning_rate": 4.078762306610408e-05, | |
| "loss": 0.8157, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.042217844075429216, | |
| "grad_norm": 1.9145057201385498, | |
| "learning_rate": 4.2194092827004224e-05, | |
| "loss": 0.6091, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04362510554461019, | |
| "grad_norm": 1.168205976486206, | |
| "learning_rate": 4.3600562587904366e-05, | |
| "loss": 0.5557, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.04503236701379116, | |
| "grad_norm": 0.8458957076072693, | |
| "learning_rate": 4.50070323488045e-05, | |
| "loss": 0.4024, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04643962848297214, | |
| "grad_norm": 1.442372441291809, | |
| "learning_rate": 4.641350210970464e-05, | |
| "loss": 0.8014, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.04784688995215311, | |
| "grad_norm": 1.6391854286193848, | |
| "learning_rate": 4.7819971870604783e-05, | |
| "loss": 1.012, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04925415142133408, | |
| "grad_norm": 1.334926724433899, | |
| "learning_rate": 4.9226441631504925e-05, | |
| "loss": 0.6832, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.05066141289051506, | |
| "grad_norm": 1.0498499870300293, | |
| "learning_rate": 5.0632911392405066e-05, | |
| "loss": 0.6351, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05206867435969603, | |
| "grad_norm": 2.0023510456085205, | |
| "learning_rate": 5.203938115330521e-05, | |
| "loss": 0.6497, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.053475935828877004, | |
| "grad_norm": 1.9690536260604858, | |
| "learning_rate": 5.344585091420534e-05, | |
| "loss": 0.7836, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.054883197298057976, | |
| "grad_norm": 1.4102208614349365, | |
| "learning_rate": 5.4852320675105484e-05, | |
| "loss": 0.5955, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.056290458767238954, | |
| "grad_norm": 0.9214100241661072, | |
| "learning_rate": 5.6258790436005626e-05, | |
| "loss": 0.7519, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.057697720236419926, | |
| "grad_norm": 1.3210060596466064, | |
| "learning_rate": 5.766526019690577e-05, | |
| "loss": 0.5468, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.0591049817056009, | |
| "grad_norm": 1.723496437072754, | |
| "learning_rate": 5.907172995780591e-05, | |
| "loss": 0.4599, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06051224317478188, | |
| "grad_norm": 1.1883797645568848, | |
| "learning_rate": 6.047819971870605e-05, | |
| "loss": 0.7824, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.06191950464396285, | |
| "grad_norm": 1.0189827680587769, | |
| "learning_rate": 6.18846694796062e-05, | |
| "loss": 0.5021, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.06332676611314382, | |
| "grad_norm": 1.1384845972061157, | |
| "learning_rate": 6.329113924050633e-05, | |
| "loss": 0.7703, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0647340275823248, | |
| "grad_norm": 2.097339391708374, | |
| "learning_rate": 6.469760900140648e-05, | |
| "loss": 1.0998, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06614128905150576, | |
| "grad_norm": 2.55668044090271, | |
| "learning_rate": 6.610407876230662e-05, | |
| "loss": 0.5333, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.06754855052068674, | |
| "grad_norm": 1.1277037858963013, | |
| "learning_rate": 6.751054852320675e-05, | |
| "loss": 0.6361, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06895581198986772, | |
| "grad_norm": 2.0660481452941895, | |
| "learning_rate": 6.89170182841069e-05, | |
| "loss": 0.7486, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.07036307345904869, | |
| "grad_norm": 2.1117303371429443, | |
| "learning_rate": 7.032348804500703e-05, | |
| "loss": 0.7103, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07177033492822966, | |
| "grad_norm": 1.1796034574508667, | |
| "learning_rate": 7.172995780590718e-05, | |
| "loss": 0.6379, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.07317759639741064, | |
| "grad_norm": 1.470502257347107, | |
| "learning_rate": 7.313642756680732e-05, | |
| "loss": 0.4737, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07458485786659161, | |
| "grad_norm": 1.443248987197876, | |
| "learning_rate": 7.454289732770746e-05, | |
| "loss": 0.812, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.07599211933577259, | |
| "grad_norm": 3.0095481872558594, | |
| "learning_rate": 7.59493670886076e-05, | |
| "loss": 0.4456, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07739938080495357, | |
| "grad_norm": 1.157353401184082, | |
| "learning_rate": 7.735583684950773e-05, | |
| "loss": 0.524, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.07880664227413453, | |
| "grad_norm": 1.1761438846588135, | |
| "learning_rate": 7.876230661040788e-05, | |
| "loss": 0.7222, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08021390374331551, | |
| "grad_norm": 0.64066082239151, | |
| "learning_rate": 8.016877637130802e-05, | |
| "loss": 0.5886, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.08162116521249649, | |
| "grad_norm": 0.9376239776611328, | |
| "learning_rate": 8.157524613220817e-05, | |
| "loss": 0.6901, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08302842668167745, | |
| "grad_norm": 0.9339331388473511, | |
| "learning_rate": 8.29817158931083e-05, | |
| "loss": 0.389, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.08443568815085843, | |
| "grad_norm": 1.1914637088775635, | |
| "learning_rate": 8.438818565400845e-05, | |
| "loss": 0.535, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0858429496200394, | |
| "grad_norm": 1.1882398128509521, | |
| "learning_rate": 8.579465541490858e-05, | |
| "loss": 0.3909, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.08725021108922038, | |
| "grad_norm": 1.5186290740966797, | |
| "learning_rate": 8.720112517580873e-05, | |
| "loss": 0.6317, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08865747255840135, | |
| "grad_norm": 1.509944200515747, | |
| "learning_rate": 8.860759493670887e-05, | |
| "loss": 0.4739, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.09006473402758232, | |
| "grad_norm": 1.4957388639450073, | |
| "learning_rate": 9.0014064697609e-05, | |
| "loss": 0.6078, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.0914719954967633, | |
| "grad_norm": 1.8821747303009033, | |
| "learning_rate": 9.142053445850915e-05, | |
| "loss": 0.8152, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.09287925696594428, | |
| "grad_norm": 0.9399609565734863, | |
| "learning_rate": 9.282700421940928e-05, | |
| "loss": 0.6356, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.09428651843512524, | |
| "grad_norm": 1.4053034782409668, | |
| "learning_rate": 9.423347398030943e-05, | |
| "loss": 0.7405, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.09569377990430622, | |
| "grad_norm": 0.9742883443832397, | |
| "learning_rate": 9.563994374120957e-05, | |
| "loss": 0.7251, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.0971010413734872, | |
| "grad_norm": 3.047891616821289, | |
| "learning_rate": 9.704641350210972e-05, | |
| "loss": 0.7387, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.09850830284266816, | |
| "grad_norm": 0.8324292898178101, | |
| "learning_rate": 9.845288326300985e-05, | |
| "loss": 0.584, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09991556431184914, | |
| "grad_norm": 1.0198436975479126, | |
| "learning_rate": 9.985935302391e-05, | |
| "loss": 0.4691, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.10132282578103012, | |
| "grad_norm": 3.0640432834625244, | |
| "learning_rate": 0.00010126582278481013, | |
| "loss": 0.4508, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.10273008725021109, | |
| "grad_norm": 0.9727720022201538, | |
| "learning_rate": 0.00010267229254571027, | |
| "loss": 0.4544, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.10413734871939206, | |
| "grad_norm": 1.4771376848220825, | |
| "learning_rate": 0.00010407876230661042, | |
| "loss": 0.5085, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.10554461018857304, | |
| "grad_norm": 1.5016095638275146, | |
| "learning_rate": 0.00010548523206751055, | |
| "loss": 0.5482, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.10695187165775401, | |
| "grad_norm": 1.5180020332336426, | |
| "learning_rate": 0.00010689170182841069, | |
| "loss": 0.7243, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.10835913312693499, | |
| "grad_norm": 1.8111554384231567, | |
| "learning_rate": 0.00010829817158931083, | |
| "loss": 0.5539, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.10976639459611595, | |
| "grad_norm": 1.488231897354126, | |
| "learning_rate": 0.00010970464135021097, | |
| "loss": 0.4533, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.11117365606529693, | |
| "grad_norm": 1.7389737367630005, | |
| "learning_rate": 0.00011111111111111112, | |
| "loss": 0.6554, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.11258091753447791, | |
| "grad_norm": 0.9282882213592529, | |
| "learning_rate": 0.00011251758087201125, | |
| "loss": 0.5665, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11398817900365887, | |
| "grad_norm": 1.2808202505111694, | |
| "learning_rate": 0.0001139240506329114, | |
| "loss": 0.8137, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.11539544047283985, | |
| "grad_norm": 1.520807147026062, | |
| "learning_rate": 0.00011533052039381153, | |
| "loss": 0.7432, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.11680270194202083, | |
| "grad_norm": 1.4392223358154297, | |
| "learning_rate": 0.0001167369901547117, | |
| "loss": 0.478, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.1182099634112018, | |
| "grad_norm": 0.8880683779716492, | |
| "learning_rate": 0.00011814345991561182, | |
| "loss": 0.4246, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.11961722488038277, | |
| "grad_norm": 0.832594633102417, | |
| "learning_rate": 0.00011954992967651195, | |
| "loss": 0.5505, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.12102448634956375, | |
| "grad_norm": 0.4944693148136139, | |
| "learning_rate": 0.0001209563994374121, | |
| "loss": 0.4342, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.12243174781874472, | |
| "grad_norm": 0.8733665943145752, | |
| "learning_rate": 0.00012236286919831225, | |
| "loss": 0.5839, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.1238390092879257, | |
| "grad_norm": 1.1832093000411987, | |
| "learning_rate": 0.0001237693389592124, | |
| "loss": 0.6976, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.12524627075710668, | |
| "grad_norm": 1.0406477451324463, | |
| "learning_rate": 0.00012517580872011252, | |
| "loss": 0.6353, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.12665353222628764, | |
| "grad_norm": 0.788364827632904, | |
| "learning_rate": 0.00012658227848101267, | |
| "loss": 0.3272, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1280607936954686, | |
| "grad_norm": 1.2941433191299438, | |
| "learning_rate": 0.00012798874824191281, | |
| "loss": 0.7372, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.1294680551646496, | |
| "grad_norm": 0.9147971272468567, | |
| "learning_rate": 0.00012939521800281296, | |
| "loss": 0.5474, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.13087531663383056, | |
| "grad_norm": 1.0644923448562622, | |
| "learning_rate": 0.00013080168776371308, | |
| "loss": 0.6286, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.13228257810301153, | |
| "grad_norm": 0.8214511275291443, | |
| "learning_rate": 0.00013220815752461323, | |
| "loss": 0.3655, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.13368983957219252, | |
| "grad_norm": 0.7348743677139282, | |
| "learning_rate": 0.00013361462728551338, | |
| "loss": 0.5278, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.13509710104137349, | |
| "grad_norm": 1.0437523126602173, | |
| "learning_rate": 0.0001350210970464135, | |
| "loss": 0.4665, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13650436251055445, | |
| "grad_norm": 1.6613603830337524, | |
| "learning_rate": 0.00013642756680731365, | |
| "loss": 0.7575, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.13791162397973544, | |
| "grad_norm": 1.0844550132751465, | |
| "learning_rate": 0.0001378340365682138, | |
| "loss": 0.6744, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.1393188854489164, | |
| "grad_norm": 1.3651305437088013, | |
| "learning_rate": 0.00013924050632911395, | |
| "loss": 0.8377, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.14072614691809737, | |
| "grad_norm": 1.256631851196289, | |
| "learning_rate": 0.00014064697609001407, | |
| "loss": 0.6523, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14213340838727836, | |
| "grad_norm": 1.7894726991653442, | |
| "learning_rate": 0.0001420534458509142, | |
| "loss": 0.5191, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.14354066985645933, | |
| "grad_norm": 0.8206887245178223, | |
| "learning_rate": 0.00014345991561181436, | |
| "loss": 0.378, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1449479313256403, | |
| "grad_norm": 1.6677026748657227, | |
| "learning_rate": 0.00014486638537271449, | |
| "loss": 0.326, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.1463551927948213, | |
| "grad_norm": 1.4679995775222778, | |
| "learning_rate": 0.00014627285513361463, | |
| "loss": 0.6619, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.14776245426400225, | |
| "grad_norm": 0.829093337059021, | |
| "learning_rate": 0.00014767932489451478, | |
| "loss": 0.7372, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.14916971573318322, | |
| "grad_norm": 1.6188422441482544, | |
| "learning_rate": 0.00014908579465541493, | |
| "loss": 0.5666, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1505769772023642, | |
| "grad_norm": 1.319091558456421, | |
| "learning_rate": 0.00015049226441631505, | |
| "loss": 0.8461, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.15198423867154517, | |
| "grad_norm": 1.7154995203018188, | |
| "learning_rate": 0.0001518987341772152, | |
| "loss": 0.6463, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.15339150014072614, | |
| "grad_norm": 1.4643100500106812, | |
| "learning_rate": 0.00015330520393811535, | |
| "loss": 0.6149, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.15479876160990713, | |
| "grad_norm": 1.554081916809082, | |
| "learning_rate": 0.00015471167369901547, | |
| "loss": 0.7509, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1562060230790881, | |
| "grad_norm": 1.040045976638794, | |
| "learning_rate": 0.00015611814345991562, | |
| "loss": 0.6607, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.15761328454826906, | |
| "grad_norm": 1.9093159437179565, | |
| "learning_rate": 0.00015752461322081577, | |
| "loss": 0.6108, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.15902054601745005, | |
| "grad_norm": 0.8650393486022949, | |
| "learning_rate": 0.0001589310829817159, | |
| "loss": 0.629, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.16042780748663102, | |
| "grad_norm": 1.011257529258728, | |
| "learning_rate": 0.00016033755274261603, | |
| "loss": 0.2586, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.16183506895581198, | |
| "grad_norm": 0.8653711676597595, | |
| "learning_rate": 0.00016174402250351618, | |
| "loss": 0.6063, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.16324233042499298, | |
| "grad_norm": 1.7408281564712524, | |
| "learning_rate": 0.00016315049226441633, | |
| "loss": 0.4728, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.16464959189417394, | |
| "grad_norm": 0.7200327515602112, | |
| "learning_rate": 0.00016455696202531648, | |
| "loss": 0.6803, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.1660568533633549, | |
| "grad_norm": 2.032118320465088, | |
| "learning_rate": 0.0001659634317862166, | |
| "loss": 0.6615, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.1674641148325359, | |
| "grad_norm": 1.1240061521530151, | |
| "learning_rate": 0.00016736990154711675, | |
| "loss": 0.4675, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.16887137630171686, | |
| "grad_norm": 0.8609156012535095, | |
| "learning_rate": 0.0001687763713080169, | |
| "loss": 0.6737, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.17027863777089783, | |
| "grad_norm": 1.4271563291549683, | |
| "learning_rate": 0.00017018284106891702, | |
| "loss": 0.6479, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.1716858992400788, | |
| "grad_norm": 0.8409131765365601, | |
| "learning_rate": 0.00017158931082981717, | |
| "loss": 0.5877, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.17309316070925979, | |
| "grad_norm": 1.002172827720642, | |
| "learning_rate": 0.00017299578059071731, | |
| "loss": 0.5572, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.17450042217844075, | |
| "grad_norm": 0.7729489207267761, | |
| "learning_rate": 0.00017440225035161746, | |
| "loss": 0.64, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.17590768364762172, | |
| "grad_norm": 1.3359206914901733, | |
| "learning_rate": 0.00017580872011251758, | |
| "loss": 0.5652, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.1773149451168027, | |
| "grad_norm": 2.492105722427368, | |
| "learning_rate": 0.00017721518987341773, | |
| "loss": 0.584, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.17872220658598367, | |
| "grad_norm": 1.271020770072937, | |
| "learning_rate": 0.00017862165963431788, | |
| "loss": 0.4011, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.18012946805516464, | |
| "grad_norm": 0.8744266629219055, | |
| "learning_rate": 0.000180028129395218, | |
| "loss": 0.616, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.18153672952434563, | |
| "grad_norm": 1.2818926572799683, | |
| "learning_rate": 0.00018143459915611815, | |
| "loss": 0.463, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.1829439909935266, | |
| "grad_norm": 1.3106176853179932, | |
| "learning_rate": 0.0001828410689170183, | |
| "loss": 0.4851, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.18435125246270756, | |
| "grad_norm": 1.068864345550537, | |
| "learning_rate": 0.00018424753867791845, | |
| "loss": 0.6297, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.18575851393188855, | |
| "grad_norm": 1.879895567893982, | |
| "learning_rate": 0.00018565400843881857, | |
| "loss": 0.6638, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.18716577540106952, | |
| "grad_norm": 1.4671173095703125, | |
| "learning_rate": 0.00018706047819971872, | |
| "loss": 0.7588, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.18857303687025048, | |
| "grad_norm": 1.5851764678955078, | |
| "learning_rate": 0.00018846694796061886, | |
| "loss": 0.7505, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.18998029833943147, | |
| "grad_norm": 0.7149075269699097, | |
| "learning_rate": 0.00018987341772151899, | |
| "loss": 0.3806, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.19138755980861244, | |
| "grad_norm": 1.049310326576233, | |
| "learning_rate": 0.00019127988748241913, | |
| "loss": 0.5908, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.1927948212777934, | |
| "grad_norm": 0.950442373752594, | |
| "learning_rate": 0.00019268635724331928, | |
| "loss": 0.6755, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.1942020827469744, | |
| "grad_norm": 0.9287855625152588, | |
| "learning_rate": 0.00019409282700421943, | |
| "loss": 0.5703, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.19560934421615536, | |
| "grad_norm": 0.7228776216506958, | |
| "learning_rate": 0.00019549929676511955, | |
| "loss": 0.5971, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.19701660568533633, | |
| "grad_norm": 1.04582941532135, | |
| "learning_rate": 0.0001969057665260197, | |
| "loss": 0.9477, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.19842386715451732, | |
| "grad_norm": 1.6367225646972656, | |
| "learning_rate": 0.00019831223628691985, | |
| "loss": 0.5875, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.19983112862369828, | |
| "grad_norm": 0.724415123462677, | |
| "learning_rate": 0.00019971870604782, | |
| "loss": 0.5531, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.20123839009287925, | |
| "grad_norm": 1.1167938709259033, | |
| "learning_rate": 0.00019999980693280142, | |
| "loss": 0.4568, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.20264565156206024, | |
| "grad_norm": 3.7291440963745117, | |
| "learning_rate": 0.00019999902259858484, | |
| "loss": 0.4796, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2040529130312412, | |
| "grad_norm": 1.0626037120819092, | |
| "learning_rate": 0.00019999763493537887, | |
| "loss": 0.5454, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.20546017450042217, | |
| "grad_norm": 1.1673458814620972, | |
| "learning_rate": 0.00019999564395155577, | |
| "loss": 0.6261, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.20686743596960316, | |
| "grad_norm": 1.1592299938201904, | |
| "learning_rate": 0.00019999304965912784, | |
| "loss": 0.6726, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.20827469743878413, | |
| "grad_norm": 1.117803692817688, | |
| "learning_rate": 0.00019998985207374736, | |
| "loss": 0.8504, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2096819589079651, | |
| "grad_norm": 0.8449244499206543, | |
| "learning_rate": 0.00019998605121470645, | |
| "loss": 0.4394, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.2110892203771461, | |
| "grad_norm": 0.9696683883666992, | |
| "learning_rate": 0.00019998164710493705, | |
| "loss": 0.3861, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.21249648184632705, | |
| "grad_norm": 1.5206379890441895, | |
| "learning_rate": 0.00019997663977101068, | |
| "loss": 0.6289, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.21390374331550802, | |
| "grad_norm": 1.5071372985839844, | |
| "learning_rate": 0.00019997102924313836, | |
| "loss": 0.8584, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.215311004784689, | |
| "grad_norm": 0.9600889086723328, | |
| "learning_rate": 0.00019996481555517028, | |
| "loss": 0.3949, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.21671826625386997, | |
| "grad_norm": 0.8249372839927673, | |
| "learning_rate": 0.00019995799874459585, | |
| "loss": 0.559, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.21812552772305094, | |
| "grad_norm": 1.2509324550628662, | |
| "learning_rate": 0.00019995057885254333, | |
| "loss": 0.5327, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.2195327891922319, | |
| "grad_norm": 0.8242643475532532, | |
| "learning_rate": 0.00019994255592377936, | |
| "loss": 0.4605, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2209400506614129, | |
| "grad_norm": 0.7586041688919067, | |
| "learning_rate": 0.00019993393000670916, | |
| "loss": 0.4722, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.22234731213059386, | |
| "grad_norm": 1.2805287837982178, | |
| "learning_rate": 0.00019992470115337592, | |
| "loss": 0.2861, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.22375457359977483, | |
| "grad_norm": 1.6375665664672852, | |
| "learning_rate": 0.00019991486941946048, | |
| "loss": 0.5846, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.22516183506895582, | |
| "grad_norm": 0.8348977565765381, | |
| "learning_rate": 0.00019990443486428118, | |
| "loss": 0.4657, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.22656909653813678, | |
| "grad_norm": 1.1735246181488037, | |
| "learning_rate": 0.0001998933975507933, | |
| "loss": 0.6255, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.22797635800731775, | |
| "grad_norm": 1.1627134084701538, | |
| "learning_rate": 0.00019988175754558874, | |
| "loss": 0.7479, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.22938361947649874, | |
| "grad_norm": 1.916646957397461, | |
| "learning_rate": 0.00019986951491889578, | |
| "loss": 0.4814, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.2307908809456797, | |
| "grad_norm": 1.5607751607894897, | |
| "learning_rate": 0.00019985666974457847, | |
| "loss": 0.5807, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.23219814241486067, | |
| "grad_norm": 0.759840726852417, | |
| "learning_rate": 0.0001998432221001362, | |
| "loss": 0.5299, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.23360540388404166, | |
| "grad_norm": 0.8141459226608276, | |
| "learning_rate": 0.0001998291720667033, | |
| "loss": 0.584, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.23501266535322263, | |
| "grad_norm": 1.113457441329956, | |
| "learning_rate": 0.00019981451972904854, | |
| "loss": 0.5733, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.2364199268224036, | |
| "grad_norm": 1.1313204765319824, | |
| "learning_rate": 0.00019979926517557458, | |
| "loss": 0.6995, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.23782718829158458, | |
| "grad_norm": 0.8379271626472473, | |
| "learning_rate": 0.00019978340849831743, | |
| "loss": 0.3914, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.23923444976076555, | |
| "grad_norm": 0.8467435240745544, | |
| "learning_rate": 0.00019976694979294596, | |
| "loss": 0.6813, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.24064171122994651, | |
| "grad_norm": 1.30973219871521, | |
| "learning_rate": 0.00019974988915876134, | |
| "loss": 0.4174, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.2420489726991275, | |
| "grad_norm": 0.9715356230735779, | |
| "learning_rate": 0.0001997322266986963, | |
| "loss": 0.4208, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.24345623416830847, | |
| "grad_norm": 1.0101361274719238, | |
| "learning_rate": 0.0001997139625193146, | |
| "loss": 0.602, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.24486349563748944, | |
| "grad_norm": 0.9341487288475037, | |
| "learning_rate": 0.0001996950967308104, | |
| "loss": 0.3989, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.24627075710667043, | |
| "grad_norm": 1.2196135520935059, | |
| "learning_rate": 0.00019967562944700763, | |
| "loss": 0.4883, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.2476780185758514, | |
| "grad_norm": 1.2374253273010254, | |
| "learning_rate": 0.00019965556078535917, | |
| "loss": 0.7397, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.24908528004503236, | |
| "grad_norm": 0.561997652053833, | |
| "learning_rate": 0.00019963489086694626, | |
| "loss": 0.7548, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.25049254151421335, | |
| "grad_norm": 0.8023036122322083, | |
| "learning_rate": 0.00019961361981647775, | |
| "loss": 0.4486, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2518998029833943, | |
| "grad_norm": 0.9484225511550903, | |
| "learning_rate": 0.00019959174776228928, | |
| "loss": 0.4158, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.2533070644525753, | |
| "grad_norm": 1.119430661201477, | |
| "learning_rate": 0.0001995692748363426, | |
| "loss": 0.7553, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.25471432592175625, | |
| "grad_norm": 1.4776628017425537, | |
| "learning_rate": 0.0001995462011742247, | |
| "loss": 0.2808, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.2561215873909372, | |
| "grad_norm": 1.370290756225586, | |
| "learning_rate": 0.00019952252691514706, | |
| "loss": 0.4522, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.25752884886011823, | |
| "grad_norm": 1.1513909101486206, | |
| "learning_rate": 0.00019949825220194468, | |
| "loss": 0.5382, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.2589361103292992, | |
| "grad_norm": 1.0892587900161743, | |
| "learning_rate": 0.00019947337718107547, | |
| "loss": 0.5407, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.26034337179848016, | |
| "grad_norm": 1.1014186143875122, | |
| "learning_rate": 0.00019944790200261903, | |
| "loss": 0.5723, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.2617506332676611, | |
| "grad_norm": 1.4293971061706543, | |
| "learning_rate": 0.000199421826820276, | |
| "loss": 0.7333, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 0.5284586548805237, | |
| "learning_rate": 0.00019939515179136713, | |
| "loss": 0.6351, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.26456515620602306, | |
| "grad_norm": 0.7904505133628845, | |
| "learning_rate": 0.0001993678770768321, | |
| "loss": 0.6792, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.2659724176752041, | |
| "grad_norm": 0.5654340982437134, | |
| "learning_rate": 0.0001993400028412288, | |
| "loss": 0.4223, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.26737967914438504, | |
| "grad_norm": 0.9616327285766602, | |
| "learning_rate": 0.00019931152925273225, | |
| "loss": 0.4585, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.268786940613566, | |
| "grad_norm": 1.3930063247680664, | |
| "learning_rate": 0.00019928245648313347, | |
| "loss": 0.7828, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.27019420208274697, | |
| "grad_norm": 1.6367273330688477, | |
| "learning_rate": 0.00019925278470783866, | |
| "loss": 0.6883, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.27160146355192794, | |
| "grad_norm": 0.9764294028282166, | |
| "learning_rate": 0.00019922251410586802, | |
| "loss": 0.4474, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.2730087250211089, | |
| "grad_norm": 0.7450019121170044, | |
| "learning_rate": 0.00019919164485985463, | |
| "loss": 0.436, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.2744159864902899, | |
| "grad_norm": 0.774627149105072, | |
| "learning_rate": 0.0001991601771560434, | |
| "loss": 0.3708, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.2758232479594709, | |
| "grad_norm": 1.1829273700714111, | |
| "learning_rate": 0.00019912811118429, | |
| "loss": 0.4453, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.27723050942865185, | |
| "grad_norm": 1.0340484380722046, | |
| "learning_rate": 0.0001990954471380596, | |
| "loss": 0.3123, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.2786377708978328, | |
| "grad_norm": 0.6128121018409729, | |
| "learning_rate": 0.00019906218521442576, | |
| "loss": 0.3459, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2800450323670138, | |
| "grad_norm": 0.8443979024887085, | |
| "learning_rate": 0.00019902832561406934, | |
| "loss": 0.7583, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.28145229383619474, | |
| "grad_norm": 1.4136847257614136, | |
| "learning_rate": 0.00019899386854127705, | |
| "loss": 0.6206, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28285955530537576, | |
| "grad_norm": 0.7922631502151489, | |
| "learning_rate": 0.00019895881420394052, | |
| "loss": 0.5676, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.28426681677455673, | |
| "grad_norm": 1.7876763343811035, | |
| "learning_rate": 0.0001989231628135547, | |
| "loss": 0.5216, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.2856740782437377, | |
| "grad_norm": 1.3975410461425781, | |
| "learning_rate": 0.00019888691458521692, | |
| "loss": 0.5053, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.28708133971291866, | |
| "grad_norm": 1.0760260820388794, | |
| "learning_rate": 0.00019885006973762535, | |
| "loss": 0.3415, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.2884886011820996, | |
| "grad_norm": 1.2067842483520508, | |
| "learning_rate": 0.00019881262849307785, | |
| "loss": 0.4352, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.2898958626512806, | |
| "grad_norm": 0.8484588265419006, | |
| "learning_rate": 0.0001987745910774705, | |
| "loss": 0.6558, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.29130312412046155, | |
| "grad_norm": 1.2854669094085693, | |
| "learning_rate": 0.00019873595772029628, | |
| "loss": 0.5144, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.2927103855896426, | |
| "grad_norm": 0.8903659582138062, | |
| "learning_rate": 0.00019869672865464373, | |
| "loss": 0.7212, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 1.1273301839828491, | |
| "learning_rate": 0.00019865690411719546, | |
| "loss": 0.5763, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.2955249085280045, | |
| "grad_norm": 1.6163692474365234, | |
| "learning_rate": 0.00019861648434822687, | |
| "loss": 0.8076, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.29693216999718547, | |
| "grad_norm": 1.0796860456466675, | |
| "learning_rate": 0.00019857546959160444, | |
| "loss": 0.8208, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.29833943146636643, | |
| "grad_norm": 0.8399056792259216, | |
| "learning_rate": 0.00019853386009478454, | |
| "loss": 0.5939, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2997466929355474, | |
| "grad_norm": 1.2428550720214844, | |
| "learning_rate": 0.0001984916561088118, | |
| "loss": 0.2594, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.3011539544047284, | |
| "grad_norm": 2.2983717918395996, | |
| "learning_rate": 0.00019844885788831756, | |
| "loss": 0.7697, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3025612158739094, | |
| "grad_norm": 1.0774344205856323, | |
| "learning_rate": 0.0001984054656915184, | |
| "loss": 0.6441, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.30396847734309035, | |
| "grad_norm": 0.6637004613876343, | |
| "learning_rate": 0.00019836147978021467, | |
| "loss": 0.4219, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3053757388122713, | |
| "grad_norm": 0.9496357440948486, | |
| "learning_rate": 0.00019831690041978862, | |
| "loss": 0.6518, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.3067830002814523, | |
| "grad_norm": 1.3843315839767456, | |
| "learning_rate": 0.00019827172787920315, | |
| "loss": 0.6269, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.30819026175063324, | |
| "grad_norm": 0.9899942278862, | |
| "learning_rate": 0.0001982259624309999, | |
| "loss": 0.5791, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.30959752321981426, | |
| "grad_norm": 0.8998156785964966, | |
| "learning_rate": 0.00019817960435129778, | |
| "loss": 0.7362, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.31100478468899523, | |
| "grad_norm": 0.615544319152832, | |
| "learning_rate": 0.00019813265391979137, | |
| "loss": 0.457, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.3124120461581762, | |
| "grad_norm": 1.026685118675232, | |
| "learning_rate": 0.00019808511141974886, | |
| "loss": 0.5494, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.31381930762735716, | |
| "grad_norm": 1.0256643295288086, | |
| "learning_rate": 0.00019803697713801084, | |
| "loss": 0.3588, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.3152265690965381, | |
| "grad_norm": 0.8720577359199524, | |
| "learning_rate": 0.00019798825136498814, | |
| "loss": 0.5563, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3166338305657191, | |
| "grad_norm": 0.8864659667015076, | |
| "learning_rate": 0.00019793893439466043, | |
| "loss": 0.3091, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.3180410920349001, | |
| "grad_norm": 1.0853145122528076, | |
| "learning_rate": 0.00019788902652457412, | |
| "loss": 0.6204, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3194483535040811, | |
| "grad_norm": 1.6496775150299072, | |
| "learning_rate": 0.0001978385280558409, | |
| "loss": 0.4948, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.32085561497326204, | |
| "grad_norm": 1.668879508972168, | |
| "learning_rate": 0.00019778743929313555, | |
| "loss": 0.7545, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.322262876442443, | |
| "grad_norm": 0.7751437425613403, | |
| "learning_rate": 0.00019773576054469446, | |
| "loss": 0.4416, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.32367013791162397, | |
| "grad_norm": 1.3606644868850708, | |
| "learning_rate": 0.0001976834921223135, | |
| "loss": 0.4837, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.32507739938080493, | |
| "grad_norm": 0.5276009440422058, | |
| "learning_rate": 0.0001976306343413463, | |
| "loss": 0.2264, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.32648466084998595, | |
| "grad_norm": 1.034682035446167, | |
| "learning_rate": 0.00019757718752070239, | |
| "loss": 0.5388, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.3278919223191669, | |
| "grad_norm": 0.9205548763275146, | |
| "learning_rate": 0.00019752315198284497, | |
| "loss": 0.7432, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.3292991837883479, | |
| "grad_norm": 0.2892135977745056, | |
| "learning_rate": 0.00019746852805378932, | |
| "loss": 0.2681, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.33070644525752885, | |
| "grad_norm": 1.4844127893447876, | |
| "learning_rate": 0.0001974133160631007, | |
| "loss": 0.4837, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.3321137067267098, | |
| "grad_norm": 0.7771471738815308, | |
| "learning_rate": 0.00019735751634389226, | |
| "loss": 0.7133, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3335209681958908, | |
| "grad_norm": 1.23273766040802, | |
| "learning_rate": 0.00019730112923282321, | |
| "loss": 0.789, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.3349282296650718, | |
| "grad_norm": 1.751483678817749, | |
| "learning_rate": 0.0001972441550700966, | |
| "loss": 0.7569, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.33633549113425276, | |
| "grad_norm": 0.31647899746894836, | |
| "learning_rate": 0.00019718659419945756, | |
| "loss": 0.4276, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.3377427526034337, | |
| "grad_norm": 1.3560551404953003, | |
| "learning_rate": 0.00019712844696819076, | |
| "loss": 0.4853, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3391500140726147, | |
| "grad_norm": 1.571906328201294, | |
| "learning_rate": 0.00019706971372711882, | |
| "loss": 0.3889, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.34055727554179566, | |
| "grad_norm": 1.2469801902770996, | |
| "learning_rate": 0.00019701039483059981, | |
| "loss": 0.5063, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3419645370109766, | |
| "grad_norm": 0.660874605178833, | |
| "learning_rate": 0.00019695049063652543, | |
| "loss": 0.4789, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.3433717984801576, | |
| "grad_norm": 0.9069953560829163, | |
| "learning_rate": 0.00019689000150631845, | |
| "loss": 0.393, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3447790599493386, | |
| "grad_norm": 1.9359229803085327, | |
| "learning_rate": 0.000196828927804931, | |
| "loss": 0.4297, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.34618632141851957, | |
| "grad_norm": 1.063952088356018, | |
| "learning_rate": 0.00019676726990084195, | |
| "loss": 0.5455, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.34759358288770054, | |
| "grad_norm": 1.7802363634109497, | |
| "learning_rate": 0.000196705028166055, | |
| "loss": 0.5684, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.3490008443568815, | |
| "grad_norm": 1.1787841320037842, | |
| "learning_rate": 0.00019664220297609624, | |
| "loss": 0.6942, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.35040810582606247, | |
| "grad_norm": 1.146467924118042, | |
| "learning_rate": 0.00019657879471001195, | |
| "loss": 0.6188, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.35181536729524343, | |
| "grad_norm": 1.322690486907959, | |
| "learning_rate": 0.0001965148037503663, | |
| "loss": 0.5142, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.35322262876442445, | |
| "grad_norm": 0.8079725503921509, | |
| "learning_rate": 0.0001964502304832391, | |
| "loss": 0.4729, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.3546298902336054, | |
| "grad_norm": 1.8152616024017334, | |
| "learning_rate": 0.0001963850752982234, | |
| "loss": 0.7246, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.3560371517027864, | |
| "grad_norm": 1.4570809602737427, | |
| "learning_rate": 0.00019631933858842317, | |
| "loss": 0.8113, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.35744441317196735, | |
| "grad_norm": 1.1229805946350098, | |
| "learning_rate": 0.00019625302075045088, | |
| "loss": 0.5401, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3588516746411483, | |
| "grad_norm": 0.693499743938446, | |
| "learning_rate": 0.00019618612218442517, | |
| "loss": 0.3536, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.3602589361103293, | |
| "grad_norm": 1.592119574546814, | |
| "learning_rate": 0.00019611864329396853, | |
| "loss": 0.5994, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.3616661975795103, | |
| "grad_norm": 1.087098479270935, | |
| "learning_rate": 0.00019605058448620452, | |
| "loss": 0.5211, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.36307345904869126, | |
| "grad_norm": 1.002854585647583, | |
| "learning_rate": 0.0001959819461717557, | |
| "loss": 0.6473, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3644807205178722, | |
| "grad_norm": 1.2526451349258423, | |
| "learning_rate": 0.00019591272876474106, | |
| "loss": 0.4721, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.3658879819870532, | |
| "grad_norm": 0.9391024112701416, | |
| "learning_rate": 0.00019584293268277324, | |
| "loss": 0.5849, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.36729524345623416, | |
| "grad_norm": 1.1725986003875732, | |
| "learning_rate": 0.00019577255834695643, | |
| "loss": 0.4718, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.3687025049254151, | |
| "grad_norm": 1.1449577808380127, | |
| "learning_rate": 0.00019570160618188353, | |
| "loss": 0.5429, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.37010976639459614, | |
| "grad_norm": 1.8632793426513672, | |
| "learning_rate": 0.00019563007661563367, | |
| "loss": 0.5791, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.3715170278637771, | |
| "grad_norm": 0.6620994210243225, | |
| "learning_rate": 0.00019555797007976975, | |
| "loss": 0.4016, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.37292428933295807, | |
| "grad_norm": 1.7540533542633057, | |
| "learning_rate": 0.00019548528700933559, | |
| "loss": 0.5039, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.37433155080213903, | |
| "grad_norm": 0.9329980611801147, | |
| "learning_rate": 0.00019541202784285352, | |
| "loss": 0.403, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.37573881227132, | |
| "grad_norm": 0.4586445689201355, | |
| "learning_rate": 0.00019533819302232168, | |
| "loss": 0.3944, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.37714607374050096, | |
| "grad_norm": 1.575636863708496, | |
| "learning_rate": 0.00019526378299321127, | |
| "loss": 0.5372, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.378553335209682, | |
| "grad_norm": 1.2038066387176514, | |
| "learning_rate": 0.00019518879820446398, | |
| "loss": 0.4409, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.37996059667886295, | |
| "grad_norm": 0.9737414121627808, | |
| "learning_rate": 0.0001951132391084892, | |
| "loss": 0.7155, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3813678581480439, | |
| "grad_norm": 1.0166410207748413, | |
| "learning_rate": 0.00019503710616116128, | |
| "loss": 0.6772, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.3827751196172249, | |
| "grad_norm": 1.1660302877426147, | |
| "learning_rate": 0.0001949603998218169, | |
| "loss": 0.7076, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.38418238108640584, | |
| "grad_norm": 0.576275110244751, | |
| "learning_rate": 0.0001948831205532521, | |
| "loss": 0.5392, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.3855896425555868, | |
| "grad_norm": 1.453596830368042, | |
| "learning_rate": 0.00019480526882171976, | |
| "loss": 0.7963, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.38699690402476783, | |
| "grad_norm": 0.7829164862632751, | |
| "learning_rate": 0.00019472684509692646, | |
| "loss": 0.3505, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.3884041654939488, | |
| "grad_norm": 0.9208312630653381, | |
| "learning_rate": 0.0001946478498520299, | |
| "loss": 0.5539, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.38981142696312976, | |
| "grad_norm": 1.0814006328582764, | |
| "learning_rate": 0.00019456828356363598, | |
| "loss": 0.3839, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.3912186884323107, | |
| "grad_norm": 1.592490553855896, | |
| "learning_rate": 0.00019448814671179585, | |
| "loss": 0.6688, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.3926259499014917, | |
| "grad_norm": 0.880333662033081, | |
| "learning_rate": 0.00019440743978000312, | |
| "loss": 0.6542, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.39403321137067265, | |
| "grad_norm": 0.516769528388977, | |
| "learning_rate": 0.00019432616325519084, | |
| "loss": 0.4571, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3954404728398536, | |
| "grad_norm": 1.1296850442886353, | |
| "learning_rate": 0.00019424431762772866, | |
| "loss": 0.5596, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.39684773430903464, | |
| "grad_norm": 0.8967404365539551, | |
| "learning_rate": 0.00019416190339141976, | |
| "loss": 0.4144, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.3982549957782156, | |
| "grad_norm": 1.983446478843689, | |
| "learning_rate": 0.00019407892104349804, | |
| "loss": 0.2378, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.39966225724739657, | |
| "grad_norm": 0.868871808052063, | |
| "learning_rate": 0.00019399537108462494, | |
| "loss": 0.8016, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.40106951871657753, | |
| "grad_norm": 1.9956140518188477, | |
| "learning_rate": 0.00019391125401888644, | |
| "loss": 0.5541, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.4024767801857585, | |
| "grad_norm": 1.437330961227417, | |
| "learning_rate": 0.00019382657035379026, | |
| "loss": 0.299, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.40388404165493946, | |
| "grad_norm": 1.0055358409881592, | |
| "learning_rate": 0.00019374132060026242, | |
| "loss": 0.5419, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.4052913031241205, | |
| "grad_norm": 1.3034961223602295, | |
| "learning_rate": 0.00019365550527264443, | |
| "loss": 0.7488, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.40669856459330145, | |
| "grad_norm": 1.9104148149490356, | |
| "learning_rate": 0.0001935691248886901, | |
| "loss": 0.4039, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.4081058260624824, | |
| "grad_norm": 1.3824232816696167, | |
| "learning_rate": 0.00019348217996956245, | |
| "loss": 0.5864, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4095130875316634, | |
| "grad_norm": 0.18742340803146362, | |
| "learning_rate": 0.00019339467103983044, | |
| "loss": 0.3931, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.41092034900084434, | |
| "grad_norm": 1.0197157859802246, | |
| "learning_rate": 0.00019330659862746603, | |
| "loss": 0.4888, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.4123276104700253, | |
| "grad_norm": 1.248344898223877, | |
| "learning_rate": 0.00019321796326384082, | |
| "loss": 0.4607, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.41373487193920633, | |
| "grad_norm": 0.8360584378242493, | |
| "learning_rate": 0.00019312876548372286, | |
| "loss": 0.5113, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.4151421334083873, | |
| "grad_norm": 1.7348827123641968, | |
| "learning_rate": 0.00019303900582527344, | |
| "loss": 0.511, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.41654939487756826, | |
| "grad_norm": 1.2273963689804077, | |
| "learning_rate": 0.00019294868483004396, | |
| "loss": 0.3603, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.4179566563467492, | |
| "grad_norm": 1.0628288984298706, | |
| "learning_rate": 0.00019285780304297245, | |
| "loss": 0.5377, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.4193639178159302, | |
| "grad_norm": 1.1135960817337036, | |
| "learning_rate": 0.00019276636101238045, | |
| "loss": 0.3928, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.42077117928511115, | |
| "grad_norm": 0.8842063546180725, | |
| "learning_rate": 0.00019267435928996962, | |
| "loss": 0.4252, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.4221784407542922, | |
| "grad_norm": 0.56885826587677, | |
| "learning_rate": 0.00019258179843081847, | |
| "loss": 0.5456, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.42358570222347314, | |
| "grad_norm": 0.5579463243484497, | |
| "learning_rate": 0.00019248867899337896, | |
| "loss": 0.3585, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.4249929636926541, | |
| "grad_norm": 1.1640398502349854, | |
| "learning_rate": 0.00019239500153947305, | |
| "loss": 0.5048, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.42640022516183507, | |
| "grad_norm": 0.8812012672424316, | |
| "learning_rate": 0.00019230076663428962, | |
| "loss": 0.4503, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.42780748663101603, | |
| "grad_norm": 1.1245768070220947, | |
| "learning_rate": 0.0001922059748463807, | |
| "loss": 0.364, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.429214748100197, | |
| "grad_norm": 1.0180691480636597, | |
| "learning_rate": 0.00019211062674765817, | |
| "loss": 0.4229, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.430622009569378, | |
| "grad_norm": 1.3053510189056396, | |
| "learning_rate": 0.0001920147229133904, | |
| "loss": 0.4794, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.432029271038559, | |
| "grad_norm": 0.8506336808204651, | |
| "learning_rate": 0.00019191826392219867, | |
| "loss": 0.5524, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.43343653250773995, | |
| "grad_norm": 1.0151127576828003, | |
| "learning_rate": 0.00019182125035605376, | |
| "loss": 0.5024, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.4348437939769209, | |
| "grad_norm": 1.094344973564148, | |
| "learning_rate": 0.00019172368280027233, | |
| "loss": 0.5535, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.4362510554461019, | |
| "grad_norm": 1.0190297365188599, | |
| "learning_rate": 0.00019162556184351348, | |
| "loss": 0.393, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.43765831691528284, | |
| "grad_norm": 1.502398133277893, | |
| "learning_rate": 0.00019152688807777516, | |
| "loss": 0.4018, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.4390655783844638, | |
| "grad_norm": 0.8518544435501099, | |
| "learning_rate": 0.00019142766209839064, | |
| "loss": 0.5682, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.4404728398536448, | |
| "grad_norm": 0.42057764530181885, | |
| "learning_rate": 0.0001913278845040249, | |
| "loss": 0.2624, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.4418801013228258, | |
| "grad_norm": 0.8204036951065063, | |
| "learning_rate": 0.00019122755589667093, | |
| "loss": 0.6987, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.44328736279200676, | |
| "grad_norm": 1.2145869731903076, | |
| "learning_rate": 0.00019112667688164626, | |
| "loss": 0.575, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.4446946242611877, | |
| "grad_norm": 1.5361616611480713, | |
| "learning_rate": 0.0001910252480675891, | |
| "loss": 0.466, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.4461018857303687, | |
| "grad_norm": 1.8853634595870972, | |
| "learning_rate": 0.00019092327006645497, | |
| "loss": 0.4938, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.44750914719954965, | |
| "grad_norm": 1.2990604639053345, | |
| "learning_rate": 0.00019082074349351268, | |
| "loss": 0.5759, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.44891640866873067, | |
| "grad_norm": 1.3845807313919067, | |
| "learning_rate": 0.0001907176689673408, | |
| "loss": 0.6341, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.45032367013791164, | |
| "grad_norm": 0.8449406027793884, | |
| "learning_rate": 0.0001906140471098239, | |
| "loss": 0.546, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4517309316070926, | |
| "grad_norm": 1.2000244855880737, | |
| "learning_rate": 0.00019050987854614886, | |
| "loss": 0.5149, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.45313819307627357, | |
| "grad_norm": 0.8644974827766418, | |
| "learning_rate": 0.0001904051639048009, | |
| "loss": 0.5419, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 0.4699718654155731, | |
| "learning_rate": 0.00019029990381756002, | |
| "loss": 0.3501, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.4559527160146355, | |
| "grad_norm": 0.6143896579742432, | |
| "learning_rate": 0.00019019409891949703, | |
| "loss": 0.4732, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4573599774838165, | |
| "grad_norm": 1.4060841798782349, | |
| "learning_rate": 0.0001900877498489698, | |
| "loss": 0.6648, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.4587672389529975, | |
| "grad_norm": 1.3622968196868896, | |
| "learning_rate": 0.00018998085724761935, | |
| "loss": 0.3465, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.46017450042217845, | |
| "grad_norm": 0.6618224382400513, | |
| "learning_rate": 0.00018987342176036607, | |
| "loss": 0.5135, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.4615817618913594, | |
| "grad_norm": 1.253423810005188, | |
| "learning_rate": 0.0001897654440354057, | |
| "loss": 0.5411, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.4629890233605404, | |
| "grad_norm": 1.0359442234039307, | |
| "learning_rate": 0.00018965692472420554, | |
| "loss": 0.5266, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.46439628482972134, | |
| "grad_norm": 1.4265358448028564, | |
| "learning_rate": 0.00018954786448150047, | |
| "loss": 0.481, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.46580354629890236, | |
| "grad_norm": 0.6981240510940552, | |
| "learning_rate": 0.00018943826396528897, | |
| "loss": 0.287, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.4672108077680833, | |
| "grad_norm": 0.8274213671684265, | |
| "learning_rate": 0.00018932812383682917, | |
| "loss": 0.4081, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.4686180692372643, | |
| "grad_norm": 0.7835836410522461, | |
| "learning_rate": 0.0001892174447606349, | |
| "loss": 0.344, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.47002533070644525, | |
| "grad_norm": 1.9255175590515137, | |
| "learning_rate": 0.00018910622740447167, | |
| "loss": 0.6834, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.4714325921756262, | |
| "grad_norm": 1.7480101585388184, | |
| "learning_rate": 0.00018899447243935256, | |
| "loss": 0.4431, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.4728398536448072, | |
| "grad_norm": 0.7691779136657715, | |
| "learning_rate": 0.00018888218053953425, | |
| "loss": 0.5831, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.4742471151139882, | |
| "grad_norm": 0.6671115756034851, | |
| "learning_rate": 0.00018876935238251296, | |
| "loss": 0.3096, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.47565437658316917, | |
| "grad_norm": 0.7756052613258362, | |
| "learning_rate": 0.00018865598864902035, | |
| "loss": 0.4505, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.47706163805235013, | |
| "grad_norm": 0.7612590193748474, | |
| "learning_rate": 0.00018854209002301932, | |
| "loss": 0.5595, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.4784688995215311, | |
| "grad_norm": 0.9925332069396973, | |
| "learning_rate": 0.00018842765719170006, | |
| "loss": 0.3256, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.47987616099071206, | |
| "grad_norm": 1.4211307764053345, | |
| "learning_rate": 0.00018831269084547574, | |
| "loss": 0.3897, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.48128342245989303, | |
| "grad_norm": 0.8699591159820557, | |
| "learning_rate": 0.00018819719167797842, | |
| "loss": 0.348, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.48269068392907405, | |
| "grad_norm": 1.1962676048278809, | |
| "learning_rate": 0.00018808116038605493, | |
| "loss": 0.6022, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.484097945398255, | |
| "grad_norm": 1.0962321758270264, | |
| "learning_rate": 0.00018796459766976247, | |
| "loss": 0.4853, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.485505206867436, | |
| "grad_norm": 1.8502682447433472, | |
| "learning_rate": 0.00018784750423236462, | |
| "loss": 0.5438, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.48691246833661694, | |
| "grad_norm": 0.8780159950256348, | |
| "learning_rate": 0.0001877298807803269, | |
| "loss": 0.4728, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.4883197298057979, | |
| "grad_norm": 1.3143213987350464, | |
| "learning_rate": 0.00018761172802331263, | |
| "loss": 0.648, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.4897269912749789, | |
| "grad_norm": 1.3124626874923706, | |
| "learning_rate": 0.00018749304667417863, | |
| "loss": 0.568, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.49113425274415984, | |
| "grad_norm": 1.2247035503387451, | |
| "learning_rate": 0.0001873738374489709, | |
| "loss": 0.3325, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.49254151421334086, | |
| "grad_norm": 0.8056420683860779, | |
| "learning_rate": 0.00018725410106692025, | |
| "loss": 0.5355, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.4939487756825218, | |
| "grad_norm": 1.782456636428833, | |
| "learning_rate": 0.00018713383825043806, | |
| "loss": 0.3927, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.4953560371517028, | |
| "grad_norm": 0.9671362638473511, | |
| "learning_rate": 0.00018701304972511187, | |
| "loss": 0.4428, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.49676329862088375, | |
| "grad_norm": 0.8646135330200195, | |
| "learning_rate": 0.00018689173621970096, | |
| "loss": 0.396, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.4981705600900647, | |
| "grad_norm": 1.406186580657959, | |
| "learning_rate": 0.00018676989846613205, | |
| "loss": 0.4296, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.4995778215592457, | |
| "grad_norm": 1.2148306369781494, | |
| "learning_rate": 0.00018664753719949478, | |
| "loss": 0.3217, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.5009850830284267, | |
| "grad_norm": 2.317777395248413, | |
| "learning_rate": 0.00018652465315803745, | |
| "loss": 0.5039, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5023923444976076, | |
| "grad_norm": 2.461662530899048, | |
| "learning_rate": 0.00018640124708316225, | |
| "loss": 0.5716, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.5037996059667886, | |
| "grad_norm": 1.3684732913970947, | |
| "learning_rate": 0.0001862773197194211, | |
| "loss": 0.3489, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5052068674359697, | |
| "grad_norm": 0.7968658208847046, | |
| "learning_rate": 0.00018615287181451108, | |
| "loss": 0.4202, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.5066141289051506, | |
| "grad_norm": 1.1133559942245483, | |
| "learning_rate": 0.00018602790411926975, | |
| "loss": 0.4799, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5080213903743316, | |
| "grad_norm": 1.4438867568969727, | |
| "learning_rate": 0.0001859024173876709, | |
| "loss": 0.5841, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.5094286518435125, | |
| "grad_norm": 0.5369459986686707, | |
| "learning_rate": 0.0001857764123768196, | |
| "loss": 0.4793, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5108359133126935, | |
| "grad_norm": 0.7949886918067932, | |
| "learning_rate": 0.0001856498898469482, | |
| "loss": 0.4041, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.5122431747818744, | |
| "grad_norm": 0.5967936515808105, | |
| "learning_rate": 0.00018552285056141124, | |
| "loss": 0.3951, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5136504362510554, | |
| "grad_norm": 0.32833540439605713, | |
| "learning_rate": 0.00018539529528668094, | |
| "loss": 0.2362, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.5150576977202365, | |
| "grad_norm": 0.7846612334251404, | |
| "learning_rate": 0.00018526722479234286, | |
| "loss": 0.4279, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5164649591894174, | |
| "grad_norm": 1.5786385536193848, | |
| "learning_rate": 0.00018513863985109095, | |
| "loss": 0.429, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.5178722206585984, | |
| "grad_norm": 1.2571947574615479, | |
| "learning_rate": 0.00018500954123872303, | |
| "loss": 0.6325, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5192794821277793, | |
| "grad_norm": 0.807839035987854, | |
| "learning_rate": 0.00018487992973413605, | |
| "loss": 0.3732, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.5206867435969603, | |
| "grad_norm": 0.9321346282958984, | |
| "learning_rate": 0.00018474980611932144, | |
| "loss": 0.5329, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5220940050661413, | |
| "grad_norm": 1.1516450643539429, | |
| "learning_rate": 0.0001846191711793604, | |
| "loss": 0.553, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.5235012665353223, | |
| "grad_norm": 1.2552000284194946, | |
| "learning_rate": 0.000184488025702419, | |
| "loss": 0.5088, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5249085280045033, | |
| "grad_norm": 0.7412288188934326, | |
| "learning_rate": 0.00018435637047974375, | |
| "loss": 0.623, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 0.7325606942176819, | |
| "learning_rate": 0.0001842242063056565, | |
| "loss": 0.4663, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5277230509428652, | |
| "grad_norm": 0.7041971683502197, | |
| "learning_rate": 0.0001840915339775498, | |
| "loss": 0.3317, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.5291303124120461, | |
| "grad_norm": 0.8097009062767029, | |
| "learning_rate": 0.00018395835429588215, | |
| "loss": 0.5374, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5305375738812271, | |
| "grad_norm": 0.5471770763397217, | |
| "learning_rate": 0.000183824668064173, | |
| "loss": 0.6708, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.5319448353504082, | |
| "grad_norm": 0.9955052137374878, | |
| "learning_rate": 0.00018369047608899798, | |
| "loss": 0.3958, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5333520968195891, | |
| "grad_norm": 0.980060875415802, | |
| "learning_rate": 0.00018355577917998414, | |
| "loss": 0.5356, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.5347593582887701, | |
| "grad_norm": 0.8592010736465454, | |
| "learning_rate": 0.00018342057814980494, | |
| "loss": 0.5253, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.536166619757951, | |
| "grad_norm": 0.8325905799865723, | |
| "learning_rate": 0.00018328487381417532, | |
| "loss": 0.5743, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.537573881227132, | |
| "grad_norm": 1.0972857475280762, | |
| "learning_rate": 0.00018314866699184687, | |
| "loss": 0.6613, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5389811426963129, | |
| "grad_norm": 0.9051984548568726, | |
| "learning_rate": 0.00018301195850460293, | |
| "loss": 0.5146, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.5403884041654939, | |
| "grad_norm": 0.8490184545516968, | |
| "learning_rate": 0.00018287474917725343, | |
| "loss": 0.6052, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.541795665634675, | |
| "grad_norm": 0.9744853377342224, | |
| "learning_rate": 0.00018273703983763017, | |
| "loss": 0.556, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.5432029271038559, | |
| "grad_norm": 0.9393332600593567, | |
| "learning_rate": 0.0001825988313165816, | |
| "loss": 0.6805, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5446101885730369, | |
| "grad_norm": 0.786738932132721, | |
| "learning_rate": 0.0001824601244479679, | |
| "loss": 0.5313, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.5460174500422178, | |
| "grad_norm": 1.7297477722167969, | |
| "learning_rate": 0.00018232092006865606, | |
| "loss": 0.6627, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5474247115113988, | |
| "grad_norm": 0.8226016759872437, | |
| "learning_rate": 0.00018218121901851468, | |
| "loss": 0.4177, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.5488319729805798, | |
| "grad_norm": 1.1636661291122437, | |
| "learning_rate": 0.0001820410221404089, | |
| "loss": 0.5303, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5502392344497608, | |
| "grad_norm": 1.3004634380340576, | |
| "learning_rate": 0.00018190033028019534, | |
| "loss": 0.5114, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.5516464959189418, | |
| "grad_norm": 1.512581706047058, | |
| "learning_rate": 0.00018175914428671716, | |
| "loss": 0.5918, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5530537573881227, | |
| "grad_norm": 0.7482631206512451, | |
| "learning_rate": 0.0001816174650117987, | |
| "loss": 0.6304, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.5544610188573037, | |
| "grad_norm": 1.3120630979537964, | |
| "learning_rate": 0.00018147529331024044, | |
| "loss": 0.5008, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5558682803264846, | |
| "grad_norm": 0.9526933431625366, | |
| "learning_rate": 0.00018133263003981384, | |
| "loss": 0.6951, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.5572755417956656, | |
| "grad_norm": 0.8142489194869995, | |
| "learning_rate": 0.0001811894760612562, | |
| "loss": 0.478, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5586828032648467, | |
| "grad_norm": 1.5639302730560303, | |
| "learning_rate": 0.0001810458322382654, | |
| "loss": 0.6378, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.5600900647340276, | |
| "grad_norm": 0.6878836154937744, | |
| "learning_rate": 0.00018090169943749476, | |
| "loss": 0.6067, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5614973262032086, | |
| "grad_norm": 1.1296664476394653, | |
| "learning_rate": 0.0001807570785285477, | |
| "loss": 0.6044, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.5629045876723895, | |
| "grad_norm": 0.837823748588562, | |
| "learning_rate": 0.00018061197038397268, | |
| "loss": 0.4684, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5643118491415705, | |
| "grad_norm": 1.2144043445587158, | |
| "learning_rate": 0.0001804663758792577, | |
| "loss": 0.3649, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.5657191106107515, | |
| "grad_norm": 0.8372750878334045, | |
| "learning_rate": 0.00018032029589282525, | |
| "loss": 0.4253, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5671263720799324, | |
| "grad_norm": 0.8684276342391968, | |
| "learning_rate": 0.00018017373130602683, | |
| "loss": 0.3992, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.5685336335491135, | |
| "grad_norm": 0.9675285816192627, | |
| "learning_rate": 0.0001800266830031377, | |
| "loss": 0.5995, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5699408950182944, | |
| "grad_norm": 0.9824860692024231, | |
| "learning_rate": 0.00017987915187135157, | |
| "loss": 0.2531, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.5713481564874754, | |
| "grad_norm": 2.90608549118042, | |
| "learning_rate": 0.0001797311388007753, | |
| "loss": 0.6474, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5727554179566563, | |
| "grad_norm": 0.922585666179657, | |
| "learning_rate": 0.00017958264468442332, | |
| "loss": 0.4685, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.5741626794258373, | |
| "grad_norm": 1.4679278135299683, | |
| "learning_rate": 0.00017943367041821243, | |
| "loss": 0.4786, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5755699408950183, | |
| "grad_norm": 0.8750627040863037, | |
| "learning_rate": 0.00017928421690095636, | |
| "loss": 0.317, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.5769772023641992, | |
| "grad_norm": 1.1974796056747437, | |
| "learning_rate": 0.00017913428503436035, | |
| "loss": 0.496, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5783844638333803, | |
| "grad_norm": 0.8931379914283752, | |
| "learning_rate": 0.00017898387572301563, | |
| "loss": 0.6886, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.5797917253025612, | |
| "grad_norm": 1.0573607683181763, | |
| "learning_rate": 0.00017883298987439404, | |
| "loss": 0.5887, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.5811989867717422, | |
| "grad_norm": 1.1087405681610107, | |
| "learning_rate": 0.00017868162839884254, | |
| "loss": 0.5817, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.5826062482409231, | |
| "grad_norm": 0.5602430701255798, | |
| "learning_rate": 0.00017852979220957775, | |
| "loss": 0.4194, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5840135097101041, | |
| "grad_norm": 0.9328368306159973, | |
| "learning_rate": 0.00017837748222268037, | |
| "loss": 0.3816, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.5854207711792851, | |
| "grad_norm": 1.4052832126617432, | |
| "learning_rate": 0.00017822469935708965, | |
| "loss": 0.7981, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5868280326484661, | |
| "grad_norm": 1.0276223421096802, | |
| "learning_rate": 0.00017807144453459793, | |
| "loss": 0.4105, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 1.257156491279602, | |
| "learning_rate": 0.00017791771867984503, | |
| "loss": 0.5565, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.589642555586828, | |
| "grad_norm": 1.0978988409042358, | |
| "learning_rate": 0.00017776352272031264, | |
| "loss": 0.5929, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.591049817056009, | |
| "grad_norm": 0.8809897303581238, | |
| "learning_rate": 0.0001776088575863188, | |
| "loss": 0.3527, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.59245707852519, | |
| "grad_norm": 0.6997563242912292, | |
| "learning_rate": 0.00017745372421101223, | |
| "loss": 0.5211, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.5938643399943709, | |
| "grad_norm": 0.9955636262893677, | |
| "learning_rate": 0.00017729812353036668, | |
| "loss": 0.5267, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.595271601463552, | |
| "grad_norm": 0.8788183927536011, | |
| "learning_rate": 0.00017714205648317535, | |
| "loss": 0.5372, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.5966788629327329, | |
| "grad_norm": 1.0072330236434937, | |
| "learning_rate": 0.00017698552401104517, | |
| "loss": 0.5234, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5980861244019139, | |
| "grad_norm": 1.6254470348358154, | |
| "learning_rate": 0.00017682852705839115, | |
| "loss": 0.4621, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.5994933858710948, | |
| "grad_norm": 1.0389853715896606, | |
| "learning_rate": 0.00017667106657243072, | |
| "loss": 0.5439, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6009006473402758, | |
| "grad_norm": 0.9769371151924133, | |
| "learning_rate": 0.00017651314350317787, | |
| "loss": 0.6171, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.6023079088094568, | |
| "grad_norm": 1.7502343654632568, | |
| "learning_rate": 0.0001763547588034376, | |
| "loss": 0.612, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6037151702786377, | |
| "grad_norm": 1.1023430824279785, | |
| "learning_rate": 0.00017619591342880005, | |
| "loss": 0.4228, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.6051224317478188, | |
| "grad_norm": 2.0511550903320312, | |
| "learning_rate": 0.00017603660833763476, | |
| "loss": 0.3462, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6065296932169997, | |
| "grad_norm": 0.7986024022102356, | |
| "learning_rate": 0.00017587684449108497, | |
| "loss": 0.4616, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.6079369546861807, | |
| "grad_norm": 0.7450430989265442, | |
| "learning_rate": 0.00017571662285306166, | |
| "loss": 0.5481, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6093442161553617, | |
| "grad_norm": 1.1748677492141724, | |
| "learning_rate": 0.00017555594439023787, | |
| "loss": 0.5419, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.6107514776245426, | |
| "grad_norm": 0.7183251976966858, | |
| "learning_rate": 0.0001753948100720429, | |
| "loss": 0.4122, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6121587390937236, | |
| "grad_norm": 0.7296462655067444, | |
| "learning_rate": 0.00017523322087065614, | |
| "loss": 0.3651, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.6135660005629046, | |
| "grad_norm": 0.5904517769813538, | |
| "learning_rate": 0.00017507117776100178, | |
| "loss": 0.3728, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6149732620320856, | |
| "grad_norm": 1.5718715190887451, | |
| "learning_rate": 0.00017490868172074232, | |
| "loss": 0.4729, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.6163805235012665, | |
| "grad_norm": 1.053885579109192, | |
| "learning_rate": 0.00017474573373027315, | |
| "loss": 0.4341, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.6177877849704475, | |
| "grad_norm": 0.723726212978363, | |
| "learning_rate": 0.00017458233477271628, | |
| "loss": 0.4755, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.6191950464396285, | |
| "grad_norm": 1.133907437324524, | |
| "learning_rate": 0.00017441848583391463, | |
| "loss": 0.7399, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6206023079088094, | |
| "grad_norm": 0.5922422409057617, | |
| "learning_rate": 0.00017425418790242606, | |
| "loss": 0.4381, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.6220095693779905, | |
| "grad_norm": 0.534817636013031, | |
| "learning_rate": 0.0001740894419695172, | |
| "loss": 0.4668, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6234168308471714, | |
| "grad_norm": 0.5950006246566772, | |
| "learning_rate": 0.00017392424902915786, | |
| "loss": 0.3497, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.6248240923163524, | |
| "grad_norm": 3.878748655319214, | |
| "learning_rate": 0.00017375861007801465, | |
| "loss": 0.2247, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6262313537855334, | |
| "grad_norm": 1.3402066230773926, | |
| "learning_rate": 0.00017359252611544505, | |
| "loss": 0.3214, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.6276386152547143, | |
| "grad_norm": 1.3445652723312378, | |
| "learning_rate": 0.0001734259981434917, | |
| "loss": 0.4757, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6290458767238953, | |
| "grad_norm": 0.801052987575531, | |
| "learning_rate": 0.00017325902716687578, | |
| "loss": 0.542, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.6304531381930762, | |
| "grad_norm": 0.6313127279281616, | |
| "learning_rate": 0.0001730916141929916, | |
| "loss": 0.6026, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6318603996622573, | |
| "grad_norm": 0.7048347592353821, | |
| "learning_rate": 0.00017292376023189996, | |
| "loss": 0.4769, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.6332676611314382, | |
| "grad_norm": 1.3377580642700195, | |
| "learning_rate": 0.00017275546629632235, | |
| "loss": 0.3727, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6346749226006192, | |
| "grad_norm": 1.3854931592941284, | |
| "learning_rate": 0.00017258673340163485, | |
| "loss": 0.4537, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.6360821840698002, | |
| "grad_norm": 1.5850138664245605, | |
| "learning_rate": 0.00017241756256586183, | |
| "loss": 0.5933, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6374894455389811, | |
| "grad_norm": 1.3591883182525635, | |
| "learning_rate": 0.00017224795480967, | |
| "loss": 0.3786, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.6388967070081621, | |
| "grad_norm": 0.685483992099762, | |
| "learning_rate": 0.00017207791115636206, | |
| "loss": 0.3562, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.640303968477343, | |
| "grad_norm": 1.1758111715316772, | |
| "learning_rate": 0.00017190743263187076, | |
| "loss": 0.3506, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.6417112299465241, | |
| "grad_norm": 0.9146699905395508, | |
| "learning_rate": 0.00017173652026475247, | |
| "loss": 0.4753, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.643118491415705, | |
| "grad_norm": 0.6895302534103394, | |
| "learning_rate": 0.00017156517508618116, | |
| "loss": 0.2637, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.644525752884886, | |
| "grad_norm": 1.011983036994934, | |
| "learning_rate": 0.00017139339812994204, | |
| "loss": 0.551, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.645933014354067, | |
| "grad_norm": 1.5470740795135498, | |
| "learning_rate": 0.0001712211904324254, | |
| "loss": 0.6397, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.6473402758232479, | |
| "grad_norm": 0.8334661722183228, | |
| "learning_rate": 0.0001710485530326204, | |
| "loss": 0.3297, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.648747537292429, | |
| "grad_norm": 1.3184936046600342, | |
| "learning_rate": 0.00017087548697210868, | |
| "loss": 0.2933, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.6501547987616099, | |
| "grad_norm": 0.6180691719055176, | |
| "learning_rate": 0.00017070199329505815, | |
| "loss": 0.316, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6515620602307909, | |
| "grad_norm": 1.5314627885818481, | |
| "learning_rate": 0.00017052807304821673, | |
| "loss": 0.4908, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.6529693216999719, | |
| "grad_norm": 0.2867351472377777, | |
| "learning_rate": 0.0001703537272809059, | |
| "loss": 0.4078, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6543765831691528, | |
| "grad_norm": 1.513857126235962, | |
| "learning_rate": 0.00017017895704501447, | |
| "loss": 0.5121, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.6557838446383338, | |
| "grad_norm": 0.7989262938499451, | |
| "learning_rate": 0.00017000376339499233, | |
| "loss": 0.4578, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6571911061075147, | |
| "grad_norm": 1.8081159591674805, | |
| "learning_rate": 0.00016982814738784386, | |
| "loss": 0.3809, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.6585983675766958, | |
| "grad_norm": 1.2163859605789185, | |
| "learning_rate": 0.0001696521100831216, | |
| "loss": 0.3293, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6600056290458767, | |
| "grad_norm": 1.5051732063293457, | |
| "learning_rate": 0.00016947565254292016, | |
| "loss": 0.33, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.6614128905150577, | |
| "grad_norm": 0.6793294548988342, | |
| "learning_rate": 0.00016929877583186936, | |
| "loss": 0.5292, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6628201519842387, | |
| "grad_norm": 1.8864996433258057, | |
| "learning_rate": 0.00016912148101712814, | |
| "loss": 0.1853, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.6642274134534196, | |
| "grad_norm": 1.2697969675064087, | |
| "learning_rate": 0.00016894376916837795, | |
| "loss": 0.4886, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.6656346749226006, | |
| "grad_norm": 1.4264556169509888, | |
| "learning_rate": 0.00016876564135781638, | |
| "loss": 0.5061, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.6670419363917816, | |
| "grad_norm": 0.5291624665260315, | |
| "learning_rate": 0.00016858709866015065, | |
| "loss": 0.4241, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6684491978609626, | |
| "grad_norm": 1.5842996835708618, | |
| "learning_rate": 0.00016840814215259112, | |
| "loss": 0.4321, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.6698564593301436, | |
| "grad_norm": 0.7339175939559937, | |
| "learning_rate": 0.0001682287729148449, | |
| "loss": 0.4975, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6712637207993245, | |
| "grad_norm": 0.6193541884422302, | |
| "learning_rate": 0.00016804899202910907, | |
| "loss": 0.1977, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.6726709822685055, | |
| "grad_norm": 1.8930505514144897, | |
| "learning_rate": 0.00016786880058006453, | |
| "loss": 0.6117, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6740782437376864, | |
| "grad_norm": 1.268921971321106, | |
| "learning_rate": 0.0001676881996548691, | |
| "loss": 0.5449, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.6754855052068675, | |
| "grad_norm": 1.5368669033050537, | |
| "learning_rate": 0.00016750719034315121, | |
| "loss": 0.4734, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6768927666760484, | |
| "grad_norm": 0.8705158233642578, | |
| "learning_rate": 0.00016732577373700314, | |
| "loss": 0.4644, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.6783000281452294, | |
| "grad_norm": 0.3128531873226166, | |
| "learning_rate": 0.00016714395093097458, | |
| "loss": 0.4438, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6797072896144104, | |
| "grad_norm": 1.795952558517456, | |
| "learning_rate": 0.00016696172302206597, | |
| "loss": 0.463, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.6811145510835913, | |
| "grad_norm": 0.8031005263328552, | |
| "learning_rate": 0.00016677909110972183, | |
| "loss": 0.727, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6825218125527723, | |
| "grad_norm": 1.083425760269165, | |
| "learning_rate": 0.00016659605629582418, | |
| "loss": 0.6498, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.6839290740219532, | |
| "grad_norm": 0.9262056350708008, | |
| "learning_rate": 0.00016641261968468598, | |
| "loss": 0.3122, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6853363354911343, | |
| "grad_norm": 0.27757611870765686, | |
| "learning_rate": 0.00016622878238304424, | |
| "loss": 0.3477, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.6867435969603152, | |
| "grad_norm": 0.6037611365318298, | |
| "learning_rate": 0.00016604454550005356, | |
| "loss": 0.2896, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.6881508584294962, | |
| "grad_norm": 0.7902546525001526, | |
| "learning_rate": 0.00016585991014727932, | |
| "loss": 0.6687, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.6895581198986772, | |
| "grad_norm": 0.8998187184333801, | |
| "learning_rate": 0.000165674877438691, | |
| "loss": 0.5168, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6909653813678581, | |
| "grad_norm": 0.9715900421142578, | |
| "learning_rate": 0.0001654894484906555, | |
| "loss": 0.6263, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.6923726428370391, | |
| "grad_norm": 1.390411138534546, | |
| "learning_rate": 0.00016530362442193037, | |
| "loss": 0.4905, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.69377990430622, | |
| "grad_norm": 0.8985224366188049, | |
| "learning_rate": 0.00016511740635365705, | |
| "loss": 0.5525, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.6951871657754011, | |
| "grad_norm": 0.8099625110626221, | |
| "learning_rate": 0.00016493079540935406, | |
| "loss": 0.3906, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.6965944272445821, | |
| "grad_norm": 1.9844683408737183, | |
| "learning_rate": 0.00016474379271491033, | |
| "loss": 0.5456, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.698001688713763, | |
| "grad_norm": 1.053562045097351, | |
| "learning_rate": 0.00016455639939857842, | |
| "loss": 0.2934, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.699408950182944, | |
| "grad_norm": 1.4200698137283325, | |
| "learning_rate": 0.00016436861659096752, | |
| "loss": 0.6771, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.7008162116521249, | |
| "grad_norm": 0.7813885807991028, | |
| "learning_rate": 0.00016418044542503685, | |
| "loss": 0.357, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.702223473121306, | |
| "grad_norm": 1.131839632987976, | |
| "learning_rate": 0.00016399188703608867, | |
| "loss": 0.528, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.7036307345904869, | |
| "grad_norm": 0.7668808698654175, | |
| "learning_rate": 0.00016380294256176155, | |
| "loss": 0.4434, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7050379960596679, | |
| "grad_norm": 2.0037477016448975, | |
| "learning_rate": 0.00016361361314202343, | |
| "loss": 0.5884, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.7064452575288489, | |
| "grad_norm": 0.726494550704956, | |
| "learning_rate": 0.0001634238999191647, | |
| "loss": 0.4555, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7078525189980298, | |
| "grad_norm": 0.5868455171585083, | |
| "learning_rate": 0.0001632338040377915, | |
| "loss": 0.4513, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.7092597804672108, | |
| "grad_norm": 0.8666847348213196, | |
| "learning_rate": 0.00016304332664481848, | |
| "loss": 0.7028, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7106670419363917, | |
| "grad_norm": 1.0513399839401245, | |
| "learning_rate": 0.00016285246888946234, | |
| "loss": 0.3972, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.7120743034055728, | |
| "grad_norm": 0.765617847442627, | |
| "learning_rate": 0.0001626612319232344, | |
| "loss": 0.4364, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7134815648747538, | |
| "grad_norm": 0.7804258465766907, | |
| "learning_rate": 0.00016246961689993404, | |
| "loss": 0.6756, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.7148888263439347, | |
| "grad_norm": 1.0644882917404175, | |
| "learning_rate": 0.00016227762497564153, | |
| "loss": 0.4398, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7162960878131157, | |
| "grad_norm": 1.0868752002716064, | |
| "learning_rate": 0.0001620852573087111, | |
| "loss": 0.4097, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.7177033492822966, | |
| "grad_norm": 0.877193033695221, | |
| "learning_rate": 0.00016189251505976403, | |
| "loss": 0.4445, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7191106107514776, | |
| "grad_norm": 1.735767126083374, | |
| "learning_rate": 0.00016169939939168155, | |
| "loss": 0.4002, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.7205178722206586, | |
| "grad_norm": 0.679560124874115, | |
| "learning_rate": 0.00016150591146959787, | |
| "loss": 0.4376, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7219251336898396, | |
| "grad_norm": 0.7569028735160828, | |
| "learning_rate": 0.00016131205246089304, | |
| "loss": 0.5988, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.7233323951590206, | |
| "grad_norm": 0.7681282758712769, | |
| "learning_rate": 0.00016111782353518624, | |
| "loss": 0.6736, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7247396566282015, | |
| "grad_norm": 0.9109302759170532, | |
| "learning_rate": 0.0001609232258643282, | |
| "loss": 0.4269, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.7261469180973825, | |
| "grad_norm": 1.033499002456665, | |
| "learning_rate": 0.00016072826062239458, | |
| "loss": 0.4186, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7275541795665634, | |
| "grad_norm": 0.765438437461853, | |
| "learning_rate": 0.00016053292898567876, | |
| "loss": 0.4688, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.7289614410357445, | |
| "grad_norm": 1.352359414100647, | |
| "learning_rate": 0.00016033723213268464, | |
| "loss": 0.4242, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7303687025049254, | |
| "grad_norm": 0.9118134379386902, | |
| "learning_rate": 0.00016014117124411954, | |
| "loss": 0.4915, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.7317759639741064, | |
| "grad_norm": 1.1372839212417603, | |
| "learning_rate": 0.00015994474750288725, | |
| "loss": 0.3128, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7331832254432874, | |
| "grad_norm": 0.23089000582695007, | |
| "learning_rate": 0.00015974796209408071, | |
| "loss": 0.4923, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.7345904869124683, | |
| "grad_norm": 1.543110728263855, | |
| "learning_rate": 0.00015955081620497497, | |
| "loss": 0.5901, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7359977483816493, | |
| "grad_norm": 1.474463939666748, | |
| "learning_rate": 0.00015935331102501994, | |
| "loss": 0.5367, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.7374050098508302, | |
| "grad_norm": 0.7584693431854248, | |
| "learning_rate": 0.00015915544774583324, | |
| "loss": 0.6098, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7388122713200113, | |
| "grad_norm": 0.6778565645217896, | |
| "learning_rate": 0.0001589572275611931, | |
| "loss": 0.4514, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.7402195327891923, | |
| "grad_norm": 0.7713000178337097, | |
| "learning_rate": 0.00015875865166703105, | |
| "loss": 0.2646, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7416267942583732, | |
| "grad_norm": 1.2152999639511108, | |
| "learning_rate": 0.0001585597212614247, | |
| "loss": 0.5909, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.7430340557275542, | |
| "grad_norm": 1.4983125925064087, | |
| "learning_rate": 0.00015836043754459064, | |
| "loss": 0.4621, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.7444413171967351, | |
| "grad_norm": 1.0301270484924316, | |
| "learning_rate": 0.000158160801718877, | |
| "loss": 0.2372, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.7458485786659161, | |
| "grad_norm": 1.2305338382720947, | |
| "learning_rate": 0.0001579608149887564, | |
| "loss": 0.3397, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.747255840135097, | |
| "grad_norm": 1.1948976516723633, | |
| "learning_rate": 0.00015776047856081853, | |
| "loss": 0.3388, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.7486631016042781, | |
| "grad_norm": 1.539473295211792, | |
| "learning_rate": 0.00015755979364376295, | |
| "loss": 0.239, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7500703630734591, | |
| "grad_norm": 2.136974811553955, | |
| "learning_rate": 0.0001573587614483918, | |
| "loss": 0.5409, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.75147762454264, | |
| "grad_norm": 1.2603963613510132, | |
| "learning_rate": 0.0001571573831876024, | |
| "loss": 0.3763, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.752884886011821, | |
| "grad_norm": 0.9054425954818726, | |
| "learning_rate": 0.00015695566007638013, | |
| "loss": 0.4531, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.7542921474810019, | |
| "grad_norm": 0.6948245763778687, | |
| "learning_rate": 0.0001567535933317908, | |
| "loss": 0.3894, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.755699408950183, | |
| "grad_norm": 1.3231799602508545, | |
| "learning_rate": 0.00015655118417297366, | |
| "loss": 0.4352, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.757106670419364, | |
| "grad_norm": 0.8093194365501404, | |
| "learning_rate": 0.00015634843382113372, | |
| "loss": 0.5505, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.7585139318885449, | |
| "grad_norm": 0.7088418006896973, | |
| "learning_rate": 0.0001561453434995346, | |
| "loss": 0.4232, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.7599211933577259, | |
| "grad_norm": 0.48376569151878357, | |
| "learning_rate": 0.00015594191443349105, | |
| "loss": 0.5123, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7613284548269068, | |
| "grad_norm": 1.2853504419326782, | |
| "learning_rate": 0.00015573814785036164, | |
| "loss": 0.3733, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.7627357162960878, | |
| "grad_norm": 0.7034462690353394, | |
| "learning_rate": 0.00015553404497954117, | |
| "loss": 0.4144, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.7641429777652687, | |
| "grad_norm": 1.340484380722046, | |
| "learning_rate": 0.00015532960705245356, | |
| "loss": 0.4388, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.7655502392344498, | |
| "grad_norm": 0.7512633204460144, | |
| "learning_rate": 0.00015512483530254412, | |
| "loss": 0.4672, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.7669575007036308, | |
| "grad_norm": 2.1453585624694824, | |
| "learning_rate": 0.00015491973096527217, | |
| "loss": 0.8132, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.7683647621728117, | |
| "grad_norm": 1.0686702728271484, | |
| "learning_rate": 0.00015471429527810383, | |
| "loss": 0.3679, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.7697720236419927, | |
| "grad_norm": 1.7490125894546509, | |
| "learning_rate": 0.00015450852948050426, | |
| "loss": 0.3288, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.7711792851111736, | |
| "grad_norm": 1.7581394910812378, | |
| "learning_rate": 0.00015430243481393024, | |
| "loss": 0.6833, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.7725865465803546, | |
| "grad_norm": 1.5255379676818848, | |
| "learning_rate": 0.00015409601252182285, | |
| "loss": 0.4711, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.7739938080495357, | |
| "grad_norm": 1.7117855548858643, | |
| "learning_rate": 0.00015388926384959976, | |
| "loss": 0.6609, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7754010695187166, | |
| "grad_norm": 0.5109424591064453, | |
| "learning_rate": 0.00015368219004464786, | |
| "loss": 0.3426, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.7768083309878976, | |
| "grad_norm": 1.3394129276275635, | |
| "learning_rate": 0.0001534747923563156, | |
| "loss": 0.4882, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7782155924570785, | |
| "grad_norm": 1.1809154748916626, | |
| "learning_rate": 0.00015326707203590568, | |
| "loss": 0.262, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.7796228539262595, | |
| "grad_norm": 0.6428471207618713, | |
| "learning_rate": 0.0001530590303366672, | |
| "loss": 0.3657, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7810301153954404, | |
| "grad_norm": 0.5726737976074219, | |
| "learning_rate": 0.0001528506685137883, | |
| "loss": 0.4514, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.7824373768646214, | |
| "grad_norm": 0.589094877243042, | |
| "learning_rate": 0.00015264198782438858, | |
| "loss": 0.5539, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.7838446383338025, | |
| "grad_norm": 0.7207341194152832, | |
| "learning_rate": 0.00015243298952751145, | |
| "loss": 0.3529, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.7852518998029834, | |
| "grad_norm": 1.0593701601028442, | |
| "learning_rate": 0.0001522236748841165, | |
| "loss": 0.317, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.7866591612721644, | |
| "grad_norm": 1.1395798921585083, | |
| "learning_rate": 0.000152014045157072, | |
| "loss": 0.5062, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.7880664227413453, | |
| "grad_norm": 1.3966251611709595, | |
| "learning_rate": 0.00015180410161114724, | |
| "loss": 0.4887, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 0.7492479681968689, | |
| "learning_rate": 0.00015159384551300493, | |
| "loss": 0.3919, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.7908809456797072, | |
| "grad_norm": 1.2680071592330933, | |
| "learning_rate": 0.00015138327813119337, | |
| "loss": 0.3053, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.7922882071488883, | |
| "grad_norm": 1.4319703578948975, | |
| "learning_rate": 0.00015117240073613908, | |
| "loss": 0.3683, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.7936954686180693, | |
| "grad_norm": 1.0931735038757324, | |
| "learning_rate": 0.00015096121460013895, | |
| "loss": 0.5054, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.7951027300872502, | |
| "grad_norm": 0.627133309841156, | |
| "learning_rate": 0.00015074972099735266, | |
| "loss": 0.4424, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.7965099915564312, | |
| "grad_norm": 0.90239417552948, | |
| "learning_rate": 0.00015053792120379476, | |
| "loss": 0.5346, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.7979172530256121, | |
| "grad_norm": 1.3932188749313354, | |
| "learning_rate": 0.0001503258164973274, | |
| "loss": 0.5265, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.7993245144947931, | |
| "grad_norm": 1.2821606397628784, | |
| "learning_rate": 0.0001501134081576523, | |
| "loss": 0.3778, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8007317759639742, | |
| "grad_norm": 0.8399055600166321, | |
| "learning_rate": 0.00014990069746630299, | |
| "loss": 0.5459, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.8021390374331551, | |
| "grad_norm": 2.0415430068969727, | |
| "learning_rate": 0.00014968768570663735, | |
| "loss": 0.534, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8035462989023361, | |
| "grad_norm": 1.1202126741409302, | |
| "learning_rate": 0.00014947437416382956, | |
| "loss": 0.3913, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.804953560371517, | |
| "grad_norm": 1.3579108715057373, | |
| "learning_rate": 0.00014926076412486263, | |
| "loss": 0.3769, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.806360821840698, | |
| "grad_norm": 1.1060523986816406, | |
| "learning_rate": 0.00014904685687852043, | |
| "loss": 0.4045, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.8077680833098789, | |
| "grad_norm": 1.785001277923584, | |
| "learning_rate": 0.00014883265371538, | |
| "loss": 0.4895, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.80917534477906, | |
| "grad_norm": 0.7138920426368713, | |
| "learning_rate": 0.00014861815592780378, | |
| "loss": 0.2431, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.810582606248241, | |
| "grad_norm": 1.0932033061981201, | |
| "learning_rate": 0.00014840336480993172, | |
| "loss": 0.4196, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.8119898677174219, | |
| "grad_norm": 1.47943115234375, | |
| "learning_rate": 0.00014818828165767355, | |
| "loss": 0.4288, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 0.8133971291866029, | |
| "grad_norm": 1.5669611692428589, | |
| "learning_rate": 0.00014797290776870101, | |
| "loss": 0.7103, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.8148043906557838, | |
| "grad_norm": 1.002616047859192, | |
| "learning_rate": 0.0001477572444424399, | |
| "loss": 0.2174, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.8162116521249648, | |
| "grad_norm": 1.2607040405273438, | |
| "learning_rate": 0.00014754129298006228, | |
| "loss": 0.3312, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8176189135941458, | |
| "grad_norm": 1.2113310098648071, | |
| "learning_rate": 0.00014732505468447867, | |
| "loss": 0.309, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.8190261750633268, | |
| "grad_norm": 0.6215373277664185, | |
| "learning_rate": 0.00014710853086033013, | |
| "loss": 0.3802, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8204334365325078, | |
| "grad_norm": 0.9997283220291138, | |
| "learning_rate": 0.00014689172281398042, | |
| "loss": 0.5467, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 0.8218406980016887, | |
| "grad_norm": 0.7299907803535461, | |
| "learning_rate": 0.0001466746318535082, | |
| "loss": 0.4039, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8232479594708697, | |
| "grad_norm": 0.8940709829330444, | |
| "learning_rate": 0.00014645725928869892, | |
| "loss": 0.282, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.8246552209400506, | |
| "grad_norm": 1.1947124004364014, | |
| "learning_rate": 0.00014623960643103705, | |
| "loss": 0.4364, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.8260624824092316, | |
| "grad_norm": 0.6835992932319641, | |
| "learning_rate": 0.00014602167459369826, | |
| "loss": 0.4539, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 0.8274697438784127, | |
| "grad_norm": 0.7021106481552124, | |
| "learning_rate": 0.00014580346509154136, | |
| "loss": 0.2876, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.8288770053475936, | |
| "grad_norm": 1.7289482355117798, | |
| "learning_rate": 0.00014558497924110038, | |
| "loss": 0.4377, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.8302842668167746, | |
| "grad_norm": 1.0549077987670898, | |
| "learning_rate": 0.00014536621836057665, | |
| "loss": 0.5667, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8316915282859555, | |
| "grad_norm": 0.5255772471427917, | |
| "learning_rate": 0.000145147183769831, | |
| "loss": 0.4976, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.8330987897551365, | |
| "grad_norm": 2.376354694366455, | |
| "learning_rate": 0.00014492787679037537, | |
| "loss": 0.8001, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.8345060512243174, | |
| "grad_norm": 0.8916311264038086, | |
| "learning_rate": 0.0001447082987453654, | |
| "loss": 0.4217, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 0.8359133126934984, | |
| "grad_norm": 0.5236600637435913, | |
| "learning_rate": 0.00014448845095959192, | |
| "loss": 0.4531, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.8373205741626795, | |
| "grad_norm": 1.5615344047546387, | |
| "learning_rate": 0.00014426833475947345, | |
| "loss": 0.3796, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.8387278356318604, | |
| "grad_norm": 0.6851219534873962, | |
| "learning_rate": 0.00014404795147304774, | |
| "loss": 0.3966, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.8401350971010414, | |
| "grad_norm": 1.6611498594284058, | |
| "learning_rate": 0.00014382730242996404, | |
| "loss": 0.6284, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.8415423585702223, | |
| "grad_norm": 2.139336109161377, | |
| "learning_rate": 0.00014360638896147501, | |
| "loss": 0.4697, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.8429496200394033, | |
| "grad_norm": 1.0581591129302979, | |
| "learning_rate": 0.00014338521240042873, | |
| "loss": 0.5119, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 0.8443568815085843, | |
| "grad_norm": 0.885945200920105, | |
| "learning_rate": 0.00014316377408126046, | |
| "loss": 0.4225, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8457641429777653, | |
| "grad_norm": 2.1063387393951416, | |
| "learning_rate": 0.00014294207533998486, | |
| "loss": 0.4308, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 0.8471714044469463, | |
| "grad_norm": 0.6381533741950989, | |
| "learning_rate": 0.00014272011751418782, | |
| "loss": 0.4063, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.8485786659161272, | |
| "grad_norm": 0.740987241268158, | |
| "learning_rate": 0.00014249790194301832, | |
| "loss": 0.2807, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.8499859273853082, | |
| "grad_norm": 0.8399060964584351, | |
| "learning_rate": 0.0001422754299671804, | |
| "loss": 0.3904, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.8513931888544891, | |
| "grad_norm": 1.4542044401168823, | |
| "learning_rate": 0.00014205270292892512, | |
| "loss": 0.5098, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.8528004503236701, | |
| "grad_norm": 0.8759632706642151, | |
| "learning_rate": 0.00014182972217204238, | |
| "loss": 0.438, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.8542077117928512, | |
| "grad_norm": 1.2544376850128174, | |
| "learning_rate": 0.00014160648904185295, | |
| "loss": 0.3654, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 0.8556149732620321, | |
| "grad_norm": 0.9191109538078308, | |
| "learning_rate": 0.00014138300488520007, | |
| "loss": 0.4855, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.8570222347312131, | |
| "grad_norm": 1.2452969551086426, | |
| "learning_rate": 0.00014115927105044172, | |
| "loss": 0.1865, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.858429496200394, | |
| "grad_norm": 1.0692249536514282, | |
| "learning_rate": 0.00014093528888744212, | |
| "loss": 0.3869, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.859836757669575, | |
| "grad_norm": 0.9611905217170715, | |
| "learning_rate": 0.00014071105974756382, | |
| "loss": 0.4429, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 0.861244019138756, | |
| "grad_norm": 1.419103741645813, | |
| "learning_rate": 0.00014048658498365946, | |
| "loss": 0.3828, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.8626512806079369, | |
| "grad_norm": 0.70958012342453, | |
| "learning_rate": 0.00014026186595006356, | |
| "loss": 0.4098, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 0.864058542077118, | |
| "grad_norm": 0.7273248434066772, | |
| "learning_rate": 0.0001400369040025845, | |
| "loss": 0.3795, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.8654658035462989, | |
| "grad_norm": 1.2816479206085205, | |
| "learning_rate": 0.00013981170049849614, | |
| "loss": 0.3648, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.8668730650154799, | |
| "grad_norm": 1.0046167373657227, | |
| "learning_rate": 0.00013958625679652982, | |
| "loss": 0.3949, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.8682803264846608, | |
| "grad_norm": 0.45679983496665955, | |
| "learning_rate": 0.000139360574256866, | |
| "loss": 0.3828, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 0.8696875879538418, | |
| "grad_norm": 0.7042393684387207, | |
| "learning_rate": 0.00013913465424112627, | |
| "loss": 0.3163, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.8710948494230228, | |
| "grad_norm": 0.7769744992256165, | |
| "learning_rate": 0.00013890849811236478, | |
| "loss": 0.275, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 0.8725021108922038, | |
| "grad_norm": 0.5500330328941345, | |
| "learning_rate": 0.0001386821072350604, | |
| "loss": 0.36, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8739093723613848, | |
| "grad_norm": 1.508569359779358, | |
| "learning_rate": 0.00013845548297510834, | |
| "loss": 0.3744, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.8753166338305657, | |
| "grad_norm": 1.6323150396347046, | |
| "learning_rate": 0.0001382286266998117, | |
| "loss": 0.5385, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.8767238952997467, | |
| "grad_norm": 1.0691790580749512, | |
| "learning_rate": 0.00013800153977787364, | |
| "loss": 0.4918, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.8781311567689276, | |
| "grad_norm": 0.8545736074447632, | |
| "learning_rate": 0.0001377742235793887, | |
| "loss": 0.327, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.8795384182381086, | |
| "grad_norm": 1.2977032661437988, | |
| "learning_rate": 0.00013754667947583486, | |
| "loss": 0.3627, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.8809456797072897, | |
| "grad_norm": 0.8414074778556824, | |
| "learning_rate": 0.00013731890884006507, | |
| "loss": 0.4126, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 1.2440998554229736, | |
| "learning_rate": 0.00013709091304629903, | |
| "loss": 0.5402, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.8837602026456516, | |
| "grad_norm": 1.1474038362503052, | |
| "learning_rate": 0.00013686269347011487, | |
| "loss": 0.4402, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.8851674641148325, | |
| "grad_norm": 1.9769107103347778, | |
| "learning_rate": 0.00013663425148844097, | |
| "loss": 0.5528, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 0.8865747255840135, | |
| "grad_norm": 1.071049451828003, | |
| "learning_rate": 0.00013640558847954746, | |
| "loss": 0.3496, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8879819870531945, | |
| "grad_norm": 1.002313494682312, | |
| "learning_rate": 0.00013617670582303804, | |
| "loss": 0.4351, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 0.8893892485223754, | |
| "grad_norm": 0.8908954858779907, | |
| "learning_rate": 0.00013594760489984167, | |
| "loss": 0.3371, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.8907965099915565, | |
| "grad_norm": 0.9060853123664856, | |
| "learning_rate": 0.00013571828709220413, | |
| "loss": 0.2489, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.8922037714607374, | |
| "grad_norm": 0.7479000687599182, | |
| "learning_rate": 0.00013548875378367972, | |
| "loss": 0.2874, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.8936110329299184, | |
| "grad_norm": 0.9289246201515198, | |
| "learning_rate": 0.00013525900635912299, | |
| "loss": 0.466, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.8950182943990993, | |
| "grad_norm": 1.428377628326416, | |
| "learning_rate": 0.0001350290462046803, | |
| "loss": 0.5203, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.8964255558682803, | |
| "grad_norm": 0.7524283528327942, | |
| "learning_rate": 0.00013479887470778149, | |
| "loss": 0.365, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 0.8978328173374613, | |
| "grad_norm": 1.021815299987793, | |
| "learning_rate": 0.0001345684932571315, | |
| "loss": 0.5084, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.8992400788066423, | |
| "grad_norm": 0.7522305846214294, | |
| "learning_rate": 0.00013433790324270199, | |
| "loss": 0.2659, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 0.9006473402758233, | |
| "grad_norm": 1.3865163326263428, | |
| "learning_rate": 0.00013410710605572294, | |
| "loss": 0.2533, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9020546017450042, | |
| "grad_norm": 1.8485382795333862, | |
| "learning_rate": 0.00013387610308867437, | |
| "loss": 0.3675, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 0.9034618632141852, | |
| "grad_norm": 1.203482985496521, | |
| "learning_rate": 0.0001336448957352777, | |
| "loss": 0.3284, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.9048691246833662, | |
| "grad_norm": 0.9714936017990112, | |
| "learning_rate": 0.00013341348539048752, | |
| "loss": 0.2657, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 0.9062763861525471, | |
| "grad_norm": 1.062326192855835, | |
| "learning_rate": 0.00013318187345048328, | |
| "loss": 0.3837, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9076836476217282, | |
| "grad_norm": 1.3822613954544067, | |
| "learning_rate": 0.00013295006131266055, | |
| "loss": 0.3584, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 1.2804548740386963, | |
| "learning_rate": 0.0001327180503756228, | |
| "loss": 0.4558, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.9104981705600901, | |
| "grad_norm": 0.6253718137741089, | |
| "learning_rate": 0.00013248584203917298, | |
| "loss": 0.2871, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 0.911905432029271, | |
| "grad_norm": 0.8237050175666809, | |
| "learning_rate": 0.00013225343770430502, | |
| "loss": 0.4014, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.913312693498452, | |
| "grad_norm": 0.9199953675270081, | |
| "learning_rate": 0.00013202083877319538, | |
| "loss": 0.597, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 0.914719954967633, | |
| "grad_norm": 1.0530214309692383, | |
| "learning_rate": 0.00013178804664919444, | |
| "loss": 0.5745, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9161272164368139, | |
| "grad_norm": 1.0369855165481567, | |
| "learning_rate": 0.00013155506273681837, | |
| "loss": 0.2493, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.917534477905995, | |
| "grad_norm": 0.37017834186553955, | |
| "learning_rate": 0.00013132188844174042, | |
| "loss": 0.5125, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.9189417393751759, | |
| "grad_norm": 0.5272582769393921, | |
| "learning_rate": 0.0001310885251707824, | |
| "loss": 0.2099, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 0.9203490008443569, | |
| "grad_norm": 1.3228068351745605, | |
| "learning_rate": 0.00013085497433190635, | |
| "loss": 0.3625, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9217562623135379, | |
| "grad_norm": 1.2980788946151733, | |
| "learning_rate": 0.000130621237334206, | |
| "loss": 0.3258, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.9231635237827188, | |
| "grad_norm": 0.7955147624015808, | |
| "learning_rate": 0.00013038731558789816, | |
| "loss": 0.331, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.9245707852518998, | |
| "grad_norm": 0.33198082447052, | |
| "learning_rate": 0.00013015321050431435, | |
| "loss": 0.2828, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 0.9259780467210807, | |
| "grad_norm": 1.193824052810669, | |
| "learning_rate": 0.0001299189234958922, | |
| "loss": 0.5299, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.9273853081902618, | |
| "grad_norm": 0.6841180324554443, | |
| "learning_rate": 0.00012968445597616695, | |
| "loss": 0.2236, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 0.9287925696594427, | |
| "grad_norm": 1.009793758392334, | |
| "learning_rate": 0.00012944980935976295, | |
| "loss": 0.4583, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9301998311286237, | |
| "grad_norm": 1.1918591260910034, | |
| "learning_rate": 0.00012921498506238512, | |
| "loss": 0.4523, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 0.9316070925978047, | |
| "grad_norm": 0.7123336791992188, | |
| "learning_rate": 0.00012897998450081037, | |
| "loss": 0.3185, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.9330143540669856, | |
| "grad_norm": 0.6820237040519714, | |
| "learning_rate": 0.00012874480909287904, | |
| "loss": 0.4963, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 0.9344216155361666, | |
| "grad_norm": 0.6030889749526978, | |
| "learning_rate": 0.00012850946025748643, | |
| "loss": 0.3238, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.9358288770053476, | |
| "grad_norm": 0.3159545958042145, | |
| "learning_rate": 0.00012827393941457416, | |
| "loss": 0.1804, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.9372361384745286, | |
| "grad_norm": 0.500643789768219, | |
| "learning_rate": 0.00012803824798512166, | |
| "loss": 0.4421, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.9386433999437095, | |
| "grad_norm": 1.0271189212799072, | |
| "learning_rate": 0.00012780238739113755, | |
| "loss": 0.4825, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 0.9400506614128905, | |
| "grad_norm": 1.3835067749023438, | |
| "learning_rate": 0.000127566359055651, | |
| "loss": 0.5109, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.9414579228820715, | |
| "grad_norm": 0.6945546269416809, | |
| "learning_rate": 0.00012733016440270344, | |
| "loss": 0.3438, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 0.9428651843512524, | |
| "grad_norm": 0.5347813367843628, | |
| "learning_rate": 0.0001270938048573395, | |
| "loss": 0.2245, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.9442724458204335, | |
| "grad_norm": 0.5110495090484619, | |
| "learning_rate": 0.00012685728184559878, | |
| "loss": 0.3236, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 0.9456797072896144, | |
| "grad_norm": 1.1028776168823242, | |
| "learning_rate": 0.00012662059679450715, | |
| "loss": 0.3656, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.9470869687587954, | |
| "grad_norm": 1.0305935144424438, | |
| "learning_rate": 0.0001263837511320681, | |
| "loss": 0.2271, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 0.9484942302279764, | |
| "grad_norm": 1.1044567823410034, | |
| "learning_rate": 0.0001261467462872541, | |
| "loss": 0.3901, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.9499014916971573, | |
| "grad_norm": 1.0489617586135864, | |
| "learning_rate": 0.00012590958368999817, | |
| "loss": 0.3906, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.9513087531663383, | |
| "grad_norm": 0.9781221747398376, | |
| "learning_rate": 0.0001256722647711849, | |
| "loss": 0.3616, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.9527160146355192, | |
| "grad_norm": 1.1387841701507568, | |
| "learning_rate": 0.0001254347909626421, | |
| "loss": 0.2382, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 0.9541232761047003, | |
| "grad_norm": 1.3473316431045532, | |
| "learning_rate": 0.00012519716369713214, | |
| "loss": 0.446, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.9555305375738812, | |
| "grad_norm": 1.1464128494262695, | |
| "learning_rate": 0.00012495938440834327, | |
| "loss": 0.341, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 0.9569377990430622, | |
| "grad_norm": 0.9990252256393433, | |
| "learning_rate": 0.0001247214545308808, | |
| "loss": 0.4666, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9583450605122432, | |
| "grad_norm": 1.9256302118301392, | |
| "learning_rate": 0.0001244833755002587, | |
| "loss": 0.4555, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 0.9597523219814241, | |
| "grad_norm": 0.8169670104980469, | |
| "learning_rate": 0.00012424514875289088, | |
| "loss": 0.6558, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.9611595834506051, | |
| "grad_norm": 1.60161554813385, | |
| "learning_rate": 0.0001240067757260824, | |
| "loss": 0.4544, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 0.9625668449197861, | |
| "grad_norm": 0.7437291741371155, | |
| "learning_rate": 0.0001237682578580208, | |
| "loss": 0.3022, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.9639741063889671, | |
| "grad_norm": 0.9030975699424744, | |
| "learning_rate": 0.00012352959658776767, | |
| "loss": 0.4267, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.9653813678581481, | |
| "grad_norm": 1.0298916101455688, | |
| "learning_rate": 0.00012329079335524973, | |
| "loss": 0.5084, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.966788629327329, | |
| "grad_norm": 1.4346392154693604, | |
| "learning_rate": 0.0001230518496012502, | |
| "loss": 0.5032, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 0.96819589079651, | |
| "grad_norm": 1.988788366317749, | |
| "learning_rate": 0.00012281276676739996, | |
| "loss": 0.5206, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.9696031522656909, | |
| "grad_norm": 0.627189040184021, | |
| "learning_rate": 0.00012257354629616933, | |
| "loss": 0.3927, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 0.971010413734872, | |
| "grad_norm": 1.1982104778289795, | |
| "learning_rate": 0.0001223341896308588, | |
| "loss": 0.4134, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.9724176752040529, | |
| "grad_norm": 1.1405185461044312, | |
| "learning_rate": 0.00012209469821559062, | |
| "loss": 0.314, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 0.9738249366732339, | |
| "grad_norm": 1.0637789964675903, | |
| "learning_rate": 0.00012185507349530006, | |
| "loss": 0.4855, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.9752321981424149, | |
| "grad_norm": 1.1884607076644897, | |
| "learning_rate": 0.00012161531691572665, | |
| "loss": 0.4043, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.9766394596115958, | |
| "grad_norm": 0.7082695960998535, | |
| "learning_rate": 0.00012137542992340552, | |
| "loss": 0.3864, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.9780467210807768, | |
| "grad_norm": 1.400940179824829, | |
| "learning_rate": 0.0001211354139656585, | |
| "loss": 0.3179, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.9794539825499577, | |
| "grad_norm": 1.0918678045272827, | |
| "learning_rate": 0.00012089527049058566, | |
| "loss": 0.3724, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.9808612440191388, | |
| "grad_norm": 0.8317002654075623, | |
| "learning_rate": 0.00012065500094705635, | |
| "loss": 0.4669, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 0.9822685054883197, | |
| "grad_norm": 2.4732000827789307, | |
| "learning_rate": 0.00012041460678470057, | |
| "loss": 0.536, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.9836757669575007, | |
| "grad_norm": 0.4239155650138855, | |
| "learning_rate": 0.00012017408945390009, | |
| "loss": 0.4178, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 0.9850830284266817, | |
| "grad_norm": 1.0096583366394043, | |
| "learning_rate": 0.00011993345040577995, | |
| "loss": 0.5533, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9864902898958626, | |
| "grad_norm": 1.6637718677520752, | |
| "learning_rate": 0.00011969269109219945, | |
| "loss": 0.1999, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 0.9878975513650436, | |
| "grad_norm": 1.4339228868484497, | |
| "learning_rate": 0.0001194518129657435, | |
| "loss": 0.2913, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.9893048128342246, | |
| "grad_norm": 0.9473050236701965, | |
| "learning_rate": 0.00011921081747971392, | |
| "loss": 0.4202, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 0.9907120743034056, | |
| "grad_norm": 1.5468287467956543, | |
| "learning_rate": 0.00011896970608812053, | |
| "loss": 0.2755, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.9921193357725866, | |
| "grad_norm": 1.0197608470916748, | |
| "learning_rate": 0.00011872848024567245, | |
| "loss": 0.399, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.9935265972417675, | |
| "grad_norm": 1.9030907154083252, | |
| "learning_rate": 0.00011848714140776936, | |
| "loss": 0.3538, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.9949338587109485, | |
| "grad_norm": 1.1370608806610107, | |
| "learning_rate": 0.00011824569103049264, | |
| "loss": 0.6243, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 0.9963411201801294, | |
| "grad_norm": 0.7336493134498596, | |
| "learning_rate": 0.0001180041305705967, | |
| "loss": 0.287, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.9977483816493105, | |
| "grad_norm": 0.8091352581977844, | |
| "learning_rate": 0.0001177624614855, | |
| "loss": 0.4314, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 0.9991556431184914, | |
| "grad_norm": 0.8396396636962891, | |
| "learning_rate": 0.0001175206852332765, | |
| "loss": 0.243, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.0005629045876725, | |
| "grad_norm": 0.4893011152744293, | |
| "learning_rate": 0.00011727880327264667, | |
| "loss": 0.4008, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 1.0019701660568534, | |
| "grad_norm": 0.5934264659881592, | |
| "learning_rate": 0.00011703681706296871, | |
| "loss": 0.197, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.0033774275260343, | |
| "grad_norm": 0.9697572588920593, | |
| "learning_rate": 0.00011679472806422991, | |
| "loss": 0.2565, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 1.0047846889952152, | |
| "grad_norm": 0.6383791565895081, | |
| "learning_rate": 0.00011655253773703763, | |
| "loss": 0.1732, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.0061919504643964, | |
| "grad_norm": 2.7294044494628906, | |
| "learning_rate": 0.00011631024754261057, | |
| "loss": 0.344, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.0075992119335773, | |
| "grad_norm": 0.7987744212150574, | |
| "learning_rate": 0.00011606785894277002, | |
| "loss": 0.2462, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.0090064734027582, | |
| "grad_norm": 1.0963287353515625, | |
| "learning_rate": 0.00011582537339993102, | |
| "loss": 0.2017, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 1.0104137348719393, | |
| "grad_norm": 0.2937074303627014, | |
| "learning_rate": 0.00011558279237709337, | |
| "loss": 0.2587, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.0118209963411202, | |
| "grad_norm": 1.1680563688278198, | |
| "learning_rate": 0.00011534011733783303, | |
| "loss": 0.3315, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 1.0132282578103011, | |
| "grad_norm": 0.8227936029434204, | |
| "learning_rate": 0.00011509734974629316, | |
| "loss": 0.1936, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.014635519279482, | |
| "grad_norm": 1.266236424446106, | |
| "learning_rate": 0.0001148544910671754, | |
| "loss": 0.283, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 1.0160427807486632, | |
| "grad_norm": 0.4134606122970581, | |
| "learning_rate": 0.0001146115427657308, | |
| "loss": 0.1711, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.017450042217844, | |
| "grad_norm": 0.5949440598487854, | |
| "learning_rate": 0.00011436850630775127, | |
| "loss": 0.2659, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 1.018857303687025, | |
| "grad_norm": 1.2255134582519531, | |
| "learning_rate": 0.00011412538315956051, | |
| "loss": 0.331, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.0202645651562061, | |
| "grad_norm": 0.7793748378753662, | |
| "learning_rate": 0.00011388217478800536, | |
| "loss": 0.3107, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.021671826625387, | |
| "grad_norm": 1.5764113664627075, | |
| "learning_rate": 0.00011363888266044668, | |
| "loss": 0.2801, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.023079088094568, | |
| "grad_norm": 0.7818349599838257, | |
| "learning_rate": 0.0001133955082447508, | |
| "loss": 0.4592, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 1.0244863495637488, | |
| "grad_norm": 0.8325141072273254, | |
| "learning_rate": 0.00011315205300928047, | |
| "loss": 0.2221, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.02589361103293, | |
| "grad_norm": 0.8759342432022095, | |
| "learning_rate": 0.0001129085184228861, | |
| "loss": 0.2282, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 1.0273008725021109, | |
| "grad_norm": 0.8269652724266052, | |
| "learning_rate": 0.00011266490595489672, | |
| "loss": 0.288, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.0287081339712918, | |
| "grad_norm": 0.9182637929916382, | |
| "learning_rate": 0.0001124212170751114, | |
| "loss": 0.2124, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 1.030115395440473, | |
| "grad_norm": 0.7247250080108643, | |
| "learning_rate": 0.00011217745325379017, | |
| "loss": 0.2818, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.0315226569096538, | |
| "grad_norm": 1.1736894845962524, | |
| "learning_rate": 0.00011193361596164517, | |
| "loss": 0.2349, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 1.0329299183788347, | |
| "grad_norm": 0.3809513747692108, | |
| "learning_rate": 0.00011168970666983184, | |
| "loss": 0.158, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.0343371798480159, | |
| "grad_norm": 1.4163240194320679, | |
| "learning_rate": 0.0001114457268499401, | |
| "loss": 0.3035, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.0357444413171968, | |
| "grad_norm": 1.8142826557159424, | |
| "learning_rate": 0.00011120167797398527, | |
| "loss": 0.3572, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.0371517027863777, | |
| "grad_norm": 0.9238508343696594, | |
| "learning_rate": 0.00011095756151439934, | |
| "loss": 0.2104, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 1.0385589642555586, | |
| "grad_norm": 1.3922544717788696, | |
| "learning_rate": 0.0001107133789440221, | |
| "loss": 0.3846, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.0399662257247397, | |
| "grad_norm": 0.5761235952377319, | |
| "learning_rate": 0.00011046913173609217, | |
| "loss": 0.1728, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 1.0413734871939206, | |
| "grad_norm": 1.3399313688278198, | |
| "learning_rate": 0.0001102248213642382, | |
| "loss": 0.2158, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.0427807486631016, | |
| "grad_norm": 0.5189816355705261, | |
| "learning_rate": 0.00010998044930246985, | |
| "loss": 0.2724, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 1.0441880101322827, | |
| "grad_norm": 1.0454604625701904, | |
| "learning_rate": 0.00010973601702516903, | |
| "loss": 0.3016, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.0455952716014636, | |
| "grad_norm": 0.9476893544197083, | |
| "learning_rate": 0.00010949152600708096, | |
| "loss": 0.161, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 1.0470025330706445, | |
| "grad_norm": 1.1760029792785645, | |
| "learning_rate": 0.00010924697772330525, | |
| "loss": 0.3402, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.0484097945398254, | |
| "grad_norm": 0.7986089587211609, | |
| "learning_rate": 0.000109002373649287, | |
| "loss": 0.3381, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.0498170560090065, | |
| "grad_norm": 0.46115541458129883, | |
| "learning_rate": 0.00010875771526080791, | |
| "loss": 0.2121, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.0512243174781875, | |
| "grad_norm": 0.8159217238426208, | |
| "learning_rate": 0.00010851300403397741, | |
| "loss": 0.1618, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 0.9532806277275085, | |
| "learning_rate": 0.00010826824144522369, | |
| "loss": 0.2001, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.0540388404165495, | |
| "grad_norm": 0.987647294998169, | |
| "learning_rate": 0.00010802342897128484, | |
| "loss": 0.1255, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 1.0554461018857304, | |
| "grad_norm": 0.5456539988517761, | |
| "learning_rate": 0.00010777856808919993, | |
| "loss": 0.1738, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.0568533633549113, | |
| "grad_norm": 1.2354178428649902, | |
| "learning_rate": 0.00010753366027630005, | |
| "loss": 0.1968, | |
| "step": 3755 | |
| }, | |
| { | |
| "epoch": 1.0582606248240922, | |
| "grad_norm": 1.5054504871368408, | |
| "learning_rate": 0.00010728870701019952, | |
| "loss": 0.3881, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.0596678862932734, | |
| "grad_norm": 0.33300110697746277, | |
| "learning_rate": 0.00010704370976878683, | |
| "loss": 0.3455, | |
| "step": 3765 | |
| }, | |
| { | |
| "epoch": 1.0610751477624543, | |
| "grad_norm": 0.28057172894477844, | |
| "learning_rate": 0.00010679867003021582, | |
| "loss": 0.3676, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.0624824092316352, | |
| "grad_norm": 0.78326416015625, | |
| "learning_rate": 0.0001065535892728967, | |
| "loss": 0.2051, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.0638896707008163, | |
| "grad_norm": 0.30371785163879395, | |
| "learning_rate": 0.00010630846897548719, | |
| "loss": 0.2172, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.0652969321699972, | |
| "grad_norm": 0.951871931552887, | |
| "learning_rate": 0.00010606331061688352, | |
| "loss": 0.2731, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 1.0667041936391781, | |
| "grad_norm": 0.9194802641868591, | |
| "learning_rate": 0.00010581811567621165, | |
| "loss": 0.437, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.068111455108359, | |
| "grad_norm": 1.3185656070709229, | |
| "learning_rate": 0.00010557288563281819, | |
| "loss": 0.1762, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 1.0695187165775402, | |
| "grad_norm": 0.6637858152389526, | |
| "learning_rate": 0.00010532762196626151, | |
| "loss": 0.3499, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.070925978046721, | |
| "grad_norm": 0.5646357536315918, | |
| "learning_rate": 0.00010508232615630291, | |
| "loss": 0.1794, | |
| "step": 3805 | |
| }, | |
| { | |
| "epoch": 1.072333239515902, | |
| "grad_norm": 0.7347474694252014, | |
| "learning_rate": 0.00010483699968289754, | |
| "loss": 0.2088, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.0737405009850831, | |
| "grad_norm": 0.7603871822357178, | |
| "learning_rate": 0.00010459164402618567, | |
| "loss": 0.2723, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 1.075147762454264, | |
| "grad_norm": 1.574090838432312, | |
| "learning_rate": 0.0001043462606664835, | |
| "loss": 0.3175, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.076555023923445, | |
| "grad_norm": 1.8480275869369507, | |
| "learning_rate": 0.00010410085108427448, | |
| "loss": 0.3903, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.0779622853926258, | |
| "grad_norm": 3.3462395668029785, | |
| "learning_rate": 0.00010385541676020026, | |
| "loss": 0.2867, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.079369546861807, | |
| "grad_norm": 1.0282424688339233, | |
| "learning_rate": 0.00010360995917505167, | |
| "loss": 0.3542, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 1.0807768083309879, | |
| "grad_norm": 1.081586241722107, | |
| "learning_rate": 0.00010336447980976, | |
| "loss": 0.1933, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.0821840698001688, | |
| "grad_norm": 0.7061908841133118, | |
| "learning_rate": 0.00010311898014538788, | |
| "loss": 0.3673, | |
| "step": 3845 | |
| }, | |
| { | |
| "epoch": 1.08359133126935, | |
| "grad_norm": 1.0589807033538818, | |
| "learning_rate": 0.00010287346166312048, | |
| "loss": 0.2017, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.0849985927385308, | |
| "grad_norm": 0.7850357890129089, | |
| "learning_rate": 0.0001026279258442564, | |
| "loss": 0.3781, | |
| "step": 3855 | |
| }, | |
| { | |
| "epoch": 1.0864058542077117, | |
| "grad_norm": 0.8800612688064575, | |
| "learning_rate": 0.00010238237417019889, | |
| "loss": 0.2454, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.0878131156768927, | |
| "grad_norm": 0.8004993796348572, | |
| "learning_rate": 0.00010213680812244693, | |
| "loss": 0.3253, | |
| "step": 3865 | |
| }, | |
| { | |
| "epoch": 1.0892203771460738, | |
| "grad_norm": 1.0395301580429077, | |
| "learning_rate": 0.00010189122918258611, | |
| "loss": 0.3023, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.0906276386152547, | |
| "grad_norm": 0.7087461352348328, | |
| "learning_rate": 0.00010164563883227982, | |
| "loss": 0.258, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.0920349000844356, | |
| "grad_norm": 1.0742789506912231, | |
| "learning_rate": 0.00010140003855326034, | |
| "loss": 0.1768, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.0934421615536167, | |
| "grad_norm": 1.7721843719482422, | |
| "learning_rate": 0.00010115442982731988, | |
| "loss": 0.2673, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 1.0948494230227976, | |
| "grad_norm": 0.5749943256378174, | |
| "learning_rate": 0.00010090881413630154, | |
| "loss": 0.2943, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.0962566844919786, | |
| "grad_norm": 1.210871696472168, | |
| "learning_rate": 0.00010066319296209043, | |
| "loss": 0.2569, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 1.0976639459611597, | |
| "grad_norm": 0.7546014189720154, | |
| "learning_rate": 0.00010041756778660483, | |
| "loss": 0.1277, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.0990712074303406, | |
| "grad_norm": 0.45546409487724304, | |
| "learning_rate": 0.0001001719400917871, | |
| "loss": 0.2447, | |
| "step": 3905 | |
| }, | |
| { | |
| "epoch": 1.1004784688995215, | |
| "grad_norm": 0.9810652136802673, | |
| "learning_rate": 9.992631135959484e-05, | |
| "loss": 0.1891, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.1018857303687024, | |
| "grad_norm": 0.26853448152542114, | |
| "learning_rate": 9.96806830719918e-05, | |
| "loss": 0.2793, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 1.1032929918378835, | |
| "grad_norm": 0.815556526184082, | |
| "learning_rate": 9.943505671093923e-05, | |
| "loss": 0.1589, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.1047002533070645, | |
| "grad_norm": 1.1649208068847656, | |
| "learning_rate": 9.918943375838658e-05, | |
| "loss": 0.1692, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.1061075147762454, | |
| "grad_norm": 1.3160449266433716, | |
| "learning_rate": 9.894381569626286e-05, | |
| "loss": 0.1748, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.1075147762454265, | |
| "grad_norm": 0.7906925082206726, | |
| "learning_rate": 9.869820400646752e-05, | |
| "loss": 0.2706, | |
| "step": 3935 | |
| }, | |
| { | |
| "epoch": 1.1089220377146074, | |
| "grad_norm": 1.7690831422805786, | |
| "learning_rate": 9.845260017086152e-05, | |
| "loss": 0.4101, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.1103292991837883, | |
| "grad_norm": 0.7361578941345215, | |
| "learning_rate": 9.820700567125855e-05, | |
| "loss": 0.2352, | |
| "step": 3945 | |
| }, | |
| { | |
| "epoch": 1.1117365606529692, | |
| "grad_norm": 0.7984316945075989, | |
| "learning_rate": 9.79614219894159e-05, | |
| "loss": 0.2466, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.1131438221221504, | |
| "grad_norm": 1.6478660106658936, | |
| "learning_rate": 9.771585060702551e-05, | |
| "loss": 0.2434, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 1.1145510835913313, | |
| "grad_norm": 0.8288646936416626, | |
| "learning_rate": 9.747029300570528e-05, | |
| "loss": 0.1954, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.1159583450605122, | |
| "grad_norm": 1.0649809837341309, | |
| "learning_rate": 9.722475066698992e-05, | |
| "loss": 0.1995, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 1.1173656065296933, | |
| "grad_norm": 1.0399101972579956, | |
| "learning_rate": 9.697922507232194e-05, | |
| "loss": 0.2972, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.1187728679988742, | |
| "grad_norm": 0.9969576001167297, | |
| "learning_rate": 9.673371770304291e-05, | |
| "loss": 0.2133, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.1201801294680551, | |
| "grad_norm": 0.7914555072784424, | |
| "learning_rate": 9.648823004038452e-05, | |
| "loss": 0.2006, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.1215873909372363, | |
| "grad_norm": 0.8462080359458923, | |
| "learning_rate": 9.62427635654594e-05, | |
| "loss": 0.1759, | |
| "step": 3985 | |
| }, | |
| { | |
| "epoch": 1.1229946524064172, | |
| "grad_norm": 1.5257298946380615, | |
| "learning_rate": 9.599731975925248e-05, | |
| "loss": 0.2961, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.124401913875598, | |
| "grad_norm": 0.918910562992096, | |
| "learning_rate": 9.575190010261179e-05, | |
| "loss": 0.2468, | |
| "step": 3995 | |
| }, | |
| { | |
| "epoch": 1.125809175344779, | |
| "grad_norm": 0.9318897128105164, | |
| "learning_rate": 9.550650607623982e-05, | |
| "loss": 0.2609, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.12721643681396, | |
| "grad_norm": 0.49596425890922546, | |
| "learning_rate": 9.526113916068431e-05, | |
| "loss": 0.2369, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 1.128623698283141, | |
| "grad_norm": 0.6530629396438599, | |
| "learning_rate": 9.501580083632946e-05, | |
| "loss": 0.1354, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.130030959752322, | |
| "grad_norm": 0.39932572841644287, | |
| "learning_rate": 9.477049258338694e-05, | |
| "loss": 0.2277, | |
| "step": 4015 | |
| }, | |
| { | |
| "epoch": 1.131438221221503, | |
| "grad_norm": 0.8406773805618286, | |
| "learning_rate": 9.452521588188711e-05, | |
| "loss": 0.1472, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.132845482690684, | |
| "grad_norm": 0.7629873752593994, | |
| "learning_rate": 9.427997221166978e-05, | |
| "loss": 0.2421, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.1342527441598649, | |
| "grad_norm": 1.1697338819503784, | |
| "learning_rate": 9.40347630523756e-05, | |
| "loss": 0.2181, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.1356600056290458, | |
| "grad_norm": 0.924167811870575, | |
| "learning_rate": 9.378958988343702e-05, | |
| "loss": 0.3934, | |
| "step": 4035 | |
| }, | |
| { | |
| "epoch": 1.137067267098227, | |
| "grad_norm": 0.8078356385231018, | |
| "learning_rate": 9.354445418406924e-05, | |
| "loss": 0.1403, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.1384745285674078, | |
| "grad_norm": 0.520318329334259, | |
| "learning_rate": 9.329935743326144e-05, | |
| "loss": 0.2916, | |
| "step": 4045 | |
| }, | |
| { | |
| "epoch": 1.1398817900365887, | |
| "grad_norm": 0.45882686972618103, | |
| "learning_rate": 9.305430110976793e-05, | |
| "loss": 0.1297, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.1412890515057699, | |
| "grad_norm": 0.5139206051826477, | |
| "learning_rate": 9.280928669209887e-05, | |
| "loss": 0.2342, | |
| "step": 4055 | |
| }, | |
| { | |
| "epoch": 1.1426963129749508, | |
| "grad_norm": 0.9370526671409607, | |
| "learning_rate": 9.256431565851181e-05, | |
| "loss": 0.1581, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.1441035744441317, | |
| "grad_norm": 1.525415301322937, | |
| "learning_rate": 9.23193894870024e-05, | |
| "loss": 0.255, | |
| "step": 4065 | |
| }, | |
| { | |
| "epoch": 1.1455108359133126, | |
| "grad_norm": 1.745328426361084, | |
| "learning_rate": 9.207450965529571e-05, | |
| "loss": 0.1585, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.1469180973824937, | |
| "grad_norm": 0.5603808760643005, | |
| "learning_rate": 9.18296776408372e-05, | |
| "loss": 0.2085, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.1483253588516746, | |
| "grad_norm": 0.24650625884532928, | |
| "learning_rate": 9.158489492078381e-05, | |
| "loss": 0.2441, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.1497326203208555, | |
| "grad_norm": 1.2769076824188232, | |
| "learning_rate": 9.134016297199506e-05, | |
| "loss": 0.1923, | |
| "step": 4085 | |
| }, | |
| { | |
| "epoch": 1.1511398817900367, | |
| "grad_norm": 0.6759532690048218, | |
| "learning_rate": 9.109548327102424e-05, | |
| "loss": 0.1818, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.1525471432592176, | |
| "grad_norm": 1.7534480094909668, | |
| "learning_rate": 9.085085729410928e-05, | |
| "loss": 0.2677, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 1.1539544047283985, | |
| "grad_norm": 1.578730583190918, | |
| "learning_rate": 9.060628651716409e-05, | |
| "loss": 0.3868, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.1553616661975794, | |
| "grad_norm": 1.5693743228912354, | |
| "learning_rate": 9.036177241576949e-05, | |
| "loss": 0.4238, | |
| "step": 4105 | |
| }, | |
| { | |
| "epoch": 1.1567689276667605, | |
| "grad_norm": 0.7190649509429932, | |
| "learning_rate": 9.011731646516429e-05, | |
| "loss": 0.2943, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.1581761891359414, | |
| "grad_norm": 1.3021358251571655, | |
| "learning_rate": 8.987292014023658e-05, | |
| "loss": 0.282, | |
| "step": 4115 | |
| }, | |
| { | |
| "epoch": 1.1595834506051224, | |
| "grad_norm": 0.7299554944038391, | |
| "learning_rate": 8.962858491551467e-05, | |
| "loss": 0.2086, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.1609907120743035, | |
| "grad_norm": 0.8138667345046997, | |
| "learning_rate": 8.938431226515813e-05, | |
| "loss": 0.3847, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.1623979735434844, | |
| "grad_norm": 1.6948626041412354, | |
| "learning_rate": 8.914010366294917e-05, | |
| "loss": 0.2519, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.1638052350126653, | |
| "grad_norm": 0.4518921971321106, | |
| "learning_rate": 8.889596058228339e-05, | |
| "loss": 0.1481, | |
| "step": 4135 | |
| }, | |
| { | |
| "epoch": 1.1652124964818462, | |
| "grad_norm": 0.9538673162460327, | |
| "learning_rate": 8.865188449616124e-05, | |
| "loss": 0.2342, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.1666197579510273, | |
| "grad_norm": 1.5478556156158447, | |
| "learning_rate": 8.84078768771789e-05, | |
| "loss": 0.2741, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 1.1680270194202083, | |
| "grad_norm": 0.8891351222991943, | |
| "learning_rate": 8.816393919751937e-05, | |
| "loss": 0.2279, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.1694342808893892, | |
| "grad_norm": 1.0661555528640747, | |
| "learning_rate": 8.792007292894387e-05, | |
| "loss": 0.2588, | |
| "step": 4155 | |
| }, | |
| { | |
| "epoch": 1.1708415423585703, | |
| "grad_norm": 1.0529447793960571, | |
| "learning_rate": 8.767627954278267e-05, | |
| "loss": 0.3593, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.1722488038277512, | |
| "grad_norm": 1.0678569078445435, | |
| "learning_rate": 8.743256050992623e-05, | |
| "loss": 0.1596, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 1.1736560652969321, | |
| "grad_norm": 0.7005488276481628, | |
| "learning_rate": 8.71889173008166e-05, | |
| "loss": 0.2517, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.175063326766113, | |
| "grad_norm": 0.4683868885040283, | |
| "learning_rate": 8.69453513854382e-05, | |
| "loss": 0.1622, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 0.8689951300621033, | |
| "learning_rate": 8.67018642333092e-05, | |
| "loss": 0.1776, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.177877849704475, | |
| "grad_norm": 0.7526000738143921, | |
| "learning_rate": 8.645845731347248e-05, | |
| "loss": 0.1588, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 1.179285111173656, | |
| "grad_norm": 1.2025400400161743, | |
| "learning_rate": 8.621513209448701e-05, | |
| "loss": 0.197, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.180692372642837, | |
| "grad_norm": 1.2456661462783813, | |
| "learning_rate": 8.597189004441863e-05, | |
| "loss": 0.2185, | |
| "step": 4195 | |
| }, | |
| { | |
| "epoch": 1.182099634112018, | |
| "grad_norm": 0.26599639654159546, | |
| "learning_rate": 8.572873263083152e-05, | |
| "loss": 0.1736, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.183506895581199, | |
| "grad_norm": 0.6946321725845337, | |
| "learning_rate": 8.548566132077916e-05, | |
| "loss": 0.2439, | |
| "step": 4205 | |
| }, | |
| { | |
| "epoch": 1.18491415705038, | |
| "grad_norm": 0.8973987102508545, | |
| "learning_rate": 8.524267758079557e-05, | |
| "loss": 0.2171, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.186321418519561, | |
| "grad_norm": 0.653135359287262, | |
| "learning_rate": 8.499978287688648e-05, | |
| "loss": 0.1822, | |
| "step": 4215 | |
| }, | |
| { | |
| "epoch": 1.1877286799887419, | |
| "grad_norm": 1.1294854879379272, | |
| "learning_rate": 8.475697867452028e-05, | |
| "loss": 0.3998, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.189135941457923, | |
| "grad_norm": 0.7260348200798035, | |
| "learning_rate": 8.451426643861946e-05, | |
| "loss": 0.3177, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.190543202927104, | |
| "grad_norm": 0.9421544075012207, | |
| "learning_rate": 8.427164763355169e-05, | |
| "loss": 0.3644, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.1919504643962848, | |
| "grad_norm": 1.8454887866973877, | |
| "learning_rate": 8.402912372312076e-05, | |
| "loss": 0.2601, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 1.1933577258654657, | |
| "grad_norm": 0.7556844353675842, | |
| "learning_rate": 8.378669617055806e-05, | |
| "loss": 0.1539, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.1947649873346469, | |
| "grad_norm": 1.1138182878494263, | |
| "learning_rate": 8.354436643851365e-05, | |
| "loss": 0.2221, | |
| "step": 4245 | |
| }, | |
| { | |
| "epoch": 1.1961722488038278, | |
| "grad_norm": 1.7039527893066406, | |
| "learning_rate": 8.330213598904726e-05, | |
| "loss": 0.3543, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.1975795102730087, | |
| "grad_norm": 1.6566787958145142, | |
| "learning_rate": 8.306000628361972e-05, | |
| "loss": 0.1975, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 1.1989867717421898, | |
| "grad_norm": 1.0765029191970825, | |
| "learning_rate": 8.281797878308406e-05, | |
| "loss": 0.1358, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.2003940332113707, | |
| "grad_norm": 0.7748456001281738, | |
| "learning_rate": 8.257605494767654e-05, | |
| "loss": 0.1821, | |
| "step": 4265 | |
| }, | |
| { | |
| "epoch": 1.2018012946805516, | |
| "grad_norm": 0.32174113392829895, | |
| "learning_rate": 8.233423623700816e-05, | |
| "loss": 0.1391, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.2032085561497325, | |
| "grad_norm": 0.5359024405479431, | |
| "learning_rate": 8.209252411005548e-05, | |
| "loss": 0.1476, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.2046158176189137, | |
| "grad_norm": 0.9815373420715332, | |
| "learning_rate": 8.185092002515209e-05, | |
| "loss": 0.3173, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.2060230790880946, | |
| "grad_norm": 0.6186626553535461, | |
| "learning_rate": 8.16094254399798e-05, | |
| "loss": 0.3268, | |
| "step": 4285 | |
| }, | |
| { | |
| "epoch": 1.2074303405572755, | |
| "grad_norm": 1.598221778869629, | |
| "learning_rate": 8.136804181155961e-05, | |
| "loss": 0.2788, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.2088376020264566, | |
| "grad_norm": 0.409020334482193, | |
| "learning_rate": 8.112677059624316e-05, | |
| "loss": 0.2455, | |
| "step": 4295 | |
| }, | |
| { | |
| "epoch": 1.2102448634956375, | |
| "grad_norm": 1.0623451471328735, | |
| "learning_rate": 8.088561324970396e-05, | |
| "loss": 0.2883, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.2116521249648184, | |
| "grad_norm": 0.9107158780097961, | |
| "learning_rate": 8.064457122692828e-05, | |
| "loss": 0.191, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 1.2130593864339994, | |
| "grad_norm": 1.021278738975525, | |
| "learning_rate": 8.040364598220682e-05, | |
| "loss": 0.2287, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.2144666479031805, | |
| "grad_norm": 1.0348402261734009, | |
| "learning_rate": 8.016283896912563e-05, | |
| "loss": 0.1455, | |
| "step": 4315 | |
| }, | |
| { | |
| "epoch": 1.2158739093723614, | |
| "grad_norm": 1.06684410572052, | |
| "learning_rate": 7.992215164055737e-05, | |
| "loss": 0.1786, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.2172811708415423, | |
| "grad_norm": 0.45586028695106506, | |
| "learning_rate": 7.968158544865272e-05, | |
| "loss": 0.2625, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.2186884323107234, | |
| "grad_norm": 1.0333331823349, | |
| "learning_rate": 7.944114184483144e-05, | |
| "loss": 0.1766, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.2200956937799043, | |
| "grad_norm": 1.477582335472107, | |
| "learning_rate": 7.920082227977361e-05, | |
| "loss": 0.2547, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 1.2215029552490853, | |
| "grad_norm": 0.732683539390564, | |
| "learning_rate": 7.89606282034111e-05, | |
| "loss": 0.1894, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.2229102167182662, | |
| "grad_norm": 1.199336290359497, | |
| "learning_rate": 7.872056106491846e-05, | |
| "loss": 0.3359, | |
| "step": 4345 | |
| }, | |
| { | |
| "epoch": 1.2243174781874473, | |
| "grad_norm": 2.6119384765625, | |
| "learning_rate": 7.848062231270458e-05, | |
| "loss": 0.3301, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.2257247396566282, | |
| "grad_norm": 1.0260940790176392, | |
| "learning_rate": 7.824081339440364e-05, | |
| "loss": 0.1735, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 1.2271320011258091, | |
| "grad_norm": 0.7368533611297607, | |
| "learning_rate": 7.800113575686643e-05, | |
| "loss": 0.1741, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.2285392625949902, | |
| "grad_norm": 0.8837445378303528, | |
| "learning_rate": 7.776159084615183e-05, | |
| "loss": 0.2789, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 1.2299465240641712, | |
| "grad_norm": 1.0234431028366089, | |
| "learning_rate": 7.752218010751786e-05, | |
| "loss": 0.1811, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.231353785533352, | |
| "grad_norm": 1.1849218606948853, | |
| "learning_rate": 7.728290498541297e-05, | |
| "loss": 0.2951, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.232761047002533, | |
| "grad_norm": 1.1420046091079712, | |
| "learning_rate": 7.704376692346748e-05, | |
| "loss": 0.2964, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.234168308471714, | |
| "grad_norm": 0.44826436042785645, | |
| "learning_rate": 7.680476736448477e-05, | |
| "loss": 0.165, | |
| "step": 4385 | |
| }, | |
| { | |
| "epoch": 1.235575569940895, | |
| "grad_norm": 0.6397153735160828, | |
| "learning_rate": 7.656590775043249e-05, | |
| "loss": 0.138, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.236982831410076, | |
| "grad_norm": 1.1096476316452026, | |
| "learning_rate": 7.632718952243404e-05, | |
| "loss": 0.2673, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 1.238390092879257, | |
| "grad_norm": 0.7769279479980469, | |
| "learning_rate": 7.608861412075987e-05, | |
| "loss": 0.1631, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.239797354348438, | |
| "grad_norm": 0.8061667084693909, | |
| "learning_rate": 7.585018298481849e-05, | |
| "loss": 0.1851, | |
| "step": 4405 | |
| }, | |
| { | |
| "epoch": 1.2412046158176189, | |
| "grad_norm": 1.618454098701477, | |
| "learning_rate": 7.561189755314817e-05, | |
| "loss": 0.2377, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.2426118772867998, | |
| "grad_norm": 1.1752551794052124, | |
| "learning_rate": 7.537375926340802e-05, | |
| "loss": 0.1806, | |
| "step": 4415 | |
| }, | |
| { | |
| "epoch": 1.244019138755981, | |
| "grad_norm": 0.29463231563568115, | |
| "learning_rate": 7.513576955236944e-05, | |
| "loss": 0.1611, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.2454264002251618, | |
| "grad_norm": 0.7407804131507874, | |
| "learning_rate": 7.489792985590743e-05, | |
| "loss": 0.3176, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.2468336616943427, | |
| "grad_norm": 0.8456223011016846, | |
| "learning_rate": 7.466024160899173e-05, | |
| "loss": 0.2742, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.2482409231635239, | |
| "grad_norm": 1.3502225875854492, | |
| "learning_rate": 7.442270624567856e-05, | |
| "loss": 0.2477, | |
| "step": 4435 | |
| }, | |
| { | |
| "epoch": 1.2496481846327048, | |
| "grad_norm": 1.0241039991378784, | |
| "learning_rate": 7.418532519910162e-05, | |
| "loss": 0.2415, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.2510554461018857, | |
| "grad_norm": 0.570637047290802, | |
| "learning_rate": 7.394809990146356e-05, | |
| "loss": 0.2094, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 1.2524627075710666, | |
| "grad_norm": 0.4012211859226227, | |
| "learning_rate": 7.371103178402731e-05, | |
| "loss": 0.2591, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.2538699690402477, | |
| "grad_norm": 1.1546359062194824, | |
| "learning_rate": 7.347412227710766e-05, | |
| "loss": 0.2837, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 1.2552772305094286, | |
| "grad_norm": 0.8672778606414795, | |
| "learning_rate": 7.32373728100622e-05, | |
| "loss": 0.298, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.2566844919786098, | |
| "grad_norm": 0.4911658465862274, | |
| "learning_rate": 7.300078481128306e-05, | |
| "loss": 0.1921, | |
| "step": 4465 | |
| }, | |
| { | |
| "epoch": 1.2580917534477907, | |
| "grad_norm": 1.1717147827148438, | |
| "learning_rate": 7.276435970818824e-05, | |
| "loss": 0.1687, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.2594990149169716, | |
| "grad_norm": 0.5286734104156494, | |
| "learning_rate": 7.252809892721282e-05, | |
| "loss": 0.2104, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.2609062763861525, | |
| "grad_norm": 2.43472957611084, | |
| "learning_rate": 7.229200389380056e-05, | |
| "loss": 0.2763, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.2623135378553334, | |
| "grad_norm": 0.9692918062210083, | |
| "learning_rate": 7.205607603239508e-05, | |
| "loss": 0.1913, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 1.2637207993245145, | |
| "grad_norm": 0.8969650268554688, | |
| "learning_rate": 7.182031676643153e-05, | |
| "loss": 0.4249, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.2651280607936954, | |
| "grad_norm": 0.7135694026947021, | |
| "learning_rate": 7.158472751832783e-05, | |
| "loss": 0.1957, | |
| "step": 4495 | |
| }, | |
| { | |
| "epoch": 1.2665353222628766, | |
| "grad_norm": 2.911539077758789, | |
| "learning_rate": 7.134930970947607e-05, | |
| "loss": 0.3644, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.2679425837320575, | |
| "grad_norm": 1.8338284492492676, | |
| "learning_rate": 7.111406476023398e-05, | |
| "loss": 0.2941, | |
| "step": 4505 | |
| }, | |
| { | |
| "epoch": 1.2693498452012384, | |
| "grad_norm": 0.736365020275116, | |
| "learning_rate": 7.087899408991651e-05, | |
| "loss": 0.2541, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.2707571066704193, | |
| "grad_norm": 1.269327163696289, | |
| "learning_rate": 7.06440991167869e-05, | |
| "loss": 0.2847, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 1.2721643681396002, | |
| "grad_norm": 0.6774185299873352, | |
| "learning_rate": 7.040938125804858e-05, | |
| "loss": 0.2047, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.2735716296087813, | |
| "grad_norm": 1.0028345584869385, | |
| "learning_rate": 7.017484192983623e-05, | |
| "loss": 0.2327, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 1.2749788910779623, | |
| "grad_norm": 0.9345621466636658, | |
| "learning_rate": 6.99404825472074e-05, | |
| "loss": 0.2574, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.2763861525471434, | |
| "grad_norm": 1.2837140560150146, | |
| "learning_rate": 6.970630452413407e-05, | |
| "loss": 0.298, | |
| "step": 4535 | |
| }, | |
| { | |
| "epoch": 1.2777934140163243, | |
| "grad_norm": 0.5337740182876587, | |
| "learning_rate": 6.947230927349396e-05, | |
| "loss": 0.1538, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.2792006754855052, | |
| "grad_norm": 0.5805062651634216, | |
| "learning_rate": 6.923849820706194e-05, | |
| "loss": 0.1483, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 1.280607936954686, | |
| "grad_norm": 0.8201838135719299, | |
| "learning_rate": 6.900487273550187e-05, | |
| "loss": 0.163, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.282015198423867, | |
| "grad_norm": 0.5184070467948914, | |
| "learning_rate": 6.877143426835764e-05, | |
| "loss": 0.2611, | |
| "step": 4555 | |
| }, | |
| { | |
| "epoch": 1.2834224598930482, | |
| "grad_norm": 1.0877232551574707, | |
| "learning_rate": 6.853818421404496e-05, | |
| "loss": 0.3085, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.284829721362229, | |
| "grad_norm": 1.616977572441101, | |
| "learning_rate": 6.830512397984288e-05, | |
| "loss": 0.3108, | |
| "step": 4565 | |
| }, | |
| { | |
| "epoch": 1.2862369828314102, | |
| "grad_norm": 0.6340872049331665, | |
| "learning_rate": 6.807225497188496e-05, | |
| "loss": 0.177, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.287644244300591, | |
| "grad_norm": 0.8518214821815491, | |
| "learning_rate": 6.783957859515127e-05, | |
| "loss": 0.1805, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 1.289051505769772, | |
| "grad_norm": 1.280093789100647, | |
| "learning_rate": 6.760709625345953e-05, | |
| "loss": 0.2854, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.290458767238953, | |
| "grad_norm": 0.7486845850944519, | |
| "learning_rate": 6.737480934945677e-05, | |
| "loss": 0.1399, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 1.291866028708134, | |
| "grad_norm": 1.3590744733810425, | |
| "learning_rate": 6.714271928461097e-05, | |
| "loss": 0.1735, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.293273290177315, | |
| "grad_norm": 0.6231881380081177, | |
| "learning_rate": 6.691082745920247e-05, | |
| "loss": 0.2083, | |
| "step": 4595 | |
| }, | |
| { | |
| "epoch": 1.2946805516464959, | |
| "grad_norm": 1.0750889778137207, | |
| "learning_rate": 6.667913527231549e-05, | |
| "loss": 0.2304, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.296087813115677, | |
| "grad_norm": 1.3983303308486938, | |
| "learning_rate": 6.644764412182986e-05, | |
| "loss": 0.3285, | |
| "step": 4605 | |
| }, | |
| { | |
| "epoch": 1.297495074584858, | |
| "grad_norm": 0.5835619568824768, | |
| "learning_rate": 6.621635540441249e-05, | |
| "loss": 0.2651, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.2989023360540388, | |
| "grad_norm": 0.7869633436203003, | |
| "learning_rate": 6.598527051550882e-05, | |
| "loss": 0.2144, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 1.3003095975232197, | |
| "grad_norm": 0.4034360945224762, | |
| "learning_rate": 6.575439084933468e-05, | |
| "loss": 0.1919, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.3017168589924009, | |
| "grad_norm": 1.0225868225097656, | |
| "learning_rate": 6.552371779886756e-05, | |
| "loss": 0.2942, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 1.3031241204615818, | |
| "grad_norm": 1.8515701293945312, | |
| "learning_rate": 6.52932527558385e-05, | |
| "loss": 0.2579, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.3045313819307627, | |
| "grad_norm": 1.13215172290802, | |
| "learning_rate": 6.506299711072353e-05, | |
| "loss": 0.189, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 1.3059386433999438, | |
| "grad_norm": 1.1587252616882324, | |
| "learning_rate": 6.483295225273521e-05, | |
| "loss": 0.2055, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.3073459048691247, | |
| "grad_norm": 1.6920759677886963, | |
| "learning_rate": 6.460311956981444e-05, | |
| "loss": 0.3108, | |
| "step": 4645 | |
| }, | |
| { | |
| "epoch": 1.3087531663383056, | |
| "grad_norm": 0.5736072659492493, | |
| "learning_rate": 6.437350044862207e-05, | |
| "loss": 0.2675, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.3101604278074865, | |
| "grad_norm": 0.9719104170799255, | |
| "learning_rate": 6.414409627453025e-05, | |
| "loss": 0.1933, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 1.3115676892766677, | |
| "grad_norm": 0.8271322250366211, | |
| "learning_rate": 6.391490843161442e-05, | |
| "loss": 0.0908, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.3129749507458486, | |
| "grad_norm": 1.2622920274734497, | |
| "learning_rate": 6.368593830264485e-05, | |
| "loss": 0.1837, | |
| "step": 4665 | |
| }, | |
| { | |
| "epoch": 1.3143822122150295, | |
| "grad_norm": 1.0141448974609375, | |
| "learning_rate": 6.345718726907815e-05, | |
| "loss": 0.1396, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 0.5923504829406738, | |
| "learning_rate": 6.322865671104909e-05, | |
| "loss": 0.1631, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 1.3171967351533915, | |
| "grad_norm": 1.8866256475448608, | |
| "learning_rate": 6.300034800736233e-05, | |
| "loss": 0.1407, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.3186039966225724, | |
| "grad_norm": 0.8495520353317261, | |
| "learning_rate": 6.277226253548385e-05, | |
| "loss": 0.2345, | |
| "step": 4685 | |
| }, | |
| { | |
| "epoch": 1.3200112580917533, | |
| "grad_norm": 0.8851481080055237, | |
| "learning_rate": 6.254440167153295e-05, | |
| "loss": 0.2431, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.3214185195609345, | |
| "grad_norm": 0.5228270292282104, | |
| "learning_rate": 6.231676679027364e-05, | |
| "loss": 0.1606, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 1.3228257810301154, | |
| "grad_norm": 1.2752258777618408, | |
| "learning_rate": 6.208935926510659e-05, | |
| "loss": 0.2588, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.3242330424992963, | |
| "grad_norm": 1.6664029359817505, | |
| "learning_rate": 6.186218046806078e-05, | |
| "loss": 0.2418, | |
| "step": 4705 | |
| }, | |
| { | |
| "epoch": 1.3256403039684774, | |
| "grad_norm": 0.7116133570671082, | |
| "learning_rate": 6.16352317697851e-05, | |
| "loss": 0.1839, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.3270475654376583, | |
| "grad_norm": 1.6506725549697876, | |
| "learning_rate": 6.140851453954021e-05, | |
| "loss": 0.2076, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 1.3284548269068392, | |
| "grad_norm": 1.0681225061416626, | |
| "learning_rate": 6.118203014519034e-05, | |
| "loss": 0.2491, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.3298620883760202, | |
| "grad_norm": 0.969599723815918, | |
| "learning_rate": 6.095577995319476e-05, | |
| "loss": 0.273, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 1.3312693498452013, | |
| "grad_norm": 1.4593223333358765, | |
| "learning_rate": 6.072976532859982e-05, | |
| "loss": 0.358, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.3326766113143822, | |
| "grad_norm": 0.29552891850471497, | |
| "learning_rate": 6.0503987635030656e-05, | |
| "loss": 0.2655, | |
| "step": 4735 | |
| }, | |
| { | |
| "epoch": 1.334083872783563, | |
| "grad_norm": 2.189373731613159, | |
| "learning_rate": 6.0278448234682784e-05, | |
| "loss": 0.2624, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.3354911342527442, | |
| "grad_norm": 0.28230440616607666, | |
| "learning_rate": 6.005314848831415e-05, | |
| "loss": 0.1886, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 1.3368983957219251, | |
| "grad_norm": 0.5569413304328918, | |
| "learning_rate": 5.9828089755236714e-05, | |
| "loss": 0.231, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.338305657191106, | |
| "grad_norm": 0.8192738890647888, | |
| "learning_rate": 5.960327339330828e-05, | |
| "loss": 0.23, | |
| "step": 4755 | |
| }, | |
| { | |
| "epoch": 1.339712918660287, | |
| "grad_norm": 1.0859158039093018, | |
| "learning_rate": 5.9378700758924466e-05, | |
| "loss": 0.3275, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.341120180129468, | |
| "grad_norm": 0.8077869415283203, | |
| "learning_rate": 5.915437320701025e-05, | |
| "loss": 0.0847, | |
| "step": 4765 | |
| }, | |
| { | |
| "epoch": 1.342527441598649, | |
| "grad_norm": 1.8826837539672852, | |
| "learning_rate": 5.8930292091012015e-05, | |
| "loss": 0.2158, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.3439347030678301, | |
| "grad_norm": 0.6470653414726257, | |
| "learning_rate": 5.870645876288938e-05, | |
| "loss": 0.3325, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 1.345341964537011, | |
| "grad_norm": 0.7090429067611694, | |
| "learning_rate": 5.848287457310681e-05, | |
| "loss": 0.2083, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.346749226006192, | |
| "grad_norm": 0.1886598914861679, | |
| "learning_rate": 5.825954087062579e-05, | |
| "loss": 0.2118, | |
| "step": 4785 | |
| }, | |
| { | |
| "epoch": 1.3481564874753729, | |
| "grad_norm": 0.5092473030090332, | |
| "learning_rate": 5.8036459002896473e-05, | |
| "loss": 0.253, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.3495637489445538, | |
| "grad_norm": 0.9652419686317444, | |
| "learning_rate": 5.78136303158495e-05, | |
| "loss": 0.1499, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 1.350971010413735, | |
| "grad_norm": 0.6111290454864502, | |
| "learning_rate": 5.759105615388814e-05, | |
| "loss": 0.1805, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.3523782718829158, | |
| "grad_norm": 2.2469632625579834, | |
| "learning_rate": 5.736873785987997e-05, | |
| "loss": 0.3536, | |
| "step": 4805 | |
| }, | |
| { | |
| "epoch": 1.353785533352097, | |
| "grad_norm": 0.9734948873519897, | |
| "learning_rate": 5.714667677514882e-05, | |
| "loss": 0.2784, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.3551927948212779, | |
| "grad_norm": 1.076882243156433, | |
| "learning_rate": 5.692487423946662e-05, | |
| "loss": 0.1953, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 1.3566000562904588, | |
| "grad_norm": 0.7746699452400208, | |
| "learning_rate": 5.6703331591045524e-05, | |
| "loss": 0.2175, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.3580073177596397, | |
| "grad_norm": 0.7650654315948486, | |
| "learning_rate": 5.6482050166529546e-05, | |
| "loss": 0.1676, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 1.3594145792288206, | |
| "grad_norm": 0.6610764861106873, | |
| "learning_rate": 5.62610313009868e-05, | |
| "loss": 0.1721, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.3608218406980017, | |
| "grad_norm": 0.8137916326522827, | |
| "learning_rate": 5.604027632790112e-05, | |
| "loss": 0.1374, | |
| "step": 4835 | |
| }, | |
| { | |
| "epoch": 1.3622291021671826, | |
| "grad_norm": 0.6320801377296448, | |
| "learning_rate": 5.581978657916431e-05, | |
| "loss": 0.209, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 1.4471935033798218, | |
| "learning_rate": 5.5599563385067996e-05, | |
| "loss": 0.1163, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 1.3650436251055447, | |
| "grad_norm": 0.9794873595237732, | |
| "learning_rate": 5.537960807429547e-05, | |
| "loss": 0.2077, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.3664508865747256, | |
| "grad_norm": 1.3119271993637085, | |
| "learning_rate": 5.5159921973913866e-05, | |
| "loss": 0.2667, | |
| "step": 4855 | |
| }, | |
| { | |
| "epoch": 1.3678581480439065, | |
| "grad_norm": 1.156152367591858, | |
| "learning_rate": 5.49405064093661e-05, | |
| "loss": 0.1734, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.3692654095130874, | |
| "grad_norm": 0.06259223818778992, | |
| "learning_rate": 5.472136270446275e-05, | |
| "loss": 0.2067, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 1.3706726709822685, | |
| "grad_norm": 0.6296875476837158, | |
| "learning_rate": 5.4502492181374284e-05, | |
| "loss": 0.229, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.3720799324514494, | |
| "grad_norm": 1.3139517307281494, | |
| "learning_rate": 5.428389616062298e-05, | |
| "loss": 0.286, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.3734871939206306, | |
| "grad_norm": 0.5777654051780701, | |
| "learning_rate": 5.40655759610748e-05, | |
| "loss": 0.2024, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.3748944553898115, | |
| "grad_norm": 0.5422516465187073, | |
| "learning_rate": 5.384753289993173e-05, | |
| "loss": 0.2453, | |
| "step": 4885 | |
| }, | |
| { | |
| "epoch": 1.3763017168589924, | |
| "grad_norm": 1.2088871002197266, | |
| "learning_rate": 5.3629768292723614e-05, | |
| "loss": 0.1644, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.3777089783281733, | |
| "grad_norm": 0.6206454634666443, | |
| "learning_rate": 5.341228345330025e-05, | |
| "loss": 0.3293, | |
| "step": 4895 | |
| }, | |
| { | |
| "epoch": 1.3791162397973544, | |
| "grad_norm": 1.0353143215179443, | |
| "learning_rate": 5.3195079693823624e-05, | |
| "loss": 0.2197, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.3805235012665353, | |
| "grad_norm": 1.076452612876892, | |
| "learning_rate": 5.297815832475971e-05, | |
| "loss": 0.1435, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 1.3819307627357162, | |
| "grad_norm": 0.7797285914421082, | |
| "learning_rate": 5.2761520654870846e-05, | |
| "loss": 0.1499, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.3833380242048974, | |
| "grad_norm": 3.2293171882629395, | |
| "learning_rate": 5.25451679912077e-05, | |
| "loss": 0.4037, | |
| "step": 4915 | |
| }, | |
| { | |
| "epoch": 1.3847452856740783, | |
| "grad_norm": 0.7513951659202576, | |
| "learning_rate": 5.232910163910132e-05, | |
| "loss": 0.136, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.3861525471432592, | |
| "grad_norm": 0.43260759115219116, | |
| "learning_rate": 5.211332290215543e-05, | |
| "loss": 0.2419, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 1.38755980861244, | |
| "grad_norm": 0.7441173791885376, | |
| "learning_rate": 5.189783308223841e-05, | |
| "loss": 0.1678, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.3889670700816212, | |
| "grad_norm": 0.4429182708263397, | |
| "learning_rate": 5.1682633479475484e-05, | |
| "loss": 0.1767, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 1.3903743315508021, | |
| "grad_norm": 1.6440355777740479, | |
| "learning_rate": 5.146772539224094e-05, | |
| "loss": 0.2831, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.391781593019983, | |
| "grad_norm": 1.1421854496002197, | |
| "learning_rate": 5.1253110117150314e-05, | |
| "loss": 0.157, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 1.3931888544891642, | |
| "grad_norm": 1.013460397720337, | |
| "learning_rate": 5.1038788949052344e-05, | |
| "loss": 0.3537, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.394596115958345, | |
| "grad_norm": 1.2984402179718018, | |
| "learning_rate": 5.082476318102144e-05, | |
| "loss": 0.2869, | |
| "step": 4955 | |
| }, | |
| { | |
| "epoch": 1.396003377427526, | |
| "grad_norm": 0.8296849727630615, | |
| "learning_rate": 5.061103410434978e-05, | |
| "loss": 0.2029, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.397410638896707, | |
| "grad_norm": 1.1972373723983765, | |
| "learning_rate": 5.0397603008539374e-05, | |
| "loss": 0.182, | |
| "step": 4965 | |
| }, | |
| { | |
| "epoch": 1.398817900365888, | |
| "grad_norm": 1.5300724506378174, | |
| "learning_rate": 5.0184471181294515e-05, | |
| "loss": 0.1537, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.400225161835069, | |
| "grad_norm": 0.9540086984634399, | |
| "learning_rate": 4.997163990851381e-05, | |
| "loss": 0.1679, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 1.4016324233042499, | |
| "grad_norm": 0.15063901245594025, | |
| "learning_rate": 4.975911047428263e-05, | |
| "loss": 0.1512, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.403039684773431, | |
| "grad_norm": 1.925596833229065, | |
| "learning_rate": 4.954688416086524e-05, | |
| "loss": 0.2077, | |
| "step": 4985 | |
| }, | |
| { | |
| "epoch": 1.404446946242612, | |
| "grad_norm": 1.4239457845687866, | |
| "learning_rate": 4.9334962248696934e-05, | |
| "loss": 0.2464, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.4058542077117928, | |
| "grad_norm": 0.3618084490299225, | |
| "learning_rate": 4.912334601637658e-05, | |
| "loss": 0.1579, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 1.4072614691809737, | |
| "grad_norm": 0.8101370334625244, | |
| "learning_rate": 4.8912036740658776e-05, | |
| "loss": 0.2682, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.4086687306501549, | |
| "grad_norm": 0.7149579524993896, | |
| "learning_rate": 4.8701035696446064e-05, | |
| "loss": 0.3497, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 1.4100759921193358, | |
| "grad_norm": 1.0598907470703125, | |
| "learning_rate": 4.849034415678131e-05, | |
| "loss": 0.2342, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.4114832535885167, | |
| "grad_norm": 1.2105034589767456, | |
| "learning_rate": 4.8279963392840156e-05, | |
| "loss": 0.2693, | |
| "step": 5015 | |
| }, | |
| { | |
| "epoch": 1.4128905150576978, | |
| "grad_norm": 0.6534488201141357, | |
| "learning_rate": 4.8069894673923064e-05, | |
| "loss": 0.2475, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.4142977765268787, | |
| "grad_norm": 1.4907587766647339, | |
| "learning_rate": 4.7860139267447956e-05, | |
| "loss": 0.2958, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 1.4157050379960596, | |
| "grad_norm": 1.1340523958206177, | |
| "learning_rate": 4.765069843894239e-05, | |
| "loss": 0.1087, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.4171122994652405, | |
| "grad_norm": 0.6139047145843506, | |
| "learning_rate": 4.744157345203588e-05, | |
| "loss": 0.1827, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 1.4185195609344217, | |
| "grad_norm": 1.5109590291976929, | |
| "learning_rate": 4.723276556845252e-05, | |
| "loss": 0.1851, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.4199268224036026, | |
| "grad_norm": 0.593103289604187, | |
| "learning_rate": 4.702427604800307e-05, | |
| "loss": 0.2019, | |
| "step": 5045 | |
| }, | |
| { | |
| "epoch": 1.4213340838727835, | |
| "grad_norm": 1.3064155578613281, | |
| "learning_rate": 4.681610614857749e-05, | |
| "loss": 0.1086, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.4227413453419646, | |
| "grad_norm": 1.4465229511260986, | |
| "learning_rate": 4.66082571261375e-05, | |
| "loss": 0.099, | |
| "step": 5055 | |
| }, | |
| { | |
| "epoch": 1.4241486068111455, | |
| "grad_norm": 1.0164941549301147, | |
| "learning_rate": 4.6400730234708676e-05, | |
| "loss": 0.2006, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.4255558682803264, | |
| "grad_norm": 1.600894808769226, | |
| "learning_rate": 4.61935267263732e-05, | |
| "loss": 0.2938, | |
| "step": 5065 | |
| }, | |
| { | |
| "epoch": 1.4269631297495073, | |
| "grad_norm": 0.8022120594978333, | |
| "learning_rate": 4.598664785126217e-05, | |
| "loss": 0.2981, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.4283703912186885, | |
| "grad_norm": 0.6564612984657288, | |
| "learning_rate": 4.578009485754791e-05, | |
| "loss": 0.1266, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 1.4297776526878694, | |
| "grad_norm": 0.7073236107826233, | |
| "learning_rate": 4.557386899143678e-05, | |
| "loss": 0.2229, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.4311849141570505, | |
| "grad_norm": 0.9632103443145752, | |
| "learning_rate": 4.536797149716133e-05, | |
| "loss": 0.1511, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 1.4325921756262314, | |
| "grad_norm": 1.1304622888565063, | |
| "learning_rate": 4.5162403616972945e-05, | |
| "loss": 0.2341, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.4339994370954123, | |
| "grad_norm": 1.135055422782898, | |
| "learning_rate": 4.4957166591134405e-05, | |
| "loss": 0.3898, | |
| "step": 5095 | |
| }, | |
| { | |
| "epoch": 1.4354066985645932, | |
| "grad_norm": 0.6786003112792969, | |
| "learning_rate": 4.475226165791231e-05, | |
| "loss": 0.2129, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.4368139600337742, | |
| "grad_norm": 1.3296654224395752, | |
| "learning_rate": 4.454769005356955e-05, | |
| "loss": 0.3128, | |
| "step": 5105 | |
| }, | |
| { | |
| "epoch": 1.4382212215029553, | |
| "grad_norm": 0.7507737278938293, | |
| "learning_rate": 4.434345301235802e-05, | |
| "loss": 0.1069, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.4396284829721362, | |
| "grad_norm": 1.4222168922424316, | |
| "learning_rate": 4.4139551766511e-05, | |
| "loss": 0.1529, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 1.4410357444413173, | |
| "grad_norm": 0.21092858910560608, | |
| "learning_rate": 4.39359875462359e-05, | |
| "loss": 0.2159, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.4424430059104982, | |
| "grad_norm": 1.0862993001937866, | |
| "learning_rate": 4.373276157970665e-05, | |
| "loss": 0.1262, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 1.4438502673796791, | |
| "grad_norm": 1.6479579210281372, | |
| "learning_rate": 4.352987509305635e-05, | |
| "loss": 0.2165, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.44525752884886, | |
| "grad_norm": 0.11600520461797714, | |
| "learning_rate": 4.3327329310370016e-05, | |
| "loss": 0.1696, | |
| "step": 5135 | |
| }, | |
| { | |
| "epoch": 1.446664790318041, | |
| "grad_norm": 0.9424710869789124, | |
| "learning_rate": 4.312512545367702e-05, | |
| "loss": 0.3328, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.448072051787222, | |
| "grad_norm": 0.6428975462913513, | |
| "learning_rate": 4.292326474294372e-05, | |
| "loss": 0.1069, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 1.449479313256403, | |
| "grad_norm": 0.8455730676651001, | |
| "learning_rate": 4.272174839606628e-05, | |
| "loss": 0.3006, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.4508865747255841, | |
| "grad_norm": 0.6467002034187317, | |
| "learning_rate": 4.252057762886305e-05, | |
| "loss": 0.1345, | |
| "step": 5155 | |
| }, | |
| { | |
| "epoch": 1.452293836194765, | |
| "grad_norm": 0.7402626276016235, | |
| "learning_rate": 4.2319753655067505e-05, | |
| "loss": 0.1928, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.453701097663946, | |
| "grad_norm": 1.142514705657959, | |
| "learning_rate": 4.211927768632068e-05, | |
| "loss": 0.3225, | |
| "step": 5165 | |
| }, | |
| { | |
| "epoch": 1.4551083591331269, | |
| "grad_norm": 0.9843090772628784, | |
| "learning_rate": 4.191915093216411e-05, | |
| "loss": 0.1223, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.4565156206023078, | |
| "grad_norm": 0.9305518865585327, | |
| "learning_rate": 4.171937460003223e-05, | |
| "loss": 0.1518, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 1.457922882071489, | |
| "grad_norm": 0.9245863556861877, | |
| "learning_rate": 4.1519949895245435e-05, | |
| "loss": 0.161, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.4593301435406698, | |
| "grad_norm": 0.5494176149368286, | |
| "learning_rate": 4.1320878021002466e-05, | |
| "loss": 0.1645, | |
| "step": 5185 | |
| }, | |
| { | |
| "epoch": 1.460737405009851, | |
| "grad_norm": 0.454455703496933, | |
| "learning_rate": 4.112216017837346e-05, | |
| "loss": 0.1784, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.4621446664790319, | |
| "grad_norm": 0.8797675967216492, | |
| "learning_rate": 4.092379756629244e-05, | |
| "loss": 0.1915, | |
| "step": 5195 | |
| }, | |
| { | |
| "epoch": 1.4635519279482128, | |
| "grad_norm": 0.5059092044830322, | |
| "learning_rate": 4.072579138155024e-05, | |
| "loss": 0.1533, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.4649591894173937, | |
| "grad_norm": 1.5164445638656616, | |
| "learning_rate": 4.052814281878725e-05, | |
| "loss": 0.3054, | |
| "step": 5205 | |
| }, | |
| { | |
| "epoch": 1.4663664508865748, | |
| "grad_norm": 0.8489431738853455, | |
| "learning_rate": 4.033085307048626e-05, | |
| "loss": 0.1573, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.4677737123557557, | |
| "grad_norm": 0.8418503999710083, | |
| "learning_rate": 4.0133923326965073e-05, | |
| "loss": 0.2269, | |
| "step": 5215 | |
| }, | |
| { | |
| "epoch": 1.4691809738249366, | |
| "grad_norm": 0.4309021830558777, | |
| "learning_rate": 3.9937354776369565e-05, | |
| "loss": 0.1621, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 1.8004333972930908, | |
| "learning_rate": 3.974114860466641e-05, | |
| "loss": 0.1821, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 1.4719954967632987, | |
| "grad_norm": 0.5034974217414856, | |
| "learning_rate": 3.954530599563586e-05, | |
| "loss": 0.1586, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.4734027582324796, | |
| "grad_norm": 1.8636256456375122, | |
| "learning_rate": 3.934982813086466e-05, | |
| "loss": 0.1778, | |
| "step": 5235 | |
| }, | |
| { | |
| "epoch": 1.4748100197016605, | |
| "grad_norm": 0.7782198190689087, | |
| "learning_rate": 3.915471618973905e-05, | |
| "loss": 0.2362, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.4762172811708416, | |
| "grad_norm": 0.5170087218284607, | |
| "learning_rate": 3.895997134943735e-05, | |
| "loss": 0.1389, | |
| "step": 5245 | |
| }, | |
| { | |
| "epoch": 1.4776245426400225, | |
| "grad_norm": 0.6563436388969421, | |
| "learning_rate": 3.876559478492319e-05, | |
| "loss": 0.1972, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.4790318041092034, | |
| "grad_norm": 0.6524726748466492, | |
| "learning_rate": 3.857158766893814e-05, | |
| "loss": 0.2123, | |
| "step": 5255 | |
| }, | |
| { | |
| "epoch": 1.4804390655783846, | |
| "grad_norm": 0.8341132402420044, | |
| "learning_rate": 3.837795117199483e-05, | |
| "loss": 0.2374, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.4818463270475655, | |
| "grad_norm": 0.37632039189338684, | |
| "learning_rate": 3.818468646236984e-05, | |
| "loss": 0.114, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 1.4832535885167464, | |
| "grad_norm": 2.116046190261841, | |
| "learning_rate": 3.799179470609656e-05, | |
| "loss": 0.3048, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.4846608499859273, | |
| "grad_norm": 2.3138134479522705, | |
| "learning_rate": 3.7799277066958205e-05, | |
| "loss": 0.1414, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 1.4860681114551084, | |
| "grad_norm": 1.4033293724060059, | |
| "learning_rate": 3.760713470648093e-05, | |
| "loss": 0.1972, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.4874753729242893, | |
| "grad_norm": 0.9336678981781006, | |
| "learning_rate": 3.741536878392654e-05, | |
| "loss": 0.1519, | |
| "step": 5285 | |
| }, | |
| { | |
| "epoch": 1.4888826343934702, | |
| "grad_norm": 1.4050379991531372, | |
| "learning_rate": 3.7223980456285813e-05, | |
| "loss": 0.1493, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.4902898958626514, | |
| "grad_norm": 0.4991312623023987, | |
| "learning_rate": 3.70329708782713e-05, | |
| "loss": 0.157, | |
| "step": 5295 | |
| }, | |
| { | |
| "epoch": 1.4916971573318323, | |
| "grad_norm": 1.6823819875717163, | |
| "learning_rate": 3.6842341202310374e-05, | |
| "loss": 0.2532, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.4931044188010132, | |
| "grad_norm": 0.81031733751297, | |
| "learning_rate": 3.665209257853843e-05, | |
| "loss": 0.3201, | |
| "step": 5305 | |
| }, | |
| { | |
| "epoch": 1.494511680270194, | |
| "grad_norm": 1.287041425704956, | |
| "learning_rate": 3.646222615479177e-05, | |
| "loss": 0.1398, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.4959189417393752, | |
| "grad_norm": 0.4528125822544098, | |
| "learning_rate": 3.62727430766007e-05, | |
| "loss": 0.2131, | |
| "step": 5315 | |
| }, | |
| { | |
| "epoch": 1.4973262032085561, | |
| "grad_norm": 1.0578283071517944, | |
| "learning_rate": 3.608364448718283e-05, | |
| "loss": 0.1415, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.498733464677737, | |
| "grad_norm": 0.4122551679611206, | |
| "learning_rate": 3.589493152743585e-05, | |
| "loss": 0.0914, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 1.5001407261469182, | |
| "grad_norm": 0.6634222269058228, | |
| "learning_rate": 3.570660533593091e-05, | |
| "loss": 0.1269, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.501547987616099, | |
| "grad_norm": 0.27888017892837524, | |
| "learning_rate": 3.551866704890564e-05, | |
| "loss": 0.1288, | |
| "step": 5335 | |
| }, | |
| { | |
| "epoch": 1.50295524908528, | |
| "grad_norm": 1.0966591835021973, | |
| "learning_rate": 3.533111780025725e-05, | |
| "loss": 0.1822, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.504362510554461, | |
| "grad_norm": 1.1912025213241577, | |
| "learning_rate": 3.514395872153584e-05, | |
| "loss": 0.2205, | |
| "step": 5345 | |
| }, | |
| { | |
| "epoch": 1.505769772023642, | |
| "grad_norm": 0.34254777431488037, | |
| "learning_rate": 3.49571909419374e-05, | |
| "loss": 0.1333, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.507177033492823, | |
| "grad_norm": 0.7154930233955383, | |
| "learning_rate": 3.4770815588297054e-05, | |
| "loss": 0.1758, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 1.508584294962004, | |
| "grad_norm": 0.7776800394058228, | |
| "learning_rate": 3.4584833785082385e-05, | |
| "loss": 0.1721, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.509991556431185, | |
| "grad_norm": 1.0347821712493896, | |
| "learning_rate": 3.43992466543865e-05, | |
| "loss": 0.1735, | |
| "step": 5365 | |
| }, | |
| { | |
| "epoch": 1.511398817900366, | |
| "grad_norm": 0.773311972618103, | |
| "learning_rate": 3.4214055315921245e-05, | |
| "loss": 0.1798, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.5128060793695468, | |
| "grad_norm": 0.15166114270687103, | |
| "learning_rate": 3.402926088701062e-05, | |
| "loss": 0.2025, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 1.5142133408387277, | |
| "grad_norm": 0.4494927227497101, | |
| "learning_rate": 3.38448644825839e-05, | |
| "loss": 0.1211, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.5156206023079088, | |
| "grad_norm": 1.2481530904769897, | |
| "learning_rate": 3.36608672151689e-05, | |
| "loss": 0.1325, | |
| "step": 5385 | |
| }, | |
| { | |
| "epoch": 1.5170278637770898, | |
| "grad_norm": 0.7955223321914673, | |
| "learning_rate": 3.347727019488531e-05, | |
| "loss": 0.1334, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.518435125246271, | |
| "grad_norm": 1.1012686491012573, | |
| "learning_rate": 3.329407452943799e-05, | |
| "loss": 0.1978, | |
| "step": 5395 | |
| }, | |
| { | |
| "epoch": 1.5198423867154518, | |
| "grad_norm": 2.147088050842285, | |
| "learning_rate": 3.311128132411031e-05, | |
| "loss": 0.1742, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.5212496481846327, | |
| "grad_norm": 1.0812978744506836, | |
| "learning_rate": 3.292889168175751e-05, | |
| "loss": 0.1237, | |
| "step": 5405 | |
| }, | |
| { | |
| "epoch": 1.5226569096538136, | |
| "grad_norm": 0.8602486848831177, | |
| "learning_rate": 3.274690670279984e-05, | |
| "loss": 0.1628, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.5240641711229945, | |
| "grad_norm": 0.4767683446407318, | |
| "learning_rate": 3.25653274852162e-05, | |
| "loss": 0.0893, | |
| "step": 5415 | |
| }, | |
| { | |
| "epoch": 1.5254714325921757, | |
| "grad_norm": 1.434166431427002, | |
| "learning_rate": 3.238415512453741e-05, | |
| "loss": 0.3905, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.5268786940613566, | |
| "grad_norm": 3.7128000259399414, | |
| "learning_rate": 3.220339071383948e-05, | |
| "loss": 0.336, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 1.5282859555305377, | |
| "grad_norm": 0.9743013381958008, | |
| "learning_rate": 3.202303534373712e-05, | |
| "loss": 0.17, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.5296932169997186, | |
| "grad_norm": 0.4060254991054535, | |
| "learning_rate": 3.184309010237728e-05, | |
| "loss": 0.1817, | |
| "step": 5435 | |
| }, | |
| { | |
| "epoch": 1.5311004784688995, | |
| "grad_norm": 1.3302080631256104, | |
| "learning_rate": 3.16635560754323e-05, | |
| "loss": 0.2442, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.5325077399380804, | |
| "grad_norm": 1.5643320083618164, | |
| "learning_rate": 3.148443434609367e-05, | |
| "loss": 0.3225, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 1.5339150014072613, | |
| "grad_norm": 1.2559304237365723, | |
| "learning_rate": 3.1305725995065205e-05, | |
| "loss": 0.1861, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.5353222628764425, | |
| "grad_norm": 1.1454960107803345, | |
| "learning_rate": 3.112743210055677e-05, | |
| "loss": 0.1262, | |
| "step": 5455 | |
| }, | |
| { | |
| "epoch": 1.5367295243456234, | |
| "grad_norm": 0.46115657687187195, | |
| "learning_rate": 3.0949553738277634e-05, | |
| "loss": 0.1827, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.5381367858148045, | |
| "grad_norm": 1.2840021848678589, | |
| "learning_rate": 3.077209198143002e-05, | |
| "loss": 0.1399, | |
| "step": 5465 | |
| }, | |
| { | |
| "epoch": 1.5395440472839854, | |
| "grad_norm": 1.189970850944519, | |
| "learning_rate": 3.0595047900702564e-05, | |
| "loss": 0.2078, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.5409513087531663, | |
| "grad_norm": 0.5335509181022644, | |
| "learning_rate": 3.041842256426404e-05, | |
| "loss": 0.1423, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 1.5423585702223472, | |
| "grad_norm": 0.8606838583946228, | |
| "learning_rate": 3.024221703775665e-05, | |
| "loss": 0.1468, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.5437658316915281, | |
| "grad_norm": 1.3679966926574707, | |
| "learning_rate": 3.0066432384289844e-05, | |
| "loss": 0.1247, | |
| "step": 5485 | |
| }, | |
| { | |
| "epoch": 1.5451730931607093, | |
| "grad_norm": 1.2723866701126099, | |
| "learning_rate": 2.989106966443379e-05, | |
| "loss": 0.1482, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.5465803546298902, | |
| "grad_norm": 0.8712704181671143, | |
| "learning_rate": 2.97161299362129e-05, | |
| "loss": 0.2848, | |
| "step": 5495 | |
| }, | |
| { | |
| "epoch": 1.5479876160990713, | |
| "grad_norm": 0.6967242360115051, | |
| "learning_rate": 2.9541614255099625e-05, | |
| "loss": 0.1604, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.5493948775682522, | |
| "grad_norm": 1.0415253639221191, | |
| "learning_rate": 2.9367523674007947e-05, | |
| "loss": 0.1876, | |
| "step": 5505 | |
| }, | |
| { | |
| "epoch": 1.5508021390374331, | |
| "grad_norm": 0.5861086845397949, | |
| "learning_rate": 2.9193859243287036e-05, | |
| "loss": 0.1835, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.552209400506614, | |
| "grad_norm": 1.444682002067566, | |
| "learning_rate": 2.902062201071505e-05, | |
| "loss": 0.1588, | |
| "step": 5515 | |
| }, | |
| { | |
| "epoch": 1.553616661975795, | |
| "grad_norm": 1.0231586694717407, | |
| "learning_rate": 2.8847813021492574e-05, | |
| "loss": 0.3833, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.555023923444976, | |
| "grad_norm": 1.2998064756393433, | |
| "learning_rate": 2.8675433318236567e-05, | |
| "loss": 0.1849, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 1.556431184914157, | |
| "grad_norm": 0.8349362015724182, | |
| "learning_rate": 2.8503483940973952e-05, | |
| "loss": 0.1391, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.5578384463833381, | |
| "grad_norm": 0.9555754661560059, | |
| "learning_rate": 2.8331965927135274e-05, | |
| "loss": 0.2073, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 1.559245707852519, | |
| "grad_norm": 1.703472375869751, | |
| "learning_rate": 2.8160880311548522e-05, | |
| "loss": 0.2548, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.5606529693217, | |
| "grad_norm": 0.39019107818603516, | |
| "learning_rate": 2.799022812643295e-05, | |
| "loss": 0.1277, | |
| "step": 5545 | |
| }, | |
| { | |
| "epoch": 1.5620602307908809, | |
| "grad_norm": 1.0451160669326782, | |
| "learning_rate": 2.782001040139267e-05, | |
| "loss": 0.3046, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.5634674922600618, | |
| "grad_norm": 0.8136467337608337, | |
| "learning_rate": 2.765022816341063e-05, | |
| "loss": 0.197, | |
| "step": 5555 | |
| }, | |
| { | |
| "epoch": 1.564874753729243, | |
| "grad_norm": 0.6249985098838806, | |
| "learning_rate": 2.7480882436842335e-05, | |
| "loss": 0.1592, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.566282015198424, | |
| "grad_norm": 0.5969499945640564, | |
| "learning_rate": 2.7311974243409565e-05, | |
| "loss": 0.2353, | |
| "step": 5565 | |
| }, | |
| { | |
| "epoch": 1.567689276667605, | |
| "grad_norm": 0.5542153716087341, | |
| "learning_rate": 2.7143504602194448e-05, | |
| "loss": 0.1407, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.5690965381367858, | |
| "grad_norm": 0.40066176652908325, | |
| "learning_rate": 2.697547452963307e-05, | |
| "loss": 0.1318, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 1.5705037996059668, | |
| "grad_norm": 0.4262009859085083, | |
| "learning_rate": 2.680788503950944e-05, | |
| "loss": 0.171, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.5719110610751477, | |
| "grad_norm": 0.7851074934005737, | |
| "learning_rate": 2.664073714294948e-05, | |
| "loss": 0.2443, | |
| "step": 5585 | |
| }, | |
| { | |
| "epoch": 1.5733183225443286, | |
| "grad_norm": 0.39711621403694153, | |
| "learning_rate": 2.6474031848414704e-05, | |
| "loss": 0.2419, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.5747255840135097, | |
| "grad_norm": 0.4387623369693756, | |
| "learning_rate": 2.6307770161696354e-05, | |
| "loss": 0.0821, | |
| "step": 5595 | |
| }, | |
| { | |
| "epoch": 1.5761328454826908, | |
| "grad_norm": 0.9057246446609497, | |
| "learning_rate": 2.6141953085909198e-05, | |
| "loss": 0.2652, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.5775401069518717, | |
| "grad_norm": 0.7787453532218933, | |
| "learning_rate": 2.597658162148544e-05, | |
| "loss": 0.2335, | |
| "step": 5605 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 1.116365909576416, | |
| "learning_rate": 2.5811656766168902e-05, | |
| "loss": 0.2092, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.5803546298902336, | |
| "grad_norm": 0.741118848323822, | |
| "learning_rate": 2.5647179515008724e-05, | |
| "loss": 0.18, | |
| "step": 5615 | |
| }, | |
| { | |
| "epoch": 1.5817618913594145, | |
| "grad_norm": 0.9240850806236267, | |
| "learning_rate": 2.548315086035351e-05, | |
| "loss": 0.2047, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.5831691528285956, | |
| "grad_norm": 1.0324885845184326, | |
| "learning_rate": 2.5319571791845408e-05, | |
| "loss": 0.1117, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 1.5845764142977765, | |
| "grad_norm": 1.108396053314209, | |
| "learning_rate": 2.5156443296414013e-05, | |
| "loss": 0.1582, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.5859836757669576, | |
| "grad_norm": 1.0466639995574951, | |
| "learning_rate": 2.4993766358270388e-05, | |
| "loss": 0.2145, | |
| "step": 5635 | |
| }, | |
| { | |
| "epoch": 1.5873909372361386, | |
| "grad_norm": 1.1003303527832031, | |
| "learning_rate": 2.4831541958901293e-05, | |
| "loss": 0.1401, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.5887981987053195, | |
| "grad_norm": 0.7945972084999084, | |
| "learning_rate": 2.4669771077063152e-05, | |
| "loss": 0.101, | |
| "step": 5645 | |
| }, | |
| { | |
| "epoch": 1.5902054601745004, | |
| "grad_norm": 1.6851614713668823, | |
| "learning_rate": 2.4508454688776105e-05, | |
| "loss": 0.2356, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.5916127216436813, | |
| "grad_norm": 0.708411693572998, | |
| "learning_rate": 2.434759376731819e-05, | |
| "loss": 0.2346, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 1.5930199831128624, | |
| "grad_norm": 0.9913239479064941, | |
| "learning_rate": 2.4187189283219446e-05, | |
| "loss": 0.1195, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.5944272445820433, | |
| "grad_norm": 1.0097897052764893, | |
| "learning_rate": 2.4027242204256108e-05, | |
| "loss": 0.1723, | |
| "step": 5665 | |
| }, | |
| { | |
| "epoch": 1.5958345060512245, | |
| "grad_norm": 0.8258925080299377, | |
| "learning_rate": 2.3867753495444723e-05, | |
| "loss": 0.1539, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.5972417675204054, | |
| "grad_norm": 0.5283498764038086, | |
| "learning_rate": 2.3708724119036262e-05, | |
| "loss": 0.1165, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 1.5986490289895863, | |
| "grad_norm": 1.170369267463684, | |
| "learning_rate": 2.355015503451048e-05, | |
| "loss": 0.1951, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.6000562904587672, | |
| "grad_norm": 0.8622944355010986, | |
| "learning_rate": 2.339204719856998e-05, | |
| "loss": 0.153, | |
| "step": 5685 | |
| }, | |
| { | |
| "epoch": 1.601463551927948, | |
| "grad_norm": 0.6249514818191528, | |
| "learning_rate": 2.323440156513448e-05, | |
| "loss": 0.0686, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.6028708133971292, | |
| "grad_norm": 0.2732272446155548, | |
| "learning_rate": 2.3077219085335054e-05, | |
| "loss": 0.1054, | |
| "step": 5695 | |
| }, | |
| { | |
| "epoch": 1.6042780748663101, | |
| "grad_norm": 1.5117753744125366, | |
| "learning_rate": 2.2920500707508496e-05, | |
| "loss": 0.1682, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.6056853363354913, | |
| "grad_norm": 1.9940603971481323, | |
| "learning_rate": 2.2764247377191405e-05, | |
| "loss": 0.2375, | |
| "step": 5705 | |
| }, | |
| { | |
| "epoch": 1.6070925978046722, | |
| "grad_norm": 1.0817060470581055, | |
| "learning_rate": 2.2608460037114642e-05, | |
| "loss": 0.2294, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.608499859273853, | |
| "grad_norm": 0.4378751814365387, | |
| "learning_rate": 2.2453139627197618e-05, | |
| "loss": 0.1674, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 1.609907120743034, | |
| "grad_norm": 0.5405195951461792, | |
| "learning_rate": 2.22982870845425e-05, | |
| "loss": 0.3422, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.611314382212215, | |
| "grad_norm": 1.4159220457077026, | |
| "learning_rate": 2.214390334342875e-05, | |
| "loss": 0.2116, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 1.612721643681396, | |
| "grad_norm": 1.1930686235427856, | |
| "learning_rate": 2.1989989335307304e-05, | |
| "loss": 0.0965, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.614128905150577, | |
| "grad_norm": 1.2334959506988525, | |
| "learning_rate": 2.1836545988795054e-05, | |
| "loss": 0.1547, | |
| "step": 5735 | |
| }, | |
| { | |
| "epoch": 1.615536166619758, | |
| "grad_norm": 0.7615369558334351, | |
| "learning_rate": 2.168357422966928e-05, | |
| "loss": 0.2468, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.616943428088939, | |
| "grad_norm": 0.7710257172584534, | |
| "learning_rate": 2.153107498086193e-05, | |
| "loss": 0.1674, | |
| "step": 5745 | |
| }, | |
| { | |
| "epoch": 1.61835068955812, | |
| "grad_norm": 0.464054673910141, | |
| "learning_rate": 2.137904916245419e-05, | |
| "loss": 0.2004, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.6197579510273008, | |
| "grad_norm": 0.3523075580596924, | |
| "learning_rate": 2.1227497691670894e-05, | |
| "loss": 0.2314, | |
| "step": 5755 | |
| }, | |
| { | |
| "epoch": 1.6211652124964817, | |
| "grad_norm": 0.8045745491981506, | |
| "learning_rate": 2.1076421482874877e-05, | |
| "loss": 0.1431, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.6225724739656628, | |
| "grad_norm": 0.7054654955863953, | |
| "learning_rate": 2.0925821447561665e-05, | |
| "loss": 0.1056, | |
| "step": 5765 | |
| }, | |
| { | |
| "epoch": 1.6239797354348438, | |
| "grad_norm": 1.5930366516113281, | |
| "learning_rate": 2.077569849435379e-05, | |
| "loss": 0.2394, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.6253869969040249, | |
| "grad_norm": 0.678402304649353, | |
| "learning_rate": 2.062605352899537e-05, | |
| "loss": 0.1482, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 1.6267942583732058, | |
| "grad_norm": 1.009436845779419, | |
| "learning_rate": 2.0476887454346716e-05, | |
| "loss": 0.2381, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.6282015198423867, | |
| "grad_norm": 0.5717734098434448, | |
| "learning_rate": 2.0328201170378813e-05, | |
| "loss": 0.1877, | |
| "step": 5785 | |
| }, | |
| { | |
| "epoch": 1.6296087813115676, | |
| "grad_norm": 1.0021076202392578, | |
| "learning_rate": 2.0179995574167842e-05, | |
| "loss": 0.1836, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.6310160427807485, | |
| "grad_norm": 0.5409684777259827, | |
| "learning_rate": 2.0032271559889915e-05, | |
| "loss": 0.21, | |
| "step": 5795 | |
| }, | |
| { | |
| "epoch": 1.6324233042499297, | |
| "grad_norm": 1.6268481016159058, | |
| "learning_rate": 1.9885030018815487e-05, | |
| "loss": 0.1786, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.6338305657191106, | |
| "grad_norm": 1.0220392942428589, | |
| "learning_rate": 1.9738271839304213e-05, | |
| "loss": 0.2016, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 1.6352378271882917, | |
| "grad_norm": 0.8178629875183105, | |
| "learning_rate": 1.959199790679934e-05, | |
| "loss": 0.1491, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.6366450886574726, | |
| "grad_norm": 2.1935439109802246, | |
| "learning_rate": 1.944620910382252e-05, | |
| "loss": 0.1966, | |
| "step": 5815 | |
| }, | |
| { | |
| "epoch": 1.6380523501266535, | |
| "grad_norm": 1.1369730234146118, | |
| "learning_rate": 1.930090630996849e-05, | |
| "loss": 0.2084, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.6394596115958344, | |
| "grad_norm": 0.8570969104766846, | |
| "learning_rate": 1.915609040189972e-05, | |
| "loss": 0.1779, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 1.6408668730650153, | |
| "grad_norm": 0.8881973624229431, | |
| "learning_rate": 1.901176225334105e-05, | |
| "loss": 0.2334, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.6422741345341965, | |
| "grad_norm": 1.057015299797058, | |
| "learning_rate": 1.886792273507457e-05, | |
| "loss": 0.2208, | |
| "step": 5835 | |
| }, | |
| { | |
| "epoch": 1.6436813960033776, | |
| "grad_norm": 0.40783455967903137, | |
| "learning_rate": 1.8724572714934307e-05, | |
| "loss": 0.0648, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.6450886574725585, | |
| "grad_norm": 0.8724305629730225, | |
| "learning_rate": 1.8581713057800933e-05, | |
| "loss": 0.2695, | |
| "step": 5845 | |
| }, | |
| { | |
| "epoch": 1.6464959189417394, | |
| "grad_norm": 1.3229783773422241, | |
| "learning_rate": 1.8439344625596534e-05, | |
| "loss": 0.1555, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.6479031804109203, | |
| "grad_norm": 0.7381983399391174, | |
| "learning_rate": 1.8297468277279618e-05, | |
| "loss": 0.177, | |
| "step": 5855 | |
| }, | |
| { | |
| "epoch": 1.6493104418801012, | |
| "grad_norm": 0.4356767535209656, | |
| "learning_rate": 1.8156084868839617e-05, | |
| "loss": 0.094, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.6507177033492821, | |
| "grad_norm": 2.0452256202697754, | |
| "learning_rate": 1.8015195253292016e-05, | |
| "loss": 0.3872, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 1.6521249648184633, | |
| "grad_norm": 0.7345725297927856, | |
| "learning_rate": 1.7874800280672953e-05, | |
| "loss": 0.3794, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.6535322262876444, | |
| "grad_norm": 0.5564286112785339, | |
| "learning_rate": 1.773490079803436e-05, | |
| "loss": 0.194, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 1.6549394877568253, | |
| "grad_norm": 1.4534375667572021, | |
| "learning_rate": 1.7595497649438565e-05, | |
| "loss": 0.2468, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.6563467492260062, | |
| "grad_norm": 1.159037709236145, | |
| "learning_rate": 1.745659167595337e-05, | |
| "loss": 0.2072, | |
| "step": 5885 | |
| }, | |
| { | |
| "epoch": 1.6577540106951871, | |
| "grad_norm": 0.9856454133987427, | |
| "learning_rate": 1.7318183715647017e-05, | |
| "loss": 0.2057, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.659161272164368, | |
| "grad_norm": 0.9816296696662903, | |
| "learning_rate": 1.7180274603583035e-05, | |
| "loss": 0.0591, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 1.660568533633549, | |
| "grad_norm": 0.6953201293945312, | |
| "learning_rate": 1.7042865171815158e-05, | |
| "loss": 0.1549, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.66197579510273, | |
| "grad_norm": 0.9859986901283264, | |
| "learning_rate": 1.6905956249382448e-05, | |
| "loss": 0.1446, | |
| "step": 5905 | |
| }, | |
| { | |
| "epoch": 1.6633830565719112, | |
| "grad_norm": 2.2135300636291504, | |
| "learning_rate": 1.6769548662304224e-05, | |
| "loss": 0.2074, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.6647903180410921, | |
| "grad_norm": 0.7724807858467102, | |
| "learning_rate": 1.6633643233575014e-05, | |
| "loss": 0.1867, | |
| "step": 5915 | |
| }, | |
| { | |
| "epoch": 1.666197579510273, | |
| "grad_norm": 0.6000497341156006, | |
| "learning_rate": 1.6498240783159656e-05, | |
| "loss": 0.3259, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.667604840979454, | |
| "grad_norm": 1.0605989694595337, | |
| "learning_rate": 1.6363342127988435e-05, | |
| "loss": 0.2042, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 1.6690121024486348, | |
| "grad_norm": 0.4106568396091461, | |
| "learning_rate": 1.6228948081951943e-05, | |
| "loss": 0.1073, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.670419363917816, | |
| "grad_norm": 0.9518342614173889, | |
| "learning_rate": 1.6095059455896387e-05, | |
| "loss": 0.1523, | |
| "step": 5935 | |
| }, | |
| { | |
| "epoch": 1.671826625386997, | |
| "grad_norm": 0.7186952829360962, | |
| "learning_rate": 1.596167705761852e-05, | |
| "loss": 0.1052, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.673233886856178, | |
| "grad_norm": 0.5331084728240967, | |
| "learning_rate": 1.5828801691860895e-05, | |
| "loss": 0.1007, | |
| "step": 5945 | |
| }, | |
| { | |
| "epoch": 1.674641148325359, | |
| "grad_norm": 0.530546247959137, | |
| "learning_rate": 1.5696434160306983e-05, | |
| "loss": 0.0948, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.6760484097945398, | |
| "grad_norm": 0.9805326461791992, | |
| "learning_rate": 1.5564575261576254e-05, | |
| "loss": 0.2097, | |
| "step": 5955 | |
| }, | |
| { | |
| "epoch": 1.6774556712637207, | |
| "grad_norm": 0.8919891715049744, | |
| "learning_rate": 1.5433225791219407e-05, | |
| "loss": 0.1409, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.6788629327329017, | |
| "grad_norm": 0.8015194535255432, | |
| "learning_rate": 1.5302386541713687e-05, | |
| "loss": 0.126, | |
| "step": 5965 | |
| }, | |
| { | |
| "epoch": 1.6802701942020828, | |
| "grad_norm": 0.47212257981300354, | |
| "learning_rate": 1.5172058302457881e-05, | |
| "loss": 0.1573, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.6816774556712637, | |
| "grad_norm": 0.6983383297920227, | |
| "learning_rate": 1.5042241859767735e-05, | |
| "loss": 0.1209, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 1.6830847171404448, | |
| "grad_norm": 1.2159236669540405, | |
| "learning_rate": 1.4912937996871168e-05, | |
| "loss": 0.1802, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.6844919786096257, | |
| "grad_norm": 0.764870822429657, | |
| "learning_rate": 1.4784147493903455e-05, | |
| "loss": 0.2714, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 1.6858992400788066, | |
| "grad_norm": 0.9790758490562439, | |
| "learning_rate": 1.4655871127902655e-05, | |
| "loss": 0.2561, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.6873065015479876, | |
| "grad_norm": 2.1390011310577393, | |
| "learning_rate": 1.4528109672804835e-05, | |
| "loss": 0.23, | |
| "step": 5995 | |
| }, | |
| { | |
| "epoch": 1.6887137630171685, | |
| "grad_norm": 0.39941343665122986, | |
| "learning_rate": 1.4400863899439387e-05, | |
| "loss": 0.2019, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.6901210244863496, | |
| "grad_norm": 0.6225385069847107, | |
| "learning_rate": 1.42741345755245e-05, | |
| "loss": 0.1884, | |
| "step": 6005 | |
| }, | |
| { | |
| "epoch": 1.6915282859555305, | |
| "grad_norm": 0.7307006120681763, | |
| "learning_rate": 1.4147922465662367e-05, | |
| "loss": 0.1126, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.6929355474247116, | |
| "grad_norm": 1.095548152923584, | |
| "learning_rate": 1.4022228331334675e-05, | |
| "loss": 0.1279, | |
| "step": 6015 | |
| }, | |
| { | |
| "epoch": 1.6943428088938925, | |
| "grad_norm": 0.45030713081359863, | |
| "learning_rate": 1.3897052930898035e-05, | |
| "loss": 0.1378, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.6957500703630735, | |
| "grad_norm": 1.7270435094833374, | |
| "learning_rate": 1.3772397019579242e-05, | |
| "loss": 0.2399, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 1.6971573318322544, | |
| "grad_norm": 1.0650115013122559, | |
| "learning_rate": 1.3648261349470948e-05, | |
| "loss": 0.1895, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.6985645933014353, | |
| "grad_norm": 1.0545300245285034, | |
| "learning_rate": 1.352464666952694e-05, | |
| "loss": 0.1122, | |
| "step": 6035 | |
| }, | |
| { | |
| "epoch": 1.6999718547706164, | |
| "grad_norm": 1.0150022506713867, | |
| "learning_rate": 1.3401553725557681e-05, | |
| "loss": 0.1585, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.7013791162397973, | |
| "grad_norm": 0.5082919001579285, | |
| "learning_rate": 1.3278983260225875e-05, | |
| "loss": 0.2291, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 1.7027863777089784, | |
| "grad_norm": 0.9131124019622803, | |
| "learning_rate": 1.3156936013041898e-05, | |
| "loss": 0.1303, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.7041936391781594, | |
| "grad_norm": 0.6868187189102173, | |
| "learning_rate": 1.3035412720359353e-05, | |
| "loss": 0.1357, | |
| "step": 6055 | |
| }, | |
| { | |
| "epoch": 1.7056009006473403, | |
| "grad_norm": 0.8841606378555298, | |
| "learning_rate": 1.2914414115370666e-05, | |
| "loss": 0.1271, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.7070081621165212, | |
| "grad_norm": 0.7348530888557434, | |
| "learning_rate": 1.2793940928102654e-05, | |
| "loss": 0.1773, | |
| "step": 6065 | |
| }, | |
| { | |
| "epoch": 1.708415423585702, | |
| "grad_norm": 0.7667552828788757, | |
| "learning_rate": 1.2673993885412073e-05, | |
| "loss": 0.2278, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.7098226850548832, | |
| "grad_norm": 1.5741273164749146, | |
| "learning_rate": 1.2554573710981276e-05, | |
| "loss": 0.1607, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 1.7112299465240641, | |
| "grad_norm": 1.1054571866989136, | |
| "learning_rate": 1.2435681125313803e-05, | |
| "loss": 0.1732, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.7126372079932453, | |
| "grad_norm": 1.193298101425171, | |
| "learning_rate": 1.2317316845730131e-05, | |
| "loss": 0.2668, | |
| "step": 6085 | |
| }, | |
| { | |
| "epoch": 1.7140444694624262, | |
| "grad_norm": 0.5256794691085815, | |
| "learning_rate": 1.2199481586363281e-05, | |
| "loss": 0.1741, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.715451730931607, | |
| "grad_norm": 1.2280601263046265, | |
| "learning_rate": 1.2082176058154426e-05, | |
| "loss": 0.1479, | |
| "step": 6095 | |
| }, | |
| { | |
| "epoch": 1.716858992400788, | |
| "grad_norm": 1.0573979616165161, | |
| "learning_rate": 1.196540096884876e-05, | |
| "loss": 0.1264, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.718266253869969, | |
| "grad_norm": 1.5370665788650513, | |
| "learning_rate": 1.1849157022991163e-05, | |
| "loss": 0.2142, | |
| "step": 6105 | |
| }, | |
| { | |
| "epoch": 1.71967351533915, | |
| "grad_norm": 0.7827951312065125, | |
| "learning_rate": 1.1733444921921899e-05, | |
| "loss": 0.2057, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.721080776808331, | |
| "grad_norm": 1.3667113780975342, | |
| "learning_rate": 1.1618265363772407e-05, | |
| "loss": 0.2746, | |
| "step": 6115 | |
| }, | |
| { | |
| "epoch": 1.722488038277512, | |
| "grad_norm": 1.506797432899475, | |
| "learning_rate": 1.15036190434612e-05, | |
| "loss": 0.1855, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.723895299746693, | |
| "grad_norm": 0.9613803029060364, | |
| "learning_rate": 1.1389506652689474e-05, | |
| "loss": 0.1031, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 1.7253025612158739, | |
| "grad_norm": 1.2002402544021606, | |
| "learning_rate": 1.1275928879937114e-05, | |
| "loss": 0.1781, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.7267098226850548, | |
| "grad_norm": 0.5957798361778259, | |
| "learning_rate": 1.1162886410458462e-05, | |
| "loss": 0.1176, | |
| "step": 6135 | |
| }, | |
| { | |
| "epoch": 1.7281170841542357, | |
| "grad_norm": 0.9620370268821716, | |
| "learning_rate": 1.1050379926278132e-05, | |
| "loss": 0.1515, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.7295243456234168, | |
| "grad_norm": 0.9195571541786194, | |
| "learning_rate": 1.0938410106187046e-05, | |
| "loss": 0.1121, | |
| "step": 6145 | |
| }, | |
| { | |
| "epoch": 1.730931607092598, | |
| "grad_norm": 0.4538973867893219, | |
| "learning_rate": 1.0826977625738155e-05, | |
| "loss": 0.1129, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.7323388685617789, | |
| "grad_norm": 1.3514046669006348, | |
| "learning_rate": 1.0716083157242484e-05, | |
| "loss": 0.1743, | |
| "step": 6155 | |
| }, | |
| { | |
| "epoch": 1.7337461300309598, | |
| "grad_norm": 0.8769412636756897, | |
| "learning_rate": 1.0605727369765072e-05, | |
| "loss": 0.1615, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.7351533915001407, | |
| "grad_norm": 1.3082162141799927, | |
| "learning_rate": 1.0495910929120866e-05, | |
| "loss": 0.1344, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 1.7365606529693216, | |
| "grad_norm": 0.8667125105857849, | |
| "learning_rate": 1.0386634497870751e-05, | |
| "loss": 0.2135, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.7379679144385025, | |
| "grad_norm": 0.7873309850692749, | |
| "learning_rate": 1.0277898735317614e-05, | |
| "loss": 0.1445, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 1.7393751759076836, | |
| "grad_norm": 1.0749235153198242, | |
| "learning_rate": 1.016970429750218e-05, | |
| "loss": 0.1792, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.7407824373768648, | |
| "grad_norm": 0.7576783299446106, | |
| "learning_rate": 1.0062051837199282e-05, | |
| "loss": 0.1597, | |
| "step": 6185 | |
| }, | |
| { | |
| "epoch": 1.7421896988460457, | |
| "grad_norm": 0.7447710037231445, | |
| "learning_rate": 9.954942003913758e-06, | |
| "loss": 0.1363, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.7435969603152266, | |
| "grad_norm": 0.756251335144043, | |
| "learning_rate": 9.848375443876578e-06, | |
| "loss": 0.1474, | |
| "step": 6195 | |
| }, | |
| { | |
| "epoch": 1.7450042217844075, | |
| "grad_norm": 0.45274704694747925, | |
| "learning_rate": 9.742352800040988e-06, | |
| "loss": 0.065, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.7464114832535884, | |
| "grad_norm": 1.0789294242858887, | |
| "learning_rate": 9.636874712078603e-06, | |
| "loss": 0.2623, | |
| "step": 6205 | |
| }, | |
| { | |
| "epoch": 1.7478187447227693, | |
| "grad_norm": 1.4076869487762451, | |
| "learning_rate": 9.531941816375501e-06, | |
| "loss": 0.2516, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.7492260061919505, | |
| "grad_norm": 2.701754331588745, | |
| "learning_rate": 9.427554746028478e-06, | |
| "loss": 0.2951, | |
| "step": 6215 | |
| }, | |
| { | |
| "epoch": 1.7506332676611316, | |
| "grad_norm": 0.36146071553230286, | |
| "learning_rate": 9.3237141308411e-06, | |
| "loss": 0.0842, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.7520405291303125, | |
| "grad_norm": 1.120956540107727, | |
| "learning_rate": 9.22042059732008e-06, | |
| "loss": 0.2894, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 1.7534477905994934, | |
| "grad_norm": 0.5138603448867798, | |
| "learning_rate": 9.117674768671313e-06, | |
| "loss": 0.0713, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.7548550520686743, | |
| "grad_norm": 0.8469157814979553, | |
| "learning_rate": 9.015477264796202e-06, | |
| "loss": 0.2038, | |
| "step": 6235 | |
| }, | |
| { | |
| "epoch": 1.7562623135378552, | |
| "grad_norm": 1.5071958303451538, | |
| "learning_rate": 8.913828702287974e-06, | |
| "loss": 0.3285, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.7576695750070364, | |
| "grad_norm": 1.6233199834823608, | |
| "learning_rate": 8.812729694427879e-06, | |
| "loss": 0.1192, | |
| "step": 6245 | |
| }, | |
| { | |
| "epoch": 1.7590768364762173, | |
| "grad_norm": 0.884638786315918, | |
| "learning_rate": 8.712180851181462e-06, | |
| "loss": 0.1612, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.7604840979453984, | |
| "grad_norm": 1.5049396753311157, | |
| "learning_rate": 8.612182779195021e-06, | |
| "loss": 0.1233, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 1.7618913594145793, | |
| "grad_norm": 1.0843751430511475, | |
| "learning_rate": 8.512736081791772e-06, | |
| "loss": 0.2496, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.7632986208837602, | |
| "grad_norm": 0.9301806688308716, | |
| "learning_rate": 8.413841358968332e-06, | |
| "loss": 0.2379, | |
| "step": 6265 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 1.611035943031311, | |
| "learning_rate": 8.315499207391075e-06, | |
| "loss": 0.1856, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.766113143822122, | |
| "grad_norm": 1.3043655157089233, | |
| "learning_rate": 8.217710220392526e-06, | |
| "loss": 0.1456, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 1.7675204052913032, | |
| "grad_norm": 1.800098180770874, | |
| "learning_rate": 8.12047498796773e-06, | |
| "loss": 0.2416, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.768927666760484, | |
| "grad_norm": 0.7097885608673096, | |
| "learning_rate": 8.023794096770808e-06, | |
| "loss": 0.141, | |
| "step": 6285 | |
| }, | |
| { | |
| "epoch": 1.7703349282296652, | |
| "grad_norm": 1.1929750442504883, | |
| "learning_rate": 7.927668130111243e-06, | |
| "loss": 0.3433, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.7717421896988461, | |
| "grad_norm": 1.647980809211731, | |
| "learning_rate": 7.832097667950588e-06, | |
| "loss": 0.2052, | |
| "step": 6295 | |
| }, | |
| { | |
| "epoch": 1.773149451168027, | |
| "grad_norm": 0.43591317534446716, | |
| "learning_rate": 7.737083286898749e-06, | |
| "loss": 0.2104, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.774556712637208, | |
| "grad_norm": 1.241782546043396, | |
| "learning_rate": 7.642625560210637e-06, | |
| "loss": 0.1109, | |
| "step": 6305 | |
| }, | |
| { | |
| "epoch": 1.7759639741063888, | |
| "grad_norm": 0.9579405784606934, | |
| "learning_rate": 7.548725057782658e-06, | |
| "loss": 0.1786, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.77737123557557, | |
| "grad_norm": 0.7312494516372681, | |
| "learning_rate": 7.455382346149342e-06, | |
| "loss": 0.1228, | |
| "step": 6315 | |
| }, | |
| { | |
| "epoch": 1.7787784970447509, | |
| "grad_norm": 0.7087497711181641, | |
| "learning_rate": 7.36259798847978e-06, | |
| "loss": 0.1424, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.780185758513932, | |
| "grad_norm": 1.6807194948196411, | |
| "learning_rate": 7.2703725445744105e-06, | |
| "loss": 0.1199, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 1.781593019983113, | |
| "grad_norm": 1.101808786392212, | |
| "learning_rate": 7.178706570861515e-06, | |
| "loss": 0.0979, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.7830002814522938, | |
| "grad_norm": 1.7121551036834717, | |
| "learning_rate": 7.087600620393864e-06, | |
| "loss": 0.101, | |
| "step": 6335 | |
| }, | |
| { | |
| "epoch": 1.7844075429214747, | |
| "grad_norm": 0.6395900845527649, | |
| "learning_rate": 6.997055242845441e-06, | |
| "loss": 0.2197, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.7858148043906557, | |
| "grad_norm": 0.9732767343521118, | |
| "learning_rate": 6.907070984508124e-06, | |
| "loss": 0.1321, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 1.7872220658598368, | |
| "grad_norm": 1.2426737546920776, | |
| "learning_rate": 6.8176483882883e-06, | |
| "loss": 0.2246, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.7886293273290177, | |
| "grad_norm": 1.6869935989379883, | |
| "learning_rate": 6.728787993703733e-06, | |
| "loss": 0.2733, | |
| "step": 6355 | |
| }, | |
| { | |
| "epoch": 1.7900365887981988, | |
| "grad_norm": 0.49518850445747375, | |
| "learning_rate": 6.640490336880134e-06, | |
| "loss": 0.1142, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.7914438502673797, | |
| "grad_norm": 0.7494794726371765, | |
| "learning_rate": 6.552755950548095e-06, | |
| "loss": 0.2115, | |
| "step": 6365 | |
| }, | |
| { | |
| "epoch": 1.7928511117365606, | |
| "grad_norm": 0.7595309019088745, | |
| "learning_rate": 6.465585364039795e-06, | |
| "loss": 0.1135, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.7942583732057416, | |
| "grad_norm": 0.7823693752288818, | |
| "learning_rate": 6.378979103285765e-06, | |
| "loss": 0.1422, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 1.7956656346749225, | |
| "grad_norm": 1.9872539043426514, | |
| "learning_rate": 6.292937690811795e-06, | |
| "loss": 0.22, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.7970728961441036, | |
| "grad_norm": 0.46582117676734924, | |
| "learning_rate": 6.207461645735746e-06, | |
| "loss": 0.1519, | |
| "step": 6385 | |
| }, | |
| { | |
| "epoch": 1.7984801576132845, | |
| "grad_norm": 0.40433597564697266, | |
| "learning_rate": 6.122551483764416e-06, | |
| "loss": 0.2422, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.7998874190824656, | |
| "grad_norm": 1.4909939765930176, | |
| "learning_rate": 6.038207717190436e-06, | |
| "loss": 0.1638, | |
| "step": 6395 | |
| }, | |
| { | |
| "epoch": 1.8012946805516465, | |
| "grad_norm": 0.7252668738365173, | |
| "learning_rate": 5.954430854889182e-06, | |
| "loss": 0.1053, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.8027019420208275, | |
| "grad_norm": 1.4477570056915283, | |
| "learning_rate": 5.871221402315674e-06, | |
| "loss": 0.1934, | |
| "step": 6405 | |
| }, | |
| { | |
| "epoch": 1.8041092034900084, | |
| "grad_norm": 0.43066859245300293, | |
| "learning_rate": 5.788579861501597e-06, | |
| "loss": 0.114, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.8055164649591893, | |
| "grad_norm": 1.1360474824905396, | |
| "learning_rate": 5.706506731052175e-06, | |
| "loss": 0.1447, | |
| "step": 6415 | |
| }, | |
| { | |
| "epoch": 1.8069237264283704, | |
| "grad_norm": 0.6951930522918701, | |
| "learning_rate": 5.625002506143218e-06, | |
| "loss": 0.1401, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.8083309878975513, | |
| "grad_norm": 1.213666319847107, | |
| "learning_rate": 5.544067678518194e-06, | |
| "loss": 0.1737, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 1.8097382493667324, | |
| "grad_norm": 0.9512806534767151, | |
| "learning_rate": 5.46370273648511e-06, | |
| "loss": 0.1517, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.8111455108359134, | |
| "grad_norm": 1.4045182466506958, | |
| "learning_rate": 5.3839081649137205e-06, | |
| "loss": 0.1899, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 1.8125527723050943, | |
| "grad_norm": 0.579311192035675, | |
| "learning_rate": 5.304684445232522e-06, | |
| "loss": 0.1442, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.8139600337742752, | |
| "grad_norm": 1.6119418144226074, | |
| "learning_rate": 5.2260320554258225e-06, | |
| "loss": 0.1473, | |
| "step": 6445 | |
| }, | |
| { | |
| "epoch": 1.815367295243456, | |
| "grad_norm": 1.2963722944259644, | |
| "learning_rate": 5.147951470030976e-06, | |
| "loss": 0.227, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.8167745567126372, | |
| "grad_norm": 1.3112095594406128, | |
| "learning_rate": 5.070443160135352e-06, | |
| "loss": 0.116, | |
| "step": 6455 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.49451136589050293, | |
| "learning_rate": 4.993507593373625e-06, | |
| "loss": 0.2077, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.8195890796509993, | |
| "grad_norm": 1.0468064546585083, | |
| "learning_rate": 4.917145233924924e-06, | |
| "loss": 0.246, | |
| "step": 6465 | |
| }, | |
| { | |
| "epoch": 1.8209963411201802, | |
| "grad_norm": 0.5947392582893372, | |
| "learning_rate": 4.841356542510022e-06, | |
| "loss": 0.1534, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.822403602589361, | |
| "grad_norm": 0.3909468352794647, | |
| "learning_rate": 4.766141976388494e-06, | |
| "loss": 0.1792, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 1.823810864058542, | |
| "grad_norm": 0.911483645439148, | |
| "learning_rate": 4.691501989356084e-06, | |
| "loss": 0.2147, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.825218125527723, | |
| "grad_norm": 0.5338053703308105, | |
| "learning_rate": 4.617437031741867e-06, | |
| "loss": 0.0811, | |
| "step": 6485 | |
| }, | |
| { | |
| "epoch": 1.826625386996904, | |
| "grad_norm": 0.5877882242202759, | |
| "learning_rate": 4.54394755040558e-06, | |
| "loss": 0.1473, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.8280326484660852, | |
| "grad_norm": 0.21510696411132812, | |
| "learning_rate": 4.471033988734885e-06, | |
| "loss": 0.2545, | |
| "step": 6495 | |
| }, | |
| { | |
| "epoch": 1.829439909935266, | |
| "grad_norm": 1.325976014137268, | |
| "learning_rate": 4.398696786642731e-06, | |
| "loss": 0.1934, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.830847171404447, | |
| "grad_norm": 0.5961741805076599, | |
| "learning_rate": 4.326936380564705e-06, | |
| "loss": 0.1732, | |
| "step": 6505 | |
| }, | |
| { | |
| "epoch": 1.8322544328736279, | |
| "grad_norm": 1.4790091514587402, | |
| "learning_rate": 4.255753203456392e-06, | |
| "loss": 0.1699, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.8336616943428088, | |
| "grad_norm": 0.5095391869544983, | |
| "learning_rate": 4.185147684790691e-06, | |
| "loss": 0.1335, | |
| "step": 6515 | |
| }, | |
| { | |
| "epoch": 1.83506895581199, | |
| "grad_norm": 0.5565084218978882, | |
| "learning_rate": 4.115120250555349e-06, | |
| "loss": 0.1748, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.8364762172811708, | |
| "grad_norm": 1.2198169231414795, | |
| "learning_rate": 4.045671323250333e-06, | |
| "loss": 0.2197, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 1.837883478750352, | |
| "grad_norm": 0.4299394488334656, | |
| "learning_rate": 3.976801321885215e-06, | |
| "loss": 0.1229, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.8392907402195329, | |
| "grad_norm": 0.8082312345504761, | |
| "learning_rate": 3.908510661976739e-06, | |
| "loss": 0.2784, | |
| "step": 6535 | |
| }, | |
| { | |
| "epoch": 1.8406980016887138, | |
| "grad_norm": 0.7714455723762512, | |
| "learning_rate": 3.840799755546298e-06, | |
| "loss": 0.1128, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.8421052631578947, | |
| "grad_norm": 1.8380225896835327, | |
| "learning_rate": 3.773669011117398e-06, | |
| "loss": 0.2196, | |
| "step": 6545 | |
| }, | |
| { | |
| "epoch": 1.8435125246270756, | |
| "grad_norm": 1.4072784185409546, | |
| "learning_rate": 3.707118833713241e-06, | |
| "loss": 0.1164, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.8449197860962567, | |
| "grad_norm": 2.7376558780670166, | |
| "learning_rate": 3.6411496248542897e-06, | |
| "loss": 0.1715, | |
| "step": 6555 | |
| }, | |
| { | |
| "epoch": 1.8463270475654376, | |
| "grad_norm": 1.3996756076812744, | |
| "learning_rate": 3.5757617825557533e-06, | |
| "loss": 0.1792, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.8477343090346188, | |
| "grad_norm": 1.6355584859848022, | |
| "learning_rate": 3.5109557013253357e-06, | |
| "loss": 0.1213, | |
| "step": 6565 | |
| }, | |
| { | |
| "epoch": 1.8491415705037997, | |
| "grad_norm": 0.6846399903297424, | |
| "learning_rate": 3.446731772160716e-06, | |
| "loss": 0.1431, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.8505488319729806, | |
| "grad_norm": 1.0300202369689941, | |
| "learning_rate": 3.3830903825472493e-06, | |
| "loss": 0.1996, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 1.8519560934421615, | |
| "grad_norm": 0.8449344038963318, | |
| "learning_rate": 3.3200319164556683e-06, | |
| "loss": 0.1457, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.8533633549113424, | |
| "grad_norm": 0.8704646825790405, | |
| "learning_rate": 3.2575567543396746e-06, | |
| "loss": 0.1493, | |
| "step": 6585 | |
| }, | |
| { | |
| "epoch": 1.8547706163805235, | |
| "grad_norm": 1.0447726249694824, | |
| "learning_rate": 3.195665273133719e-06, | |
| "loss": 0.2999, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.8561778778497044, | |
| "grad_norm": 0.6128522157669067, | |
| "learning_rate": 3.134357846250735e-06, | |
| "loss": 0.0989, | |
| "step": 6595 | |
| }, | |
| { | |
| "epoch": 1.8575851393188856, | |
| "grad_norm": 0.7889478802680969, | |
| "learning_rate": 3.073634843579776e-06, | |
| "loss": 0.1107, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.8589924007880665, | |
| "grad_norm": 1.114986777305603, | |
| "learning_rate": 3.0134966314839144e-06, | |
| "loss": 0.0739, | |
| "step": 6605 | |
| }, | |
| { | |
| "epoch": 1.8603996622572474, | |
| "grad_norm": 0.4977349638938904, | |
| "learning_rate": 2.953943572797968e-06, | |
| "loss": 0.0591, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.8618069237264283, | |
| "grad_norm": 0.6706826686859131, | |
| "learning_rate": 2.8949760268263017e-06, | |
| "loss": 0.1383, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 1.8632141851956092, | |
| "grad_norm": 0.6721628308296204, | |
| "learning_rate": 2.8365943493406934e-06, | |
| "loss": 0.1539, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.8646214466647903, | |
| "grad_norm": 0.6661956310272217, | |
| "learning_rate": 2.7787988925782048e-06, | |
| "loss": 0.1833, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 1.8660287081339713, | |
| "grad_norm": 1.3089790344238281, | |
| "learning_rate": 2.7215900052389497e-06, | |
| "loss": 0.1368, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.8674359696031524, | |
| "grad_norm": 1.6742780208587646, | |
| "learning_rate": 2.6649680324841166e-06, | |
| "loss": 0.2486, | |
| "step": 6635 | |
| }, | |
| { | |
| "epoch": 1.8688432310723333, | |
| "grad_norm": 0.8076462745666504, | |
| "learning_rate": 2.608933315933837e-06, | |
| "loss": 0.115, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.8702504925415142, | |
| "grad_norm": 1.4497947692871094, | |
| "learning_rate": 2.5534861936650665e-06, | |
| "loss": 0.1807, | |
| "step": 6645 | |
| }, | |
| { | |
| "epoch": 1.8716577540106951, | |
| "grad_norm": 0.8782854676246643, | |
| "learning_rate": 2.4986270002096747e-06, | |
| "loss": 0.1052, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.873065015479876, | |
| "grad_norm": 0.6687735915184021, | |
| "learning_rate": 2.4443560665523e-06, | |
| "loss": 0.2023, | |
| "step": 6655 | |
| }, | |
| { | |
| "epoch": 1.8744722769490572, | |
| "grad_norm": 0.698962390422821, | |
| "learning_rate": 2.3906737201284002e-06, | |
| "loss": 0.1023, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.875879538418238, | |
| "grad_norm": 1.2811174392700195, | |
| "learning_rate": 2.3375802848223385e-06, | |
| "loss": 0.1374, | |
| "step": 6665 | |
| }, | |
| { | |
| "epoch": 1.8772867998874192, | |
| "grad_norm": 0.8447235226631165, | |
| "learning_rate": 2.285076080965287e-06, | |
| "loss": 0.1569, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.8786940613566, | |
| "grad_norm": 0.6996911764144897, | |
| "learning_rate": 2.233161425333474e-06, | |
| "loss": 0.1395, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 1.880101322825781, | |
| "grad_norm": 1.388584852218628, | |
| "learning_rate": 2.1818366311460946e-06, | |
| "loss": 0.1692, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.881508584294962, | |
| "grad_norm": 0.5281504988670349, | |
| "learning_rate": 2.1311020080635346e-06, | |
| "loss": 0.1218, | |
| "step": 6685 | |
| }, | |
| { | |
| "epoch": 1.8829158457641428, | |
| "grad_norm": 0.8310534954071045, | |
| "learning_rate": 2.080957862185484e-06, | |
| "loss": 0.2253, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.884323107233324, | |
| "grad_norm": 0.5924013257026672, | |
| "learning_rate": 2.031404496049072e-06, | |
| "loss": 0.0862, | |
| "step": 6695 | |
| }, | |
| { | |
| "epoch": 1.8857303687025049, | |
| "grad_norm": 0.445305198431015, | |
| "learning_rate": 1.982442208627033e-06, | |
| "loss": 0.2208, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.887137630171686, | |
| "grad_norm": 0.66776442527771, | |
| "learning_rate": 1.9340712953259565e-06, | |
| "loss": 0.159, | |
| "step": 6705 | |
| }, | |
| { | |
| "epoch": 1.888544891640867, | |
| "grad_norm": 0.8003804683685303, | |
| "learning_rate": 1.886292047984395e-06, | |
| "loss": 0.1276, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.8899521531100478, | |
| "grad_norm": 1.1968119144439697, | |
| "learning_rate": 1.839104754871257e-06, | |
| "loss": 0.1147, | |
| "step": 6715 | |
| }, | |
| { | |
| "epoch": 1.8913594145792287, | |
| "grad_norm": 2.06772518157959, | |
| "learning_rate": 1.7925097006839198e-06, | |
| "loss": 0.1263, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.8927666760484096, | |
| "grad_norm": 0.8591898083686829, | |
| "learning_rate": 1.746507166546596e-06, | |
| "loss": 0.1612, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 1.8941739375175908, | |
| "grad_norm": 1.3790104389190674, | |
| "learning_rate": 1.7010974300086358e-06, | |
| "loss": 0.1714, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.8955811989867717, | |
| "grad_norm": 0.6857600808143616, | |
| "learning_rate": 1.656280765042828e-06, | |
| "loss": 0.1331, | |
| "step": 6735 | |
| }, | |
| { | |
| "epoch": 1.8969884604559528, | |
| "grad_norm": 0.9561905860900879, | |
| "learning_rate": 1.612057442043724e-06, | |
| "loss": 0.13, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.8983957219251337, | |
| "grad_norm": 1.3840196132659912, | |
| "learning_rate": 1.5684277278260718e-06, | |
| "loss": 0.2562, | |
| "step": 6745 | |
| }, | |
| { | |
| "epoch": 1.8998029833943146, | |
| "grad_norm": 0.6963467001914978, | |
| "learning_rate": 1.525391885623173e-06, | |
| "loss": 0.1882, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.9012102448634955, | |
| "grad_norm": 0.9500248432159424, | |
| "learning_rate": 1.4829501750852626e-06, | |
| "loss": 0.131, | |
| "step": 6755 | |
| }, | |
| { | |
| "epoch": 1.9026175063326765, | |
| "grad_norm": 0.8108523488044739, | |
| "learning_rate": 1.4411028522779757e-06, | |
| "loss": 0.1891, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.9040247678018576, | |
| "grad_norm": 0.6868911981582642, | |
| "learning_rate": 1.3998501696808274e-06, | |
| "loss": 0.1761, | |
| "step": 6765 | |
| }, | |
| { | |
| "epoch": 1.9054320292710387, | |
| "grad_norm": 1.8471946716308594, | |
| "learning_rate": 1.3591923761856363e-06, | |
| "loss": 0.2683, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.9068392907402196, | |
| "grad_norm": 0.5496200919151306, | |
| "learning_rate": 1.3191297170950578e-06, | |
| "loss": 0.1627, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 1.9082465522094005, | |
| "grad_norm": 0.7432734370231628, | |
| "learning_rate": 1.2796624341210873e-06, | |
| "loss": 0.1406, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.9096538136785814, | |
| "grad_norm": 0.773916482925415, | |
| "learning_rate": 1.2407907653836038e-06, | |
| "loss": 0.1308, | |
| "step": 6785 | |
| }, | |
| { | |
| "epoch": 1.9110610751477624, | |
| "grad_norm": 1.0941839218139648, | |
| "learning_rate": 1.2025149454089723e-06, | |
| "loss": 0.1269, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.9124683366169433, | |
| "grad_norm": 0.5930225253105164, | |
| "learning_rate": 1.1648352051285448e-06, | |
| "loss": 0.1393, | |
| "step": 6795 | |
| }, | |
| { | |
| "epoch": 1.9138755980861244, | |
| "grad_norm": 0.38355159759521484, | |
| "learning_rate": 1.127751771877339e-06, | |
| "loss": 0.128, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.9152828595553055, | |
| "grad_norm": 0.8687125444412231, | |
| "learning_rate": 1.0912648693926497e-06, | |
| "loss": 0.128, | |
| "step": 6805 | |
| }, | |
| { | |
| "epoch": 1.9166901210244864, | |
| "grad_norm": 0.9181435704231262, | |
| "learning_rate": 1.055374717812696e-06, | |
| "loss": 0.2078, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.9180973824936673, | |
| "grad_norm": 1.5709048509597778, | |
| "learning_rate": 1.0200815336752657e-06, | |
| "loss": 0.1745, | |
| "step": 6815 | |
| }, | |
| { | |
| "epoch": 1.9195046439628483, | |
| "grad_norm": 0.8740848302841187, | |
| "learning_rate": 9.853855299164717e-07, | |
| "loss": 0.1209, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.9209119054320292, | |
| "grad_norm": 0.46822214126586914, | |
| "learning_rate": 9.512869158693982e-07, | |
| "loss": 0.1031, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 1.9223191669012103, | |
| "grad_norm": 0.6493380665779114, | |
| "learning_rate": 9.177858972628794e-07, | |
| "loss": 0.1665, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.9237264283703912, | |
| "grad_norm": 0.628223180770874, | |
| "learning_rate": 8.848826762202556e-07, | |
| "loss": 0.1375, | |
| "step": 6835 | |
| }, | |
| { | |
| "epoch": 1.9251336898395723, | |
| "grad_norm": 0.8677277565002441, | |
| "learning_rate": 8.525774512581297e-07, | |
| "loss": 0.1193, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.9265409513087532, | |
| "grad_norm": 0.34191542863845825, | |
| "learning_rate": 8.208704172851911e-07, | |
| "loss": 0.1605, | |
| "step": 6845 | |
| }, | |
| { | |
| "epoch": 1.9279482127779342, | |
| "grad_norm": 0.3965689539909363, | |
| "learning_rate": 7.897617656010381e-07, | |
| "loss": 0.2008, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.929355474247115, | |
| "grad_norm": 1.651140809059143, | |
| "learning_rate": 7.592516838950348e-07, | |
| "loss": 0.259, | |
| "step": 6855 | |
| }, | |
| { | |
| "epoch": 1.930762735716296, | |
| "grad_norm": 1.2457526922225952, | |
| "learning_rate": 7.293403562451229e-07, | |
| "loss": 0.1243, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.932169997185477, | |
| "grad_norm": 0.42919033765792847, | |
| "learning_rate": 7.000279631168005e-07, | |
| "loss": 0.0686, | |
| "step": 6865 | |
| }, | |
| { | |
| "epoch": 1.933577258654658, | |
| "grad_norm": 1.004384160041809, | |
| "learning_rate": 6.713146813619564e-07, | |
| "loss": 0.1132, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.9349845201238391, | |
| "grad_norm": 0.7319831252098083, | |
| "learning_rate": 6.432006842178262e-07, | |
| "loss": 0.0594, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 1.93639178159302, | |
| "grad_norm": 0.9444944262504578, | |
| "learning_rate": 6.156861413059601e-07, | |
| "loss": 0.1181, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.937799043062201, | |
| "grad_norm": 1.6310319900512695, | |
| "learning_rate": 5.887712186312011e-07, | |
| "loss": 0.2333, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 1.9392063045313819, | |
| "grad_norm": 0.7760756015777588, | |
| "learning_rate": 5.624560785806754e-07, | |
| "loss": 0.1101, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.9406135660005628, | |
| "grad_norm": 1.4316829442977905, | |
| "learning_rate": 5.367408799227925e-07, | |
| "loss": 0.1512, | |
| "step": 6895 | |
| }, | |
| { | |
| "epoch": 1.942020827469744, | |
| "grad_norm": 0.6632144451141357, | |
| "learning_rate": 5.116257778063238e-07, | |
| "loss": 0.176, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.9434280889389248, | |
| "grad_norm": 0.4353666603565216, | |
| "learning_rate": 4.871109237594373e-07, | |
| "loss": 0.1293, | |
| "step": 6905 | |
| }, | |
| { | |
| "epoch": 1.944835350408106, | |
| "grad_norm": 2.0593976974487305, | |
| "learning_rate": 4.631964656888088e-07, | |
| "loss": 0.4206, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.9462426118772869, | |
| "grad_norm": 0.8553899526596069, | |
| "learning_rate": 4.3988254787868945e-07, | |
| "loss": 0.2033, | |
| "step": 6915 | |
| }, | |
| { | |
| "epoch": 1.9476498733464678, | |
| "grad_norm": 2.4069225788116455, | |
| "learning_rate": 4.171693109900954e-07, | |
| "loss": 0.1747, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.9490571348156487, | |
| "grad_norm": 1.0317012071609497, | |
| "learning_rate": 3.950568920598974e-07, | |
| "loss": 0.1857, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 1.9504643962848296, | |
| "grad_norm": 0.16559715569019318, | |
| "learning_rate": 3.735454245000436e-07, | |
| "loss": 0.1506, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.9518716577540107, | |
| "grad_norm": 1.008353590965271, | |
| "learning_rate": 3.526350380967047e-07, | |
| "loss": 0.1661, | |
| "step": 6935 | |
| }, | |
| { | |
| "epoch": 1.9532789192231916, | |
| "grad_norm": 0.8605316877365112, | |
| "learning_rate": 3.323258590095635e-07, | |
| "loss": 0.1547, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.9546861806923728, | |
| "grad_norm": 0.8140857815742493, | |
| "learning_rate": 3.126180097709597e-07, | |
| "loss": 0.204, | |
| "step": 6945 | |
| }, | |
| { | |
| "epoch": 1.9560934421615537, | |
| "grad_norm": 0.250213086605072, | |
| "learning_rate": 2.9351160928522416e-07, | |
| "loss": 0.1531, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.9575007036307346, | |
| "grad_norm": 2.0146706104278564, | |
| "learning_rate": 2.7500677282795704e-07, | |
| "loss": 0.135, | |
| "step": 6955 | |
| }, | |
| { | |
| "epoch": 1.9589079650999155, | |
| "grad_norm": 0.43031638860702515, | |
| "learning_rate": 2.57103612045273e-07, | |
| "loss": 0.1118, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.9603152265690964, | |
| "grad_norm": 1.1351455450057983, | |
| "learning_rate": 2.3980223495319034e-07, | |
| "loss": 0.1474, | |
| "step": 6965 | |
| }, | |
| { | |
| "epoch": 1.9617224880382775, | |
| "grad_norm": 0.6760854721069336, | |
| "learning_rate": 2.231027459369539e-07, | |
| "loss": 0.1577, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.9631297495074584, | |
| "grad_norm": 0.6344230771064758, | |
| "learning_rate": 2.0700524575041347e-07, | |
| "loss": 0.0911, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 1.9645370109766396, | |
| "grad_norm": 0.8816024661064148, | |
| "learning_rate": 1.915098315153907e-07, | |
| "loss": 0.1711, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.9659442724458205, | |
| "grad_norm": 1.2508419752120972, | |
| "learning_rate": 1.766165967211464e-07, | |
| "loss": 0.2165, | |
| "step": 6985 | |
| }, | |
| { | |
| "epoch": 1.9673515339150014, | |
| "grad_norm": 0.9682034254074097, | |
| "learning_rate": 1.6232563122373645e-07, | |
| "loss": 0.1176, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.9687587953841823, | |
| "grad_norm": 0.5194812417030334, | |
| "learning_rate": 1.4863702124554567e-07, | |
| "loss": 0.1792, | |
| "step": 6995 | |
| }, | |
| { | |
| "epoch": 1.9701660568533632, | |
| "grad_norm": 0.7501698136329651, | |
| "learning_rate": 1.3555084937475483e-07, | |
| "loss": 0.1375, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.9715733183225443, | |
| "grad_norm": 0.8848897218704224, | |
| "learning_rate": 1.2306719456478544e-07, | |
| "loss": 0.1218, | |
| "step": 7005 | |
| }, | |
| { | |
| "epoch": 1.9729805797917253, | |
| "grad_norm": 0.5296036601066589, | |
| "learning_rate": 1.1118613213388918e-07, | |
| "loss": 0.0949, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.9743878412609064, | |
| "grad_norm": 0.5823400616645813, | |
| "learning_rate": 9.990773376464812e-08, | |
| "loss": 0.1266, | |
| "step": 7015 | |
| }, | |
| { | |
| "epoch": 1.9757951027300873, | |
| "grad_norm": 1.2051528692245483, | |
| "learning_rate": 8.923206750359736e-08, | |
| "loss": 0.1841, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.9772023641992682, | |
| "grad_norm": 2.1660141944885254, | |
| "learning_rate": 7.915919776073644e-08, | |
| "loss": 0.1758, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 1.9786096256684491, | |
| "grad_norm": 0.9142996072769165, | |
| "learning_rate": 6.968918530920742e-08, | |
| "loss": 0.2226, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.98001688713763, | |
| "grad_norm": 2.0500295162200928, | |
| "learning_rate": 6.082208728490635e-08, | |
| "loss": 0.1638, | |
| "step": 7035 | |
| }, | |
| { | |
| "epoch": 1.9814241486068112, | |
| "grad_norm": 0.7084165811538696, | |
| "learning_rate": 5.255795718611678e-08, | |
| "loss": 0.1535, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.9828314100759923, | |
| "grad_norm": 0.5557725429534912, | |
| "learning_rate": 4.489684487322121e-08, | |
| "loss": 0.1053, | |
| "step": 7045 | |
| }, | |
| { | |
| "epoch": 1.9842386715451732, | |
| "grad_norm": 0.3313843905925751, | |
| "learning_rate": 3.783879656840128e-08, | |
| "loss": 0.1593, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.985645933014354, | |
| "grad_norm": 2.084636688232422, | |
| "learning_rate": 3.1383854855304705e-08, | |
| "loss": 0.1938, | |
| "step": 7055 | |
| }, | |
| { | |
| "epoch": 1.987053194483535, | |
| "grad_norm": 0.47041577100753784, | |
| "learning_rate": 2.553205867884545e-08, | |
| "loss": 0.0875, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.988460455952716, | |
| "grad_norm": 0.6036000847816467, | |
| "learning_rate": 2.0283443344959464e-08, | |
| "loss": 0.064, | |
| "step": 7065 | |
| }, | |
| { | |
| "epoch": 1.9898677174218968, | |
| "grad_norm": 0.40105298161506653, | |
| "learning_rate": 1.5638040520382646e-08, | |
| "loss": 0.1467, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.991274978891078, | |
| "grad_norm": 0.8283329606056213, | |
| "learning_rate": 1.1595878232428803e-08, | |
| "loss": 0.1675, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 1.992682240360259, | |
| "grad_norm": 0.612358570098877, | |
| "learning_rate": 8.15698086888972e-09, | |
| "loss": 0.1813, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.99408950182944, | |
| "grad_norm": 0.3482489287853241, | |
| "learning_rate": 5.321369177835323e-09, | |
| "loss": 0.1543, | |
| "step": 7085 | |
| }, | |
| { | |
| "epoch": 1.995496763298621, | |
| "grad_norm": 0.9294025301933289, | |
| "learning_rate": 3.089060267480459e-09, | |
| "loss": 0.1197, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.9969040247678018, | |
| "grad_norm": 1.7287979125976562, | |
| "learning_rate": 1.4600676061404805e-09, | |
| "loss": 0.1638, | |
| "step": 7095 | |
| }, | |
| { | |
| "epoch": 1.9983112862369827, | |
| "grad_norm": 0.451955109834671, | |
| "learning_rate": 4.344010220980188e-10, | |
| "loss": 0.2378, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.9997185477061636, | |
| "grad_norm": 0.541246771812439, | |
| "learning_rate": 1.20667035474753e-11, | |
| "loss": 0.1537, | |
| "step": 7105 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 7106, | |
| "total_flos": 1.54790643235396e+18, | |
| "train_loss": 0.3593486731773929, | |
| "train_runtime": 16225.5696, | |
| "train_samples_per_second": 3.503, | |
| "train_steps_per_second": 0.438 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 7106, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.54790643235396e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |