| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 0, | |
| "global_step": 633, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001579778830963665, | |
| "grad_norm": 3.3021833896636963, | |
| "learning_rate": 1e-05, | |
| "loss": 0.8142, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00315955766192733, | |
| "grad_norm": 0.5667713284492493, | |
| "learning_rate": 9.984202211690363e-06, | |
| "loss": 0.4081, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004739336492890996, | |
| "grad_norm": 7.904314994812012, | |
| "learning_rate": 9.968404423380728e-06, | |
| "loss": 1.1876, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00631911532385466, | |
| "grad_norm": 10.157713890075684, | |
| "learning_rate": 9.95260663507109e-06, | |
| "loss": 1.4092, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007898894154818325, | |
| "grad_norm": 4.723056316375732, | |
| "learning_rate": 9.936808846761454e-06, | |
| "loss": 0.7578, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.009478672985781991, | |
| "grad_norm": 7.033465385437012, | |
| "learning_rate": 9.921011058451816e-06, | |
| "loss": 0.5175, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.011058451816745656, | |
| "grad_norm": 0.800440788269043, | |
| "learning_rate": 9.905213270142182e-06, | |
| "loss": 0.4077, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.01263823064770932, | |
| "grad_norm": 0.6944026350975037, | |
| "learning_rate": 9.889415481832544e-06, | |
| "loss": 0.4686, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.014218009478672985, | |
| "grad_norm": 0.5700448751449585, | |
| "learning_rate": 9.873617693522908e-06, | |
| "loss": 0.3623, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01579778830963665, | |
| "grad_norm": 0.7115408778190613, | |
| "learning_rate": 9.85781990521327e-06, | |
| "loss": 0.4727, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.017377567140600316, | |
| "grad_norm": 0.5764197707176208, | |
| "learning_rate": 9.842022116903635e-06, | |
| "loss": 0.4054, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.018957345971563982, | |
| "grad_norm": 0.615205705165863, | |
| "learning_rate": 9.826224328593997e-06, | |
| "loss": 0.3798, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.020537124802527645, | |
| "grad_norm": 0.6402739882469177, | |
| "learning_rate": 9.810426540284361e-06, | |
| "loss": 0.3966, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.022116903633491312, | |
| "grad_norm": 0.6007937788963318, | |
| "learning_rate": 9.794628751974725e-06, | |
| "loss": 0.4158, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.023696682464454975, | |
| "grad_norm": 0.5462563037872314, | |
| "learning_rate": 9.778830963665089e-06, | |
| "loss": 0.4795, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02527646129541864, | |
| "grad_norm": 0.6038461923599243, | |
| "learning_rate": 9.76303317535545e-06, | |
| "loss": 0.4142, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.026856240126382307, | |
| "grad_norm": 0.514258861541748, | |
| "learning_rate": 9.747235387045815e-06, | |
| "loss": 0.4139, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.02843601895734597, | |
| "grad_norm": 0.728235125541687, | |
| "learning_rate": 9.731437598736178e-06, | |
| "loss": 0.3129, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.030015797788309637, | |
| "grad_norm": 0.7013534307479858, | |
| "learning_rate": 9.715639810426542e-06, | |
| "loss": 0.4275, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0315955766192733, | |
| "grad_norm": 0.6062476634979248, | |
| "learning_rate": 9.699842022116904e-06, | |
| "loss": 0.3961, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03317535545023697, | |
| "grad_norm": 0.6089779138565063, | |
| "learning_rate": 9.684044233807268e-06, | |
| "loss": 0.4972, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03475513428120063, | |
| "grad_norm": 0.6651365756988525, | |
| "learning_rate": 9.668246445497632e-06, | |
| "loss": 0.4714, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.036334913112164295, | |
| "grad_norm": 0.6064260601997375, | |
| "learning_rate": 9.652448657187995e-06, | |
| "loss": 0.4358, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.037914691943127965, | |
| "grad_norm": 0.5868542790412903, | |
| "learning_rate": 9.636650868878358e-06, | |
| "loss": 0.5178, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.03949447077409163, | |
| "grad_norm": 0.6516690850257874, | |
| "learning_rate": 9.620853080568721e-06, | |
| "loss": 0.4281, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04107424960505529, | |
| "grad_norm": 0.7721027731895447, | |
| "learning_rate": 9.605055292259085e-06, | |
| "loss": 0.4979, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04265402843601896, | |
| "grad_norm": 0.6200973987579346, | |
| "learning_rate": 9.589257503949447e-06, | |
| "loss": 0.347, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.044233807266982623, | |
| "grad_norm": 0.6557235717773438, | |
| "learning_rate": 9.573459715639811e-06, | |
| "loss": 0.3422, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.045813586097946286, | |
| "grad_norm": 1.0422502756118774, | |
| "learning_rate": 9.557661927330175e-06, | |
| "loss": 0.4955, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.04739336492890995, | |
| "grad_norm": 0.8272190093994141, | |
| "learning_rate": 9.541864139020539e-06, | |
| "loss": 0.434, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04897314375987362, | |
| "grad_norm": 0.5929948091506958, | |
| "learning_rate": 9.5260663507109e-06, | |
| "loss": 0.5042, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.05055292259083728, | |
| "grad_norm": 0.7872880101203918, | |
| "learning_rate": 9.510268562401264e-06, | |
| "loss": 0.5175, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.052132701421800945, | |
| "grad_norm": 0.6884463429450989, | |
| "learning_rate": 9.494470774091628e-06, | |
| "loss": 0.5104, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.053712480252764615, | |
| "grad_norm": 1.215976357460022, | |
| "learning_rate": 9.478672985781992e-06, | |
| "loss": 0.4742, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.05529225908372828, | |
| "grad_norm": 0.7471550107002258, | |
| "learning_rate": 9.462875197472354e-06, | |
| "loss": 0.4374, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.05687203791469194, | |
| "grad_norm": 0.6779741048812866, | |
| "learning_rate": 9.447077409162718e-06, | |
| "loss": 0.4337, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05845181674565561, | |
| "grad_norm": 0.5205997824668884, | |
| "learning_rate": 9.431279620853082e-06, | |
| "loss": 0.4296, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.06003159557661927, | |
| "grad_norm": 0.381757527589798, | |
| "learning_rate": 9.415481832543445e-06, | |
| "loss": 0.2223, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.061611374407582936, | |
| "grad_norm": 0.650593101978302, | |
| "learning_rate": 9.399684044233807e-06, | |
| "loss": 0.5066, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0631911532385466, | |
| "grad_norm": 0.5445153117179871, | |
| "learning_rate": 9.383886255924171e-06, | |
| "loss": 0.4998, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06477093206951026, | |
| "grad_norm": 0.5024020671844482, | |
| "learning_rate": 9.368088467614535e-06, | |
| "loss": 0.4121, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.06635071090047394, | |
| "grad_norm": 0.6259915232658386, | |
| "learning_rate": 9.352290679304899e-06, | |
| "loss": 0.4969, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0679304897314376, | |
| "grad_norm": 0.49405789375305176, | |
| "learning_rate": 9.336492890995261e-06, | |
| "loss": 0.4121, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.06951026856240126, | |
| "grad_norm": 0.7586628198623657, | |
| "learning_rate": 9.320695102685625e-06, | |
| "loss": 0.4782, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.07109004739336493, | |
| "grad_norm": 0.6203773021697998, | |
| "learning_rate": 9.304897314375988e-06, | |
| "loss": 0.3579, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.07266982622432859, | |
| "grad_norm": 0.6982845067977905, | |
| "learning_rate": 9.289099526066352e-06, | |
| "loss": 0.3876, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.07424960505529225, | |
| "grad_norm": 0.5712842345237732, | |
| "learning_rate": 9.273301737756714e-06, | |
| "loss": 0.4288, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.07582938388625593, | |
| "grad_norm": 0.6829891204833984, | |
| "learning_rate": 9.257503949447078e-06, | |
| "loss": 0.4939, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07740916271721959, | |
| "grad_norm": 0.5508958101272583, | |
| "learning_rate": 9.241706161137442e-06, | |
| "loss": 0.372, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.07898894154818326, | |
| "grad_norm": 0.9345032572746277, | |
| "learning_rate": 9.225908372827806e-06, | |
| "loss": 0.4896, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08056872037914692, | |
| "grad_norm": 0.6280492544174194, | |
| "learning_rate": 9.210110584518168e-06, | |
| "loss": 0.4375, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.08214849921011058, | |
| "grad_norm": 0.6853601336479187, | |
| "learning_rate": 9.194312796208532e-06, | |
| "loss": 0.4294, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.08372827804107424, | |
| "grad_norm": 0.6665984392166138, | |
| "learning_rate": 9.178515007898895e-06, | |
| "loss": 0.5894, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.08530805687203792, | |
| "grad_norm": 0.5088407397270203, | |
| "learning_rate": 9.162717219589257e-06, | |
| "loss": 0.3853, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.08688783570300158, | |
| "grad_norm": 0.5319867730140686, | |
| "learning_rate": 9.146919431279621e-06, | |
| "loss": 0.4791, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08846761453396525, | |
| "grad_norm": 0.6452597975730896, | |
| "learning_rate": 9.131121642969985e-06, | |
| "loss": 0.4056, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.09004739336492891, | |
| "grad_norm": 0.6769601106643677, | |
| "learning_rate": 9.115323854660349e-06, | |
| "loss": 0.4253, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.09162717219589257, | |
| "grad_norm": 0.5170547962188721, | |
| "learning_rate": 9.09952606635071e-06, | |
| "loss": 0.4211, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.09320695102685624, | |
| "grad_norm": 0.5035193562507629, | |
| "learning_rate": 9.083728278041075e-06, | |
| "loss": 0.3144, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0947867298578199, | |
| "grad_norm": 0.5919070243835449, | |
| "learning_rate": 9.067930489731438e-06, | |
| "loss": 0.4533, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09636650868878358, | |
| "grad_norm": 0.6510637998580933, | |
| "learning_rate": 9.052132701421802e-06, | |
| "loss": 0.4701, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.09794628751974724, | |
| "grad_norm": 0.5784177780151367, | |
| "learning_rate": 9.036334913112164e-06, | |
| "loss": 0.3896, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0995260663507109, | |
| "grad_norm": 0.7009139060974121, | |
| "learning_rate": 9.020537124802528e-06, | |
| "loss": 0.5018, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.10110584518167456, | |
| "grad_norm": 0.5086057186126709, | |
| "learning_rate": 9.004739336492892e-06, | |
| "loss": 0.4305, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.10268562401263823, | |
| "grad_norm": 0.5124595761299133, | |
| "learning_rate": 8.988941548183256e-06, | |
| "loss": 0.4473, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.10426540284360189, | |
| "grad_norm": 0.6409702897071838, | |
| "learning_rate": 8.973143759873618e-06, | |
| "loss": 0.429, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.10584518167456557, | |
| "grad_norm": 0.5651409029960632, | |
| "learning_rate": 8.957345971563981e-06, | |
| "loss": 0.4036, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.10742496050552923, | |
| "grad_norm": 0.6658238172531128, | |
| "learning_rate": 8.941548183254345e-06, | |
| "loss": 0.4726, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10900473933649289, | |
| "grad_norm": 0.444815993309021, | |
| "learning_rate": 8.925750394944709e-06, | |
| "loss": 0.4016, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.11058451816745656, | |
| "grad_norm": 0.5855506658554077, | |
| "learning_rate": 8.909952606635071e-06, | |
| "loss": 0.4531, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.11216429699842022, | |
| "grad_norm": 0.693794310092926, | |
| "learning_rate": 8.894154818325435e-06, | |
| "loss": 0.4382, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.11374407582938388, | |
| "grad_norm": 0.6658089756965637, | |
| "learning_rate": 8.878357030015799e-06, | |
| "loss": 0.4571, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.11532385466034756, | |
| "grad_norm": 1.0504828691482544, | |
| "learning_rate": 8.862559241706162e-06, | |
| "loss": 0.4311, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.11690363349131122, | |
| "grad_norm": 0.5297814607620239, | |
| "learning_rate": 8.846761453396524e-06, | |
| "loss": 0.4391, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11848341232227488, | |
| "grad_norm": 0.6601409316062927, | |
| "learning_rate": 8.830963665086888e-06, | |
| "loss": 0.5125, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.12006319115323855, | |
| "grad_norm": 0.6345618963241577, | |
| "learning_rate": 8.815165876777252e-06, | |
| "loss": 0.4471, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.12164296998420221, | |
| "grad_norm": 0.5008222460746765, | |
| "learning_rate": 8.799368088467614e-06, | |
| "loss": 0.3845, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.12322274881516587, | |
| "grad_norm": 0.5394203066825867, | |
| "learning_rate": 8.783570300157978e-06, | |
| "loss": 0.4117, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.12480252764612954, | |
| "grad_norm": 0.6255345940589905, | |
| "learning_rate": 8.767772511848342e-06, | |
| "loss": 0.512, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1263823064770932, | |
| "grad_norm": 0.6215748190879822, | |
| "learning_rate": 8.751974723538705e-06, | |
| "loss": 0.509, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12796208530805686, | |
| "grad_norm": 0.611587405204773, | |
| "learning_rate": 8.736176935229068e-06, | |
| "loss": 0.4036, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.12954186413902052, | |
| "grad_norm": 0.5373330116271973, | |
| "learning_rate": 8.720379146919431e-06, | |
| "loss": 0.393, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.13112164296998421, | |
| "grad_norm": 0.5936598181724548, | |
| "learning_rate": 8.704581358609795e-06, | |
| "loss": 0.4092, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.13270142180094788, | |
| "grad_norm": 0.576614260673523, | |
| "learning_rate": 8.688783570300159e-06, | |
| "loss": 0.5513, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.13428120063191154, | |
| "grad_norm": 0.5715078711509705, | |
| "learning_rate": 8.672985781990521e-06, | |
| "loss": 0.4403, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1358609794628752, | |
| "grad_norm": 0.6212042570114136, | |
| "learning_rate": 8.657187993680885e-06, | |
| "loss": 0.391, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.13744075829383887, | |
| "grad_norm": 0.5439122319221497, | |
| "learning_rate": 8.641390205371249e-06, | |
| "loss": 0.4764, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.13902053712480253, | |
| "grad_norm": 0.6808428168296814, | |
| "learning_rate": 8.625592417061612e-06, | |
| "loss": 0.512, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.1406003159557662, | |
| "grad_norm": 0.7429847717285156, | |
| "learning_rate": 8.609794628751974e-06, | |
| "loss": 0.3834, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.14218009478672985, | |
| "grad_norm": 0.6030511260032654, | |
| "learning_rate": 8.59399684044234e-06, | |
| "loss": 0.4631, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.14375987361769352, | |
| "grad_norm": 0.6499682068824768, | |
| "learning_rate": 8.578199052132702e-06, | |
| "loss": 0.4484, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.14533965244865718, | |
| "grad_norm": 0.6490275859832764, | |
| "learning_rate": 8.562401263823066e-06, | |
| "loss": 0.414, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.14691943127962084, | |
| "grad_norm": 0.6859791874885559, | |
| "learning_rate": 8.546603475513428e-06, | |
| "loss": 0.386, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.1484992101105845, | |
| "grad_norm": 0.5281291007995605, | |
| "learning_rate": 8.530805687203793e-06, | |
| "loss": 0.4036, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.1500789889415482, | |
| "grad_norm": 0.5261964797973633, | |
| "learning_rate": 8.515007898894155e-06, | |
| "loss": 0.33, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.15165876777251186, | |
| "grad_norm": 0.4350665211677551, | |
| "learning_rate": 8.499210110584519e-06, | |
| "loss": 0.3347, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.15323854660347552, | |
| "grad_norm": 0.8448456525802612, | |
| "learning_rate": 8.483412322274883e-06, | |
| "loss": 0.4253, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.15481832543443919, | |
| "grad_norm": 0.6256837248802185, | |
| "learning_rate": 8.467614533965247e-06, | |
| "loss": 0.4464, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.15639810426540285, | |
| "grad_norm": 0.7007749676704407, | |
| "learning_rate": 8.451816745655609e-06, | |
| "loss": 0.4641, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1579778830963665, | |
| "grad_norm": 0.6551494002342224, | |
| "learning_rate": 8.436018957345973e-06, | |
| "loss": 0.5097, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15955766192733017, | |
| "grad_norm": 0.5944113731384277, | |
| "learning_rate": 8.420221169036336e-06, | |
| "loss": 0.4554, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.16113744075829384, | |
| "grad_norm": 0.5755615234375, | |
| "learning_rate": 8.4044233807267e-06, | |
| "loss": 0.443, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1627172195892575, | |
| "grad_norm": 0.5263962745666504, | |
| "learning_rate": 8.388625592417062e-06, | |
| "loss": 0.4355, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.16429699842022116, | |
| "grad_norm": 0.6115814447402954, | |
| "learning_rate": 8.372827804107424e-06, | |
| "loss": 0.4863, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.16587677725118483, | |
| "grad_norm": 0.5544970631599426, | |
| "learning_rate": 8.35703001579779e-06, | |
| "loss": 0.3979, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1674565560821485, | |
| "grad_norm": 0.5588533878326416, | |
| "learning_rate": 8.341232227488152e-06, | |
| "loss": 0.4073, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.16903633491311215, | |
| "grad_norm": 0.578982949256897, | |
| "learning_rate": 8.325434439178516e-06, | |
| "loss": 0.3745, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.17061611374407584, | |
| "grad_norm": 0.4955246150493622, | |
| "learning_rate": 8.30963665086888e-06, | |
| "loss": 0.438, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1721958925750395, | |
| "grad_norm": 0.593362033367157, | |
| "learning_rate": 8.293838862559243e-06, | |
| "loss": 0.4161, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.17377567140600317, | |
| "grad_norm": 0.5000883340835571, | |
| "learning_rate": 8.278041074249605e-06, | |
| "loss": 0.432, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.17535545023696683, | |
| "grad_norm": 0.5794082880020142, | |
| "learning_rate": 8.262243285939969e-06, | |
| "loss": 0.4431, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.1769352290679305, | |
| "grad_norm": 0.6179563999176025, | |
| "learning_rate": 8.246445497630333e-06, | |
| "loss": 0.3871, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.17851500789889416, | |
| "grad_norm": 0.6540956497192383, | |
| "learning_rate": 8.230647709320697e-06, | |
| "loss": 0.3706, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.18009478672985782, | |
| "grad_norm": 0.7029737234115601, | |
| "learning_rate": 8.214849921011059e-06, | |
| "loss": 0.5077, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.18167456556082148, | |
| "grad_norm": 0.5466600656509399, | |
| "learning_rate": 8.199052132701422e-06, | |
| "loss": 0.4634, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.18325434439178515, | |
| "grad_norm": 0.5513831973075867, | |
| "learning_rate": 8.183254344391786e-06, | |
| "loss": 0.4457, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.1848341232227488, | |
| "grad_norm": 0.7652455568313599, | |
| "learning_rate": 8.16745655608215e-06, | |
| "loss": 0.4376, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.18641390205371247, | |
| "grad_norm": 0.6213077902793884, | |
| "learning_rate": 8.151658767772512e-06, | |
| "loss": 0.3988, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.18799368088467613, | |
| "grad_norm": 0.50051349401474, | |
| "learning_rate": 8.135860979462876e-06, | |
| "loss": 0.4142, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1895734597156398, | |
| "grad_norm": 0.8015328049659729, | |
| "learning_rate": 8.12006319115324e-06, | |
| "loss": 0.4474, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1911532385466035, | |
| "grad_norm": 0.6595532298088074, | |
| "learning_rate": 8.104265402843603e-06, | |
| "loss": 0.5173, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.19273301737756715, | |
| "grad_norm": 0.7859697937965393, | |
| "learning_rate": 8.088467614533966e-06, | |
| "loss": 0.4465, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.1943127962085308, | |
| "grad_norm": 0.6508023738861084, | |
| "learning_rate": 8.07266982622433e-06, | |
| "loss": 0.4448, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.19589257503949448, | |
| "grad_norm": 0.49232304096221924, | |
| "learning_rate": 8.056872037914693e-06, | |
| "loss": 0.4005, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.19747235387045814, | |
| "grad_norm": 0.6464349031448364, | |
| "learning_rate": 8.041074249605057e-06, | |
| "loss": 0.47, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1990521327014218, | |
| "grad_norm": 0.5296919345855713, | |
| "learning_rate": 8.025276461295419e-06, | |
| "loss": 0.4247, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.20063191153238547, | |
| "grad_norm": 0.6270297765731812, | |
| "learning_rate": 8.009478672985783e-06, | |
| "loss": 0.5397, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.20221169036334913, | |
| "grad_norm": 0.6148909330368042, | |
| "learning_rate": 7.993680884676147e-06, | |
| "loss": 0.4133, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2037914691943128, | |
| "grad_norm": 0.7778130173683167, | |
| "learning_rate": 7.977883096366509e-06, | |
| "loss": 0.5119, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.20537124802527645, | |
| "grad_norm": 0.47952044010162354, | |
| "learning_rate": 7.962085308056872e-06, | |
| "loss": 0.386, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.20695102685624012, | |
| "grad_norm": 0.5951160788536072, | |
| "learning_rate": 7.946287519747236e-06, | |
| "loss": 0.5101, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.20853080568720378, | |
| "grad_norm": 0.6209789514541626, | |
| "learning_rate": 7.9304897314376e-06, | |
| "loss": 0.4988, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.21011058451816747, | |
| "grad_norm": 0.5093654990196228, | |
| "learning_rate": 7.914691943127962e-06, | |
| "loss": 0.374, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.21169036334913113, | |
| "grad_norm": 0.5125884413719177, | |
| "learning_rate": 7.898894154818326e-06, | |
| "loss": 0.4097, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2132701421800948, | |
| "grad_norm": 0.5116066932678223, | |
| "learning_rate": 7.88309636650869e-06, | |
| "loss": 0.4643, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.21484992101105846, | |
| "grad_norm": 0.5778034329414368, | |
| "learning_rate": 7.867298578199053e-06, | |
| "loss": 0.4645, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.21642969984202212, | |
| "grad_norm": 0.6490422487258911, | |
| "learning_rate": 7.851500789889415e-06, | |
| "loss": 0.4825, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.21800947867298578, | |
| "grad_norm": 0.644008219242096, | |
| "learning_rate": 7.83570300157978e-06, | |
| "loss": 0.3954, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.21958925750394945, | |
| "grad_norm": 0.8628047704696655, | |
| "learning_rate": 7.819905213270143e-06, | |
| "loss": 0.5322, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.2211690363349131, | |
| "grad_norm": 0.6286507844924927, | |
| "learning_rate": 7.804107424960507e-06, | |
| "loss": 0.3741, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.22274881516587677, | |
| "grad_norm": 0.6210809350013733, | |
| "learning_rate": 7.788309636650869e-06, | |
| "loss": 0.4572, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.22432859399684044, | |
| "grad_norm": 0.5337722897529602, | |
| "learning_rate": 7.772511848341233e-06, | |
| "loss": 0.3788, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2259083728278041, | |
| "grad_norm": 0.5743194818496704, | |
| "learning_rate": 7.756714060031596e-06, | |
| "loss": 0.3963, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.22748815165876776, | |
| "grad_norm": 0.4972652792930603, | |
| "learning_rate": 7.74091627172196e-06, | |
| "loss": 0.2906, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.22906793048973143, | |
| "grad_norm": 0.5239664316177368, | |
| "learning_rate": 7.725118483412322e-06, | |
| "loss": 0.4009, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.23064770932069512, | |
| "grad_norm": 0.5151936411857605, | |
| "learning_rate": 7.709320695102686e-06, | |
| "loss": 0.4208, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.23222748815165878, | |
| "grad_norm": 0.6128547191619873, | |
| "learning_rate": 7.69352290679305e-06, | |
| "loss": 0.4779, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.23380726698262244, | |
| "grad_norm": 0.5268502235412598, | |
| "learning_rate": 7.677725118483414e-06, | |
| "loss": 0.4219, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.2353870458135861, | |
| "grad_norm": 0.5439866185188293, | |
| "learning_rate": 7.661927330173776e-06, | |
| "loss": 0.4436, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.23696682464454977, | |
| "grad_norm": 0.5291867852210999, | |
| "learning_rate": 7.64612954186414e-06, | |
| "loss": 0.407, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23854660347551343, | |
| "grad_norm": 0.6638155579566956, | |
| "learning_rate": 7.630331753554503e-06, | |
| "loss": 0.403, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.2401263823064771, | |
| "grad_norm": 0.5501230955123901, | |
| "learning_rate": 7.614533965244867e-06, | |
| "loss": 0.5004, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.24170616113744076, | |
| "grad_norm": 0.5949499011039734, | |
| "learning_rate": 7.59873617693523e-06, | |
| "loss": 0.4708, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.24328593996840442, | |
| "grad_norm": 0.5841517448425293, | |
| "learning_rate": 7.582938388625593e-06, | |
| "loss": 0.4836, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.24486571879936808, | |
| "grad_norm": 0.6298154592514038, | |
| "learning_rate": 7.567140600315957e-06, | |
| "loss": 0.4728, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.24644549763033174, | |
| "grad_norm": 0.6107637882232666, | |
| "learning_rate": 7.55134281200632e-06, | |
| "loss": 0.4243, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2480252764612954, | |
| "grad_norm": 0.5174968838691711, | |
| "learning_rate": 7.535545023696683e-06, | |
| "loss": 0.4657, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.24960505529225907, | |
| "grad_norm": 0.5588591694831848, | |
| "learning_rate": 7.519747235387046e-06, | |
| "loss": 0.4567, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.25118483412322273, | |
| "grad_norm": 0.8415222764015198, | |
| "learning_rate": 7.50394944707741e-06, | |
| "loss": 0.4625, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.2527646129541864, | |
| "grad_norm": 0.6054974794387817, | |
| "learning_rate": 7.488151658767773e-06, | |
| "loss": 0.3843, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.25434439178515006, | |
| "grad_norm": 0.5117557644844055, | |
| "learning_rate": 7.472353870458137e-06, | |
| "loss": 0.3887, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.2559241706161137, | |
| "grad_norm": 0.5849332213401794, | |
| "learning_rate": 7.4565560821485e-06, | |
| "loss": 0.4528, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.2575039494470774, | |
| "grad_norm": 0.5625325441360474, | |
| "learning_rate": 7.4407582938388635e-06, | |
| "loss": 0.4542, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.25908372827804105, | |
| "grad_norm": 0.5406492352485657, | |
| "learning_rate": 7.4249605055292264e-06, | |
| "loss": 0.4592, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.26066350710900477, | |
| "grad_norm": 0.6318654417991638, | |
| "learning_rate": 7.40916271721959e-06, | |
| "loss": 0.4361, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.26224328593996843, | |
| "grad_norm": 0.5719902515411377, | |
| "learning_rate": 7.393364928909953e-06, | |
| "loss": 0.4799, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.2638230647709321, | |
| "grad_norm": 0.5211177468299866, | |
| "learning_rate": 7.377567140600317e-06, | |
| "loss": 0.33, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.26540284360189575, | |
| "grad_norm": 0.6400920152664185, | |
| "learning_rate": 7.36176935229068e-06, | |
| "loss": 0.4235, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.2669826224328594, | |
| "grad_norm": 0.5302186608314514, | |
| "learning_rate": 7.345971563981044e-06, | |
| "loss": 0.4342, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.2685624012638231, | |
| "grad_norm": 0.5393325686454773, | |
| "learning_rate": 7.3301737756714066e-06, | |
| "loss": 0.3632, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.27014218009478674, | |
| "grad_norm": 0.5409063696861267, | |
| "learning_rate": 7.31437598736177e-06, | |
| "loss": 0.4076, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.2717219589257504, | |
| "grad_norm": 0.5056774616241455, | |
| "learning_rate": 7.298578199052133e-06, | |
| "loss": 0.4821, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.27330173775671407, | |
| "grad_norm": 0.6061700582504272, | |
| "learning_rate": 7.282780410742497e-06, | |
| "loss": 0.5137, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.27488151658767773, | |
| "grad_norm": 0.5524815917015076, | |
| "learning_rate": 7.26698262243286e-06, | |
| "loss": 0.4116, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2764612954186414, | |
| "grad_norm": 0.5045567750930786, | |
| "learning_rate": 7.251184834123224e-06, | |
| "loss": 0.3969, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.27804107424960506, | |
| "grad_norm": 0.604505717754364, | |
| "learning_rate": 7.235387045813587e-06, | |
| "loss": 0.5176, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2796208530805687, | |
| "grad_norm": 0.6067575812339783, | |
| "learning_rate": 7.2195892575039505e-06, | |
| "loss": 0.4438, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.2812006319115324, | |
| "grad_norm": 0.6412494778633118, | |
| "learning_rate": 7.203791469194313e-06, | |
| "loss": 0.4758, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.28278041074249605, | |
| "grad_norm": 0.5432886481285095, | |
| "learning_rate": 7.187993680884676e-06, | |
| "loss": 0.4387, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.2843601895734597, | |
| "grad_norm": 0.4622472822666168, | |
| "learning_rate": 7.17219589257504e-06, | |
| "loss": 0.4775, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2859399684044234, | |
| "grad_norm": 0.643259584903717, | |
| "learning_rate": 7.156398104265403e-06, | |
| "loss": 0.4479, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.28751974723538704, | |
| "grad_norm": 0.48998138308525085, | |
| "learning_rate": 7.140600315955767e-06, | |
| "loss": 0.399, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.2890995260663507, | |
| "grad_norm": 0.5146614909172058, | |
| "learning_rate": 7.12480252764613e-06, | |
| "loss": 0.4475, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.29067930489731436, | |
| "grad_norm": 0.5386670231819153, | |
| "learning_rate": 7.1090047393364935e-06, | |
| "loss": 0.3892, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.292259083728278, | |
| "grad_norm": 0.5147759318351746, | |
| "learning_rate": 7.0932069510268565e-06, | |
| "loss": 0.3755, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2938388625592417, | |
| "grad_norm": 0.5141321420669556, | |
| "learning_rate": 7.07740916271722e-06, | |
| "loss": 0.355, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.29541864139020535, | |
| "grad_norm": 0.9518134593963623, | |
| "learning_rate": 7.061611374407583e-06, | |
| "loss": 0.4021, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.296998420221169, | |
| "grad_norm": 0.5844981670379639, | |
| "learning_rate": 7.045813586097947e-06, | |
| "loss": 0.4233, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2985781990521327, | |
| "grad_norm": 0.6381546854972839, | |
| "learning_rate": 7.03001579778831e-06, | |
| "loss": 0.4862, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.3001579778830964, | |
| "grad_norm": 0.7311195135116577, | |
| "learning_rate": 7.014218009478674e-06, | |
| "loss": 0.4822, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.30173775671406006, | |
| "grad_norm": 0.5827596783638, | |
| "learning_rate": 6.998420221169037e-06, | |
| "loss": 0.4027, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.3033175355450237, | |
| "grad_norm": 0.6907688975334167, | |
| "learning_rate": 6.9826224328594e-06, | |
| "loss": 0.4374, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3048973143759874, | |
| "grad_norm": 0.5060120820999146, | |
| "learning_rate": 6.966824644549763e-06, | |
| "loss": 0.4226, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.30647709320695105, | |
| "grad_norm": 0.41480544209480286, | |
| "learning_rate": 6.951026856240127e-06, | |
| "loss": 0.3766, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.3080568720379147, | |
| "grad_norm": 0.5637404322624207, | |
| "learning_rate": 6.93522906793049e-06, | |
| "loss": 0.4365, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.30963665086887837, | |
| "grad_norm": 0.6389409899711609, | |
| "learning_rate": 6.919431279620854e-06, | |
| "loss": 0.4186, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.31121642969984203, | |
| "grad_norm": 0.48588162660598755, | |
| "learning_rate": 6.903633491311217e-06, | |
| "loss": 0.4023, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.3127962085308057, | |
| "grad_norm": 0.6066514253616333, | |
| "learning_rate": 6.8878357030015805e-06, | |
| "loss": 0.4652, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.31437598736176936, | |
| "grad_norm": 0.6308689117431641, | |
| "learning_rate": 6.8720379146919435e-06, | |
| "loss": 0.3885, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.315955766192733, | |
| "grad_norm": 0.4883437752723694, | |
| "learning_rate": 6.856240126382307e-06, | |
| "loss": 0.4128, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3175355450236967, | |
| "grad_norm": 0.720086932182312, | |
| "learning_rate": 6.84044233807267e-06, | |
| "loss": 0.4333, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.31911532385466035, | |
| "grad_norm": 0.6698761582374573, | |
| "learning_rate": 6.824644549763034e-06, | |
| "loss": 0.3967, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.320695102685624, | |
| "grad_norm": 0.5240082740783691, | |
| "learning_rate": 6.808846761453397e-06, | |
| "loss": 0.4055, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.3222748815165877, | |
| "grad_norm": 0.6142946481704712, | |
| "learning_rate": 6.79304897314376e-06, | |
| "loss": 0.3645, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.32385466034755134, | |
| "grad_norm": 0.6439379453659058, | |
| "learning_rate": 6.777251184834124e-06, | |
| "loss": 0.3207, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.325434439178515, | |
| "grad_norm": 0.6862720847129822, | |
| "learning_rate": 6.7614533965244865e-06, | |
| "loss": 0.4944, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.32701421800947866, | |
| "grad_norm": 0.6720433235168457, | |
| "learning_rate": 6.74565560821485e-06, | |
| "loss": 0.4335, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.3285939968404423, | |
| "grad_norm": 0.531577467918396, | |
| "learning_rate": 6.729857819905213e-06, | |
| "loss": 0.5327, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.330173775671406, | |
| "grad_norm": 0.5542590022087097, | |
| "learning_rate": 6.714060031595577e-06, | |
| "loss": 0.3629, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.33175355450236965, | |
| "grad_norm": 0.5614448189735413, | |
| "learning_rate": 6.69826224328594e-06, | |
| "loss": 0.4097, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.7383466362953186, | |
| "learning_rate": 6.682464454976304e-06, | |
| "loss": 0.5031, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.334913112164297, | |
| "grad_norm": 0.6345497965812683, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.5029, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.33649289099526064, | |
| "grad_norm": 0.579641580581665, | |
| "learning_rate": 6.6508688783570304e-06, | |
| "loss": 0.4949, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.3380726698262243, | |
| "grad_norm": 0.5040780305862427, | |
| "learning_rate": 6.635071090047393e-06, | |
| "loss": 0.4537, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.33965244865718797, | |
| "grad_norm": 0.5917491316795349, | |
| "learning_rate": 6.619273301737757e-06, | |
| "loss": 0.3883, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3412322274881517, | |
| "grad_norm": 0.7031399011611938, | |
| "learning_rate": 6.60347551342812e-06, | |
| "loss": 0.4554, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.34281200631911535, | |
| "grad_norm": 0.5503798127174377, | |
| "learning_rate": 6.587677725118484e-06, | |
| "loss": 0.352, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.344391785150079, | |
| "grad_norm": 0.5412716269493103, | |
| "learning_rate": 6.571879936808847e-06, | |
| "loss": 0.4191, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.3459715639810427, | |
| "grad_norm": 0.6272369623184204, | |
| "learning_rate": 6.556082148499211e-06, | |
| "loss": 0.4595, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.34755134281200634, | |
| "grad_norm": 0.5309504270553589, | |
| "learning_rate": 6.5402843601895735e-06, | |
| "loss": 0.4095, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.34913112164297, | |
| "grad_norm": 0.5687200427055359, | |
| "learning_rate": 6.524486571879938e-06, | |
| "loss": 0.435, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.35071090047393366, | |
| "grad_norm": 0.5819438099861145, | |
| "learning_rate": 6.5086887835703e-06, | |
| "loss": 0.4695, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.3522906793048973, | |
| "grad_norm": 0.6310110092163086, | |
| "learning_rate": 6.492890995260665e-06, | |
| "loss": 0.4346, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.353870458135861, | |
| "grad_norm": 0.5838906168937683, | |
| "learning_rate": 6.477093206951027e-06, | |
| "loss": 0.47, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.35545023696682465, | |
| "grad_norm": 0.6752678155899048, | |
| "learning_rate": 6.4612954186413915e-06, | |
| "loss": 0.3842, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3570300157977883, | |
| "grad_norm": 0.7029111981391907, | |
| "learning_rate": 6.445497630331754e-06, | |
| "loss": 0.4442, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.358609794628752, | |
| "grad_norm": 0.511812686920166, | |
| "learning_rate": 6.429699842022118e-06, | |
| "loss": 0.5171, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.36018957345971564, | |
| "grad_norm": 0.49457868933677673, | |
| "learning_rate": 6.413902053712481e-06, | |
| "loss": 0.3695, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3617693522906793, | |
| "grad_norm": 0.4521022439002991, | |
| "learning_rate": 6.398104265402843e-06, | |
| "loss": 0.3909, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.36334913112164297, | |
| "grad_norm": 0.45229026675224304, | |
| "learning_rate": 6.382306477093208e-06, | |
| "loss": 0.3417, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.36492890995260663, | |
| "grad_norm": 0.5070056915283203, | |
| "learning_rate": 6.36650868878357e-06, | |
| "loss": 0.3518, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.3665086887835703, | |
| "grad_norm": 0.9325531721115112, | |
| "learning_rate": 6.350710900473935e-06, | |
| "loss": 0.5172, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.36808846761453395, | |
| "grad_norm": 0.6027977466583252, | |
| "learning_rate": 6.334913112164297e-06, | |
| "loss": 0.4052, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.3696682464454976, | |
| "grad_norm": 0.7251097559928894, | |
| "learning_rate": 6.319115323854661e-06, | |
| "loss": 0.4739, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.3712480252764613, | |
| "grad_norm": 0.6470052003860474, | |
| "learning_rate": 6.303317535545023e-06, | |
| "loss": 0.4745, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.37282780410742494, | |
| "grad_norm": 0.7177411317825317, | |
| "learning_rate": 6.287519747235388e-06, | |
| "loss": 0.364, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3744075829383886, | |
| "grad_norm": 0.7681677341461182, | |
| "learning_rate": 6.271721958925751e-06, | |
| "loss": 0.4559, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.37598736176935227, | |
| "grad_norm": 0.6160128116607666, | |
| "learning_rate": 6.255924170616115e-06, | |
| "loss": 0.421, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.37756714060031593, | |
| "grad_norm": 0.658981442451477, | |
| "learning_rate": 6.240126382306478e-06, | |
| "loss": 0.3979, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.3791469194312796, | |
| "grad_norm": 0.9422373175621033, | |
| "learning_rate": 6.2243285939968414e-06, | |
| "loss": 0.3586, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3807266982622433, | |
| "grad_norm": 0.5452501773834229, | |
| "learning_rate": 6.208530805687204e-06, | |
| "loss": 0.4209, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.382306477093207, | |
| "grad_norm": 0.4912925660610199, | |
| "learning_rate": 6.192733017377568e-06, | |
| "loss": 0.4784, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.38388625592417064, | |
| "grad_norm": 0.6575455665588379, | |
| "learning_rate": 6.176935229067931e-06, | |
| "loss": 0.4062, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.3854660347551343, | |
| "grad_norm": 0.8840091824531555, | |
| "learning_rate": 6.161137440758295e-06, | |
| "loss": 0.4177, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.38704581358609796, | |
| "grad_norm": 0.5949338674545288, | |
| "learning_rate": 6.145339652448658e-06, | |
| "loss": 0.4477, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.3886255924170616, | |
| "grad_norm": 0.5938326120376587, | |
| "learning_rate": 6.1295418641390216e-06, | |
| "loss": 0.4155, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3902053712480253, | |
| "grad_norm": 0.5401394367218018, | |
| "learning_rate": 6.1137440758293845e-06, | |
| "loss": 0.3873, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.39178515007898895, | |
| "grad_norm": 0.5220497846603394, | |
| "learning_rate": 6.097946287519748e-06, | |
| "loss": 0.3803, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3933649289099526, | |
| "grad_norm": 0.5426644086837769, | |
| "learning_rate": 6.082148499210111e-06, | |
| "loss": 0.3239, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.3949447077409163, | |
| "grad_norm": 0.5215898156166077, | |
| "learning_rate": 6.066350710900475e-06, | |
| "loss": 0.4373, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.39652448657187994, | |
| "grad_norm": 0.5694135427474976, | |
| "learning_rate": 6.050552922590838e-06, | |
| "loss": 0.4948, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.3981042654028436, | |
| "grad_norm": 0.5505183339118958, | |
| "learning_rate": 6.034755134281202e-06, | |
| "loss": 0.4108, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.39968404423380727, | |
| "grad_norm": 0.593190610408783, | |
| "learning_rate": 6.018957345971565e-06, | |
| "loss": 0.429, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.40126382306477093, | |
| "grad_norm": 0.5409046411514282, | |
| "learning_rate": 6.003159557661928e-06, | |
| "loss": 0.4443, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.4028436018957346, | |
| "grad_norm": 0.5520291328430176, | |
| "learning_rate": 5.987361769352291e-06, | |
| "loss": 0.4485, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.40442338072669826, | |
| "grad_norm": 0.5622429847717285, | |
| "learning_rate": 5.971563981042654e-06, | |
| "loss": 0.4181, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.4060031595576619, | |
| "grad_norm": 0.5267983078956604, | |
| "learning_rate": 5.955766192733018e-06, | |
| "loss": 0.4235, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.4075829383886256, | |
| "grad_norm": 0.5384082198143005, | |
| "learning_rate": 5.939968404423381e-06, | |
| "loss": 0.4055, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.40916271721958924, | |
| "grad_norm": 0.5427289605140686, | |
| "learning_rate": 5.924170616113745e-06, | |
| "loss": 0.3427, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.4107424960505529, | |
| "grad_norm": 0.4936423599720001, | |
| "learning_rate": 5.908372827804108e-06, | |
| "loss": 0.4133, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.41232227488151657, | |
| "grad_norm": 0.5825520753860474, | |
| "learning_rate": 5.8925750394944715e-06, | |
| "loss": 0.377, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.41390205371248023, | |
| "grad_norm": 0.6343340277671814, | |
| "learning_rate": 5.876777251184834e-06, | |
| "loss": 0.441, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4154818325434439, | |
| "grad_norm": 0.5479387044906616, | |
| "learning_rate": 5.860979462875198e-06, | |
| "loss": 0.4353, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.41706161137440756, | |
| "grad_norm": 0.5873805284500122, | |
| "learning_rate": 5.845181674565561e-06, | |
| "loss": 0.4293, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.4186413902053712, | |
| "grad_norm": 0.6624792218208313, | |
| "learning_rate": 5.829383886255925e-06, | |
| "loss": 0.5162, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.42022116903633494, | |
| "grad_norm": 0.5797149538993835, | |
| "learning_rate": 5.813586097946288e-06, | |
| "loss": 0.3651, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.4218009478672986, | |
| "grad_norm": 0.5814763903617859, | |
| "learning_rate": 5.797788309636652e-06, | |
| "loss": 0.3817, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.42338072669826227, | |
| "grad_norm": 0.5556735992431641, | |
| "learning_rate": 5.7819905213270145e-06, | |
| "loss": 0.4186, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.42496050552922593, | |
| "grad_norm": 0.5842727422714233, | |
| "learning_rate": 5.766192733017378e-06, | |
| "loss": 0.4343, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.4265402843601896, | |
| "grad_norm": 0.5401722192764282, | |
| "learning_rate": 5.750394944707741e-06, | |
| "loss": 0.4418, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.42812006319115326, | |
| "grad_norm": 0.5917039513587952, | |
| "learning_rate": 5.734597156398105e-06, | |
| "loss": 0.5371, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.4296998420221169, | |
| "grad_norm": 0.5991331338882446, | |
| "learning_rate": 5.718799368088468e-06, | |
| "loss": 0.4969, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.4312796208530806, | |
| "grad_norm": 0.4709448218345642, | |
| "learning_rate": 5.703001579778832e-06, | |
| "loss": 0.4139, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.43285939968404424, | |
| "grad_norm": 0.5746496319770813, | |
| "learning_rate": 5.687203791469195e-06, | |
| "loss": 0.4683, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.4344391785150079, | |
| "grad_norm": 0.523835301399231, | |
| "learning_rate": 5.6714060031595584e-06, | |
| "loss": 0.4346, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.43601895734597157, | |
| "grad_norm": 0.5292810797691345, | |
| "learning_rate": 5.655608214849921e-06, | |
| "loss": 0.463, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.43759873617693523, | |
| "grad_norm": 0.6543466448783875, | |
| "learning_rate": 5.639810426540285e-06, | |
| "loss": 0.427, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.4391785150078989, | |
| "grad_norm": 0.5543989539146423, | |
| "learning_rate": 5.624012638230648e-06, | |
| "loss": 0.3902, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.44075829383886256, | |
| "grad_norm": 0.5905360579490662, | |
| "learning_rate": 5.608214849921012e-06, | |
| "loss": 0.4266, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.4423380726698262, | |
| "grad_norm": 0.5785796046257019, | |
| "learning_rate": 5.592417061611375e-06, | |
| "loss": 0.4521, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4439178515007899, | |
| "grad_norm": 0.5580607056617737, | |
| "learning_rate": 5.576619273301738e-06, | |
| "loss": 0.378, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.44549763033175355, | |
| "grad_norm": 0.5100966691970825, | |
| "learning_rate": 5.5608214849921015e-06, | |
| "loss": 0.3876, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.4470774091627172, | |
| "grad_norm": 0.5704023241996765, | |
| "learning_rate": 5.5450236966824644e-06, | |
| "loss": 0.4694, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.4486571879936809, | |
| "grad_norm": 0.5954383611679077, | |
| "learning_rate": 5.529225908372828e-06, | |
| "loss": 0.5049, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.45023696682464454, | |
| "grad_norm": 0.5239635705947876, | |
| "learning_rate": 5.513428120063191e-06, | |
| "loss": 0.4182, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.4518167456556082, | |
| "grad_norm": 0.6643552780151367, | |
| "learning_rate": 5.497630331753555e-06, | |
| "loss": 0.4434, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.45339652448657186, | |
| "grad_norm": 0.6675540804862976, | |
| "learning_rate": 5.481832543443918e-06, | |
| "loss": 0.3745, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.4549763033175355, | |
| "grad_norm": 0.5871401429176331, | |
| "learning_rate": 5.466034755134282e-06, | |
| "loss": 0.5527, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4565560821484992, | |
| "grad_norm": 0.5936838984489441, | |
| "learning_rate": 5.4502369668246446e-06, | |
| "loss": 0.4857, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.45813586097946285, | |
| "grad_norm": 0.5998191833496094, | |
| "learning_rate": 5.434439178515008e-06, | |
| "loss": 0.4395, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4597156398104265, | |
| "grad_norm": 0.5102293491363525, | |
| "learning_rate": 5.418641390205371e-06, | |
| "loss": 0.4496, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.46129541864139023, | |
| "grad_norm": 0.6297216415405273, | |
| "learning_rate": 5.402843601895735e-06, | |
| "loss": 0.3555, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.4628751974723539, | |
| "grad_norm": 0.6780267953872681, | |
| "learning_rate": 5.387045813586098e-06, | |
| "loss": 0.3295, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.46445497630331756, | |
| "grad_norm": 0.5788872838020325, | |
| "learning_rate": 5.371248025276462e-06, | |
| "loss": 0.4293, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4660347551342812, | |
| "grad_norm": 0.5679113268852234, | |
| "learning_rate": 5.355450236966825e-06, | |
| "loss": 0.4274, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4676145339652449, | |
| "grad_norm": 0.5739018321037292, | |
| "learning_rate": 5.3396524486571885e-06, | |
| "loss": 0.3292, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.46919431279620855, | |
| "grad_norm": 0.5387299060821533, | |
| "learning_rate": 5.323854660347551e-06, | |
| "loss": 0.36, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.4707740916271722, | |
| "grad_norm": 0.4877624213695526, | |
| "learning_rate": 5.308056872037915e-06, | |
| "loss": 0.403, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.47235387045813587, | |
| "grad_norm": 0.5668107271194458, | |
| "learning_rate": 5.292259083728278e-06, | |
| "loss": 0.4087, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.47393364928909953, | |
| "grad_norm": 0.5592719316482544, | |
| "learning_rate": 5.276461295418642e-06, | |
| "loss": 0.405, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4755134281200632, | |
| "grad_norm": 0.48879534006118774, | |
| "learning_rate": 5.260663507109005e-06, | |
| "loss": 0.3562, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.47709320695102686, | |
| "grad_norm": 0.5968641042709351, | |
| "learning_rate": 5.244865718799369e-06, | |
| "loss": 0.4216, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.4786729857819905, | |
| "grad_norm": 0.7803828120231628, | |
| "learning_rate": 5.2290679304897315e-06, | |
| "loss": 0.4014, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.4802527646129542, | |
| "grad_norm": 0.592827558517456, | |
| "learning_rate": 5.213270142180096e-06, | |
| "loss": 0.2895, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.48183254344391785, | |
| "grad_norm": 0.8070396184921265, | |
| "learning_rate": 5.197472353870458e-06, | |
| "loss": 0.3972, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.4834123222748815, | |
| "grad_norm": 0.5256397724151611, | |
| "learning_rate": 5.181674565560821e-06, | |
| "loss": 0.4384, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.4849921011058452, | |
| "grad_norm": 0.5307562947273254, | |
| "learning_rate": 5.165876777251185e-06, | |
| "loss": 0.3788, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.48657187993680884, | |
| "grad_norm": 0.4588807225227356, | |
| "learning_rate": 5.150078988941548e-06, | |
| "loss": 0.3491, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4881516587677725, | |
| "grad_norm": 0.524919331073761, | |
| "learning_rate": 5.134281200631912e-06, | |
| "loss": 0.4375, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.48973143759873616, | |
| "grad_norm": 0.6611966490745544, | |
| "learning_rate": 5.118483412322275e-06, | |
| "loss": 0.4399, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4913112164296998, | |
| "grad_norm": 0.5597748160362244, | |
| "learning_rate": 5.102685624012638e-06, | |
| "loss": 0.5073, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.4928909952606635, | |
| "grad_norm": 0.8958181738853455, | |
| "learning_rate": 5.086887835703001e-06, | |
| "loss": 0.4756, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.49447077409162715, | |
| "grad_norm": 0.4875742197036743, | |
| "learning_rate": 5.071090047393366e-06, | |
| "loss": 0.4424, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.4960505529225908, | |
| "grad_norm": 0.6110445261001587, | |
| "learning_rate": 5.055292259083728e-06, | |
| "loss": 0.4686, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4976303317535545, | |
| "grad_norm": 0.5900540351867676, | |
| "learning_rate": 5.039494470774093e-06, | |
| "loss": 0.4, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.49921011058451814, | |
| "grad_norm": 0.624906599521637, | |
| "learning_rate": 5.023696682464455e-06, | |
| "loss": 0.3967, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.5007898894154819, | |
| "grad_norm": 0.6435191631317139, | |
| "learning_rate": 5.007898894154819e-06, | |
| "loss": 0.5104, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.5023696682464455, | |
| "grad_norm": 0.7464382648468018, | |
| "learning_rate": 4.9921011058451815e-06, | |
| "loss": 0.4621, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.5039494470774092, | |
| "grad_norm": 0.7912509441375732, | |
| "learning_rate": 4.976303317535545e-06, | |
| "loss": 0.4186, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.5055292259083728, | |
| "grad_norm": 0.6150445938110352, | |
| "learning_rate": 4.960505529225908e-06, | |
| "loss": 0.469, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5071090047393365, | |
| "grad_norm": 0.5445781946182251, | |
| "learning_rate": 4.944707740916272e-06, | |
| "loss": 0.4111, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.5086887835703001, | |
| "grad_norm": 0.5628255605697632, | |
| "learning_rate": 4.928909952606635e-06, | |
| "loss": 0.4884, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.5102685624012638, | |
| "grad_norm": 0.5007054805755615, | |
| "learning_rate": 4.913112164296999e-06, | |
| "loss": 0.4315, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.5118483412322274, | |
| "grad_norm": 0.6346699595451355, | |
| "learning_rate": 4.8973143759873624e-06, | |
| "loss": 0.4033, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5134281200631912, | |
| "grad_norm": 0.639045774936676, | |
| "learning_rate": 4.881516587677725e-06, | |
| "loss": 0.3748, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5150078988941548, | |
| "grad_norm": 0.5578002333641052, | |
| "learning_rate": 4.865718799368089e-06, | |
| "loss": 0.5055, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.5165876777251185, | |
| "grad_norm": 0.5281325578689575, | |
| "learning_rate": 4.849921011058452e-06, | |
| "loss": 0.4307, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.5181674565560821, | |
| "grad_norm": 0.6557057499885559, | |
| "learning_rate": 4.834123222748816e-06, | |
| "loss": 0.4085, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.5197472353870458, | |
| "grad_norm": 0.5667731761932373, | |
| "learning_rate": 4.818325434439179e-06, | |
| "loss": 0.4774, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.5213270142180095, | |
| "grad_norm": 0.5362856984138489, | |
| "learning_rate": 4.8025276461295426e-06, | |
| "loss": 0.4316, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5229067930489731, | |
| "grad_norm": 0.5326763391494751, | |
| "learning_rate": 4.7867298578199055e-06, | |
| "loss": 0.389, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.5244865718799369, | |
| "grad_norm": 0.4922950565814972, | |
| "learning_rate": 4.770932069510269e-06, | |
| "loss": 0.3756, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.5260663507109005, | |
| "grad_norm": 0.4961477518081665, | |
| "learning_rate": 4.755134281200632e-06, | |
| "loss": 0.4336, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.5276461295418642, | |
| "grad_norm": 0.5258511304855347, | |
| "learning_rate": 4.739336492890996e-06, | |
| "loss": 0.404, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.5292259083728278, | |
| "grad_norm": 0.5479301810264587, | |
| "learning_rate": 4.723538704581359e-06, | |
| "loss": 0.3578, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.5308056872037915, | |
| "grad_norm": 0.49883902072906494, | |
| "learning_rate": 4.707740916271723e-06, | |
| "loss": 0.3809, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.5323854660347551, | |
| "grad_norm": 0.5133053660392761, | |
| "learning_rate": 4.691943127962086e-06, | |
| "loss": 0.4091, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.5339652448657188, | |
| "grad_norm": 0.6334301829338074, | |
| "learning_rate": 4.676145339652449e-06, | |
| "loss": 0.4432, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.5355450236966824, | |
| "grad_norm": 0.5124396085739136, | |
| "learning_rate": 4.660347551342812e-06, | |
| "loss": 0.3557, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.5371248025276462, | |
| "grad_norm": 0.5863746404647827, | |
| "learning_rate": 4.644549763033176e-06, | |
| "loss": 0.4288, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5387045813586098, | |
| "grad_norm": 0.6599943041801453, | |
| "learning_rate": 4.628751974723539e-06, | |
| "loss": 0.398, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.5402843601895735, | |
| "grad_norm": 0.480027437210083, | |
| "learning_rate": 4.612954186413903e-06, | |
| "loss": 0.4706, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5418641390205371, | |
| "grad_norm": 0.6601845026016235, | |
| "learning_rate": 4.597156398104266e-06, | |
| "loss": 0.4092, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.5434439178515008, | |
| "grad_norm": 0.5557224154472351, | |
| "learning_rate": 4.581358609794629e-06, | |
| "loss": 0.389, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5450236966824644, | |
| "grad_norm": 0.49160709977149963, | |
| "learning_rate": 4.5655608214849925e-06, | |
| "loss": 0.4338, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5466034755134281, | |
| "grad_norm": 0.5284649133682251, | |
| "learning_rate": 4.549763033175355e-06, | |
| "loss": 0.403, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5481832543443917, | |
| "grad_norm": 0.5501908659934998, | |
| "learning_rate": 4.533965244865719e-06, | |
| "loss": 0.4983, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.5497630331753555, | |
| "grad_norm": 0.5585077404975891, | |
| "learning_rate": 4.518167456556082e-06, | |
| "loss": 0.4219, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5513428120063191, | |
| "grad_norm": 0.4565962255001068, | |
| "learning_rate": 4.502369668246446e-06, | |
| "loss": 0.3591, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.5529225908372828, | |
| "grad_norm": 0.5507949590682983, | |
| "learning_rate": 4.486571879936809e-06, | |
| "loss": 0.4752, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5545023696682464, | |
| "grad_norm": 0.5490357875823975, | |
| "learning_rate": 4.470774091627173e-06, | |
| "loss": 0.4291, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.5560821484992101, | |
| "grad_norm": 0.5804268717765808, | |
| "learning_rate": 4.4549763033175355e-06, | |
| "loss": 0.3113, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5576619273301737, | |
| "grad_norm": 0.4745613634586334, | |
| "learning_rate": 4.439178515007899e-06, | |
| "loss": 0.4196, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.5592417061611374, | |
| "grad_norm": 0.6223664283752441, | |
| "learning_rate": 4.423380726698262e-06, | |
| "loss": 0.4592, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5608214849921012, | |
| "grad_norm": 0.8797832727432251, | |
| "learning_rate": 4.407582938388626e-06, | |
| "loss": 0.4448, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.5624012638230648, | |
| "grad_norm": 0.5569826364517212, | |
| "learning_rate": 4.391785150078989e-06, | |
| "loss": 0.3873, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5639810426540285, | |
| "grad_norm": 0.4294510781764984, | |
| "learning_rate": 4.375987361769353e-06, | |
| "loss": 0.3407, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.5655608214849921, | |
| "grad_norm": 0.5657434463500977, | |
| "learning_rate": 4.360189573459716e-06, | |
| "loss": 0.3345, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5671406003159558, | |
| "grad_norm": 0.5589077472686768, | |
| "learning_rate": 4.3443917851500794e-06, | |
| "loss": 0.5237, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.5687203791469194, | |
| "grad_norm": 0.6107128858566284, | |
| "learning_rate": 4.328593996840442e-06, | |
| "loss": 0.4354, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5703001579778831, | |
| "grad_norm": 0.5671380758285522, | |
| "learning_rate": 4.312796208530806e-06, | |
| "loss": 0.3712, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.5718799368088467, | |
| "grad_norm": 0.508173406124115, | |
| "learning_rate": 4.29699842022117e-06, | |
| "loss": 0.4097, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5734597156398105, | |
| "grad_norm": 0.6139382719993591, | |
| "learning_rate": 4.281200631911533e-06, | |
| "loss": 0.2646, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.5750394944707741, | |
| "grad_norm": 0.5677220821380615, | |
| "learning_rate": 4.265402843601897e-06, | |
| "loss": 0.3748, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5766192733017378, | |
| "grad_norm": 0.530708372592926, | |
| "learning_rate": 4.2496050552922596e-06, | |
| "loss": 0.3857, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.5781990521327014, | |
| "grad_norm": 1.176272988319397, | |
| "learning_rate": 4.233807266982623e-06, | |
| "loss": 0.436, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5797788309636651, | |
| "grad_norm": 0.6165753602981567, | |
| "learning_rate": 4.218009478672986e-06, | |
| "loss": 0.3898, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.5813586097946287, | |
| "grad_norm": 0.47574201226234436, | |
| "learning_rate": 4.20221169036335e-06, | |
| "loss": 0.3685, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5829383886255924, | |
| "grad_norm": 0.5995083451271057, | |
| "learning_rate": 4.186413902053712e-06, | |
| "loss": 0.4686, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.584518167456556, | |
| "grad_norm": 0.5809090733528137, | |
| "learning_rate": 4.170616113744076e-06, | |
| "loss": 0.4514, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5860979462875198, | |
| "grad_norm": 0.6154018044471741, | |
| "learning_rate": 4.15481832543444e-06, | |
| "loss": 0.3737, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.5876777251184834, | |
| "grad_norm": 0.5799654126167297, | |
| "learning_rate": 4.139020537124803e-06, | |
| "loss": 0.4285, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5892575039494471, | |
| "grad_norm": 0.4476354420185089, | |
| "learning_rate": 4.123222748815166e-06, | |
| "loss": 0.4362, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.5908372827804107, | |
| "grad_norm": 0.6266714334487915, | |
| "learning_rate": 4.107424960505529e-06, | |
| "loss": 0.4943, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5924170616113744, | |
| "grad_norm": 0.5103732347488403, | |
| "learning_rate": 4.091627172195893e-06, | |
| "loss": 0.4585, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.593996840442338, | |
| "grad_norm": 0.49011877179145813, | |
| "learning_rate": 4.075829383886256e-06, | |
| "loss": 0.4489, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5955766192733017, | |
| "grad_norm": 0.5286844372749329, | |
| "learning_rate": 4.06003159557662e-06, | |
| "loss": 0.4114, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.5971563981042654, | |
| "grad_norm": 0.494807630777359, | |
| "learning_rate": 4.044233807266983e-06, | |
| "loss": 0.3514, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5987361769352291, | |
| "grad_norm": 0.46120524406433105, | |
| "learning_rate": 4.0284360189573465e-06, | |
| "loss": 0.4452, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.6003159557661928, | |
| "grad_norm": 0.6024404764175415, | |
| "learning_rate": 4.0126382306477095e-06, | |
| "loss": 0.4368, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6018957345971564, | |
| "grad_norm": 0.8292664885520935, | |
| "learning_rate": 3.996840442338073e-06, | |
| "loss": 0.4495, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.6034755134281201, | |
| "grad_norm": 0.5312369465827942, | |
| "learning_rate": 3.981042654028436e-06, | |
| "loss": 0.3642, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.6050552922590837, | |
| "grad_norm": 0.6373758316040039, | |
| "learning_rate": 3.9652448657188e-06, | |
| "loss": 0.3884, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.6066350710900474, | |
| "grad_norm": 0.5623313188552856, | |
| "learning_rate": 3.949447077409163e-06, | |
| "loss": 0.3489, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.608214849921011, | |
| "grad_norm": 0.5703821778297424, | |
| "learning_rate": 3.933649289099527e-06, | |
| "loss": 0.5309, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.6097946287519748, | |
| "grad_norm": 0.5930938720703125, | |
| "learning_rate": 3.91785150078989e-06, | |
| "loss": 0.4072, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.6113744075829384, | |
| "grad_norm": 0.5636332631111145, | |
| "learning_rate": 3.902053712480253e-06, | |
| "loss": 0.3938, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.6129541864139021, | |
| "grad_norm": 0.45709583163261414, | |
| "learning_rate": 3.886255924170616e-06, | |
| "loss": 0.4436, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.6145339652448657, | |
| "grad_norm": 0.5924400687217712, | |
| "learning_rate": 3.87045813586098e-06, | |
| "loss": 0.2939, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.6161137440758294, | |
| "grad_norm": 0.6232696175575256, | |
| "learning_rate": 3.854660347551343e-06, | |
| "loss": 0.4183, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.617693522906793, | |
| "grad_norm": 0.5407995581626892, | |
| "learning_rate": 3.838862559241707e-06, | |
| "loss": 0.3925, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.6192733017377567, | |
| "grad_norm": 0.524691104888916, | |
| "learning_rate": 3.82306477093207e-06, | |
| "loss": 0.4327, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.6208530805687204, | |
| "grad_norm": 0.5206206440925598, | |
| "learning_rate": 3.8072669826224335e-06, | |
| "loss": 0.4203, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.6224328593996841, | |
| "grad_norm": 0.6244251132011414, | |
| "learning_rate": 3.7914691943127964e-06, | |
| "loss": 0.4546, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.6240126382306477, | |
| "grad_norm": 0.707058846950531, | |
| "learning_rate": 3.77567140600316e-06, | |
| "loss": 0.4015, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.6255924170616114, | |
| "grad_norm": 0.5457757115364075, | |
| "learning_rate": 3.759873617693523e-06, | |
| "loss": 0.3962, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.627172195892575, | |
| "grad_norm": 0.5757611989974976, | |
| "learning_rate": 3.7440758293838865e-06, | |
| "loss": 0.4299, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.6287519747235387, | |
| "grad_norm": 0.5844476819038391, | |
| "learning_rate": 3.72827804107425e-06, | |
| "loss": 0.4674, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.6303317535545023, | |
| "grad_norm": 0.6859634518623352, | |
| "learning_rate": 3.7124802527646132e-06, | |
| "loss": 0.4253, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.631911532385466, | |
| "grad_norm": 0.5247636437416077, | |
| "learning_rate": 3.6966824644549766e-06, | |
| "loss": 0.4318, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6334913112164297, | |
| "grad_norm": 0.6206024885177612, | |
| "learning_rate": 3.68088467614534e-06, | |
| "loss": 0.3759, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.6350710900473934, | |
| "grad_norm": 0.6237459182739258, | |
| "learning_rate": 3.6650868878357033e-06, | |
| "loss": 0.3642, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.636650868878357, | |
| "grad_norm": 0.8048799633979797, | |
| "learning_rate": 3.6492890995260666e-06, | |
| "loss": 0.514, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.6382306477093207, | |
| "grad_norm": 0.4662720561027527, | |
| "learning_rate": 3.63349131121643e-06, | |
| "loss": 0.3654, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.6398104265402843, | |
| "grad_norm": 0.5561702251434326, | |
| "learning_rate": 3.6176935229067934e-06, | |
| "loss": 0.3823, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.641390205371248, | |
| "grad_norm": 0.6143206357955933, | |
| "learning_rate": 3.6018957345971567e-06, | |
| "loss": 0.3938, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.6429699842022117, | |
| "grad_norm": 0.6854034662246704, | |
| "learning_rate": 3.58609794628752e-06, | |
| "loss": 0.4625, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.6445497630331753, | |
| "grad_norm": 0.5590549111366272, | |
| "learning_rate": 3.5703001579778834e-06, | |
| "loss": 0.4199, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.6461295418641391, | |
| "grad_norm": 0.642573356628418, | |
| "learning_rate": 3.5545023696682468e-06, | |
| "loss": 0.4366, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.6477093206951027, | |
| "grad_norm": 0.5898130536079407, | |
| "learning_rate": 3.53870458135861e-06, | |
| "loss": 0.4691, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6492890995260664, | |
| "grad_norm": 0.5370688438415527, | |
| "learning_rate": 3.5229067930489735e-06, | |
| "loss": 0.45, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.65086887835703, | |
| "grad_norm": 0.6769170165061951, | |
| "learning_rate": 3.507109004739337e-06, | |
| "loss": 0.3962, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6524486571879937, | |
| "grad_norm": 0.5891703367233276, | |
| "learning_rate": 3.4913112164297e-06, | |
| "loss": 0.4542, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.6540284360189573, | |
| "grad_norm": 0.42204615473747253, | |
| "learning_rate": 3.4755134281200636e-06, | |
| "loss": 0.3368, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.655608214849921, | |
| "grad_norm": 0.46033787727355957, | |
| "learning_rate": 3.459715639810427e-06, | |
| "loss": 0.4357, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.6571879936808847, | |
| "grad_norm": 0.5509577393531799, | |
| "learning_rate": 3.4439178515007903e-06, | |
| "loss": 0.3939, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6587677725118484, | |
| "grad_norm": 0.5802867412567139, | |
| "learning_rate": 3.4281200631911536e-06, | |
| "loss": 0.4073, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.660347551342812, | |
| "grad_norm": 0.6130402684211731, | |
| "learning_rate": 3.412322274881517e-06, | |
| "loss": 0.3452, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6619273301737757, | |
| "grad_norm": 0.6854075789451599, | |
| "learning_rate": 3.39652448657188e-06, | |
| "loss": 0.3551, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.6635071090047393, | |
| "grad_norm": 0.5365926027297974, | |
| "learning_rate": 3.3807266982622433e-06, | |
| "loss": 0.4011, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.665086887835703, | |
| "grad_norm": 1.0338938236236572, | |
| "learning_rate": 3.3649289099526066e-06, | |
| "loss": 0.4623, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.5612855553627014, | |
| "learning_rate": 3.34913112164297e-06, | |
| "loss": 0.3738, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6682464454976303, | |
| "grad_norm": 0.5113286375999451, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.3865, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.669826224328594, | |
| "grad_norm": 0.5509905815124512, | |
| "learning_rate": 3.3175355450236967e-06, | |
| "loss": 0.4093, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6714060031595577, | |
| "grad_norm": 0.5425525903701782, | |
| "learning_rate": 3.30173775671406e-06, | |
| "loss": 0.383, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6729857819905213, | |
| "grad_norm": 0.5866172909736633, | |
| "learning_rate": 3.2859399684044234e-06, | |
| "loss": 0.4843, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.674565560821485, | |
| "grad_norm": 1.0777703523635864, | |
| "learning_rate": 3.2701421800947867e-06, | |
| "loss": 0.3748, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.6761453396524486, | |
| "grad_norm": 0.49126845598220825, | |
| "learning_rate": 3.25434439178515e-06, | |
| "loss": 0.3505, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6777251184834123, | |
| "grad_norm": 0.5471718311309814, | |
| "learning_rate": 3.2385466034755135e-06, | |
| "loss": 0.4755, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.6793048973143759, | |
| "grad_norm": 0.5689931511878967, | |
| "learning_rate": 3.222748815165877e-06, | |
| "loss": 0.3956, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6808846761453397, | |
| "grad_norm": 0.6496183276176453, | |
| "learning_rate": 3.2069510268562406e-06, | |
| "loss": 0.4598, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.6824644549763034, | |
| "grad_norm": 0.47042712569236755, | |
| "learning_rate": 3.191153238546604e-06, | |
| "loss": 0.3756, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.684044233807267, | |
| "grad_norm": 0.5819857120513916, | |
| "learning_rate": 3.1753554502369673e-06, | |
| "loss": 0.4803, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.6856240126382307, | |
| "grad_norm": 0.5752127766609192, | |
| "learning_rate": 3.1595576619273307e-06, | |
| "loss": 0.3916, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6872037914691943, | |
| "grad_norm": 0.6483988761901855, | |
| "learning_rate": 3.143759873617694e-06, | |
| "loss": 0.4338, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.688783570300158, | |
| "grad_norm": 0.7817516326904297, | |
| "learning_rate": 3.1279620853080574e-06, | |
| "loss": 0.3645, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6903633491311216, | |
| "grad_norm": 0.4980696737766266, | |
| "learning_rate": 3.1121642969984207e-06, | |
| "loss": 0.3962, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.6919431279620853, | |
| "grad_norm": 0.5592882037162781, | |
| "learning_rate": 3.096366508688784e-06, | |
| "loss": 0.3645, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.693522906793049, | |
| "grad_norm": 0.6228163242340088, | |
| "learning_rate": 3.0805687203791474e-06, | |
| "loss": 0.3696, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.6951026856240127, | |
| "grad_norm": 0.6718009114265442, | |
| "learning_rate": 3.0647709320695108e-06, | |
| "loss": 0.4926, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6966824644549763, | |
| "grad_norm": 0.6085376143455505, | |
| "learning_rate": 3.048973143759874e-06, | |
| "loss": 0.418, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.69826224328594, | |
| "grad_norm": 0.7716324925422668, | |
| "learning_rate": 3.0331753554502375e-06, | |
| "loss": 0.4038, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6998420221169036, | |
| "grad_norm": 0.7239758968353271, | |
| "learning_rate": 3.017377567140601e-06, | |
| "loss": 0.4596, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.7014218009478673, | |
| "grad_norm": 0.6308011412620544, | |
| "learning_rate": 3.001579778830964e-06, | |
| "loss": 0.4082, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.7030015797788309, | |
| "grad_norm": 0.515626072883606, | |
| "learning_rate": 2.985781990521327e-06, | |
| "loss": 0.4688, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.7045813586097947, | |
| "grad_norm": 0.5395441651344299, | |
| "learning_rate": 2.9699842022116905e-06, | |
| "loss": 0.3448, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.7061611374407583, | |
| "grad_norm": 0.5883680582046509, | |
| "learning_rate": 2.954186413902054e-06, | |
| "loss": 0.4546, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.707740916271722, | |
| "grad_norm": 0.7300311326980591, | |
| "learning_rate": 2.938388625592417e-06, | |
| "loss": 0.368, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.7093206951026856, | |
| "grad_norm": 0.5901307463645935, | |
| "learning_rate": 2.9225908372827806e-06, | |
| "loss": 0.3688, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.7109004739336493, | |
| "grad_norm": 0.6521854996681213, | |
| "learning_rate": 2.906793048973144e-06, | |
| "loss": 0.3876, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7124802527646129, | |
| "grad_norm": 0.688450038433075, | |
| "learning_rate": 2.8909952606635073e-06, | |
| "loss": 0.4298, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.7140600315955766, | |
| "grad_norm": 0.6533556580543518, | |
| "learning_rate": 2.8751974723538706e-06, | |
| "loss": 0.3589, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.7156398104265402, | |
| "grad_norm": 0.5261491537094116, | |
| "learning_rate": 2.859399684044234e-06, | |
| "loss": 0.3886, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.717219589257504, | |
| "grad_norm": 0.5488421320915222, | |
| "learning_rate": 2.8436018957345973e-06, | |
| "loss": 0.411, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.7187993680884676, | |
| "grad_norm": 0.6415657997131348, | |
| "learning_rate": 2.8278041074249607e-06, | |
| "loss": 0.4581, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.7203791469194313, | |
| "grad_norm": 0.5058445334434509, | |
| "learning_rate": 2.812006319115324e-06, | |
| "loss": 0.4325, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.721958925750395, | |
| "grad_norm": 0.6409322619438171, | |
| "learning_rate": 2.7962085308056874e-06, | |
| "loss": 0.3759, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.7235387045813586, | |
| "grad_norm": 0.5578014850616455, | |
| "learning_rate": 2.7804107424960508e-06, | |
| "loss": 0.3947, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.7251184834123223, | |
| "grad_norm": 0.6064183115959167, | |
| "learning_rate": 2.764612954186414e-06, | |
| "loss": 0.4766, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.7266982622432859, | |
| "grad_norm": 0.6067904233932495, | |
| "learning_rate": 2.7488151658767775e-06, | |
| "loss": 0.4698, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7282780410742496, | |
| "grad_norm": 0.526088297367096, | |
| "learning_rate": 2.733017377567141e-06, | |
| "loss": 0.3997, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.7298578199052133, | |
| "grad_norm": 0.6290006637573242, | |
| "learning_rate": 2.717219589257504e-06, | |
| "loss": 0.4393, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.731437598736177, | |
| "grad_norm": 0.5822445154190063, | |
| "learning_rate": 2.7014218009478675e-06, | |
| "loss": 0.4767, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.7330173775671406, | |
| "grad_norm": 0.5798205733299255, | |
| "learning_rate": 2.685624012638231e-06, | |
| "loss": 0.4163, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.7345971563981043, | |
| "grad_norm": 0.6234124898910522, | |
| "learning_rate": 2.6698262243285942e-06, | |
| "loss": 0.387, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.7361769352290679, | |
| "grad_norm": 0.5226984620094299, | |
| "learning_rate": 2.6540284360189576e-06, | |
| "loss": 0.4144, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.7377567140600316, | |
| "grad_norm": 0.529303789138794, | |
| "learning_rate": 2.638230647709321e-06, | |
| "loss": 0.4689, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.7393364928909952, | |
| "grad_norm": 0.6620000004768372, | |
| "learning_rate": 2.6224328593996843e-06, | |
| "loss": 0.4358, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.740916271721959, | |
| "grad_norm": 0.8560294508934021, | |
| "learning_rate": 2.606635071090048e-06, | |
| "loss": 0.422, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.7424960505529226, | |
| "grad_norm": 0.47033989429473877, | |
| "learning_rate": 2.5908372827804106e-06, | |
| "loss": 0.4462, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7440758293838863, | |
| "grad_norm": 0.5476656556129456, | |
| "learning_rate": 2.575039494470774e-06, | |
| "loss": 0.3818, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.7456556082148499, | |
| "grad_norm": 0.5771902203559875, | |
| "learning_rate": 2.5592417061611373e-06, | |
| "loss": 0.3835, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.7472353870458136, | |
| "grad_norm": 0.6452733278274536, | |
| "learning_rate": 2.5434439178515007e-06, | |
| "loss": 0.4224, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.7488151658767772, | |
| "grad_norm": 0.5318686962127686, | |
| "learning_rate": 2.527646129541864e-06, | |
| "loss": 0.4812, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.7503949447077409, | |
| "grad_norm": 0.6591460108757019, | |
| "learning_rate": 2.5118483412322274e-06, | |
| "loss": 0.4546, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.7519747235387045, | |
| "grad_norm": 0.5857440829277039, | |
| "learning_rate": 2.4960505529225907e-06, | |
| "loss": 0.4008, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.7535545023696683, | |
| "grad_norm": 0.6430768370628357, | |
| "learning_rate": 2.480252764612954e-06, | |
| "loss": 0.3191, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.7551342812006319, | |
| "grad_norm": 0.7442892789840698, | |
| "learning_rate": 2.4644549763033174e-06, | |
| "loss": 0.4171, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7567140600315956, | |
| "grad_norm": 0.6390454173088074, | |
| "learning_rate": 2.4486571879936812e-06, | |
| "loss": 0.5381, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.7582938388625592, | |
| "grad_norm": 0.6277416348457336, | |
| "learning_rate": 2.4328593996840446e-06, | |
| "loss": 0.4824, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7598736176935229, | |
| "grad_norm": 0.6043097972869873, | |
| "learning_rate": 2.417061611374408e-06, | |
| "loss": 0.4266, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.7614533965244866, | |
| "grad_norm": 0.6095964312553406, | |
| "learning_rate": 2.4012638230647713e-06, | |
| "loss": 0.4258, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7630331753554502, | |
| "grad_norm": 0.5433639287948608, | |
| "learning_rate": 2.3854660347551346e-06, | |
| "loss": 0.4873, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.764612954186414, | |
| "grad_norm": 0.49287649989128113, | |
| "learning_rate": 2.369668246445498e-06, | |
| "loss": 0.4814, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7661927330173776, | |
| "grad_norm": 0.5905902981758118, | |
| "learning_rate": 2.3538704581358613e-06, | |
| "loss": 0.4519, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.7677725118483413, | |
| "grad_norm": 0.6697285771369934, | |
| "learning_rate": 2.3380726698262247e-06, | |
| "loss": 0.4686, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7693522906793049, | |
| "grad_norm": 0.5338664650917053, | |
| "learning_rate": 2.322274881516588e-06, | |
| "loss": 0.401, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.7709320695102686, | |
| "grad_norm": 0.5338428616523743, | |
| "learning_rate": 2.3064770932069514e-06, | |
| "loss": 0.4045, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7725118483412322, | |
| "grad_norm": 0.6102830171585083, | |
| "learning_rate": 2.2906793048973143e-06, | |
| "loss": 0.3785, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.7740916271721959, | |
| "grad_norm": 0.5787335634231567, | |
| "learning_rate": 2.2748815165876777e-06, | |
| "loss": 0.42, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7756714060031595, | |
| "grad_norm": 0.7426438331604004, | |
| "learning_rate": 2.259083728278041e-06, | |
| "loss": 0.4676, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.7772511848341233, | |
| "grad_norm": 0.5988475680351257, | |
| "learning_rate": 2.2432859399684044e-06, | |
| "loss": 0.5404, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7788309636650869, | |
| "grad_norm": 0.6289830803871155, | |
| "learning_rate": 2.2274881516587678e-06, | |
| "loss": 0.396, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.7804107424960506, | |
| "grad_norm": 0.6077900528907776, | |
| "learning_rate": 2.211690363349131e-06, | |
| "loss": 0.4016, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7819905213270142, | |
| "grad_norm": 0.8171889781951904, | |
| "learning_rate": 2.1958925750394945e-06, | |
| "loss": 0.3638, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.7835703001579779, | |
| "grad_norm": 0.6225026845932007, | |
| "learning_rate": 2.180094786729858e-06, | |
| "loss": 0.4088, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7851500789889415, | |
| "grad_norm": 0.6262929439544678, | |
| "learning_rate": 2.164296998420221e-06, | |
| "loss": 0.3311, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.7867298578199052, | |
| "grad_norm": 0.662129282951355, | |
| "learning_rate": 2.148499210110585e-06, | |
| "loss": 0.4434, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7883096366508688, | |
| "grad_norm": 0.5046777725219727, | |
| "learning_rate": 2.1327014218009483e-06, | |
| "loss": 0.5042, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.7898894154818326, | |
| "grad_norm": 0.6273382306098938, | |
| "learning_rate": 2.1169036334913117e-06, | |
| "loss": 0.345, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7914691943127962, | |
| "grad_norm": 0.5484871864318848, | |
| "learning_rate": 2.101105845181675e-06, | |
| "loss": 0.3476, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.7930489731437599, | |
| "grad_norm": 0.6779518723487854, | |
| "learning_rate": 2.085308056872038e-06, | |
| "loss": 0.4062, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7946287519747235, | |
| "grad_norm": 0.4969736635684967, | |
| "learning_rate": 2.0695102685624013e-06, | |
| "loss": 0.3615, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.7962085308056872, | |
| "grad_norm": 0.5542388558387756, | |
| "learning_rate": 2.0537124802527647e-06, | |
| "loss": 0.39, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7977883096366508, | |
| "grad_norm": 0.8587651252746582, | |
| "learning_rate": 2.037914691943128e-06, | |
| "loss": 0.423, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.7993680884676145, | |
| "grad_norm": 0.6399357318878174, | |
| "learning_rate": 2.0221169036334914e-06, | |
| "loss": 0.4645, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.8009478672985783, | |
| "grad_norm": 0.5677849650382996, | |
| "learning_rate": 2.0063191153238547e-06, | |
| "loss": 0.3749, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.8025276461295419, | |
| "grad_norm": 0.5609621405601501, | |
| "learning_rate": 1.990521327014218e-06, | |
| "loss": 0.4727, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.8041074249605056, | |
| "grad_norm": 0.615185558795929, | |
| "learning_rate": 1.9747235387045814e-06, | |
| "loss": 0.4349, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.8056872037914692, | |
| "grad_norm": 0.5093739032745361, | |
| "learning_rate": 1.958925750394945e-06, | |
| "loss": 0.3502, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.8072669826224329, | |
| "grad_norm": 0.8513323068618774, | |
| "learning_rate": 1.943127962085308e-06, | |
| "loss": 0.3902, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.8088467614533965, | |
| "grad_norm": 0.6797610521316528, | |
| "learning_rate": 1.9273301737756715e-06, | |
| "loss": 0.4987, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.8104265402843602, | |
| "grad_norm": 0.5715585947036743, | |
| "learning_rate": 1.911532385466035e-06, | |
| "loss": 0.3965, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.8120063191153238, | |
| "grad_norm": 0.5537532567977905, | |
| "learning_rate": 1.8957345971563982e-06, | |
| "loss": 0.3832, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.8135860979462876, | |
| "grad_norm": 0.5337470173835754, | |
| "learning_rate": 1.8799368088467616e-06, | |
| "loss": 0.4136, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.8151658767772512, | |
| "grad_norm": 0.5929555892944336, | |
| "learning_rate": 1.864139020537125e-06, | |
| "loss": 0.3901, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.8167456556082149, | |
| "grad_norm": 0.6738921403884888, | |
| "learning_rate": 1.8483412322274883e-06, | |
| "loss": 0.4128, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.8183254344391785, | |
| "grad_norm": 0.598659098148346, | |
| "learning_rate": 1.8325434439178516e-06, | |
| "loss": 0.3707, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.8199052132701422, | |
| "grad_norm": 0.5679790377616882, | |
| "learning_rate": 1.816745655608215e-06, | |
| "loss": 0.457, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.8214849921011058, | |
| "grad_norm": 0.5459115505218506, | |
| "learning_rate": 1.8009478672985784e-06, | |
| "loss": 0.3613, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8230647709320695, | |
| "grad_norm": 0.5752125978469849, | |
| "learning_rate": 1.7851500789889417e-06, | |
| "loss": 0.479, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.8246445497630331, | |
| "grad_norm": 0.5184637904167175, | |
| "learning_rate": 1.769352290679305e-06, | |
| "loss": 0.4126, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.8262243285939969, | |
| "grad_norm": 0.6329041123390198, | |
| "learning_rate": 1.7535545023696684e-06, | |
| "loss": 0.4221, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.8278041074249605, | |
| "grad_norm": 0.5233784317970276, | |
| "learning_rate": 1.7377567140600318e-06, | |
| "loss": 0.4375, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.8293838862559242, | |
| "grad_norm": 0.5424541234970093, | |
| "learning_rate": 1.7219589257503951e-06, | |
| "loss": 0.4447, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.8309636650868878, | |
| "grad_norm": 0.5534167885780334, | |
| "learning_rate": 1.7061611374407585e-06, | |
| "loss": 0.3672, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.8325434439178515, | |
| "grad_norm": 0.605102002620697, | |
| "learning_rate": 1.6903633491311216e-06, | |
| "loss": 0.4319, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.8341232227488151, | |
| "grad_norm": 0.5609396696090698, | |
| "learning_rate": 1.674565560821485e-06, | |
| "loss": 0.3984, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.8357030015797788, | |
| "grad_norm": 0.7964479923248291, | |
| "learning_rate": 1.6587677725118483e-06, | |
| "loss": 0.407, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.8372827804107424, | |
| "grad_norm": 0.4886048436164856, | |
| "learning_rate": 1.6429699842022117e-06, | |
| "loss": 0.4506, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8388625592417062, | |
| "grad_norm": 0.543812096118927, | |
| "learning_rate": 1.627172195892575e-06, | |
| "loss": 0.3141, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.8404423380726699, | |
| "grad_norm": 0.5370059609413147, | |
| "learning_rate": 1.6113744075829384e-06, | |
| "loss": 0.3712, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.8420221169036335, | |
| "grad_norm": 0.7402203679084778, | |
| "learning_rate": 1.595576619273302e-06, | |
| "loss": 0.4136, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.8436018957345972, | |
| "grad_norm": 0.6814244985580444, | |
| "learning_rate": 1.5797788309636653e-06, | |
| "loss": 0.4634, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.8451816745655608, | |
| "grad_norm": 0.5919080972671509, | |
| "learning_rate": 1.5639810426540287e-06, | |
| "loss": 0.4238, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.8467614533965245, | |
| "grad_norm": 0.617522120475769, | |
| "learning_rate": 1.548183254344392e-06, | |
| "loss": 0.3431, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.8483412322274881, | |
| "grad_norm": 0.49482643604278564, | |
| "learning_rate": 1.5323854660347554e-06, | |
| "loss": 0.3882, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.8499210110584519, | |
| "grad_norm": 0.5525531768798828, | |
| "learning_rate": 1.5165876777251187e-06, | |
| "loss": 0.4053, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.8515007898894155, | |
| "grad_norm": 0.6634103655815125, | |
| "learning_rate": 1.500789889415482e-06, | |
| "loss": 0.4624, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.8530805687203792, | |
| "grad_norm": 0.45309382677078247, | |
| "learning_rate": 1.4849921011058452e-06, | |
| "loss": 0.3486, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8546603475513428, | |
| "grad_norm": 0.778338611125946, | |
| "learning_rate": 1.4691943127962086e-06, | |
| "loss": 0.3984, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.8562401263823065, | |
| "grad_norm": 0.6093356609344482, | |
| "learning_rate": 1.453396524486572e-06, | |
| "loss": 0.333, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.8578199052132701, | |
| "grad_norm": 0.49551188945770264, | |
| "learning_rate": 1.4375987361769353e-06, | |
| "loss": 0.3915, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.8593996840442338, | |
| "grad_norm": 0.5423188209533691, | |
| "learning_rate": 1.4218009478672987e-06, | |
| "loss": 0.4192, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.8609794628751974, | |
| "grad_norm": 0.8111097812652588, | |
| "learning_rate": 1.406003159557662e-06, | |
| "loss": 0.473, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.8625592417061612, | |
| "grad_norm": 0.6064862012863159, | |
| "learning_rate": 1.3902053712480254e-06, | |
| "loss": 0.4164, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.8641390205371248, | |
| "grad_norm": 0.6180470585823059, | |
| "learning_rate": 1.3744075829383887e-06, | |
| "loss": 0.4351, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.8657187993680885, | |
| "grad_norm": 0.5101069808006287, | |
| "learning_rate": 1.358609794628752e-06, | |
| "loss": 0.3806, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8672985781990521, | |
| "grad_norm": 0.6269749402999878, | |
| "learning_rate": 1.3428120063191154e-06, | |
| "loss": 0.4028, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.8688783570300158, | |
| "grad_norm": 0.6344918608665466, | |
| "learning_rate": 1.3270142180094788e-06, | |
| "loss": 0.3206, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8704581358609794, | |
| "grad_norm": 0.7053835988044739, | |
| "learning_rate": 1.3112164296998422e-06, | |
| "loss": 0.4404, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.8720379146919431, | |
| "grad_norm": 0.4780917465686798, | |
| "learning_rate": 1.2954186413902053e-06, | |
| "loss": 0.4089, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8736176935229067, | |
| "grad_norm": 0.5235942006111145, | |
| "learning_rate": 1.2796208530805687e-06, | |
| "loss": 0.3992, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.8751974723538705, | |
| "grad_norm": 0.5037370324134827, | |
| "learning_rate": 1.263823064770932e-06, | |
| "loss": 0.3727, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8767772511848341, | |
| "grad_norm": 0.5422868132591248, | |
| "learning_rate": 1.2480252764612954e-06, | |
| "loss": 0.4524, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.8783570300157978, | |
| "grad_norm": 0.5287191271781921, | |
| "learning_rate": 1.2322274881516587e-06, | |
| "loss": 0.3445, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8799368088467614, | |
| "grad_norm": 0.49679964780807495, | |
| "learning_rate": 1.2164296998420223e-06, | |
| "loss": 0.3357, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.8815165876777251, | |
| "grad_norm": 0.5391539931297302, | |
| "learning_rate": 1.2006319115323856e-06, | |
| "loss": 0.4645, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8830963665086888, | |
| "grad_norm": 0.5474575757980347, | |
| "learning_rate": 1.184834123222749e-06, | |
| "loss": 0.4109, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.8846761453396524, | |
| "grad_norm": 0.5920886993408203, | |
| "learning_rate": 1.1690363349131124e-06, | |
| "loss": 0.4034, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8862559241706162, | |
| "grad_norm": 0.5637263655662537, | |
| "learning_rate": 1.1532385466034757e-06, | |
| "loss": 0.392, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.8878357030015798, | |
| "grad_norm": 0.6719076037406921, | |
| "learning_rate": 1.1374407582938388e-06, | |
| "loss": 0.3798, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8894154818325435, | |
| "grad_norm": 0.5554001927375793, | |
| "learning_rate": 1.1216429699842022e-06, | |
| "loss": 0.3901, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.8909952606635071, | |
| "grad_norm": 0.6078475713729858, | |
| "learning_rate": 1.1058451816745656e-06, | |
| "loss": 0.3574, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8925750394944708, | |
| "grad_norm": 0.9478325843811035, | |
| "learning_rate": 1.090047393364929e-06, | |
| "loss": 0.3831, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.8941548183254344, | |
| "grad_norm": 0.5259877443313599, | |
| "learning_rate": 1.0742496050552925e-06, | |
| "loss": 0.4003, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8957345971563981, | |
| "grad_norm": 0.5395880937576294, | |
| "learning_rate": 1.0584518167456558e-06, | |
| "loss": 0.3513, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.8973143759873617, | |
| "grad_norm": 0.5458592772483826, | |
| "learning_rate": 1.042654028436019e-06, | |
| "loss": 0.49, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8988941548183255, | |
| "grad_norm": 0.5552616715431213, | |
| "learning_rate": 1.0268562401263823e-06, | |
| "loss": 0.3905, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.9004739336492891, | |
| "grad_norm": 0.551466166973114, | |
| "learning_rate": 1.0110584518167457e-06, | |
| "loss": 0.4241, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.9020537124802528, | |
| "grad_norm": 0.7195900082588196, | |
| "learning_rate": 9.95260663507109e-07, | |
| "loss": 0.3912, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.9036334913112164, | |
| "grad_norm": 0.5951517820358276, | |
| "learning_rate": 9.794628751974724e-07, | |
| "loss": 0.4267, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.9052132701421801, | |
| "grad_norm": 0.7582541108131409, | |
| "learning_rate": 9.636650868878358e-07, | |
| "loss": 0.4024, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.9067930489731437, | |
| "grad_norm": 0.6346389651298523, | |
| "learning_rate": 9.478672985781991e-07, | |
| "loss": 0.4677, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.9083728278041074, | |
| "grad_norm": 0.7323048710823059, | |
| "learning_rate": 9.320695102685625e-07, | |
| "loss": 0.4332, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.909952606635071, | |
| "grad_norm": 0.5796726942062378, | |
| "learning_rate": 9.162717219589258e-07, | |
| "loss": 0.3514, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.9115323854660348, | |
| "grad_norm": 0.7424004673957825, | |
| "learning_rate": 9.004739336492892e-07, | |
| "loss": 0.4178, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.9131121642969984, | |
| "grad_norm": 0.525142252445221, | |
| "learning_rate": 8.846761453396525e-07, | |
| "loss": 0.4498, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.9146919431279621, | |
| "grad_norm": 0.5565955638885498, | |
| "learning_rate": 8.688783570300159e-07, | |
| "loss": 0.4532, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.9162717219589257, | |
| "grad_norm": 0.540267288684845, | |
| "learning_rate": 8.530805687203792e-07, | |
| "loss": 0.4828, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9178515007898894, | |
| "grad_norm": 0.5061677694320679, | |
| "learning_rate": 8.372827804107425e-07, | |
| "loss": 0.3505, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.919431279620853, | |
| "grad_norm": 0.5490908622741699, | |
| "learning_rate": 8.214849921011058e-07, | |
| "loss": 0.4402, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.9210110584518167, | |
| "grad_norm": 0.5788997411727905, | |
| "learning_rate": 8.056872037914692e-07, | |
| "loss": 0.3256, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.9225908372827805, | |
| "grad_norm": 0.5741492509841919, | |
| "learning_rate": 7.898894154818327e-07, | |
| "loss": 0.451, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.9241706161137441, | |
| "grad_norm": 0.5012090802192688, | |
| "learning_rate": 7.74091627172196e-07, | |
| "loss": 0.3513, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.9257503949447078, | |
| "grad_norm": 0.5613192915916443, | |
| "learning_rate": 7.582938388625594e-07, | |
| "loss": 0.3499, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.9273301737756714, | |
| "grad_norm": 0.5941815376281738, | |
| "learning_rate": 7.424960505529226e-07, | |
| "loss": 0.4133, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.9289099526066351, | |
| "grad_norm": 0.7772453427314758, | |
| "learning_rate": 7.26698262243286e-07, | |
| "loss": 0.3818, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.9304897314375987, | |
| "grad_norm": 0.5977700352668762, | |
| "learning_rate": 7.109004739336493e-07, | |
| "loss": 0.4099, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.9320695102685624, | |
| "grad_norm": 0.7777069807052612, | |
| "learning_rate": 6.951026856240127e-07, | |
| "loss": 0.4341, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.933649289099526, | |
| "grad_norm": 0.5362728834152222, | |
| "learning_rate": 6.79304897314376e-07, | |
| "loss": 0.4431, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.9352290679304898, | |
| "grad_norm": 0.5126134157180786, | |
| "learning_rate": 6.635071090047394e-07, | |
| "loss": 0.3713, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.9368088467614534, | |
| "grad_norm": 0.5886785984039307, | |
| "learning_rate": 6.477093206951026e-07, | |
| "loss": 0.405, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.9383886255924171, | |
| "grad_norm": 0.5328089594841003, | |
| "learning_rate": 6.31911532385466e-07, | |
| "loss": 0.3952, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.9399684044233807, | |
| "grad_norm": 0.7170501351356506, | |
| "learning_rate": 6.161137440758294e-07, | |
| "loss": 0.3979, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.9415481832543444, | |
| "grad_norm": 0.6048548817634583, | |
| "learning_rate": 6.003159557661928e-07, | |
| "loss": 0.3425, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.943127962085308, | |
| "grad_norm": 0.5635291337966919, | |
| "learning_rate": 5.845181674565562e-07, | |
| "loss": 0.3008, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.9447077409162717, | |
| "grad_norm": 0.6890112161636353, | |
| "learning_rate": 5.687203791469194e-07, | |
| "loss": 0.4205, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.9462875197472354, | |
| "grad_norm": 0.5197014212608337, | |
| "learning_rate": 5.529225908372828e-07, | |
| "loss": 0.4589, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.9478672985781991, | |
| "grad_norm": 0.5197718143463135, | |
| "learning_rate": 5.371248025276462e-07, | |
| "loss": 0.2678, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9494470774091627, | |
| "grad_norm": 0.44931474328041077, | |
| "learning_rate": 5.213270142180095e-07, | |
| "loss": 0.4351, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.9510268562401264, | |
| "grad_norm": 0.47795984148979187, | |
| "learning_rate": 5.055292259083728e-07, | |
| "loss": 0.4392, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.95260663507109, | |
| "grad_norm": 0.6027578115463257, | |
| "learning_rate": 4.897314375987362e-07, | |
| "loss": 0.4499, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.9541864139020537, | |
| "grad_norm": 0.6160722374916077, | |
| "learning_rate": 4.7393364928909956e-07, | |
| "loss": 0.434, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.9557661927330173, | |
| "grad_norm": 0.8371343612670898, | |
| "learning_rate": 4.581358609794629e-07, | |
| "loss": 0.3911, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.957345971563981, | |
| "grad_norm": 0.5282484292984009, | |
| "learning_rate": 4.4233807266982627e-07, | |
| "loss": 0.4445, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.9589257503949447, | |
| "grad_norm": 0.5557743310928345, | |
| "learning_rate": 4.265402843601896e-07, | |
| "loss": 0.4103, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.9605055292259084, | |
| "grad_norm": 0.6362637281417847, | |
| "learning_rate": 4.107424960505529e-07, | |
| "loss": 0.3856, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.9620853080568721, | |
| "grad_norm": 0.745617151260376, | |
| "learning_rate": 3.9494470774091633e-07, | |
| "loss": 0.4179, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.9636650868878357, | |
| "grad_norm": 0.659038782119751, | |
| "learning_rate": 3.791469194312797e-07, | |
| "loss": 0.4027, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.9652448657187994, | |
| "grad_norm": 0.645199716091156, | |
| "learning_rate": 3.63349131121643e-07, | |
| "loss": 0.3501, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.966824644549763, | |
| "grad_norm": 0.4868941605091095, | |
| "learning_rate": 3.4755134281200634e-07, | |
| "loss": 0.3385, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.9684044233807267, | |
| "grad_norm": 0.5993934273719788, | |
| "learning_rate": 3.317535545023697e-07, | |
| "loss": 0.369, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.9699842022116903, | |
| "grad_norm": 0.6094574928283691, | |
| "learning_rate": 3.15955766192733e-07, | |
| "loss": 0.4899, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.9715639810426541, | |
| "grad_norm": 0.6989656686782837, | |
| "learning_rate": 3.001579778830964e-07, | |
| "loss": 0.4346, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.9731437598736177, | |
| "grad_norm": 0.5412940382957458, | |
| "learning_rate": 2.843601895734597e-07, | |
| "loss": 0.4515, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.9747235387045814, | |
| "grad_norm": 0.507622241973877, | |
| "learning_rate": 2.685624012638231e-07, | |
| "loss": 0.4171, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.976303317535545, | |
| "grad_norm": 0.4564089775085449, | |
| "learning_rate": 2.527646129541864e-07, | |
| "loss": 0.3452, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9778830963665087, | |
| "grad_norm": 0.48170286417007446, | |
| "learning_rate": 2.3696682464454978e-07, | |
| "loss": 0.3866, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.9794628751974723, | |
| "grad_norm": 0.47774481773376465, | |
| "learning_rate": 2.2116903633491313e-07, | |
| "loss": 0.4425, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.981042654028436, | |
| "grad_norm": 0.4460739493370056, | |
| "learning_rate": 2.0537124802527646e-07, | |
| "loss": 0.3991, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.9826224328593997, | |
| "grad_norm": 0.536359965801239, | |
| "learning_rate": 1.8957345971563984e-07, | |
| "loss": 0.327, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.9842022116903634, | |
| "grad_norm": 0.5439571738243103, | |
| "learning_rate": 1.7377567140600317e-07, | |
| "loss": 0.408, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.985781990521327, | |
| "grad_norm": 0.8827345967292786, | |
| "learning_rate": 1.579778830963665e-07, | |
| "loss": 0.4924, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9873617693522907, | |
| "grad_norm": 0.4992835521697998, | |
| "learning_rate": 1.4218009478672986e-07, | |
| "loss": 0.3921, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.9889415481832543, | |
| "grad_norm": 0.7306237816810608, | |
| "learning_rate": 1.263823064770932e-07, | |
| "loss": 0.5063, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.990521327014218, | |
| "grad_norm": 0.5200903415679932, | |
| "learning_rate": 1.1058451816745657e-07, | |
| "loss": 0.358, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.9921011058451816, | |
| "grad_norm": 0.42708104848861694, | |
| "learning_rate": 9.478672985781992e-08, | |
| "loss": 0.3361, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9936808846761453, | |
| "grad_norm": 0.5993225574493408, | |
| "learning_rate": 7.898894154818325e-08, | |
| "loss": 0.3625, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.995260663507109, | |
| "grad_norm": 0.49995774030685425, | |
| "learning_rate": 6.31911532385466e-08, | |
| "loss": 0.3746, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9968404423380727, | |
| "grad_norm": 0.5806180238723755, | |
| "learning_rate": 4.739336492890996e-08, | |
| "loss": 0.3727, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.9984202211690363, | |
| "grad_norm": 0.5514349341392517, | |
| "learning_rate": 3.15955766192733e-08, | |
| "loss": 0.4634, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.4094119668006897, | |
| "learning_rate": 1.579778830963665e-08, | |
| "loss": 0.2044, | |
| "step": 633 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 633, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.9805266972408545e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |