| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 23218, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004307003187182358, |
| "grad_norm": 0.6166621446609497, |
| "learning_rate": 4.220499569336779e-07, |
| "loss": 0.6745, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.008614006374364717, |
| "grad_norm": 1.4086202383041382, |
| "learning_rate": 8.527131782945737e-07, |
| "loss": 0.665, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.012921009561547075, |
| "grad_norm": 0.8809443712234497, |
| "learning_rate": 1.2833763996554696e-06, |
| "loss": 0.638, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.017228012748729434, |
| "grad_norm": 2.4636318683624268, |
| "learning_rate": 1.7140396210163654e-06, |
| "loss": 0.5199, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.021535015935911794, |
| "grad_norm": 4.417299270629883, |
| "learning_rate": 2.144702842377261e-06, |
| "loss": 0.295, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.02584201912309415, |
| "grad_norm": 0.9570991396903992, |
| "learning_rate": 2.575366063738157e-06, |
| "loss": 0.2265, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03014902231027651, |
| "grad_norm": 5.928269863128662, |
| "learning_rate": 3.006029285099053e-06, |
| "loss": 0.1726, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.03445602549745887, |
| "grad_norm": 3.2107982635498047, |
| "learning_rate": 3.436692506459949e-06, |
| "loss": 0.1693, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.038763028684641224, |
| "grad_norm": 20.335567474365234, |
| "learning_rate": 3.867355727820845e-06, |
| "loss": 0.1624, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.04307003187182359, |
| "grad_norm": 5.493223190307617, |
| "learning_rate": 4.2980189491817404e-06, |
| "loss": 0.1553, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.047377035059005944, |
| "grad_norm": 2.554831027984619, |
| "learning_rate": 4.728682170542636e-06, |
| "loss": 0.1287, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0516840382461883, |
| "grad_norm": 7.436851978302002, |
| "learning_rate": 5.159345391903532e-06, |
| "loss": 0.1536, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.055991041433370664, |
| "grad_norm": 3.6525373458862305, |
| "learning_rate": 5.590008613264428e-06, |
| "loss": 0.1418, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.06029804462055302, |
| "grad_norm": 2.3898279666900635, |
| "learning_rate": 6.020671834625324e-06, |
| "loss": 0.1236, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.06460504780773538, |
| "grad_norm": 5.284865856170654, |
| "learning_rate": 6.45133505598622e-06, |
| "loss": 0.1248, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.06891205099491773, |
| "grad_norm": 8.725616455078125, |
| "learning_rate": 6.881998277347115e-06, |
| "loss": 0.1328, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.07321905418210009, |
| "grad_norm": 5.838630199432373, |
| "learning_rate": 7.312661498708011e-06, |
| "loss": 0.1302, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.07752605736928245, |
| "grad_norm": 2.8252811431884766, |
| "learning_rate": 7.743324720068907e-06, |
| "loss": 0.1318, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.08183306055646482, |
| "grad_norm": 3.0458829402923584, |
| "learning_rate": 8.173987941429803e-06, |
| "loss": 0.1144, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.08614006374364717, |
| "grad_norm": 12.496984481811523, |
| "learning_rate": 8.604651162790698e-06, |
| "loss": 0.1128, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.09044706693082953, |
| "grad_norm": 1.416028380393982, |
| "learning_rate": 9.035314384151595e-06, |
| "loss": 0.1091, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.09475407011801189, |
| "grad_norm": 1.2219202518463135, |
| "learning_rate": 9.46597760551249e-06, |
| "loss": 0.1188, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.09906107330519424, |
| "grad_norm": 5.654454708099365, |
| "learning_rate": 9.896640826873386e-06, |
| "loss": 0.1271, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.1033680764923766, |
| "grad_norm": 2.2038986682891846, |
| "learning_rate": 1.032730404823428e-05, |
| "loss": 0.1082, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.10767507967955896, |
| "grad_norm": 2.594078779220581, |
| "learning_rate": 1.0757967269595177e-05, |
| "loss": 0.1135, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.11198208286674133, |
| "grad_norm": 2.2295544147491455, |
| "learning_rate": 1.1188630490956073e-05, |
| "loss": 0.1231, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.11628908605392368, |
| "grad_norm": 6.695245742797852, |
| "learning_rate": 1.1619293712316968e-05, |
| "loss": 0.1113, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.12059608924110604, |
| "grad_norm": 2.6315550804138184, |
| "learning_rate": 1.2049956933677865e-05, |
| "loss": 0.1088, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.1249030924282884, |
| "grad_norm": 8.70291805267334, |
| "learning_rate": 1.2480620155038761e-05, |
| "loss": 0.0987, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.12921009561547075, |
| "grad_norm": 3.3458549976348877, |
| "learning_rate": 1.2911283376399657e-05, |
| "loss": 0.12, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1335170988026531, |
| "grad_norm": 1.7857741117477417, |
| "learning_rate": 1.3341946597760554e-05, |
| "loss": 0.1224, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.13782410198983547, |
| "grad_norm": 2.5017483234405518, |
| "learning_rate": 1.3772609819121447e-05, |
| "loss": 0.1039, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.14213110517701782, |
| "grad_norm": 1.9605143070220947, |
| "learning_rate": 1.4203273040482343e-05, |
| "loss": 0.1018, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.14643810836420018, |
| "grad_norm": 1.5602117776870728, |
| "learning_rate": 1.463393626184324e-05, |
| "loss": 0.1084, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.15074511155138254, |
| "grad_norm": 1.4006476402282715, |
| "learning_rate": 1.5064599483204136e-05, |
| "loss": 0.1123, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.1550521147385649, |
| "grad_norm": 2.3718955516815186, |
| "learning_rate": 1.549526270456503e-05, |
| "loss": 0.1023, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.15935911792574725, |
| "grad_norm": 4.667544364929199, |
| "learning_rate": 1.5925925925925926e-05, |
| "loss": 0.1047, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.16366612111292964, |
| "grad_norm": 2.8184595108032227, |
| "learning_rate": 1.6356589147286824e-05, |
| "loss": 0.1057, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.167973124300112, |
| "grad_norm": 1.4751886129379272, |
| "learning_rate": 1.678725236864772e-05, |
| "loss": 0.0972, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.17228012748729435, |
| "grad_norm": 3.6895549297332764, |
| "learning_rate": 1.7217915590008613e-05, |
| "loss": 0.0996, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1765871306744767, |
| "grad_norm": 1.049835443496704, |
| "learning_rate": 1.764857881136951e-05, |
| "loss": 0.0963, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.18089413386165906, |
| "grad_norm": 2.069823980331421, |
| "learning_rate": 1.8079242032730406e-05, |
| "loss": 0.0964, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.18520113704884142, |
| "grad_norm": 1.360253095626831, |
| "learning_rate": 1.85099052540913e-05, |
| "loss": 0.0903, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.18950814023602378, |
| "grad_norm": 3.293531656265259, |
| "learning_rate": 1.89405684754522e-05, |
| "loss": 0.1102, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.19381514342320613, |
| "grad_norm": 1.5345733165740967, |
| "learning_rate": 1.9371231696813094e-05, |
| "loss": 0.1011, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.1981221466103885, |
| "grad_norm": 1.7733877897262573, |
| "learning_rate": 1.980189491817399e-05, |
| "loss": 0.0786, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.20242914979757085, |
| "grad_norm": 1.409213662147522, |
| "learning_rate": 1.997415773353752e-05, |
| "loss": 0.1018, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.2067361529847532, |
| "grad_norm": 1.9454050064086914, |
| "learning_rate": 1.9926301684532928e-05, |
| "loss": 0.1121, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.21104315617193556, |
| "grad_norm": 0.5753230452537537, |
| "learning_rate": 1.987844563552833e-05, |
| "loss": 0.0796, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.21535015935911792, |
| "grad_norm": 6.277276039123535, |
| "learning_rate": 1.983058958652374e-05, |
| "loss": 0.1025, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.21965716254630027, |
| "grad_norm": 1.9022142887115479, |
| "learning_rate": 1.9782733537519143e-05, |
| "loss": 0.0961, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.22396416573348266, |
| "grad_norm": 1.920341968536377, |
| "learning_rate": 1.973487748851455e-05, |
| "loss": 0.0965, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.228271168920665, |
| "grad_norm": 3.8428711891174316, |
| "learning_rate": 1.9687021439509954e-05, |
| "loss": 0.0786, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.23257817210784737, |
| "grad_norm": 2.7966816425323486, |
| "learning_rate": 1.963916539050536e-05, |
| "loss": 0.1064, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.23688517529502973, |
| "grad_norm": 0.9752281904220581, |
| "learning_rate": 1.9591309341500768e-05, |
| "loss": 0.0938, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.24119217848221208, |
| "grad_norm": 0.9420919418334961, |
| "learning_rate": 1.9543453292496172e-05, |
| "loss": 0.0991, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.24549918166939444, |
| "grad_norm": 1.6354459524154663, |
| "learning_rate": 1.949559724349158e-05, |
| "loss": 0.096, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.2498061848565768, |
| "grad_norm": 0.6382321715354919, |
| "learning_rate": 1.9447741194486983e-05, |
| "loss": 0.086, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.25411318804375915, |
| "grad_norm": 3.3475544452667236, |
| "learning_rate": 1.939988514548239e-05, |
| "loss": 0.0938, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.2584201912309415, |
| "grad_norm": 1.1161267757415771, |
| "learning_rate": 1.9352029096477794e-05, |
| "loss": 0.088, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.26272719441812387, |
| "grad_norm": 2.1411211490631104, |
| "learning_rate": 1.93041730474732e-05, |
| "loss": 0.1006, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.2670341976053062, |
| "grad_norm": 5.084458827972412, |
| "learning_rate": 1.9256316998468606e-05, |
| "loss": 0.09, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.2713412007924886, |
| "grad_norm": 1.219672441482544, |
| "learning_rate": 1.9208460949464013e-05, |
| "loss": 0.0826, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.27564820397967094, |
| "grad_norm": 2.7958974838256836, |
| "learning_rate": 1.916060490045942e-05, |
| "loss": 0.0879, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.2799552071668533, |
| "grad_norm": 0.9086557626724243, |
| "learning_rate": 1.9112748851454824e-05, |
| "loss": 0.0926, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.28426221035403565, |
| "grad_norm": 3.29379940032959, |
| "learning_rate": 1.906489280245023e-05, |
| "loss": 0.076, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.288569213541218, |
| "grad_norm": 2.127718925476074, |
| "learning_rate": 1.901703675344564e-05, |
| "loss": 0.0822, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.29287621672840036, |
| "grad_norm": 1.128344178199768, |
| "learning_rate": 1.8969180704441042e-05, |
| "loss": 0.0878, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.2971832199155827, |
| "grad_norm": 0.8325080275535583, |
| "learning_rate": 1.892132465543645e-05, |
| "loss": 0.1006, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.3014902231027651, |
| "grad_norm": 1.3302809000015259, |
| "learning_rate": 1.8873468606431853e-05, |
| "loss": 0.0838, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.30579722628994743, |
| "grad_norm": 0.9956411719322205, |
| "learning_rate": 1.882561255742726e-05, |
| "loss": 0.0796, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.3101042294771298, |
| "grad_norm": 2.7951183319091797, |
| "learning_rate": 1.8777756508422668e-05, |
| "loss": 0.0746, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.31441123266431215, |
| "grad_norm": 0.9167538285255432, |
| "learning_rate": 1.8729900459418072e-05, |
| "loss": 0.0756, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.3187182358514945, |
| "grad_norm": 3.4193942546844482, |
| "learning_rate": 1.868204441041348e-05, |
| "loss": 0.0772, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.3230252390386769, |
| "grad_norm": 1.3220958709716797, |
| "learning_rate": 1.8634188361408883e-05, |
| "loss": 0.0843, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.32733224222585927, |
| "grad_norm": 0.9294602870941162, |
| "learning_rate": 1.858633231240429e-05, |
| "loss": 0.0813, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.33163924541304163, |
| "grad_norm": 1.0229344367980957, |
| "learning_rate": 1.8538476263399698e-05, |
| "loss": 0.0855, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.335946248600224, |
| "grad_norm": 2.287496566772461, |
| "learning_rate": 1.84906202143951e-05, |
| "loss": 0.0901, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.34025325178740634, |
| "grad_norm": 1.0064690113067627, |
| "learning_rate": 1.844276416539051e-05, |
| "loss": 0.091, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.3445602549745887, |
| "grad_norm": 1.040418028831482, |
| "learning_rate": 1.8394908116385913e-05, |
| "loss": 0.1087, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.34886725816177105, |
| "grad_norm": 1.4364063739776611, |
| "learning_rate": 1.834705206738132e-05, |
| "loss": 0.0868, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.3531742613489534, |
| "grad_norm": 1.8169975280761719, |
| "learning_rate": 1.8299196018376724e-05, |
| "loss": 0.0855, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.35748126453613577, |
| "grad_norm": 5.961976528167725, |
| "learning_rate": 1.825133996937213e-05, |
| "loss": 0.0704, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.3617882677233181, |
| "grad_norm": 1.6580802202224731, |
| "learning_rate": 1.8203483920367535e-05, |
| "loss": 0.09, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.3660952709105005, |
| "grad_norm": 2.251880168914795, |
| "learning_rate": 1.8155627871362942e-05, |
| "loss": 0.0891, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.37040227409768284, |
| "grad_norm": 2.918473720550537, |
| "learning_rate": 1.8107771822358346e-05, |
| "loss": 0.0877, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.3747092772848652, |
| "grad_norm": 0.5215052366256714, |
| "learning_rate": 1.8059915773353753e-05, |
| "loss": 0.0828, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.37901628047204755, |
| "grad_norm": 1.037458896636963, |
| "learning_rate": 1.801205972434916e-05, |
| "loss": 0.0692, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.3833232836592299, |
| "grad_norm": 0.9075079560279846, |
| "learning_rate": 1.7964203675344564e-05, |
| "loss": 0.0745, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.38763028684641226, |
| "grad_norm": 1.210403561592102, |
| "learning_rate": 1.791634762633997e-05, |
| "loss": 0.0814, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.3919372900335946, |
| "grad_norm": 1.3449194431304932, |
| "learning_rate": 1.7868491577335375e-05, |
| "loss": 0.0887, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.396244293220777, |
| "grad_norm": 0.8855172395706177, |
| "learning_rate": 1.7820635528330783e-05, |
| "loss": 0.0848, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.40055129640795933, |
| "grad_norm": 0.9978507161140442, |
| "learning_rate": 1.7772779479326187e-05, |
| "loss": 0.0743, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.4048582995951417, |
| "grad_norm": 1.354919195175171, |
| "learning_rate": 1.7724923430321594e-05, |
| "loss": 0.0942, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.40916530278232405, |
| "grad_norm": 2.4924111366271973, |
| "learning_rate": 1.7677067381316998e-05, |
| "loss": 0.0806, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.4134723059695064, |
| "grad_norm": 1.5234886407852173, |
| "learning_rate": 1.7629211332312405e-05, |
| "loss": 0.0753, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.41777930915668876, |
| "grad_norm": 1.6104071140289307, |
| "learning_rate": 1.758135528330781e-05, |
| "loss": 0.0708, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.4220863123438711, |
| "grad_norm": 0.8267254829406738, |
| "learning_rate": 1.7533499234303216e-05, |
| "loss": 0.0793, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.4263933155310535, |
| "grad_norm": 1.4067633152008057, |
| "learning_rate": 1.7485643185298623e-05, |
| "loss": 0.0951, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.43070031871823583, |
| "grad_norm": 0.9799253344535828, |
| "learning_rate": 1.7437787136294027e-05, |
| "loss": 0.0861, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.4350073219054182, |
| "grad_norm": 1.111114740371704, |
| "learning_rate": 1.7389931087289434e-05, |
| "loss": 0.0809, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.43931432509260054, |
| "grad_norm": 0.965411901473999, |
| "learning_rate": 1.7342075038284842e-05, |
| "loss": 0.0755, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.4436213282797829, |
| "grad_norm": 3.719944953918457, |
| "learning_rate": 1.7294218989280246e-05, |
| "loss": 0.0823, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.4479283314669653, |
| "grad_norm": 0.42969638109207153, |
| "learning_rate": 1.7246362940275653e-05, |
| "loss": 0.07, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.45223533465414767, |
| "grad_norm": 2.1813602447509766, |
| "learning_rate": 1.7198506891271057e-05, |
| "loss": 0.0706, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.45654233784133, |
| "grad_norm": 0.8767011761665344, |
| "learning_rate": 1.7150650842266464e-05, |
| "loss": 0.0798, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.4608493410285124, |
| "grad_norm": 0.997157096862793, |
| "learning_rate": 1.710279479326187e-05, |
| "loss": 0.0801, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.46515634421569474, |
| "grad_norm": 0.7616205215454102, |
| "learning_rate": 1.7054938744257275e-05, |
| "loss": 0.083, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.4694633474028771, |
| "grad_norm": 2.203051805496216, |
| "learning_rate": 1.7007082695252682e-05, |
| "loss": 0.0755, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.47377035059005945, |
| "grad_norm": 0.6811165809631348, |
| "learning_rate": 1.695922664624809e-05, |
| "loss": 0.0694, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.4780773537772418, |
| "grad_norm": 1.0467352867126465, |
| "learning_rate": 1.6911370597243494e-05, |
| "loss": 0.0773, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.48238435696442417, |
| "grad_norm": 1.0311206579208374, |
| "learning_rate": 1.68635145482389e-05, |
| "loss": 0.0826, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.4866913601516065, |
| "grad_norm": 4.649372577667236, |
| "learning_rate": 1.6815658499234305e-05, |
| "loss": 0.0909, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.4909983633387889, |
| "grad_norm": 3.3261115550994873, |
| "learning_rate": 1.6767802450229712e-05, |
| "loss": 0.0904, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.49530536652597124, |
| "grad_norm": 0.9026235342025757, |
| "learning_rate": 1.6719946401225116e-05, |
| "loss": 0.0781, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.4996123697131536, |
| "grad_norm": 1.0204756259918213, |
| "learning_rate": 1.6672090352220523e-05, |
| "loss": 0.0762, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.503919372900336, |
| "grad_norm": 1.9559473991394043, |
| "learning_rate": 1.6624234303215927e-05, |
| "loss": 0.0768, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.5082263760875183, |
| "grad_norm": 2.3736560344696045, |
| "learning_rate": 1.6576378254211334e-05, |
| "loss": 0.0613, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.5125333792747007, |
| "grad_norm": 0.923481285572052, |
| "learning_rate": 1.6528522205206738e-05, |
| "loss": 0.0796, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.516840382461883, |
| "grad_norm": 0.42188379168510437, |
| "learning_rate": 1.6480666156202145e-05, |
| "loss": 0.0733, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.5211473856490654, |
| "grad_norm": 1.1153289079666138, |
| "learning_rate": 1.6432810107197553e-05, |
| "loss": 0.0828, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.5254543888362477, |
| "grad_norm": 0.7091718912124634, |
| "learning_rate": 1.6384954058192956e-05, |
| "loss": 0.0612, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.5297613920234301, |
| "grad_norm": 0.7706901431083679, |
| "learning_rate": 1.6337098009188364e-05, |
| "loss": 0.0772, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.5340683952106124, |
| "grad_norm": 0.29516345262527466, |
| "learning_rate": 1.6289241960183768e-05, |
| "loss": 0.0665, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.5383753983977948, |
| "grad_norm": 1.5661741495132446, |
| "learning_rate": 1.6241385911179175e-05, |
| "loss": 0.0734, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.5426824015849772, |
| "grad_norm": 1.2785195112228394, |
| "learning_rate": 1.619352986217458e-05, |
| "loss": 0.0768, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.5469894047721595, |
| "grad_norm": 1.2388705015182495, |
| "learning_rate": 1.6145673813169986e-05, |
| "loss": 0.0782, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.5512964079593419, |
| "grad_norm": 0.7553074359893799, |
| "learning_rate": 1.609781776416539e-05, |
| "loss": 0.0764, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.5556034111465242, |
| "grad_norm": 1.6529933214187622, |
| "learning_rate": 1.6049961715160797e-05, |
| "loss": 0.0769, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.5599104143337066, |
| "grad_norm": 2.259467124938965, |
| "learning_rate": 1.60021056661562e-05, |
| "loss": 0.0805, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.5642174175208889, |
| "grad_norm": 0.834506630897522, |
| "learning_rate": 1.5954249617151608e-05, |
| "loss": 0.0701, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.5685244207080713, |
| "grad_norm": 0.7409648299217224, |
| "learning_rate": 1.5906393568147016e-05, |
| "loss": 0.0691, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.5728314238952537, |
| "grad_norm": 2.443349838256836, |
| "learning_rate": 1.585853751914242e-05, |
| "loss": 0.0789, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.577138427082436, |
| "grad_norm": 1.4207009077072144, |
| "learning_rate": 1.5810681470137827e-05, |
| "loss": 0.0701, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.5814454302696184, |
| "grad_norm": 1.454414963722229, |
| "learning_rate": 1.576282542113323e-05, |
| "loss": 0.0745, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.5857524334568007, |
| "grad_norm": 0.995476484298706, |
| "learning_rate": 1.5714969372128638e-05, |
| "loss": 0.0766, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.5900594366439831, |
| "grad_norm": 0.39929381012916565, |
| "learning_rate": 1.5667113323124045e-05, |
| "loss": 0.0731, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.5943664398311654, |
| "grad_norm": 4.782962799072266, |
| "learning_rate": 1.561925727411945e-05, |
| "loss": 0.078, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.5986734430183478, |
| "grad_norm": 0.7349683046340942, |
| "learning_rate": 1.5571401225114856e-05, |
| "loss": 0.0822, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.6029804462055302, |
| "grad_norm": 2.0340960025787354, |
| "learning_rate": 1.5523545176110263e-05, |
| "loss": 0.0634, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.6072874493927125, |
| "grad_norm": 0.9049922823905945, |
| "learning_rate": 1.5475689127105667e-05, |
| "loss": 0.0867, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.6115944525798949, |
| "grad_norm": 0.9008879065513611, |
| "learning_rate": 1.5427833078101075e-05, |
| "loss": 0.0693, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.6159014557670772, |
| "grad_norm": 0.8665018081665039, |
| "learning_rate": 1.537997702909648e-05, |
| "loss": 0.0639, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.6202084589542596, |
| "grad_norm": 0.8610183000564575, |
| "learning_rate": 1.5332120980091886e-05, |
| "loss": 0.0622, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.6245154621414419, |
| "grad_norm": 1.0662976503372192, |
| "learning_rate": 1.5284264931087293e-05, |
| "loss": 0.0761, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.6288224653286243, |
| "grad_norm": 1.417581558227539, |
| "learning_rate": 1.5236408882082697e-05, |
| "loss": 0.0666, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.6331294685158066, |
| "grad_norm": 1.198586344718933, |
| "learning_rate": 1.5188552833078102e-05, |
| "loss": 0.0722, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.637436471702989, |
| "grad_norm": 0.3623594045639038, |
| "learning_rate": 1.5140696784073508e-05, |
| "loss": 0.0701, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.6417434748901715, |
| "grad_norm": 1.3689919710159302, |
| "learning_rate": 1.5092840735068914e-05, |
| "loss": 0.0867, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.6460504780773538, |
| "grad_norm": 1.0699403285980225, |
| "learning_rate": 1.5044984686064319e-05, |
| "loss": 0.0646, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.6503574812645362, |
| "grad_norm": 0.4037761092185974, |
| "learning_rate": 1.4997128637059726e-05, |
| "loss": 0.059, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.6546644844517185, |
| "grad_norm": 1.1219407320022583, |
| "learning_rate": 1.494927258805513e-05, |
| "loss": 0.0776, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.6589714876389009, |
| "grad_norm": 0.5265269875526428, |
| "learning_rate": 1.4901416539050538e-05, |
| "loss": 0.0691, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.6632784908260833, |
| "grad_norm": 0.6277808547019958, |
| "learning_rate": 1.4853560490045945e-05, |
| "loss": 0.067, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.6675854940132656, |
| "grad_norm": 1.1883065700531006, |
| "learning_rate": 1.4805704441041349e-05, |
| "loss": 0.0735, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.671892497200448, |
| "grad_norm": 1.3201600313186646, |
| "learning_rate": 1.4757848392036756e-05, |
| "loss": 0.071, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.6761995003876303, |
| "grad_norm": 0.35750851035118103, |
| "learning_rate": 1.470999234303216e-05, |
| "loss": 0.0705, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.6805065035748127, |
| "grad_norm": 1.2730865478515625, |
| "learning_rate": 1.4662136294027567e-05, |
| "loss": 0.0753, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.684813506761995, |
| "grad_norm": 2.2789065837860107, |
| "learning_rate": 1.4614280245022971e-05, |
| "loss": 0.0703, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.6891205099491774, |
| "grad_norm": 1.8654379844665527, |
| "learning_rate": 1.4566424196018378e-05, |
| "loss": 0.0752, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.6934275131363598, |
| "grad_norm": 0.888477623462677, |
| "learning_rate": 1.4518568147013784e-05, |
| "loss": 0.0727, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.6977345163235421, |
| "grad_norm": 1.23393714427948, |
| "learning_rate": 1.447071209800919e-05, |
| "loss": 0.0633, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.7020415195107245, |
| "grad_norm": 0.9582380652427673, |
| "learning_rate": 1.4422856049004595e-05, |
| "loss": 0.075, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.7063485226979068, |
| "grad_norm": 0.9180455207824707, |
| "learning_rate": 1.4375e-05, |
| "loss": 0.067, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.7106555258850892, |
| "grad_norm": 1.2393083572387695, |
| "learning_rate": 1.4327143950995408e-05, |
| "loss": 0.0634, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.7149625290722715, |
| "grad_norm": 1.2908138036727905, |
| "learning_rate": 1.4279287901990813e-05, |
| "loss": 0.0787, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.7192695322594539, |
| "grad_norm": 2.0125656127929688, |
| "learning_rate": 1.4231431852986219e-05, |
| "loss": 0.0806, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.7235765354466362, |
| "grad_norm": 1.1681885719299316, |
| "learning_rate": 1.4183575803981624e-05, |
| "loss": 0.0657, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.7278835386338186, |
| "grad_norm": 1.7651474475860596, |
| "learning_rate": 1.4135719754977032e-05, |
| "loss": 0.0678, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.732190541821001, |
| "grad_norm": 1.4047213792800903, |
| "learning_rate": 1.4087863705972436e-05, |
| "loss": 0.0561, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.7364975450081833, |
| "grad_norm": 1.0144027471542358, |
| "learning_rate": 1.4040007656967843e-05, |
| "loss": 0.0704, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.7408045481953657, |
| "grad_norm": 1.1934571266174316, |
| "learning_rate": 1.3992151607963247e-05, |
| "loss": 0.0612, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.745111551382548, |
| "grad_norm": 1.2061142921447754, |
| "learning_rate": 1.3944295558958654e-05, |
| "loss": 0.0681, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.7494185545697304, |
| "grad_norm": 1.0598750114440918, |
| "learning_rate": 1.3896439509954058e-05, |
| "loss": 0.0719, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.7537255577569127, |
| "grad_norm": 0.9914436340332031, |
| "learning_rate": 1.3848583460949465e-05, |
| "loss": 0.0731, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.7580325609440951, |
| "grad_norm": 2.260218620300293, |
| "learning_rate": 1.3800727411944872e-05, |
| "loss": 0.0648, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.7623395641312775, |
| "grad_norm": 0.8335168957710266, |
| "learning_rate": 1.3752871362940276e-05, |
| "loss": 0.0627, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.7666465673184598, |
| "grad_norm": 1.749588131904602, |
| "learning_rate": 1.3705015313935683e-05, |
| "loss": 0.0698, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.7709535705056422, |
| "grad_norm": 1.2013710737228394, |
| "learning_rate": 1.3657159264931087e-05, |
| "loss": 0.0734, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.7752605736928245, |
| "grad_norm": 1.2394059896469116, |
| "learning_rate": 1.3609303215926495e-05, |
| "loss": 0.0644, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.7795675768800069, |
| "grad_norm": 2.5147886276245117, |
| "learning_rate": 1.35614471669219e-05, |
| "loss": 0.0752, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.7838745800671892, |
| "grad_norm": 0.8637904524803162, |
| "learning_rate": 1.3513591117917306e-05, |
| "loss": 0.0627, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.7881815832543716, |
| "grad_norm": 1.1269769668579102, |
| "learning_rate": 1.3465735068912711e-05, |
| "loss": 0.0685, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.792488586441554, |
| "grad_norm": 0.3503759801387787, |
| "learning_rate": 1.3417879019908119e-05, |
| "loss": 0.0674, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.7967955896287363, |
| "grad_norm": 1.6583272218704224, |
| "learning_rate": 1.3370022970903522e-05, |
| "loss": 0.0617, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.8011025928159187, |
| "grad_norm": 0.49781036376953125, |
| "learning_rate": 1.332216692189893e-05, |
| "loss": 0.0483, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.805409596003101, |
| "grad_norm": 0.423948734998703, |
| "learning_rate": 1.3274310872894335e-05, |
| "loss": 0.0651, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.8097165991902834, |
| "grad_norm": 3.2630441188812256, |
| "learning_rate": 1.322645482388974e-05, |
| "loss": 0.0751, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.8140236023774657, |
| "grad_norm": 1.1352860927581787, |
| "learning_rate": 1.3178598774885148e-05, |
| "loss": 0.0587, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.8183306055646481, |
| "grad_norm": 0.735758364200592, |
| "learning_rate": 1.3130742725880552e-05, |
| "loss": 0.0594, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.8226376087518305, |
| "grad_norm": 1.7869194746017456, |
| "learning_rate": 1.308288667687596e-05, |
| "loss": 0.0738, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.8269446119390128, |
| "grad_norm": 4.783473491668701, |
| "learning_rate": 1.3035030627871363e-05, |
| "loss": 0.0689, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.8312516151261952, |
| "grad_norm": 0.6986812949180603, |
| "learning_rate": 1.298717457886677e-05, |
| "loss": 0.0683, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.8355586183133775, |
| "grad_norm": 0.807700514793396, |
| "learning_rate": 1.2939318529862174e-05, |
| "loss": 0.0724, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.8398656215005599, |
| "grad_norm": 1.0882675647735596, |
| "learning_rate": 1.2891462480857581e-05, |
| "loss": 0.0706, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.8441726246877422, |
| "grad_norm": 1.652010440826416, |
| "learning_rate": 1.2843606431852987e-05, |
| "loss": 0.0592, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.8484796278749246, |
| "grad_norm": 1.6742961406707764, |
| "learning_rate": 1.2795750382848393e-05, |
| "loss": 0.0754, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.852786631062107, |
| "grad_norm": 0.7621288895606995, |
| "learning_rate": 1.27478943338438e-05, |
| "loss": 0.0575, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.8570936342492893, |
| "grad_norm": 1.4449315071105957, |
| "learning_rate": 1.2700038284839204e-05, |
| "loss": 0.0696, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.8614006374364717, |
| "grad_norm": 0.8159062266349792, |
| "learning_rate": 1.2652182235834611e-05, |
| "loss": 0.0662, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.865707640623654, |
| "grad_norm": 1.413529634475708, |
| "learning_rate": 1.2604326186830017e-05, |
| "loss": 0.0701, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.8700146438108364, |
| "grad_norm": 1.043516755104065, |
| "learning_rate": 1.2556470137825422e-05, |
| "loss": 0.0666, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.8743216469980187, |
| "grad_norm": 1.4546676874160767, |
| "learning_rate": 1.2508614088820828e-05, |
| "loss": 0.0565, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.8786286501852011, |
| "grad_norm": 1.4287497997283936, |
| "learning_rate": 1.2460758039816235e-05, |
| "loss": 0.0775, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.8829356533723834, |
| "grad_norm": 0.24882686138153076, |
| "learning_rate": 1.2412901990811639e-05, |
| "loss": 0.0545, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.8872426565595658, |
| "grad_norm": 0.7413391470909119, |
| "learning_rate": 1.2365045941807046e-05, |
| "loss": 0.067, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.8915496597467482, |
| "grad_norm": 0.6108767986297607, |
| "learning_rate": 1.231718989280245e-05, |
| "loss": 0.059, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.8958566629339306, |
| "grad_norm": 1.301120400428772, |
| "learning_rate": 1.2269333843797857e-05, |
| "loss": 0.061, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.900163666121113, |
| "grad_norm": 0.744163990020752, |
| "learning_rate": 1.2221477794793264e-05, |
| "loss": 0.0572, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.9044706693082953, |
| "grad_norm": 0.7539933323860168, |
| "learning_rate": 1.2173621745788668e-05, |
| "loss": 0.0856, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.9087776724954777, |
| "grad_norm": 0.5975971817970276, |
| "learning_rate": 1.2125765696784076e-05, |
| "loss": 0.0623, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.91308467568266, |
| "grad_norm": 1.0527843236923218, |
| "learning_rate": 1.207790964777948e-05, |
| "loss": 0.0564, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.9173916788698424, |
| "grad_norm": 1.154876947402954, |
| "learning_rate": 1.2030053598774887e-05, |
| "loss": 0.0741, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.9216986820570248, |
| "grad_norm": 0.895031213760376, |
| "learning_rate": 1.198219754977029e-05, |
| "loss": 0.0734, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.9260056852442071, |
| "grad_norm": 1.088548183441162, |
| "learning_rate": 1.1934341500765698e-05, |
| "loss": 0.0563, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.9303126884313895, |
| "grad_norm": 0.8421652913093567, |
| "learning_rate": 1.1886485451761103e-05, |
| "loss": 0.0691, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.9346196916185718, |
| "grad_norm": 2.2791976928710938, |
| "learning_rate": 1.1838629402756509e-05, |
| "loss": 0.0814, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.9389266948057542, |
| "grad_norm": 0.9922728538513184, |
| "learning_rate": 1.1790773353751915e-05, |
| "loss": 0.0672, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.9432336979929365, |
| "grad_norm": 0.9205858111381531, |
| "learning_rate": 1.1742917304747322e-05, |
| "loss": 0.0666, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.9475407011801189, |
| "grad_norm": 1.29545259475708, |
| "learning_rate": 1.1695061255742727e-05, |
| "loss": 0.0593, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.9518477043673013, |
| "grad_norm": 2.6823294162750244, |
| "learning_rate": 1.1647205206738133e-05, |
| "loss": 0.0583, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.9561547075544836, |
| "grad_norm": 1.1763588190078735, |
| "learning_rate": 1.1599349157733539e-05, |
| "loss": 0.0658, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.960461710741666, |
| "grad_norm": 0.281956285238266, |
| "learning_rate": 1.1551493108728944e-05, |
| "loss": 0.0564, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.9647687139288483, |
| "grad_norm": 1.5565913915634155, |
| "learning_rate": 1.1503637059724351e-05, |
| "loss": 0.0742, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.9690757171160307, |
| "grad_norm": 0.7583732604980469, |
| "learning_rate": 1.1455781010719755e-05, |
| "loss": 0.0641, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.973382720303213, |
| "grad_norm": 1.4207026958465576, |
| "learning_rate": 1.1407924961715163e-05, |
| "loss": 0.0622, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.9776897234903954, |
| "grad_norm": 2.625925302505493, |
| "learning_rate": 1.1360068912710566e-05, |
| "loss": 0.0672, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.9819967266775778, |
| "grad_norm": 0.8742978572845459, |
| "learning_rate": 1.1312212863705974e-05, |
| "loss": 0.0709, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.9863037298647601, |
| "grad_norm": 0.6801431179046631, |
| "learning_rate": 1.1264356814701378e-05, |
| "loss": 0.0624, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.9906107330519425, |
| "grad_norm": 0.4742066562175751, |
| "learning_rate": 1.1216500765696785e-05, |
| "loss": 0.0764, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.9949177362391248, |
| "grad_norm": 1.6330316066741943, |
| "learning_rate": 1.1168644716692192e-05, |
| "loss": 0.0731, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.9992247394263072, |
| "grad_norm": 0.3583783209323883, |
| "learning_rate": 1.1120788667687596e-05, |
| "loss": 0.055, |
| "step": 11600 |
| }, |
| { |
| "epoch": 1.0035317426134895, |
| "grad_norm": 0.871157169342041, |
| "learning_rate": 1.1072932618683003e-05, |
| "loss": 0.0559, |
| "step": 11650 |
| }, |
| { |
| "epoch": 1.007838745800672, |
| "grad_norm": 1.6235480308532715, |
| "learning_rate": 1.1025076569678409e-05, |
| "loss": 0.0504, |
| "step": 11700 |
| }, |
| { |
| "epoch": 1.0121457489878543, |
| "grad_norm": 1.0415027141571045, |
| "learning_rate": 1.0977220520673814e-05, |
| "loss": 0.0621, |
| "step": 11750 |
| }, |
| { |
| "epoch": 1.0164527521750366, |
| "grad_norm": 1.9939746856689453, |
| "learning_rate": 1.092936447166922e-05, |
| "loss": 0.0543, |
| "step": 11800 |
| }, |
| { |
| "epoch": 1.020759755362219, |
| "grad_norm": 1.6792505979537964, |
| "learning_rate": 1.0881508422664625e-05, |
| "loss": 0.05, |
| "step": 11850 |
| }, |
| { |
| "epoch": 1.0250667585494013, |
| "grad_norm": 1.1362611055374146, |
| "learning_rate": 1.0833652373660031e-05, |
| "loss": 0.0582, |
| "step": 11900 |
| }, |
| { |
| "epoch": 1.0293737617365837, |
| "grad_norm": 0.8512780070304871, |
| "learning_rate": 1.0785796324655438e-05, |
| "loss": 0.0506, |
| "step": 11950 |
| }, |
| { |
| "epoch": 1.033680764923766, |
| "grad_norm": 1.2007173299789429, |
| "learning_rate": 1.0737940275650842e-05, |
| "loss": 0.057, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.0379877681109484, |
| "grad_norm": 0.45064929127693176, |
| "learning_rate": 1.069008422664625e-05, |
| "loss": 0.0516, |
| "step": 12050 |
| }, |
| { |
| "epoch": 1.0422947712981308, |
| "grad_norm": 1.5519843101501465, |
| "learning_rate": 1.0642228177641657e-05, |
| "loss": 0.0498, |
| "step": 12100 |
| }, |
| { |
| "epoch": 1.046601774485313, |
| "grad_norm": 2.6113767623901367, |
| "learning_rate": 1.059437212863706e-05, |
| "loss": 0.0475, |
| "step": 12150 |
| }, |
| { |
| "epoch": 1.0509087776724955, |
| "grad_norm": 1.725409984588623, |
| "learning_rate": 1.0546516079632468e-05, |
| "loss": 0.0549, |
| "step": 12200 |
| }, |
| { |
| "epoch": 1.0552157808596778, |
| "grad_norm": 1.6692222356796265, |
| "learning_rate": 1.0498660030627872e-05, |
| "loss": 0.0541, |
| "step": 12250 |
| }, |
| { |
| "epoch": 1.0595227840468602, |
| "grad_norm": 0.3569232225418091, |
| "learning_rate": 1.0450803981623279e-05, |
| "loss": 0.0446, |
| "step": 12300 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 1.5374314785003662, |
| "learning_rate": 1.0402947932618683e-05, |
| "loss": 0.0591, |
| "step": 12350 |
| }, |
| { |
| "epoch": 1.068136790421225, |
| "grad_norm": 0.7895877957344055, |
| "learning_rate": 1.035509188361409e-05, |
| "loss": 0.0502, |
| "step": 12400 |
| }, |
| { |
| "epoch": 1.0724437936084072, |
| "grad_norm": 2.4290881156921387, |
| "learning_rate": 1.0307235834609494e-05, |
| "loss": 0.0449, |
| "step": 12450 |
| }, |
| { |
| "epoch": 1.0767507967955896, |
| "grad_norm": 0.4035605192184448, |
| "learning_rate": 1.0259379785604901e-05, |
| "loss": 0.0561, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.081057799982772, |
| "grad_norm": 3.102343797683716, |
| "learning_rate": 1.0211523736600307e-05, |
| "loss": 0.055, |
| "step": 12550 |
| }, |
| { |
| "epoch": 1.0853648031699543, |
| "grad_norm": 2.905097246170044, |
| "learning_rate": 1.0163667687595712e-05, |
| "loss": 0.0486, |
| "step": 12600 |
| }, |
| { |
| "epoch": 1.0896718063571367, |
| "grad_norm": 1.0258862972259521, |
| "learning_rate": 1.011581163859112e-05, |
| "loss": 0.0562, |
| "step": 12650 |
| }, |
| { |
| "epoch": 1.093978809544319, |
| "grad_norm": 1.214162826538086, |
| "learning_rate": 1.0067955589586525e-05, |
| "loss": 0.0556, |
| "step": 12700 |
| }, |
| { |
| "epoch": 1.0982858127315014, |
| "grad_norm": 2.0131213665008545, |
| "learning_rate": 1.002009954058193e-05, |
| "loss": 0.0616, |
| "step": 12750 |
| }, |
| { |
| "epoch": 1.1025928159186837, |
| "grad_norm": 1.3988288640975952, |
| "learning_rate": 9.972243491577336e-06, |
| "loss": 0.0558, |
| "step": 12800 |
| }, |
| { |
| "epoch": 1.106899819105866, |
| "grad_norm": 1.7865221500396729, |
| "learning_rate": 9.924387442572742e-06, |
| "loss": 0.056, |
| "step": 12850 |
| }, |
| { |
| "epoch": 1.1112068222930485, |
| "grad_norm": 1.5038090944290161, |
| "learning_rate": 9.876531393568147e-06, |
| "loss": 0.0517, |
| "step": 12900 |
| }, |
| { |
| "epoch": 1.1155138254802308, |
| "grad_norm": 0.6820291876792908, |
| "learning_rate": 9.828675344563555e-06, |
| "loss": 0.0637, |
| "step": 12950 |
| }, |
| { |
| "epoch": 1.1198208286674132, |
| "grad_norm": 3.0986287593841553, |
| "learning_rate": 9.78081929555896e-06, |
| "loss": 0.0521, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.1241278318545955, |
| "grad_norm": 2.395266056060791, |
| "learning_rate": 9.732963246554366e-06, |
| "loss": 0.0463, |
| "step": 13050 |
| }, |
| { |
| "epoch": 1.1284348350417779, |
| "grad_norm": 2.018718719482422, |
| "learning_rate": 9.685107197549771e-06, |
| "loss": 0.065, |
| "step": 13100 |
| }, |
| { |
| "epoch": 1.1327418382289602, |
| "grad_norm": 2.0893940925598145, |
| "learning_rate": 9.637251148545177e-06, |
| "loss": 0.0471, |
| "step": 13150 |
| }, |
| { |
| "epoch": 1.1370488414161426, |
| "grad_norm": 0.0586327388882637, |
| "learning_rate": 9.589395099540583e-06, |
| "loss": 0.051, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.141355844603325, |
| "grad_norm": 2.719393014907837, |
| "learning_rate": 9.541539050535988e-06, |
| "loss": 0.054, |
| "step": 13250 |
| }, |
| { |
| "epoch": 1.1456628477905073, |
| "grad_norm": 3.360701084136963, |
| "learning_rate": 9.493683001531394e-06, |
| "loss": 0.0475, |
| "step": 13300 |
| }, |
| { |
| "epoch": 1.1499698509776897, |
| "grad_norm": 2.0324788093566895, |
| "learning_rate": 9.4458269525268e-06, |
| "loss": 0.0572, |
| "step": 13350 |
| }, |
| { |
| "epoch": 1.154276854164872, |
| "grad_norm": 3.841675281524658, |
| "learning_rate": 9.397970903522205e-06, |
| "loss": 0.0455, |
| "step": 13400 |
| }, |
| { |
| "epoch": 1.1585838573520544, |
| "grad_norm": 1.2644069194793701, |
| "learning_rate": 9.350114854517612e-06, |
| "loss": 0.0551, |
| "step": 13450 |
| }, |
| { |
| "epoch": 1.1628908605392367, |
| "grad_norm": 1.7926990985870361, |
| "learning_rate": 9.302258805513018e-06, |
| "loss": 0.0561, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.167197863726419, |
| "grad_norm": 1.203897476196289, |
| "learning_rate": 9.254402756508423e-06, |
| "loss": 0.0595, |
| "step": 13550 |
| }, |
| { |
| "epoch": 1.1715048669136015, |
| "grad_norm": 2.352569341659546, |
| "learning_rate": 9.206546707503829e-06, |
| "loss": 0.0526, |
| "step": 13600 |
| }, |
| { |
| "epoch": 1.1758118701007838, |
| "grad_norm": 1.1437283754348755, |
| "learning_rate": 9.158690658499236e-06, |
| "loss": 0.0564, |
| "step": 13650 |
| }, |
| { |
| "epoch": 1.1801188732879662, |
| "grad_norm": 0.766175389289856, |
| "learning_rate": 9.110834609494642e-06, |
| "loss": 0.0471, |
| "step": 13700 |
| }, |
| { |
| "epoch": 1.1844258764751485, |
| "grad_norm": 1.0126721858978271, |
| "learning_rate": 9.062978560490047e-06, |
| "loss": 0.0597, |
| "step": 13750 |
| }, |
| { |
| "epoch": 1.1887328796623309, |
| "grad_norm": 0.4978892505168915, |
| "learning_rate": 9.015122511485453e-06, |
| "loss": 0.0528, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.1930398828495132, |
| "grad_norm": 2.2827069759368896, |
| "learning_rate": 8.967266462480858e-06, |
| "loss": 0.0529, |
| "step": 13850 |
| }, |
| { |
| "epoch": 1.1973468860366956, |
| "grad_norm": 1.0703896284103394, |
| "learning_rate": 8.919410413476264e-06, |
| "loss": 0.0499, |
| "step": 13900 |
| }, |
| { |
| "epoch": 1.201653889223878, |
| "grad_norm": 1.4585344791412354, |
| "learning_rate": 8.87155436447167e-06, |
| "loss": 0.0577, |
| "step": 13950 |
| }, |
| { |
| "epoch": 1.2059608924110603, |
| "grad_norm": 1.5076733827590942, |
| "learning_rate": 8.823698315467075e-06, |
| "loss": 0.0523, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.2102678955982427, |
| "grad_norm": 0.6510587334632874, |
| "learning_rate": 8.775842266462482e-06, |
| "loss": 0.0485, |
| "step": 14050 |
| }, |
| { |
| "epoch": 1.214574898785425, |
| "grad_norm": 1.1064165830612183, |
| "learning_rate": 8.727986217457888e-06, |
| "loss": 0.0476, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.2188819019726074, |
| "grad_norm": 0.22263744473457336, |
| "learning_rate": 8.680130168453293e-06, |
| "loss": 0.0518, |
| "step": 14150 |
| }, |
| { |
| "epoch": 1.2231889051597897, |
| "grad_norm": 0.7431623935699463, |
| "learning_rate": 8.632274119448699e-06, |
| "loss": 0.0531, |
| "step": 14200 |
| }, |
| { |
| "epoch": 1.227495908346972, |
| "grad_norm": 3.2457995414733887, |
| "learning_rate": 8.584418070444104e-06, |
| "loss": 0.0417, |
| "step": 14250 |
| }, |
| { |
| "epoch": 1.2318029115341544, |
| "grad_norm": 2.163710117340088, |
| "learning_rate": 8.53656202143951e-06, |
| "loss": 0.0519, |
| "step": 14300 |
| }, |
| { |
| "epoch": 1.2361099147213368, |
| "grad_norm": 1.5874712467193604, |
| "learning_rate": 8.488705972434916e-06, |
| "loss": 0.0603, |
| "step": 14350 |
| }, |
| { |
| "epoch": 1.2404169179085192, |
| "grad_norm": 1.1613589525222778, |
| "learning_rate": 8.440849923430323e-06, |
| "loss": 0.0507, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.2447239210957015, |
| "grad_norm": 1.43361234664917, |
| "learning_rate": 8.392993874425728e-06, |
| "loss": 0.0519, |
| "step": 14450 |
| }, |
| { |
| "epoch": 1.2490309242828839, |
| "grad_norm": 1.9301427602767944, |
| "learning_rate": 8.345137825421134e-06, |
| "loss": 0.0648, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.2533379274700662, |
| "grad_norm": 1.192642331123352, |
| "learning_rate": 8.29728177641654e-06, |
| "loss": 0.0605, |
| "step": 14550 |
| }, |
| { |
| "epoch": 1.2576449306572486, |
| "grad_norm": 1.3436222076416016, |
| "learning_rate": 8.249425727411947e-06, |
| "loss": 0.0516, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.261951933844431, |
| "grad_norm": 0.6770356297492981, |
| "learning_rate": 8.201569678407352e-06, |
| "loss": 0.0519, |
| "step": 14650 |
| }, |
| { |
| "epoch": 1.2662589370316133, |
| "grad_norm": 0.7577407360076904, |
| "learning_rate": 8.153713629402758e-06, |
| "loss": 0.0491, |
| "step": 14700 |
| }, |
| { |
| "epoch": 1.2705659402187957, |
| "grad_norm": 2.2546918392181396, |
| "learning_rate": 8.105857580398164e-06, |
| "loss": 0.0449, |
| "step": 14750 |
| }, |
| { |
| "epoch": 1.274872943405978, |
| "grad_norm": 1.6185483932495117, |
| "learning_rate": 8.058001531393569e-06, |
| "loss": 0.0578, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.2791799465931604, |
| "grad_norm": 0.1822790950536728, |
| "learning_rate": 8.010145482388975e-06, |
| "loss": 0.0364, |
| "step": 14850 |
| }, |
| { |
| "epoch": 1.2834869497803427, |
| "grad_norm": 1.012245774269104, |
| "learning_rate": 7.96228943338438e-06, |
| "loss": 0.0497, |
| "step": 14900 |
| }, |
| { |
| "epoch": 1.287793952967525, |
| "grad_norm": 1.4130667448043823, |
| "learning_rate": 7.914433384379786e-06, |
| "loss": 0.0471, |
| "step": 14950 |
| }, |
| { |
| "epoch": 1.2921009561547074, |
| "grad_norm": 0.2369040846824646, |
| "learning_rate": 7.866577335375191e-06, |
| "loss": 0.0491, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.2964079593418898, |
| "grad_norm": 1.4658520221710205, |
| "learning_rate": 7.818721286370597e-06, |
| "loss": 0.0459, |
| "step": 15050 |
| }, |
| { |
| "epoch": 1.3007149625290721, |
| "grad_norm": 1.0052220821380615, |
| "learning_rate": 7.770865237366003e-06, |
| "loss": 0.0533, |
| "step": 15100 |
| }, |
| { |
| "epoch": 1.3050219657162545, |
| "grad_norm": 1.026128888130188, |
| "learning_rate": 7.72300918836141e-06, |
| "loss": 0.0501, |
| "step": 15150 |
| }, |
| { |
| "epoch": 1.3093289689034369, |
| "grad_norm": 1.5102099180221558, |
| "learning_rate": 7.675153139356815e-06, |
| "loss": 0.0569, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.3136359720906192, |
| "grad_norm": 0.44090044498443604, |
| "learning_rate": 7.627297090352222e-06, |
| "loss": 0.0589, |
| "step": 15250 |
| }, |
| { |
| "epoch": 1.3179429752778016, |
| "grad_norm": 2.6066102981567383, |
| "learning_rate": 7.579441041347627e-06, |
| "loss": 0.0506, |
| "step": 15300 |
| }, |
| { |
| "epoch": 1.322249978464984, |
| "grad_norm": 1.068942666053772, |
| "learning_rate": 7.531584992343033e-06, |
| "loss": 0.0498, |
| "step": 15350 |
| }, |
| { |
| "epoch": 1.3265569816521663, |
| "grad_norm": 1.4411200284957886, |
| "learning_rate": 7.4837289433384385e-06, |
| "loss": 0.0507, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.3308639848393486, |
| "grad_norm": 2.66965913772583, |
| "learning_rate": 7.435872894333844e-06, |
| "loss": 0.0554, |
| "step": 15450 |
| }, |
| { |
| "epoch": 1.335170988026531, |
| "grad_norm": 0.7807905673980713, |
| "learning_rate": 7.38801684532925e-06, |
| "loss": 0.0582, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.3394779912137136, |
| "grad_norm": 1.0940531492233276, |
| "learning_rate": 7.340160796324656e-06, |
| "loss": 0.0481, |
| "step": 15550 |
| }, |
| { |
| "epoch": 1.343784994400896, |
| "grad_norm": 1.802668809890747, |
| "learning_rate": 7.2923047473200616e-06, |
| "loss": 0.0468, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.3480919975880783, |
| "grad_norm": 0.45175236463546753, |
| "learning_rate": 7.244448698315467e-06, |
| "loss": 0.0459, |
| "step": 15650 |
| }, |
| { |
| "epoch": 1.3523990007752607, |
| "grad_norm": 0.5869175791740417, |
| "learning_rate": 7.196592649310874e-06, |
| "loss": 0.0494, |
| "step": 15700 |
| }, |
| { |
| "epoch": 1.356706003962443, |
| "grad_norm": 0.7470104694366455, |
| "learning_rate": 7.14873660030628e-06, |
| "loss": 0.0513, |
| "step": 15750 |
| }, |
| { |
| "epoch": 1.3610130071496254, |
| "grad_norm": 2.0422937870025635, |
| "learning_rate": 7.1008805513016855e-06, |
| "loss": 0.0643, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.3653200103368077, |
| "grad_norm": 1.8812757730484009, |
| "learning_rate": 7.053024502297091e-06, |
| "loss": 0.0577, |
| "step": 15850 |
| }, |
| { |
| "epoch": 1.36962701352399, |
| "grad_norm": 0.21352899074554443, |
| "learning_rate": 7.005168453292497e-06, |
| "loss": 0.0459, |
| "step": 15900 |
| }, |
| { |
| "epoch": 1.3739340167111724, |
| "grad_norm": 0.8854315876960754, |
| "learning_rate": 6.957312404287902e-06, |
| "loss": 0.0472, |
| "step": 15950 |
| }, |
| { |
| "epoch": 1.3782410198983548, |
| "grad_norm": 1.2343345880508423, |
| "learning_rate": 6.909456355283309e-06, |
| "loss": 0.0447, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.3825480230855371, |
| "grad_norm": 1.955546259880066, |
| "learning_rate": 6.861600306278714e-06, |
| "loss": 0.0465, |
| "step": 16050 |
| }, |
| { |
| "epoch": 1.3868550262727195, |
| "grad_norm": 0.9426198601722717, |
| "learning_rate": 6.81374425727412e-06, |
| "loss": 0.0623, |
| "step": 16100 |
| }, |
| { |
| "epoch": 1.3911620294599019, |
| "grad_norm": 0.7671981453895569, |
| "learning_rate": 6.765888208269525e-06, |
| "loss": 0.0539, |
| "step": 16150 |
| }, |
| { |
| "epoch": 1.3954690326470842, |
| "grad_norm": 1.8504656553268433, |
| "learning_rate": 6.718032159264931e-06, |
| "loss": 0.0455, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.3997760358342666, |
| "grad_norm": 2.851039409637451, |
| "learning_rate": 6.670176110260338e-06, |
| "loss": 0.0434, |
| "step": 16250 |
| }, |
| { |
| "epoch": 1.404083039021449, |
| "grad_norm": 2.0784494876861572, |
| "learning_rate": 6.622320061255744e-06, |
| "loss": 0.0469, |
| "step": 16300 |
| }, |
| { |
| "epoch": 1.4083900422086313, |
| "grad_norm": 1.1852946281433105, |
| "learning_rate": 6.574464012251149e-06, |
| "loss": 0.0508, |
| "step": 16350 |
| }, |
| { |
| "epoch": 1.4126970453958136, |
| "grad_norm": 0.8519759774208069, |
| "learning_rate": 6.526607963246555e-06, |
| "loss": 0.0622, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.417004048582996, |
| "grad_norm": 1.7714693546295166, |
| "learning_rate": 6.4787519142419604e-06, |
| "loss": 0.045, |
| "step": 16450 |
| }, |
| { |
| "epoch": 1.4213110517701784, |
| "grad_norm": 1.9129570722579956, |
| "learning_rate": 6.430895865237367e-06, |
| "loss": 0.0527, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.4256180549573607, |
| "grad_norm": 0.8473785519599915, |
| "learning_rate": 6.383039816232772e-06, |
| "loss": 0.0438, |
| "step": 16550 |
| }, |
| { |
| "epoch": 1.429925058144543, |
| "grad_norm": 1.0529272556304932, |
| "learning_rate": 6.335183767228178e-06, |
| "loss": 0.0462, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.4342320613317254, |
| "grad_norm": 1.336285948753357, |
| "learning_rate": 6.2873277182235836e-06, |
| "loss": 0.0458, |
| "step": 16650 |
| }, |
| { |
| "epoch": 1.4385390645189078, |
| "grad_norm": 0.36205947399139404, |
| "learning_rate": 6.239471669218989e-06, |
| "loss": 0.0373, |
| "step": 16700 |
| }, |
| { |
| "epoch": 1.4428460677060901, |
| "grad_norm": 0.3680015504360199, |
| "learning_rate": 6.191615620214395e-06, |
| "loss": 0.0507, |
| "step": 16750 |
| }, |
| { |
| "epoch": 1.4471530708932725, |
| "grad_norm": 0.24794526398181915, |
| "learning_rate": 6.143759571209802e-06, |
| "loss": 0.0465, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.4514600740804549, |
| "grad_norm": 1.3948922157287598, |
| "learning_rate": 6.0959035222052075e-06, |
| "loss": 0.0427, |
| "step": 16850 |
| }, |
| { |
| "epoch": 1.4557670772676372, |
| "grad_norm": 0.9279671311378479, |
| "learning_rate": 6.048047473200613e-06, |
| "loss": 0.0509, |
| "step": 16900 |
| }, |
| { |
| "epoch": 1.4600740804548196, |
| "grad_norm": 1.4752745628356934, |
| "learning_rate": 6.0001914241960195e-06, |
| "loss": 0.0545, |
| "step": 16950 |
| }, |
| { |
| "epoch": 1.464381083642002, |
| "grad_norm": 1.0804786682128906, |
| "learning_rate": 5.952335375191425e-06, |
| "loss": 0.0488, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.4686880868291843, |
| "grad_norm": 2.13808536529541, |
| "learning_rate": 5.904479326186831e-06, |
| "loss": 0.0504, |
| "step": 17050 |
| }, |
| { |
| "epoch": 1.4729950900163666, |
| "grad_norm": 0.6244465708732605, |
| "learning_rate": 5.856623277182236e-06, |
| "loss": 0.0513, |
| "step": 17100 |
| }, |
| { |
| "epoch": 1.477302093203549, |
| "grad_norm": 1.500331163406372, |
| "learning_rate": 5.808767228177642e-06, |
| "loss": 0.049, |
| "step": 17150 |
| }, |
| { |
| "epoch": 1.4816090963907314, |
| "grad_norm": 1.1396780014038086, |
| "learning_rate": 5.760911179173047e-06, |
| "loss": 0.0537, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.4859160995779137, |
| "grad_norm": 1.0686885118484497, |
| "learning_rate": 5.713055130168454e-06, |
| "loss": 0.0524, |
| "step": 17250 |
| }, |
| { |
| "epoch": 1.490223102765096, |
| "grad_norm": 0.20028911530971527, |
| "learning_rate": 5.665199081163859e-06, |
| "loss": 0.0471, |
| "step": 17300 |
| }, |
| { |
| "epoch": 1.4945301059522784, |
| "grad_norm": 1.0594055652618408, |
| "learning_rate": 5.617343032159266e-06, |
| "loss": 0.0459, |
| "step": 17350 |
| }, |
| { |
| "epoch": 1.4988371091394608, |
| "grad_norm": 3.5541036128997803, |
| "learning_rate": 5.569486983154671e-06, |
| "loss": 0.056, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.5031441123266431, |
| "grad_norm": 1.243882417678833, |
| "learning_rate": 5.521630934150078e-06, |
| "loss": 0.056, |
| "step": 17450 |
| }, |
| { |
| "epoch": 1.5074511155138255, |
| "grad_norm": 0.7564629316329956, |
| "learning_rate": 5.473774885145483e-06, |
| "loss": 0.0469, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.5117581187010078, |
| "grad_norm": 1.5014382600784302, |
| "learning_rate": 5.425918836140889e-06, |
| "loss": 0.0467, |
| "step": 17550 |
| }, |
| { |
| "epoch": 1.5160651218881902, |
| "grad_norm": 1.7758488655090332, |
| "learning_rate": 5.378062787136294e-06, |
| "loss": 0.0503, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.5203721250753726, |
| "grad_norm": 0.6620356440544128, |
| "learning_rate": 5.3302067381317e-06, |
| "loss": 0.051, |
| "step": 17650 |
| }, |
| { |
| "epoch": 1.524679128262555, |
| "grad_norm": 1.3163670301437378, |
| "learning_rate": 5.2823506891271055e-06, |
| "loss": 0.0402, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.5289861314497373, |
| "grad_norm": 1.738950490951538, |
| "learning_rate": 5.234494640122512e-06, |
| "loss": 0.0419, |
| "step": 17750 |
| }, |
| { |
| "epoch": 1.5332931346369196, |
| "grad_norm": 1.325990080833435, |
| "learning_rate": 5.1866385911179175e-06, |
| "loss": 0.0524, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.537600137824102, |
| "grad_norm": 1.0886728763580322, |
| "learning_rate": 5.138782542113323e-06, |
| "loss": 0.0523, |
| "step": 17850 |
| }, |
| { |
| "epoch": 1.5419071410112843, |
| "grad_norm": 1.1603420972824097, |
| "learning_rate": 5.0909264931087295e-06, |
| "loss": 0.0444, |
| "step": 17900 |
| }, |
| { |
| "epoch": 1.5462141441984667, |
| "grad_norm": 0.38339659571647644, |
| "learning_rate": 5.043070444104136e-06, |
| "loss": 0.0481, |
| "step": 17950 |
| }, |
| { |
| "epoch": 1.550521147385649, |
| "grad_norm": 1.1160061359405518, |
| "learning_rate": 4.995214395099541e-06, |
| "loss": 0.0523, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.5548281505728314, |
| "grad_norm": 1.545622706413269, |
| "learning_rate": 4.947358346094947e-06, |
| "loss": 0.0551, |
| "step": 18050 |
| }, |
| { |
| "epoch": 1.5591351537600138, |
| "grad_norm": 0.8636265397071838, |
| "learning_rate": 4.899502297090353e-06, |
| "loss": 0.058, |
| "step": 18100 |
| }, |
| { |
| "epoch": 1.5634421569471961, |
| "grad_norm": 1.2045224905014038, |
| "learning_rate": 4.851646248085758e-06, |
| "loss": 0.0492, |
| "step": 18150 |
| }, |
| { |
| "epoch": 1.5677491601343785, |
| "grad_norm": 1.5447427034378052, |
| "learning_rate": 4.803790199081165e-06, |
| "loss": 0.0532, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.5720561633215608, |
| "grad_norm": 0.7917791604995728, |
| "learning_rate": 4.75593415007657e-06, |
| "loss": 0.0481, |
| "step": 18250 |
| }, |
| { |
| "epoch": 1.5763631665087432, |
| "grad_norm": 1.0425752401351929, |
| "learning_rate": 4.7080781010719766e-06, |
| "loss": 0.0606, |
| "step": 18300 |
| }, |
| { |
| "epoch": 1.5806701696959256, |
| "grad_norm": 1.5828264951705933, |
| "learning_rate": 4.660222052067382e-06, |
| "loss": 0.0514, |
| "step": 18350 |
| }, |
| { |
| "epoch": 1.584977172883108, |
| "grad_norm": 0.5627719759941101, |
| "learning_rate": 4.612366003062788e-06, |
| "loss": 0.0489, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.5892841760702903, |
| "grad_norm": 1.3349500894546509, |
| "learning_rate": 4.564509954058193e-06, |
| "loss": 0.0412, |
| "step": 18450 |
| }, |
| { |
| "epoch": 1.5935911792574726, |
| "grad_norm": 1.2263731956481934, |
| "learning_rate": 4.516653905053599e-06, |
| "loss": 0.0389, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.597898182444655, |
| "grad_norm": 0.26084914803504944, |
| "learning_rate": 4.468797856049004e-06, |
| "loss": 0.0496, |
| "step": 18550 |
| }, |
| { |
| "epoch": 1.6022051856318373, |
| "grad_norm": 2.95097017288208, |
| "learning_rate": 4.420941807044411e-06, |
| "loss": 0.0486, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.6065121888190197, |
| "grad_norm": 1.1969259977340698, |
| "learning_rate": 4.373085758039816e-06, |
| "loss": 0.0453, |
| "step": 18650 |
| }, |
| { |
| "epoch": 1.610819192006202, |
| "grad_norm": 1.319840908050537, |
| "learning_rate": 4.325229709035223e-06, |
| "loss": 0.0657, |
| "step": 18700 |
| }, |
| { |
| "epoch": 1.6151261951933844, |
| "grad_norm": 0.9771299958229065, |
| "learning_rate": 4.277373660030628e-06, |
| "loss": 0.0445, |
| "step": 18750 |
| }, |
| { |
| "epoch": 1.6194331983805668, |
| "grad_norm": 0.4943866431713104, |
| "learning_rate": 4.229517611026034e-06, |
| "loss": 0.0568, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1.6237402015677491, |
| "grad_norm": 3.353970527648926, |
| "learning_rate": 4.1816615620214395e-06, |
| "loss": 0.0494, |
| "step": 18850 |
| }, |
| { |
| "epoch": 1.6280472047549315, |
| "grad_norm": 0.42139098048210144, |
| "learning_rate": 4.133805513016846e-06, |
| "loss": 0.0476, |
| "step": 18900 |
| }, |
| { |
| "epoch": 1.6323542079421138, |
| "grad_norm": 0.5033485889434814, |
| "learning_rate": 4.0859494640122515e-06, |
| "loss": 0.0414, |
| "step": 18950 |
| }, |
| { |
| "epoch": 1.6366612111292962, |
| "grad_norm": 5.167110443115234, |
| "learning_rate": 4.038093415007657e-06, |
| "loss": 0.045, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.6409682143164785, |
| "grad_norm": 1.553246259689331, |
| "learning_rate": 3.990237366003063e-06, |
| "loss": 0.0433, |
| "step": 19050 |
| }, |
| { |
| "epoch": 1.645275217503661, |
| "grad_norm": 2.0117363929748535, |
| "learning_rate": 3.942381316998469e-06, |
| "loss": 0.0575, |
| "step": 19100 |
| }, |
| { |
| "epoch": 1.6495822206908433, |
| "grad_norm": 0.43269413709640503, |
| "learning_rate": 3.894525267993875e-06, |
| "loss": 0.0564, |
| "step": 19150 |
| }, |
| { |
| "epoch": 1.6538892238780256, |
| "grad_norm": 1.9576497077941895, |
| "learning_rate": 3.846669218989281e-06, |
| "loss": 0.0449, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.6581962270652082, |
| "grad_norm": 0.1240052804350853, |
| "learning_rate": 3.7988131699846866e-06, |
| "loss": 0.0506, |
| "step": 19250 |
| }, |
| { |
| "epoch": 1.6625032302523906, |
| "grad_norm": 0.18176259100437164, |
| "learning_rate": 3.750957120980092e-06, |
| "loss": 0.0526, |
| "step": 19300 |
| }, |
| { |
| "epoch": 1.666810233439573, |
| "grad_norm": 0.851411759853363, |
| "learning_rate": 3.7031010719754977e-06, |
| "loss": 0.048, |
| "step": 19350 |
| }, |
| { |
| "epoch": 1.6711172366267553, |
| "grad_norm": 1.481475591659546, |
| "learning_rate": 3.6552450229709037e-06, |
| "loss": 0.0367, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1.6754242398139376, |
| "grad_norm": 1.092772126197815, |
| "learning_rate": 3.6073889739663097e-06, |
| "loss": 0.0524, |
| "step": 19450 |
| }, |
| { |
| "epoch": 1.67973124300112, |
| "grad_norm": 1.5269631147384644, |
| "learning_rate": 3.5595329249617157e-06, |
| "loss": 0.051, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.6840382461883023, |
| "grad_norm": 1.2700409889221191, |
| "learning_rate": 3.5116768759571212e-06, |
| "loss": 0.0405, |
| "step": 19550 |
| }, |
| { |
| "epoch": 1.6883452493754847, |
| "grad_norm": 1.794728398323059, |
| "learning_rate": 3.463820826952527e-06, |
| "loss": 0.0514, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1.692652252562667, |
| "grad_norm": 1.415767788887024, |
| "learning_rate": 3.415964777947933e-06, |
| "loss": 0.0524, |
| "step": 19650 |
| }, |
| { |
| "epoch": 1.6969592557498494, |
| "grad_norm": 2.1614432334899902, |
| "learning_rate": 3.3681087289433388e-06, |
| "loss": 0.0606, |
| "step": 19700 |
| }, |
| { |
| "epoch": 1.7012662589370318, |
| "grad_norm": 0.6388081908226013, |
| "learning_rate": 3.3202526799387448e-06, |
| "loss": 0.0566, |
| "step": 19750 |
| }, |
| { |
| "epoch": 1.7055732621242141, |
| "grad_norm": 0.44299671053886414, |
| "learning_rate": 3.2723966309341503e-06, |
| "loss": 0.0457, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.7098802653113965, |
| "grad_norm": 2.9245407581329346, |
| "learning_rate": 3.224540581929556e-06, |
| "loss": 0.0426, |
| "step": 19850 |
| }, |
| { |
| "epoch": 1.7141872684985788, |
| "grad_norm": 1.0790481567382812, |
| "learning_rate": 3.176684532924962e-06, |
| "loss": 0.0431, |
| "step": 19900 |
| }, |
| { |
| "epoch": 1.7184942716857612, |
| "grad_norm": 0.9610195159912109, |
| "learning_rate": 3.1288284839203675e-06, |
| "loss": 0.0465, |
| "step": 19950 |
| }, |
| { |
| "epoch": 1.7228012748729435, |
| "grad_norm": 1.2030363082885742, |
| "learning_rate": 3.080972434915774e-06, |
| "loss": 0.046, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.727108278060126, |
| "grad_norm": 1.5671857595443726, |
| "learning_rate": 3.0331163859111794e-06, |
| "loss": 0.0462, |
| "step": 20050 |
| }, |
| { |
| "epoch": 1.7314152812473083, |
| "grad_norm": 0.1325124055147171, |
| "learning_rate": 2.9852603369065854e-06, |
| "loss": 0.043, |
| "step": 20100 |
| }, |
| { |
| "epoch": 1.7357222844344906, |
| "grad_norm": 0.3264756500720978, |
| "learning_rate": 2.937404287901991e-06, |
| "loss": 0.0504, |
| "step": 20150 |
| }, |
| { |
| "epoch": 1.740029287621673, |
| "grad_norm": 1.5625221729278564, |
| "learning_rate": 2.8895482388973966e-06, |
| "loss": 0.0552, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1.7443362908088553, |
| "grad_norm": 1.4193893671035767, |
| "learning_rate": 2.841692189892803e-06, |
| "loss": 0.0464, |
| "step": 20250 |
| }, |
| { |
| "epoch": 1.7486432939960377, |
| "grad_norm": 1.1241008043289185, |
| "learning_rate": 2.7938361408882086e-06, |
| "loss": 0.0451, |
| "step": 20300 |
| }, |
| { |
| "epoch": 1.75295029718322, |
| "grad_norm": 1.5155802965164185, |
| "learning_rate": 2.7459800918836145e-06, |
| "loss": 0.0469, |
| "step": 20350 |
| }, |
| { |
| "epoch": 1.7572573003704024, |
| "grad_norm": 1.757646918296814, |
| "learning_rate": 2.69812404287902e-06, |
| "loss": 0.0498, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.7615643035575848, |
| "grad_norm": 0.23753665387630463, |
| "learning_rate": 2.6502679938744257e-06, |
| "loss": 0.0468, |
| "step": 20450 |
| }, |
| { |
| "epoch": 1.765871306744767, |
| "grad_norm": 1.951041579246521, |
| "learning_rate": 2.6024119448698317e-06, |
| "loss": 0.0493, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.7701783099319495, |
| "grad_norm": 0.08196475356817245, |
| "learning_rate": 2.5545558958652377e-06, |
| "loss": 0.0397, |
| "step": 20550 |
| }, |
| { |
| "epoch": 1.7744853131191318, |
| "grad_norm": 1.0395424365997314, |
| "learning_rate": 2.5066998468606436e-06, |
| "loss": 0.0472, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1.7787923163063142, |
| "grad_norm": 2.305999517440796, |
| "learning_rate": 2.4588437978560492e-06, |
| "loss": 0.0393, |
| "step": 20650 |
| }, |
| { |
| "epoch": 1.7830993194934965, |
| "grad_norm": 3.8407812118530273, |
| "learning_rate": 2.4109877488514548e-06, |
| "loss": 0.0446, |
| "step": 20700 |
| }, |
| { |
| "epoch": 1.787406322680679, |
| "grad_norm": 1.6644717454910278, |
| "learning_rate": 2.3631316998468608e-06, |
| "loss": 0.066, |
| "step": 20750 |
| }, |
| { |
| "epoch": 1.7917133258678613, |
| "grad_norm": 1.8919422626495361, |
| "learning_rate": 2.3152756508422668e-06, |
| "loss": 0.0495, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.7960203290550436, |
| "grad_norm": 0.3518455922603607, |
| "learning_rate": 2.2674196018376723e-06, |
| "loss": 0.039, |
| "step": 20850 |
| }, |
| { |
| "epoch": 1.800327332242226, |
| "grad_norm": 0.12189528346061707, |
| "learning_rate": 2.2195635528330783e-06, |
| "loss": 0.0561, |
| "step": 20900 |
| }, |
| { |
| "epoch": 1.8046343354294083, |
| "grad_norm": 0.3493580222129822, |
| "learning_rate": 2.171707503828484e-06, |
| "loss": 0.0456, |
| "step": 20950 |
| }, |
| { |
| "epoch": 1.8089413386165907, |
| "grad_norm": 2.5483009815216064, |
| "learning_rate": 2.12385145482389e-06, |
| "loss": 0.0441, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.813248341803773, |
| "grad_norm": 1.2866871356964111, |
| "learning_rate": 2.075995405819296e-06, |
| "loss": 0.0438, |
| "step": 21050 |
| }, |
| { |
| "epoch": 1.8175553449909554, |
| "grad_norm": 0.9496790170669556, |
| "learning_rate": 2.0281393568147014e-06, |
| "loss": 0.0546, |
| "step": 21100 |
| }, |
| { |
| "epoch": 1.8218623481781377, |
| "grad_norm": 1.427046537399292, |
| "learning_rate": 1.9802833078101074e-06, |
| "loss": 0.0596, |
| "step": 21150 |
| }, |
| { |
| "epoch": 1.82616935136532, |
| "grad_norm": 0.9176798462867737, |
| "learning_rate": 1.932427258805513e-06, |
| "loss": 0.0421, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.8304763545525025, |
| "grad_norm": 0.26487183570861816, |
| "learning_rate": 1.884571209800919e-06, |
| "loss": 0.0675, |
| "step": 21250 |
| }, |
| { |
| "epoch": 1.8347833577396848, |
| "grad_norm": 0.9805583953857422, |
| "learning_rate": 1.836715160796325e-06, |
| "loss": 0.0462, |
| "step": 21300 |
| }, |
| { |
| "epoch": 1.8390903609268672, |
| "grad_norm": 2.21886944770813, |
| "learning_rate": 1.7888591117917305e-06, |
| "loss": 0.0453, |
| "step": 21350 |
| }, |
| { |
| "epoch": 1.8433973641140495, |
| "grad_norm": 0.9027713537216187, |
| "learning_rate": 1.7410030627871363e-06, |
| "loss": 0.0523, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.8477043673012319, |
| "grad_norm": 1.9989973306655884, |
| "learning_rate": 1.6931470137825423e-06, |
| "loss": 0.0398, |
| "step": 21450 |
| }, |
| { |
| "epoch": 1.8520113704884142, |
| "grad_norm": 1.208064079284668, |
| "learning_rate": 1.645290964777948e-06, |
| "loss": 0.0586, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.8563183736755966, |
| "grad_norm": 0.7394013404846191, |
| "learning_rate": 1.5974349157733536e-06, |
| "loss": 0.0525, |
| "step": 21550 |
| }, |
| { |
| "epoch": 1.860625376862779, |
| "grad_norm": 0.4758661985397339, |
| "learning_rate": 1.5495788667687596e-06, |
| "loss": 0.0524, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.8649323800499613, |
| "grad_norm": 1.2074617147445679, |
| "learning_rate": 1.5017228177641654e-06, |
| "loss": 0.0533, |
| "step": 21650 |
| }, |
| { |
| "epoch": 1.8692393832371437, |
| "grad_norm": 0.33246493339538574, |
| "learning_rate": 1.4538667687595714e-06, |
| "loss": 0.0524, |
| "step": 21700 |
| }, |
| { |
| "epoch": 1.873546386424326, |
| "grad_norm": 1.7372647523880005, |
| "learning_rate": 1.4060107197549772e-06, |
| "loss": 0.0487, |
| "step": 21750 |
| }, |
| { |
| "epoch": 1.8778533896115084, |
| "grad_norm": 0.39233338832855225, |
| "learning_rate": 1.358154670750383e-06, |
| "loss": 0.0421, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.8821603927986907, |
| "grad_norm": 0.8911519646644592, |
| "learning_rate": 1.310298621745789e-06, |
| "loss": 0.0453, |
| "step": 21850 |
| }, |
| { |
| "epoch": 1.886467395985873, |
| "grad_norm": 1.263708233833313, |
| "learning_rate": 1.2624425727411945e-06, |
| "loss": 0.046, |
| "step": 21900 |
| }, |
| { |
| "epoch": 1.8907743991730555, |
| "grad_norm": 0.7938653826713562, |
| "learning_rate": 1.2145865237366005e-06, |
| "loss": 0.041, |
| "step": 21950 |
| }, |
| { |
| "epoch": 1.8950814023602378, |
| "grad_norm": 3.179079294204712, |
| "learning_rate": 1.166730474732006e-06, |
| "loss": 0.0501, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.8993884055474202, |
| "grad_norm": 2.3698971271514893, |
| "learning_rate": 1.118874425727412e-06, |
| "loss": 0.0479, |
| "step": 22050 |
| }, |
| { |
| "epoch": 1.9036954087346025, |
| "grad_norm": 0.8952911496162415, |
| "learning_rate": 1.0710183767228178e-06, |
| "loss": 0.0412, |
| "step": 22100 |
| }, |
| { |
| "epoch": 1.9080024119217849, |
| "grad_norm": 1.1281505823135376, |
| "learning_rate": 1.0231623277182236e-06, |
| "loss": 0.0427, |
| "step": 22150 |
| }, |
| { |
| "epoch": 1.9123094151089672, |
| "grad_norm": 2.677870273590088, |
| "learning_rate": 9.753062787136294e-07, |
| "loss": 0.0469, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.9166164182961496, |
| "grad_norm": 0.686228334903717, |
| "learning_rate": 9.274502297090353e-07, |
| "loss": 0.0415, |
| "step": 22250 |
| }, |
| { |
| "epoch": 1.920923421483332, |
| "grad_norm": 1.754647135734558, |
| "learning_rate": 8.795941807044412e-07, |
| "loss": 0.044, |
| "step": 22300 |
| }, |
| { |
| "epoch": 1.9252304246705143, |
| "grad_norm": 0.9961816668510437, |
| "learning_rate": 8.31738131699847e-07, |
| "loss": 0.0541, |
| "step": 22350 |
| }, |
| { |
| "epoch": 1.9295374278576967, |
| "grad_norm": 2.1705892086029053, |
| "learning_rate": 7.838820826952527e-07, |
| "loss": 0.0435, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.933844431044879, |
| "grad_norm": 1.982060194015503, |
| "learning_rate": 7.360260336906586e-07, |
| "loss": 0.0434, |
| "step": 22450 |
| }, |
| { |
| "epoch": 1.9381514342320614, |
| "grad_norm": 1.5377906560897827, |
| "learning_rate": 6.881699846860644e-07, |
| "loss": 0.0416, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.9424584374192437, |
| "grad_norm": 1.7990597486495972, |
| "learning_rate": 6.403139356814702e-07, |
| "loss": 0.0429, |
| "step": 22550 |
| }, |
| { |
| "epoch": 1.946765440606426, |
| "grad_norm": 1.0789055824279785, |
| "learning_rate": 5.924578866768759e-07, |
| "loss": 0.0498, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1.9510724437936084, |
| "grad_norm": 1.073246717453003, |
| "learning_rate": 5.446018376722818e-07, |
| "loss": 0.0526, |
| "step": 22650 |
| }, |
| { |
| "epoch": 1.9553794469807908, |
| "grad_norm": 0.3440791964530945, |
| "learning_rate": 4.967457886676876e-07, |
| "loss": 0.0447, |
| "step": 22700 |
| }, |
| { |
| "epoch": 1.9596864501679732, |
| "grad_norm": 1.1172747611999512, |
| "learning_rate": 4.488897396630935e-07, |
| "loss": 0.051, |
| "step": 22750 |
| }, |
| { |
| "epoch": 1.9639934533551555, |
| "grad_norm": 0.6673486232757568, |
| "learning_rate": 4.0103369065849927e-07, |
| "loss": 0.0435, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.9683004565423379, |
| "grad_norm": 0.4096444249153137, |
| "learning_rate": 3.531776416539051e-07, |
| "loss": 0.0358, |
| "step": 22850 |
| }, |
| { |
| "epoch": 1.9726074597295202, |
| "grad_norm": 0.7605034708976746, |
| "learning_rate": 3.053215926493109e-07, |
| "loss": 0.0484, |
| "step": 22900 |
| }, |
| { |
| "epoch": 1.9769144629167026, |
| "grad_norm": 0.8487497568130493, |
| "learning_rate": 2.574655436447167e-07, |
| "loss": 0.0498, |
| "step": 22950 |
| }, |
| { |
| "epoch": 1.981221466103885, |
| "grad_norm": 0.12532584369182587, |
| "learning_rate": 2.0960949464012254e-07, |
| "loss": 0.0501, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.9855284692910673, |
| "grad_norm": 0.635976254940033, |
| "learning_rate": 1.6175344563552835e-07, |
| "loss": 0.044, |
| "step": 23050 |
| }, |
| { |
| "epoch": 1.9898354724782497, |
| "grad_norm": 1.429021954536438, |
| "learning_rate": 1.1389739663093415e-07, |
| "loss": 0.0446, |
| "step": 23100 |
| }, |
| { |
| "epoch": 1.994142475665432, |
| "grad_norm": 1.3071976900100708, |
| "learning_rate": 6.604134762633997e-08, |
| "loss": 0.045, |
| "step": 23150 |
| }, |
| { |
| "epoch": 1.9984494788526144, |
| "grad_norm": 1.3366061449050903, |
| "learning_rate": 1.818529862174579e-08, |
| "loss": 0.0455, |
| "step": 23200 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 23218, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.954863372257956e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|