| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 80000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.25e-05, | |
| "grad_norm": 61.570499420166016, | |
| "learning_rate": 0.0, | |
| "loss": 10.836095809936523, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00125, | |
| "grad_norm": 127.05734252929688, | |
| "learning_rate": 1.98e-06, | |
| "loss": 19.720439255839647, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0025, | |
| "grad_norm": 240.12950134277344, | |
| "learning_rate": 3.980000000000001e-06, | |
| "loss": 19.435438232421873, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.00375, | |
| "grad_norm": 165.71978759765625, | |
| "learning_rate": 5.98e-06, | |
| "loss": 17.33456298828125, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.005, | |
| "grad_norm": 476.399169921875, | |
| "learning_rate": 7.980000000000002e-06, | |
| "loss": 14.20736328125, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.00625, | |
| "grad_norm": 73.51789855957031, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 10.920599365234375, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0075, | |
| "grad_norm": 50.991634368896484, | |
| "learning_rate": 9.987547169811321e-06, | |
| "loss": 9.143378295898437, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.00875, | |
| "grad_norm": 150.61373901367188, | |
| "learning_rate": 9.97496855345912e-06, | |
| "loss": 7.4310791015625, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 188.94094848632812, | |
| "learning_rate": 9.962389937106918e-06, | |
| "loss": 6.5367333984375, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01125, | |
| "grad_norm": 37.20081329345703, | |
| "learning_rate": 9.949811320754717e-06, | |
| "loss": 7.25988525390625, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0125, | |
| "grad_norm": 52.72426223754883, | |
| "learning_rate": 9.937232704402516e-06, | |
| "loss": 6.920318603515625, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01375, | |
| "grad_norm": 77.21712493896484, | |
| "learning_rate": 9.924654088050316e-06, | |
| "loss": 6.468406982421875, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.015, | |
| "grad_norm": 73.15080261230469, | |
| "learning_rate": 9.912075471698114e-06, | |
| "loss": 6.187958374023437, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01625, | |
| "grad_norm": 21.300718307495117, | |
| "learning_rate": 9.899496855345913e-06, | |
| "loss": 5.908817749023438, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0175, | |
| "grad_norm": 33.9874153137207, | |
| "learning_rate": 9.886918238993712e-06, | |
| "loss": 5.750390625, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01875, | |
| "grad_norm": 46.610294342041016, | |
| "learning_rate": 9.87433962264151e-06, | |
| "loss": 5.010645446777343, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 116.64041137695312, | |
| "learning_rate": 9.861761006289309e-06, | |
| "loss": 4.999881591796875, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.02125, | |
| "grad_norm": 16.1014347076416, | |
| "learning_rate": 9.849182389937107e-06, | |
| "loss": 5.607005004882812, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0225, | |
| "grad_norm": 22.09562873840332, | |
| "learning_rate": 9.836603773584906e-06, | |
| "loss": 6.2442431640625, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.02375, | |
| "grad_norm": 62.120033264160156, | |
| "learning_rate": 9.824025157232704e-06, | |
| "loss": 5.164766845703125, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 12.194104194641113, | |
| "learning_rate": 9.811446540880503e-06, | |
| "loss": 4.69855224609375, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02625, | |
| "grad_norm": 21.063518524169922, | |
| "learning_rate": 9.798867924528302e-06, | |
| "loss": 5.364295654296875, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.0275, | |
| "grad_norm": 28.930068969726562, | |
| "learning_rate": 9.786289308176102e-06, | |
| "loss": 5.119418334960938, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02875, | |
| "grad_norm": 46.796871185302734, | |
| "learning_rate": 9.7737106918239e-06, | |
| "loss": 5.237764892578125, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 17.024911880493164, | |
| "learning_rate": 9.761132075471699e-06, | |
| "loss": 5.059874877929688, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.03125, | |
| "grad_norm": 74.16903686523438, | |
| "learning_rate": 9.748553459119498e-06, | |
| "loss": 4.9656503295898435, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0325, | |
| "grad_norm": 22.257841110229492, | |
| "learning_rate": 9.735974842767296e-06, | |
| "loss": 5.18156982421875, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.03375, | |
| "grad_norm": 47.79827117919922, | |
| "learning_rate": 9.723396226415095e-06, | |
| "loss": 4.9468075561523435, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.035, | |
| "grad_norm": 33.0962028503418, | |
| "learning_rate": 9.710817610062893e-06, | |
| "loss": 4.722409362792969, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.03625, | |
| "grad_norm": 20.282230377197266, | |
| "learning_rate": 9.698238993710692e-06, | |
| "loss": 5.242295532226563, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.0375, | |
| "grad_norm": 25.866464614868164, | |
| "learning_rate": 9.68566037735849e-06, | |
| "loss": 4.773485717773437, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03875, | |
| "grad_norm": 26.524415969848633, | |
| "learning_rate": 9.67308176100629e-06, | |
| "loss": 5.017108154296875, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 18.342044830322266, | |
| "learning_rate": 9.66050314465409e-06, | |
| "loss": 4.6452767944335935, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.04125, | |
| "grad_norm": 20.58109474182129, | |
| "learning_rate": 9.647924528301888e-06, | |
| "loss": 5.00500732421875, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.0425, | |
| "grad_norm": 294.05224609375, | |
| "learning_rate": 9.635345911949687e-06, | |
| "loss": 4.595547790527344, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.04375, | |
| "grad_norm": 32.15380096435547, | |
| "learning_rate": 9.622767295597485e-06, | |
| "loss": 4.3764559936523435, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.045, | |
| "grad_norm": 28.0123233795166, | |
| "learning_rate": 9.610188679245284e-06, | |
| "loss": 4.383021850585937, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.04625, | |
| "grad_norm": 33.30596923828125, | |
| "learning_rate": 9.597610062893082e-06, | |
| "loss": 4.354810180664063, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.0475, | |
| "grad_norm": 26.94670295715332, | |
| "learning_rate": 9.585031446540881e-06, | |
| "loss": 3.8970941162109374, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.04875, | |
| "grad_norm": 28.224117279052734, | |
| "learning_rate": 9.57245283018868e-06, | |
| "loss": 3.8559701538085935, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 20.57750701904297, | |
| "learning_rate": 9.559874213836478e-06, | |
| "loss": 4.92269287109375, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.05125, | |
| "grad_norm": 18.131223678588867, | |
| "learning_rate": 9.547295597484277e-06, | |
| "loss": 4.633423461914062, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.0525, | |
| "grad_norm": 29.649024963378906, | |
| "learning_rate": 9.534716981132075e-06, | |
| "loss": 4.432715759277344, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.05375, | |
| "grad_norm": 34.53123474121094, | |
| "learning_rate": 9.522138364779876e-06, | |
| "loss": 4.242776489257812, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.055, | |
| "grad_norm": 19.823360443115234, | |
| "learning_rate": 9.509559748427674e-06, | |
| "loss": 4.448049011230469, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.05625, | |
| "grad_norm": 20.857120513916016, | |
| "learning_rate": 9.496981132075473e-06, | |
| "loss": 4.65389404296875, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.0575, | |
| "grad_norm": 20.397987365722656, | |
| "learning_rate": 9.484402515723271e-06, | |
| "loss": 4.350996704101562, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.05875, | |
| "grad_norm": 22.439617156982422, | |
| "learning_rate": 9.47182389937107e-06, | |
| "loss": 4.1993505859375, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.18347692489624, | |
| "learning_rate": 9.459245283018869e-06, | |
| "loss": 4.38233154296875, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.06125, | |
| "grad_norm": 20.546842575073242, | |
| "learning_rate": 9.446666666666667e-06, | |
| "loss": 4.136148681640625, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.0625, | |
| "grad_norm": 30.28652572631836, | |
| "learning_rate": 9.434088050314466e-06, | |
| "loss": 4.002590026855469, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.06375, | |
| "grad_norm": 12.638691902160645, | |
| "learning_rate": 9.421509433962264e-06, | |
| "loss": 3.565917663574219, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.065, | |
| "grad_norm": 22.384016036987305, | |
| "learning_rate": 9.408930817610063e-06, | |
| "loss": 3.9356478881835937, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.06625, | |
| "grad_norm": 18.600645065307617, | |
| "learning_rate": 9.396352201257861e-06, | |
| "loss": 4.357646484375, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.0675, | |
| "grad_norm": 19.07636260986328, | |
| "learning_rate": 9.383773584905662e-06, | |
| "loss": 4.550914306640625, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.06875, | |
| "grad_norm": 36.520381927490234, | |
| "learning_rate": 9.37119496855346e-06, | |
| "loss": 4.02431884765625, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 31.87358856201172, | |
| "learning_rate": 9.358616352201259e-06, | |
| "loss": 4.209980163574219, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.07125, | |
| "grad_norm": 25.108585357666016, | |
| "learning_rate": 9.346037735849058e-06, | |
| "loss": 4.5496435546875, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.0725, | |
| "grad_norm": 18.682506561279297, | |
| "learning_rate": 9.333459119496856e-06, | |
| "loss": 4.466670837402344, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.07375, | |
| "grad_norm": 18.071388244628906, | |
| "learning_rate": 9.320880503144655e-06, | |
| "loss": 3.757862243652344, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 54.04276657104492, | |
| "learning_rate": 9.308301886792453e-06, | |
| "loss": 4.119568786621094, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.07625, | |
| "grad_norm": 20.94058609008789, | |
| "learning_rate": 9.295723270440252e-06, | |
| "loss": 3.6099752807617187, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.0775, | |
| "grad_norm": 28.546890258789062, | |
| "learning_rate": 9.28314465408805e-06, | |
| "loss": 4.167696533203125, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.07875, | |
| "grad_norm": 22.076147079467773, | |
| "learning_rate": 9.270566037735849e-06, | |
| "loss": 3.8074371337890627, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 11.4718599319458, | |
| "learning_rate": 9.25798742138365e-06, | |
| "loss": 3.5398162841796874, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.08125, | |
| "grad_norm": 20.66476058959961, | |
| "learning_rate": 9.245408805031448e-06, | |
| "loss": 3.8792694091796873, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.0825, | |
| "grad_norm": 22.23533058166504, | |
| "learning_rate": 9.232830188679246e-06, | |
| "loss": 3.6433590698242186, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.08375, | |
| "grad_norm": 17.721628189086914, | |
| "learning_rate": 9.220251572327045e-06, | |
| "loss": 3.73576171875, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.085, | |
| "grad_norm": 53.24715805053711, | |
| "learning_rate": 9.207672955974844e-06, | |
| "loss": 3.7794915771484376, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.08625, | |
| "grad_norm": 20.755985260009766, | |
| "learning_rate": 9.195094339622642e-06, | |
| "loss": 3.84636962890625, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.0875, | |
| "grad_norm": 26.36724281311035, | |
| "learning_rate": 9.18251572327044e-06, | |
| "loss": 3.7252166748046873, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.08875, | |
| "grad_norm": 24.380794525146484, | |
| "learning_rate": 9.16993710691824e-06, | |
| "loss": 3.7519256591796877, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 30.122207641601562, | |
| "learning_rate": 9.157358490566038e-06, | |
| "loss": 3.6482272338867188, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.09125, | |
| "grad_norm": 21.8885555267334, | |
| "learning_rate": 9.144779874213837e-06, | |
| "loss": 3.8224606323242187, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.0925, | |
| "grad_norm": 81.36309814453125, | |
| "learning_rate": 9.132201257861635e-06, | |
| "loss": 3.9120883178710937, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.09375, | |
| "grad_norm": 33.38832473754883, | |
| "learning_rate": 9.119622641509435e-06, | |
| "loss": 3.3836550903320313, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.095, | |
| "grad_norm": 3.9297144412994385, | |
| "learning_rate": 9.107044025157234e-06, | |
| "loss": 3.252887878417969, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.09625, | |
| "grad_norm": 25.273887634277344, | |
| "learning_rate": 9.094465408805033e-06, | |
| "loss": 3.3780868530273436, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.0975, | |
| "grad_norm": 32.79280471801758, | |
| "learning_rate": 9.081886792452831e-06, | |
| "loss": 3.3074383544921875, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.09875, | |
| "grad_norm": 27.27655601501465, | |
| "learning_rate": 9.06930817610063e-06, | |
| "loss": 3.4328045654296875, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 18.67003631591797, | |
| "learning_rate": 9.056729559748428e-06, | |
| "loss": 3.4785955810546874, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.10125, | |
| "grad_norm": 16.710369110107422, | |
| "learning_rate": 9.044150943396227e-06, | |
| "loss": 3.604156188964844, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.1025, | |
| "grad_norm": 39.6090202331543, | |
| "learning_rate": 9.031572327044026e-06, | |
| "loss": 3.240309753417969, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.10375, | |
| "grad_norm": 50.57542419433594, | |
| "learning_rate": 9.018993710691824e-06, | |
| "loss": 3.2084616088867186, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.105, | |
| "grad_norm": 3.647831916809082, | |
| "learning_rate": 9.006415094339623e-06, | |
| "loss": 3.3365228271484373, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.10625, | |
| "grad_norm": 25.59325408935547, | |
| "learning_rate": 8.993836477987421e-06, | |
| "loss": 3.3707135009765623, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.1075, | |
| "grad_norm": 29.391414642333984, | |
| "learning_rate": 8.981257861635222e-06, | |
| "loss": 3.698001708984375, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.10875, | |
| "grad_norm": 20.907129287719727, | |
| "learning_rate": 8.96867924528302e-06, | |
| "loss": 3.4555450439453126, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.1572871208190918, | |
| "learning_rate": 8.956100628930819e-06, | |
| "loss": 3.2748760986328125, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.11125, | |
| "grad_norm": 31.631296157836914, | |
| "learning_rate": 8.943522012578617e-06, | |
| "loss": 3.3183383178710937, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.1125, | |
| "grad_norm": 15.066047668457031, | |
| "learning_rate": 8.930943396226416e-06, | |
| "loss": 3.3248934936523438, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.11375, | |
| "grad_norm": 40.130699157714844, | |
| "learning_rate": 8.918364779874215e-06, | |
| "loss": 3.0782876586914063, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.115, | |
| "grad_norm": 6.560884952545166, | |
| "learning_rate": 8.905786163522013e-06, | |
| "loss": 3.1166537475585936, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.11625, | |
| "grad_norm": 19.945913314819336, | |
| "learning_rate": 8.893207547169812e-06, | |
| "loss": 3.104024658203125, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.1175, | |
| "grad_norm": 26.818218231201172, | |
| "learning_rate": 8.88062893081761e-06, | |
| "loss": 3.5535888671875, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.11875, | |
| "grad_norm": 8.911575317382812, | |
| "learning_rate": 8.868050314465409e-06, | |
| "loss": 3.237396240234375, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 27.469289779663086, | |
| "learning_rate": 8.855471698113209e-06, | |
| "loss": 3.37412841796875, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.12125, | |
| "grad_norm": 21.267345428466797, | |
| "learning_rate": 8.842893081761008e-06, | |
| "loss": 3.5673455810546875, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.1225, | |
| "grad_norm": 18.69856834411621, | |
| "learning_rate": 8.830314465408806e-06, | |
| "loss": 3.2800216674804688, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.12375, | |
| "grad_norm": 30.507801055908203, | |
| "learning_rate": 8.817735849056605e-06, | |
| "loss": 3.042904052734375, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 4.167088508605957, | |
| "learning_rate": 8.805157232704403e-06, | |
| "loss": 3.038970642089844, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.12625, | |
| "grad_norm": 22.224905014038086, | |
| "learning_rate": 8.792578616352202e-06, | |
| "loss": 3.6609432983398436, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.1275, | |
| "grad_norm": 23.52140235900879, | |
| "learning_rate": 8.78e-06, | |
| "loss": 3.3873190307617187, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.12875, | |
| "grad_norm": 19.35887336730957, | |
| "learning_rate": 8.7674213836478e-06, | |
| "loss": 2.98621337890625, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 36.2899169921875, | |
| "learning_rate": 8.754842767295598e-06, | |
| "loss": 3.1030731201171875, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.13125, | |
| "grad_norm": 23.118587493896484, | |
| "learning_rate": 8.742264150943396e-06, | |
| "loss": 3.2017132568359377, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.1325, | |
| "grad_norm": 18.85630989074707, | |
| "learning_rate": 8.729685534591195e-06, | |
| "loss": 3.136984558105469, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.13375, | |
| "grad_norm": 10.303237915039062, | |
| "learning_rate": 8.717106918238995e-06, | |
| "loss": 3.201553955078125, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.135, | |
| "grad_norm": 23.776470184326172, | |
| "learning_rate": 8.704528301886794e-06, | |
| "loss": 2.64281494140625, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.13625, | |
| "grad_norm": 19.486194610595703, | |
| "learning_rate": 8.691949685534592e-06, | |
| "loss": 3.0275909423828127, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.1375, | |
| "grad_norm": 23.196096420288086, | |
| "learning_rate": 8.679371069182391e-06, | |
| "loss": 3.320228271484375, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.13875, | |
| "grad_norm": 18.966537475585938, | |
| "learning_rate": 8.66679245283019e-06, | |
| "loss": 3.0495703125, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 24.352205276489258, | |
| "learning_rate": 8.654213836477988e-06, | |
| "loss": 2.7282373046875, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.14125, | |
| "grad_norm": 21.45029067993164, | |
| "learning_rate": 8.641635220125787e-06, | |
| "loss": 2.840162353515625, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.1425, | |
| "grad_norm": 27.577434539794922, | |
| "learning_rate": 8.629056603773585e-06, | |
| "loss": 2.837103271484375, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.14375, | |
| "grad_norm": 26.380931854248047, | |
| "learning_rate": 8.616477987421384e-06, | |
| "loss": 2.9257269287109375, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.145, | |
| "grad_norm": 33.0853271484375, | |
| "learning_rate": 8.603899371069183e-06, | |
| "loss": 2.7715283203125, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.14625, | |
| "grad_norm": 26.68023109436035, | |
| "learning_rate": 8.591320754716981e-06, | |
| "loss": 2.801429443359375, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.1475, | |
| "grad_norm": 23.897310256958008, | |
| "learning_rate": 8.578742138364781e-06, | |
| "loss": 2.7405438232421875, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.14875, | |
| "grad_norm": 24.198909759521484, | |
| "learning_rate": 8.56616352201258e-06, | |
| "loss": 2.8841351318359374, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 12.656911849975586, | |
| "learning_rate": 8.553584905660379e-06, | |
| "loss": 2.8034808349609377, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.15125, | |
| "grad_norm": 21.627422332763672, | |
| "learning_rate": 8.541006289308177e-06, | |
| "loss": 3.3647225952148436, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.1525, | |
| "grad_norm": 20.174375534057617, | |
| "learning_rate": 8.528427672955976e-06, | |
| "loss": 3.007825927734375, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.15375, | |
| "grad_norm": 44.98274230957031, | |
| "learning_rate": 8.515849056603774e-06, | |
| "loss": 3.088919677734375, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.155, | |
| "grad_norm": 19.29104995727539, | |
| "learning_rate": 8.503270440251573e-06, | |
| "loss": 3.018197021484375, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.15625, | |
| "grad_norm": 24.123010635375977, | |
| "learning_rate": 8.490691823899371e-06, | |
| "loss": 2.9341339111328124, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.1575, | |
| "grad_norm": 24.07183837890625, | |
| "learning_rate": 8.47811320754717e-06, | |
| "loss": 3.0051565551757813, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.15875, | |
| "grad_norm": 5.301960468292236, | |
| "learning_rate": 8.465534591194969e-06, | |
| "loss": 2.80382568359375, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.05698124319314957, | |
| "learning_rate": 8.452955974842767e-06, | |
| "loss": 2.8222991943359377, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.16125, | |
| "grad_norm": 18.93758773803711, | |
| "learning_rate": 8.440377358490568e-06, | |
| "loss": 3.1378509521484377, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.1625, | |
| "grad_norm": 20.055757522583008, | |
| "learning_rate": 8.427798742138366e-06, | |
| "loss": 2.886328125, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.16375, | |
| "grad_norm": 12.077770233154297, | |
| "learning_rate": 8.415220125786165e-06, | |
| "loss": 2.8731988525390624, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.165, | |
| "grad_norm": 44.130096435546875, | |
| "learning_rate": 8.402641509433963e-06, | |
| "loss": 2.7432611083984373, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.16625, | |
| "grad_norm": 41.551048278808594, | |
| "learning_rate": 8.390062893081762e-06, | |
| "loss": 3.06159912109375, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.1675, | |
| "grad_norm": 18.465818405151367, | |
| "learning_rate": 8.37748427672956e-06, | |
| "loss": 2.8289349365234373, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.16875, | |
| "grad_norm": 17.856552124023438, | |
| "learning_rate": 8.364905660377359e-06, | |
| "loss": 2.7455133056640624, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 38.62467956542969, | |
| "learning_rate": 8.352327044025158e-06, | |
| "loss": 2.51541748046875, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.17125, | |
| "grad_norm": 19.783123016357422, | |
| "learning_rate": 8.339748427672956e-06, | |
| "loss": 3.161922607421875, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.1725, | |
| "grad_norm": 19.746681213378906, | |
| "learning_rate": 8.327169811320755e-06, | |
| "loss": 2.948106384277344, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.17375, | |
| "grad_norm": 11.665578842163086, | |
| "learning_rate": 8.314591194968555e-06, | |
| "loss": 2.707484130859375, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 32.41368865966797, | |
| "learning_rate": 8.302012578616354e-06, | |
| "loss": 2.551041259765625, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.17625, | |
| "grad_norm": 19.156505584716797, | |
| "learning_rate": 8.289433962264152e-06, | |
| "loss": 3.110626220703125, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.1775, | |
| "grad_norm": 24.206411361694336, | |
| "learning_rate": 8.27685534591195e-06, | |
| "loss": 2.96248291015625, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.17875, | |
| "grad_norm": 28.069095611572266, | |
| "learning_rate": 8.26427672955975e-06, | |
| "loss": 2.8258160400390624, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 29.488801956176758, | |
| "learning_rate": 8.251698113207548e-06, | |
| "loss": 2.697049560546875, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.18125, | |
| "grad_norm": 24.96384620666504, | |
| "learning_rate": 8.239119496855347e-06, | |
| "loss": 2.985064697265625, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.1825, | |
| "grad_norm": 23.861825942993164, | |
| "learning_rate": 8.226540880503145e-06, | |
| "loss": 3.27437255859375, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.18375, | |
| "grad_norm": 17.654409408569336, | |
| "learning_rate": 8.213962264150944e-06, | |
| "loss": 2.7916656494140626, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.185, | |
| "grad_norm": 13.089086532592773, | |
| "learning_rate": 8.201383647798742e-06, | |
| "loss": 2.95639404296875, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.18625, | |
| "grad_norm": 25.958925247192383, | |
| "learning_rate": 8.188805031446541e-06, | |
| "loss": 3.0765805053710937, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.1875, | |
| "grad_norm": 23.476421356201172, | |
| "learning_rate": 8.176226415094341e-06, | |
| "loss": 3.1241455078125, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.18875, | |
| "grad_norm": 6.848545551300049, | |
| "learning_rate": 8.16364779874214e-06, | |
| "loss": 3.0716900634765625, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 35.278076171875, | |
| "learning_rate": 8.151069182389938e-06, | |
| "loss": 2.738138427734375, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.19125, | |
| "grad_norm": 18.48978614807129, | |
| "learning_rate": 8.138490566037737e-06, | |
| "loss": 3.2392678833007813, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.1925, | |
| "grad_norm": 25.764511108398438, | |
| "learning_rate": 8.125911949685536e-06, | |
| "loss": 2.935572509765625, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.19375, | |
| "grad_norm": 17.244510650634766, | |
| "learning_rate": 8.113333333333334e-06, | |
| "loss": 2.686405029296875, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.195, | |
| "grad_norm": 58.790367126464844, | |
| "learning_rate": 8.100754716981133e-06, | |
| "loss": 2.930709228515625, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.19625, | |
| "grad_norm": 23.248153686523438, | |
| "learning_rate": 8.088176100628931e-06, | |
| "loss": 3.134276123046875, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.1975, | |
| "grad_norm": 22.906993865966797, | |
| "learning_rate": 8.07559748427673e-06, | |
| "loss": 3.294012451171875, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.19875, | |
| "grad_norm": 18.57663917541504, | |
| "learning_rate": 8.063018867924528e-06, | |
| "loss": 2.659776611328125, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 23.491619110107422, | |
| "learning_rate": 8.050440251572327e-06, | |
| "loss": 2.69767822265625, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.20125, | |
| "grad_norm": 34.74635696411133, | |
| "learning_rate": 8.037861635220127e-06, | |
| "loss": 2.9187213134765626, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.2025, | |
| "grad_norm": 17.31464195251465, | |
| "learning_rate": 8.025283018867926e-06, | |
| "loss": 2.6978961181640626, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.20375, | |
| "grad_norm": 15.085783958435059, | |
| "learning_rate": 8.012704402515725e-06, | |
| "loss": 2.227297668457031, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.205, | |
| "grad_norm": 40.41267013549805, | |
| "learning_rate": 8.000125786163523e-06, | |
| "loss": 2.5455712890625, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.20625, | |
| "grad_norm": 21.266006469726562, | |
| "learning_rate": 7.987547169811322e-06, | |
| "loss": 2.6894076538085936, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.2075, | |
| "grad_norm": 27.872455596923828, | |
| "learning_rate": 7.97496855345912e-06, | |
| "loss": 2.957840576171875, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.20875, | |
| "grad_norm": 11.036195755004883, | |
| "learning_rate": 7.962389937106919e-06, | |
| "loss": 2.9388262939453127, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.358200341463089, | |
| "learning_rate": 7.949811320754717e-06, | |
| "loss": 2.550313720703125, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.21125, | |
| "grad_norm": 17.692394256591797, | |
| "learning_rate": 7.937232704402516e-06, | |
| "loss": 2.9567965698242187, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.2125, | |
| "grad_norm": 20.59417724609375, | |
| "learning_rate": 7.924654088050315e-06, | |
| "loss": 2.8677490234375, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.21375, | |
| "grad_norm": 21.882822036743164, | |
| "learning_rate": 7.912075471698115e-06, | |
| "loss": 2.6563641357421877, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.215, | |
| "grad_norm": 41.62974548339844, | |
| "learning_rate": 7.899496855345913e-06, | |
| "loss": 2.938742370605469, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.21625, | |
| "grad_norm": 17.829666137695312, | |
| "learning_rate": 7.886918238993712e-06, | |
| "loss": 2.936795654296875, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.2175, | |
| "grad_norm": 23.082691192626953, | |
| "learning_rate": 7.87433962264151e-06, | |
| "loss": 3.0123855590820314, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.21875, | |
| "grad_norm": 30.395631790161133, | |
| "learning_rate": 7.86176100628931e-06, | |
| "loss": 2.8054989624023436, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.134773388504982, | |
| "learning_rate": 7.849182389937108e-06, | |
| "loss": 2.675752868652344, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.22125, | |
| "grad_norm": 20.807125091552734, | |
| "learning_rate": 7.836603773584906e-06, | |
| "loss": 2.7376922607421874, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.2225, | |
| "grad_norm": 43.09901809692383, | |
| "learning_rate": 7.824025157232705e-06, | |
| "loss": 2.9282843017578126, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.22375, | |
| "grad_norm": 25.02425193786621, | |
| "learning_rate": 7.811446540880504e-06, | |
| "loss": 2.571319580078125, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.225, | |
| "grad_norm": 15.398431777954102, | |
| "learning_rate": 7.798867924528302e-06, | |
| "loss": 2.492489929199219, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.22625, | |
| "grad_norm": 19.006174087524414, | |
| "learning_rate": 7.7862893081761e-06, | |
| "loss": 2.7379693603515625, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.2275, | |
| "grad_norm": 21.487060546875, | |
| "learning_rate": 7.773710691823901e-06, | |
| "loss": 3.009235534667969, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.22875, | |
| "grad_norm": 22.86688804626465, | |
| "learning_rate": 7.7611320754717e-06, | |
| "loss": 2.937470703125, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 4.950404167175293, | |
| "learning_rate": 7.748553459119498e-06, | |
| "loss": 2.7051239013671875, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.23125, | |
| "grad_norm": 22.454504013061523, | |
| "learning_rate": 7.735974842767297e-06, | |
| "loss": 2.8736590576171874, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.2325, | |
| "grad_norm": 29.73653221130371, | |
| "learning_rate": 7.723396226415095e-06, | |
| "loss": 2.908158874511719, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.23375, | |
| "grad_norm": 28.101703643798828, | |
| "learning_rate": 7.710817610062894e-06, | |
| "loss": 2.6522030639648437, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.235, | |
| "grad_norm": 4.781414985656738, | |
| "learning_rate": 7.698238993710693e-06, | |
| "loss": 2.4308651733398436, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.23625, | |
| "grad_norm": 17.258634567260742, | |
| "learning_rate": 7.685660377358491e-06, | |
| "loss": 3.040390625, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.2375, | |
| "grad_norm": 17.386960983276367, | |
| "learning_rate": 7.67308176100629e-06, | |
| "loss": 2.769420471191406, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.23875, | |
| "grad_norm": 25.329315185546875, | |
| "learning_rate": 7.660503144654088e-06, | |
| "loss": 2.7375177001953124, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 6.02203369140625, | |
| "learning_rate": 7.647924528301887e-06, | |
| "loss": 2.505074920654297, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.24125, | |
| "grad_norm": 20.155107498168945, | |
| "learning_rate": 7.635345911949687e-06, | |
| "loss": 2.837679138183594, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.2425, | |
| "grad_norm": 18.765806198120117, | |
| "learning_rate": 7.622767295597485e-06, | |
| "loss": 2.907704162597656, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.24375, | |
| "grad_norm": 17.111177444458008, | |
| "learning_rate": 7.610188679245284e-06, | |
| "loss": 2.3241337585449218, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.245, | |
| "grad_norm": 26.34480094909668, | |
| "learning_rate": 7.597610062893083e-06, | |
| "loss": 2.534189453125, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.24625, | |
| "grad_norm": 26.818429946899414, | |
| "learning_rate": 7.5850314465408815e-06, | |
| "loss": 2.6678060913085937, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.2475, | |
| "grad_norm": 22.52924919128418, | |
| "learning_rate": 7.57245283018868e-06, | |
| "loss": 2.7501620483398437, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.24875, | |
| "grad_norm": 2.6518571376800537, | |
| "learning_rate": 7.559874213836479e-06, | |
| "loss": 2.606297607421875, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 13.926076889038086, | |
| "learning_rate": 7.547295597484277e-06, | |
| "loss": 2.6911566162109377, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.25125, | |
| "grad_norm": 21.900789260864258, | |
| "learning_rate": 7.534716981132077e-06, | |
| "loss": 2.6743511962890625, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.2525, | |
| "grad_norm": 17.92312240600586, | |
| "learning_rate": 7.522138364779875e-06, | |
| "loss": 3.041307373046875, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.25375, | |
| "grad_norm": 16.685823440551758, | |
| "learning_rate": 7.509559748427674e-06, | |
| "loss": 2.7370196533203126, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.255, | |
| "grad_norm": 17.445106506347656, | |
| "learning_rate": 7.4969811320754725e-06, | |
| "loss": 2.769534912109375, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.25625, | |
| "grad_norm": 14.679756164550781, | |
| "learning_rate": 7.484402515723271e-06, | |
| "loss": 2.6487054443359375, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.2575, | |
| "grad_norm": 29.46009063720703, | |
| "learning_rate": 7.4718238993710705e-06, | |
| "loss": 2.8649749755859375, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.25875, | |
| "grad_norm": 25.532323837280273, | |
| "learning_rate": 7.459245283018869e-06, | |
| "loss": 2.5376904296875, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.2213641107082367, | |
| "learning_rate": 7.446666666666668e-06, | |
| "loss": 2.5324951171875, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.26125, | |
| "grad_norm": 26.14183807373047, | |
| "learning_rate": 7.434088050314466e-06, | |
| "loss": 2.9009820556640626, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.2625, | |
| "grad_norm": 20.510589599609375, | |
| "learning_rate": 7.421509433962265e-06, | |
| "loss": 2.8423321533203123, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.26375, | |
| "grad_norm": 19.538053512573242, | |
| "learning_rate": 7.408930817610064e-06, | |
| "loss": 2.8104873657226563, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.265, | |
| "grad_norm": 29.258617401123047, | |
| "learning_rate": 7.396352201257863e-06, | |
| "loss": 2.460407562255859, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.26625, | |
| "grad_norm": 24.23659324645996, | |
| "learning_rate": 7.3837735849056614e-06, | |
| "loss": 2.797374267578125, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.2675, | |
| "grad_norm": 19.262876510620117, | |
| "learning_rate": 7.37119496855346e-06, | |
| "loss": 2.8027474975585935, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.26875, | |
| "grad_norm": 18.131284713745117, | |
| "learning_rate": 7.358616352201259e-06, | |
| "loss": 2.899300842285156, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.3336597979068756, | |
| "learning_rate": 7.346037735849057e-06, | |
| "loss": 2.505666961669922, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.27125, | |
| "grad_norm": 16.87041473388672, | |
| "learning_rate": 7.333459119496857e-06, | |
| "loss": 2.7630340576171877, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.2725, | |
| "grad_norm": 21.505661010742188, | |
| "learning_rate": 7.320880503144655e-06, | |
| "loss": 2.52651611328125, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.27375, | |
| "grad_norm": 3.3604607582092285, | |
| "learning_rate": 7.308301886792454e-06, | |
| "loss": 2.6651840209960938, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.275, | |
| "grad_norm": 0.0993848517537117, | |
| "learning_rate": 7.295723270440252e-06, | |
| "loss": 2.635201416015625, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.27625, | |
| "grad_norm": 28.343442916870117, | |
| "learning_rate": 7.283144654088051e-06, | |
| "loss": 3.0521551513671876, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.2775, | |
| "grad_norm": 22.11837387084961, | |
| "learning_rate": 7.27056603773585e-06, | |
| "loss": 2.7989617919921876, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.27875, | |
| "grad_norm": 14.693900108337402, | |
| "learning_rate": 7.257987421383649e-06, | |
| "loss": 2.586000671386719, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 12.320152282714844, | |
| "learning_rate": 7.2454088050314476e-06, | |
| "loss": 2.6524945068359376, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.28125, | |
| "grad_norm": 16.291311264038086, | |
| "learning_rate": 7.232830188679246e-06, | |
| "loss": 2.5608685302734373, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.2825, | |
| "grad_norm": 19.380096435546875, | |
| "learning_rate": 7.220251572327045e-06, | |
| "loss": 2.84384521484375, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.28375, | |
| "grad_norm": 23.465559005737305, | |
| "learning_rate": 7.207672955974843e-06, | |
| "loss": 2.8349188232421874, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.285, | |
| "grad_norm": 18.676286697387695, | |
| "learning_rate": 7.195094339622643e-06, | |
| "loss": 2.5051495361328127, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.28625, | |
| "grad_norm": 20.133834838867188, | |
| "learning_rate": 7.182515723270441e-06, | |
| "loss": 2.7532232666015624, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.2875, | |
| "grad_norm": 19.53656005859375, | |
| "learning_rate": 7.16993710691824e-06, | |
| "loss": 2.9203271484375, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.28875, | |
| "grad_norm": 18.04520034790039, | |
| "learning_rate": 7.1573584905660385e-06, | |
| "loss": 2.6057052612304688, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.5314738154411316, | |
| "learning_rate": 7.144779874213837e-06, | |
| "loss": 2.6248223876953123, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.29125, | |
| "grad_norm": 16.670757293701172, | |
| "learning_rate": 7.1322012578616365e-06, | |
| "loss": 2.8461798095703124, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.2925, | |
| "grad_norm": 23.886486053466797, | |
| "learning_rate": 7.119622641509435e-06, | |
| "loss": 2.57169921875, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.29375, | |
| "grad_norm": 8.540549278259277, | |
| "learning_rate": 7.107044025157234e-06, | |
| "loss": 2.620181884765625, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.295, | |
| "grad_norm": 34.379066467285156, | |
| "learning_rate": 7.094465408805032e-06, | |
| "loss": 2.5019407653808594, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.29625, | |
| "grad_norm": 24.65458106994629, | |
| "learning_rate": 7.081886792452831e-06, | |
| "loss": 2.421243133544922, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.2975, | |
| "grad_norm": 24.197660446166992, | |
| "learning_rate": 7.06930817610063e-06, | |
| "loss": 2.658455810546875, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.29875, | |
| "grad_norm": 26.06961441040039, | |
| "learning_rate": 7.056729559748429e-06, | |
| "loss": 2.8529171752929687, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 17.869230270385742, | |
| "learning_rate": 7.0441509433962275e-06, | |
| "loss": 2.4935687255859373, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.30125, | |
| "grad_norm": 24.912609100341797, | |
| "learning_rate": 7.031572327044026e-06, | |
| "loss": 2.6119781494140626, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.3025, | |
| "grad_norm": 22.447463989257812, | |
| "learning_rate": 7.018993710691825e-06, | |
| "loss": 2.824261474609375, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.30375, | |
| "grad_norm": 4.013281345367432, | |
| "learning_rate": 7.006415094339623e-06, | |
| "loss": 2.564319763183594, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.305, | |
| "grad_norm": 20.67205810546875, | |
| "learning_rate": 6.993836477987423e-06, | |
| "loss": 2.513914794921875, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.30625, | |
| "grad_norm": 24.64101219177246, | |
| "learning_rate": 6.981257861635221e-06, | |
| "loss": 2.6228094482421875, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.3075, | |
| "grad_norm": 21.346033096313477, | |
| "learning_rate": 6.96867924528302e-06, | |
| "loss": 2.74439208984375, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.30875, | |
| "grad_norm": 10.391414642333984, | |
| "learning_rate": 6.956100628930818e-06, | |
| "loss": 2.2809228515625, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.595000982284546, | |
| "learning_rate": 6.943522012578617e-06, | |
| "loss": 2.4967855834960937, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.31125, | |
| "grad_norm": 20.543582916259766, | |
| "learning_rate": 6.9309433962264164e-06, | |
| "loss": 2.585915222167969, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.3125, | |
| "grad_norm": 18.013103485107422, | |
| "learning_rate": 6.918364779874215e-06, | |
| "loss": 2.967261962890625, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.31375, | |
| "grad_norm": 2.930172920227051, | |
| "learning_rate": 6.905786163522014e-06, | |
| "loss": 2.45760986328125, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.315, | |
| "grad_norm": 0.313312292098999, | |
| "learning_rate": 6.893207547169812e-06, | |
| "loss": 2.421280822753906, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.31625, | |
| "grad_norm": 19.377904891967773, | |
| "learning_rate": 6.880628930817611e-06, | |
| "loss": 2.710804748535156, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.3175, | |
| "grad_norm": 29.901630401611328, | |
| "learning_rate": 6.86805031446541e-06, | |
| "loss": 2.6909103393554688, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.31875, | |
| "grad_norm": 0.17036622762680054, | |
| "learning_rate": 6.855471698113209e-06, | |
| "loss": 2.56456787109375, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.009640435688197613, | |
| "learning_rate": 6.842893081761007e-06, | |
| "loss": 2.603443603515625, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.32125, | |
| "grad_norm": 18.46479606628418, | |
| "learning_rate": 6.830314465408806e-06, | |
| "loss": 2.427742156982422, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.3225, | |
| "grad_norm": 20.460947036743164, | |
| "learning_rate": 6.8177358490566046e-06, | |
| "loss": 2.503583984375, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.32375, | |
| "grad_norm": 36.58029556274414, | |
| "learning_rate": 6.805157232704403e-06, | |
| "loss": 2.641945495605469, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.325, | |
| "grad_norm": 0.01783256232738495, | |
| "learning_rate": 6.7925786163522026e-06, | |
| "loss": 2.334880065917969, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.32625, | |
| "grad_norm": 16.67290496826172, | |
| "learning_rate": 6.780000000000001e-06, | |
| "loss": 2.31351806640625, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.3275, | |
| "grad_norm": 22.63792610168457, | |
| "learning_rate": 6.7674213836478e-06, | |
| "loss": 2.6967156982421874, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.32875, | |
| "grad_norm": 9.776408195495605, | |
| "learning_rate": 6.754842767295598e-06, | |
| "loss": 2.56522705078125, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 28.92633628845215, | |
| "learning_rate": 6.742264150943397e-06, | |
| "loss": 2.3877410888671875, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.33125, | |
| "grad_norm": 24.558900833129883, | |
| "learning_rate": 6.729685534591196e-06, | |
| "loss": 2.6090899658203126, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.3325, | |
| "grad_norm": 22.432323455810547, | |
| "learning_rate": 6.717106918238995e-06, | |
| "loss": 2.537680358886719, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.33375, | |
| "grad_norm": 16.474348068237305, | |
| "learning_rate": 6.7045283018867935e-06, | |
| "loss": 2.5907723999023435, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.335, | |
| "grad_norm": 0.12232944369316101, | |
| "learning_rate": 6.691949685534592e-06, | |
| "loss": 2.2261618041992186, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.33625, | |
| "grad_norm": 23.504776000976562, | |
| "learning_rate": 6.679371069182391e-06, | |
| "loss": 2.601636962890625, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.3375, | |
| "grad_norm": 25.80137062072754, | |
| "learning_rate": 6.66679245283019e-06, | |
| "loss": 2.415384521484375, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.33875, | |
| "grad_norm": 10.791342735290527, | |
| "learning_rate": 6.654213836477989e-06, | |
| "loss": 2.3595407104492185, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.23251843452453613, | |
| "learning_rate": 6.641635220125787e-06, | |
| "loss": 2.3029635620117186, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.34125, | |
| "grad_norm": 22.788803100585938, | |
| "learning_rate": 6.629056603773586e-06, | |
| "loss": 2.467623748779297, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.3425, | |
| "grad_norm": 26.63360023498535, | |
| "learning_rate": 6.6164779874213845e-06, | |
| "loss": 2.717623291015625, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.34375, | |
| "grad_norm": 16.987070083618164, | |
| "learning_rate": 6.603899371069183e-06, | |
| "loss": 2.4102777099609374, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.345, | |
| "grad_norm": 0.01087226066738367, | |
| "learning_rate": 6.5913207547169825e-06, | |
| "loss": 2.3370285034179688, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.34625, | |
| "grad_norm": 23.72052574157715, | |
| "learning_rate": 6.578742138364781e-06, | |
| "loss": 2.6314013671875, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.3475, | |
| "grad_norm": 23.36273765563965, | |
| "learning_rate": 6.56616352201258e-06, | |
| "loss": 2.7635400390625, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.34875, | |
| "grad_norm": 1.7312610149383545, | |
| "learning_rate": 6.553584905660378e-06, | |
| "loss": 2.5564617919921875, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 35.84255599975586, | |
| "learning_rate": 6.541006289308177e-06, | |
| "loss": 2.7504351806640623, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.35125, | |
| "grad_norm": 26.242603302001953, | |
| "learning_rate": 6.528427672955976e-06, | |
| "loss": 2.373230285644531, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.3525, | |
| "grad_norm": 18.80271339416504, | |
| "learning_rate": 6.515849056603775e-06, | |
| "loss": 2.472604064941406, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.35375, | |
| "grad_norm": 0.8052368760108948, | |
| "learning_rate": 6.5032704402515734e-06, | |
| "loss": 2.4384115600585936, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.355, | |
| "grad_norm": 9.235365867614746, | |
| "learning_rate": 6.490691823899372e-06, | |
| "loss": 2.026627502441406, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.35625, | |
| "grad_norm": 20.106481552124023, | |
| "learning_rate": 6.478113207547171e-06, | |
| "loss": 2.431183319091797, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.3575, | |
| "grad_norm": 26.855960845947266, | |
| "learning_rate": 6.46553459119497e-06, | |
| "loss": 2.78699462890625, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.35875, | |
| "grad_norm": 3.0411767959594727, | |
| "learning_rate": 6.452955974842769e-06, | |
| "loss": 2.862520751953125, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 17.747953414916992, | |
| "learning_rate": 6.440377358490567e-06, | |
| "loss": 2.4523715209960937, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.36125, | |
| "grad_norm": 17.899240493774414, | |
| "learning_rate": 6.427798742138366e-06, | |
| "loss": 2.606719970703125, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.3625, | |
| "grad_norm": 27.6655216217041, | |
| "learning_rate": 6.415220125786164e-06, | |
| "loss": 2.9831976318359374, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.36375, | |
| "grad_norm": 11.997615814208984, | |
| "learning_rate": 6.402641509433963e-06, | |
| "loss": 2.345184326171875, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.365, | |
| "grad_norm": 4.737981796264648, | |
| "learning_rate": 6.390062893081761e-06, | |
| "loss": 2.16708251953125, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.36625, | |
| "grad_norm": 26.705848693847656, | |
| "learning_rate": 6.37748427672956e-06, | |
| "loss": 2.63515869140625, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.3675, | |
| "grad_norm": 19.237810134887695, | |
| "learning_rate": 6.364905660377359e-06, | |
| "loss": 2.532059326171875, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.36875, | |
| "grad_norm": 17.01352882385254, | |
| "learning_rate": 6.352327044025157e-06, | |
| "loss": 2.55763671875, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 27.392210006713867, | |
| "learning_rate": 6.339748427672956e-06, | |
| "loss": 2.2140924072265626, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.37125, | |
| "grad_norm": 22.72922134399414, | |
| "learning_rate": 6.3271698113207545e-06, | |
| "loss": 2.749759521484375, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.3725, | |
| "grad_norm": 30.038837432861328, | |
| "learning_rate": 6.314591194968553e-06, | |
| "loss": 2.596155700683594, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.37375, | |
| "grad_norm": 11.441932678222656, | |
| "learning_rate": 6.3020125786163525e-06, | |
| "loss": 2.4867178344726564, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 1.1486650705337524, | |
| "learning_rate": 6.289433962264151e-06, | |
| "loss": 2.6022509765625, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.37625, | |
| "grad_norm": 19.94702911376953, | |
| "learning_rate": 6.27685534591195e-06, | |
| "loss": 2.4900088500976563, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.3775, | |
| "grad_norm": 23.304832458496094, | |
| "learning_rate": 6.264276729559748e-06, | |
| "loss": 2.6295010375976564, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.37875, | |
| "grad_norm": 0.9189664721488953, | |
| "learning_rate": 6.251698113207547e-06, | |
| "loss": 2.614851379394531, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 7.372312545776367, | |
| "learning_rate": 6.239119496855346e-06, | |
| "loss": 2.43028564453125, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.38125, | |
| "grad_norm": 24.777666091918945, | |
| "learning_rate": 6.226540880503145e-06, | |
| "loss": 2.790203857421875, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.3825, | |
| "grad_norm": 24.64716339111328, | |
| "learning_rate": 6.2139622641509434e-06, | |
| "loss": 2.7274844360351564, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.38375, | |
| "grad_norm": 10.4207763671875, | |
| "learning_rate": 6.201383647798742e-06, | |
| "loss": 2.1123760986328124, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.385, | |
| "grad_norm": 0.07715418934822083, | |
| "learning_rate": 6.188805031446541e-06, | |
| "loss": 2.5373687744140625, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.38625, | |
| "grad_norm": 23.558998107910156, | |
| "learning_rate": 6.17622641509434e-06, | |
| "loss": 2.632052001953125, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.3875, | |
| "grad_norm": 26.116592407226562, | |
| "learning_rate": 6.163647798742139e-06, | |
| "loss": 2.494936218261719, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.38875, | |
| "grad_norm": 8.466816902160645, | |
| "learning_rate": 6.151069182389937e-06, | |
| "loss": 2.3917417907714844, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 6.2974748611450195, | |
| "learning_rate": 6.138490566037736e-06, | |
| "loss": 2.315247039794922, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.39125, | |
| "grad_norm": 26.84358024597168, | |
| "learning_rate": 6.125911949685534e-06, | |
| "loss": 2.6386013793945313, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.3925, | |
| "grad_norm": 18.76458740234375, | |
| "learning_rate": 6.113333333333333e-06, | |
| "loss": 2.304689636230469, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.39375, | |
| "grad_norm": 12.886114120483398, | |
| "learning_rate": 6.100754716981132e-06, | |
| "loss": 2.476997528076172, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.395, | |
| "grad_norm": 14.977468490600586, | |
| "learning_rate": 6.088176100628931e-06, | |
| "loss": 2.3447396850585935, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.39625, | |
| "grad_norm": 29.1169490814209, | |
| "learning_rate": 6.0755974842767296e-06, | |
| "loss": 2.186744842529297, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.3975, | |
| "grad_norm": 27.776119232177734, | |
| "learning_rate": 6.063018867924528e-06, | |
| "loss": 2.382681884765625, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.39875, | |
| "grad_norm": 48.07829284667969, | |
| "learning_rate": 6.050440251572327e-06, | |
| "loss": 2.3385089111328123, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.7851316332817078, | |
| "learning_rate": 6.037861635220126e-06, | |
| "loss": 2.229570770263672, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.40125, | |
| "grad_norm": 20.3125057220459, | |
| "learning_rate": 6.025283018867925e-06, | |
| "loss": 2.5623980712890626, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.4025, | |
| "grad_norm": 21.488149642944336, | |
| "learning_rate": 6.012704402515723e-06, | |
| "loss": 2.912520751953125, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.40375, | |
| "grad_norm": 32.51865005493164, | |
| "learning_rate": 6.000125786163522e-06, | |
| "loss": 2.3843609619140627, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.405, | |
| "grad_norm": 1.5765758752822876, | |
| "learning_rate": 5.9875471698113205e-06, | |
| "loss": 2.381104736328125, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.40625, | |
| "grad_norm": 19.67698860168457, | |
| "learning_rate": 5.97496855345912e-06, | |
| "loss": 2.60897216796875, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.4075, | |
| "grad_norm": 25.434328079223633, | |
| "learning_rate": 5.9623899371069185e-06, | |
| "loss": 2.505929260253906, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.40875, | |
| "grad_norm": 1.3922165632247925, | |
| "learning_rate": 5.949811320754717e-06, | |
| "loss": 2.7238693237304688, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 34.48624038696289, | |
| "learning_rate": 5.937232704402516e-06, | |
| "loss": 2.2143258666992187, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.41125, | |
| "grad_norm": 23.105432510375977, | |
| "learning_rate": 5.924654088050314e-06, | |
| "loss": 2.609884033203125, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.4125, | |
| "grad_norm": 25.032276153564453, | |
| "learning_rate": 5.912075471698113e-06, | |
| "loss": 2.4755784606933595, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.41375, | |
| "grad_norm": 2.3799238204956055, | |
| "learning_rate": 5.899496855345912e-06, | |
| "loss": 2.575408630371094, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.415, | |
| "grad_norm": 0.27088427543640137, | |
| "learning_rate": 5.886918238993711e-06, | |
| "loss": 2.577947998046875, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.41625, | |
| "grad_norm": 21.328598022460938, | |
| "learning_rate": 5.8743396226415095e-06, | |
| "loss": 2.358548583984375, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.4175, | |
| "grad_norm": 22.012718200683594, | |
| "learning_rate": 5.861761006289308e-06, | |
| "loss": 2.5863958740234376, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.41875, | |
| "grad_norm": 11.000739097595215, | |
| "learning_rate": 5.849182389937107e-06, | |
| "loss": 2.410167236328125, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.4935649633407593, | |
| "learning_rate": 5.836603773584906e-06, | |
| "loss": 2.570559387207031, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.42125, | |
| "grad_norm": 16.273447036743164, | |
| "learning_rate": 5.824025157232705e-06, | |
| "loss": 2.599737548828125, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.4225, | |
| "grad_norm": 17.573291778564453, | |
| "learning_rate": 5.811446540880503e-06, | |
| "loss": 2.647215576171875, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.42375, | |
| "grad_norm": 11.290910720825195, | |
| "learning_rate": 5.798867924528302e-06, | |
| "loss": 2.6486666870117186, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.425, | |
| "grad_norm": 0.27160441875457764, | |
| "learning_rate": 5.7862893081761004e-06, | |
| "loss": 2.48810791015625, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.42625, | |
| "grad_norm": 33.099632263183594, | |
| "learning_rate": 5.773710691823899e-06, | |
| "loss": 2.4821075439453124, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.4275, | |
| "grad_norm": 18.253433227539062, | |
| "learning_rate": 5.7611320754716984e-06, | |
| "loss": 2.4092727661132813, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.42875, | |
| "grad_norm": 6.506767749786377, | |
| "learning_rate": 5.748553459119497e-06, | |
| "loss": 2.461749267578125, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 0.04523186758160591, | |
| "learning_rate": 5.735974842767296e-06, | |
| "loss": 2.4135496520996096, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.43125, | |
| "grad_norm": 22.646928787231445, | |
| "learning_rate": 5.723396226415094e-06, | |
| "loss": 2.4821810913085938, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.4325, | |
| "grad_norm": 24.81723976135254, | |
| "learning_rate": 5.710817610062893e-06, | |
| "loss": 2.5469622802734375, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.43375, | |
| "grad_norm": 13.340218544006348, | |
| "learning_rate": 5.698238993710692e-06, | |
| "loss": 2.5624932861328125, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.435, | |
| "grad_norm": 0.1735188513994217, | |
| "learning_rate": 5.685660377358491e-06, | |
| "loss": 2.5089077758789062, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.43625, | |
| "grad_norm": 21.547271728515625, | |
| "learning_rate": 5.673081761006289e-06, | |
| "loss": 2.6526663208007815, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.4375, | |
| "grad_norm": 19.01319694519043, | |
| "learning_rate": 5.660503144654088e-06, | |
| "loss": 2.860760498046875, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.43875, | |
| "grad_norm": 34.29476547241211, | |
| "learning_rate": 5.6479245283018866e-06, | |
| "loss": 2.6115411376953124, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 49.28584671020508, | |
| "learning_rate": 5.635345911949686e-06, | |
| "loss": 2.1887362670898436, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.44125, | |
| "grad_norm": 22.770751953125, | |
| "learning_rate": 5.622767295597485e-06, | |
| "loss": 2.5814306640625, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.4425, | |
| "grad_norm": 30.632217407226562, | |
| "learning_rate": 5.610188679245283e-06, | |
| "loss": 2.3137057495117186, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.44375, | |
| "grad_norm": 1.2359341382980347, | |
| "learning_rate": 5.597610062893082e-06, | |
| "loss": 2.060030517578125, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.445, | |
| "grad_norm": 0.3209603428840637, | |
| "learning_rate": 5.58503144654088e-06, | |
| "loss": 2.113879852294922, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.44625, | |
| "grad_norm": 18.14032554626465, | |
| "learning_rate": 5.572452830188679e-06, | |
| "loss": 2.4774960327148436, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.4475, | |
| "grad_norm": 28.4627628326416, | |
| "learning_rate": 5.559874213836478e-06, | |
| "loss": 2.282992248535156, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.44875, | |
| "grad_norm": 9.6217041015625, | |
| "learning_rate": 5.547295597484277e-06, | |
| "loss": 2.6865158081054688, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.1811428815126419, | |
| "learning_rate": 5.5347169811320755e-06, | |
| "loss": 2.6177047729492187, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.45125, | |
| "grad_norm": 29.922924041748047, | |
| "learning_rate": 5.522138364779874e-06, | |
| "loss": 2.666334533691406, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.4525, | |
| "grad_norm": 22.7872371673584, | |
| "learning_rate": 5.509559748427673e-06, | |
| "loss": 2.8180126953125, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.45375, | |
| "grad_norm": 47.54941177368164, | |
| "learning_rate": 5.496981132075472e-06, | |
| "loss": 2.5696286010742186, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.455, | |
| "grad_norm": 0.02181134559214115, | |
| "learning_rate": 5.484402515723271e-06, | |
| "loss": 2.3324235534667968, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.45625, | |
| "grad_norm": 29.27972412109375, | |
| "learning_rate": 5.471823899371069e-06, | |
| "loss": 2.377205657958984, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.4575, | |
| "grad_norm": 27.066911697387695, | |
| "learning_rate": 5.459245283018868e-06, | |
| "loss": 2.6452157592773435, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.45875, | |
| "grad_norm": 5.8870849609375, | |
| "learning_rate": 5.4466666666666665e-06, | |
| "loss": 2.6054962158203123, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 9.92285442352295, | |
| "learning_rate": 5.434088050314466e-06, | |
| "loss": 2.3585281372070312, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.46125, | |
| "grad_norm": 25.87106704711914, | |
| "learning_rate": 5.4215094339622645e-06, | |
| "loss": 2.3194918823242188, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.4625, | |
| "grad_norm": 24.31088638305664, | |
| "learning_rate": 5.408930817610063e-06, | |
| "loss": 2.558509521484375, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.46375, | |
| "grad_norm": 8.893757820129395, | |
| "learning_rate": 5.396352201257862e-06, | |
| "loss": 2.501906433105469, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.465, | |
| "grad_norm": 0.19256582856178284, | |
| "learning_rate": 5.38377358490566e-06, | |
| "loss": 2.4328764343261717, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.46625, | |
| "grad_norm": 36.3577995300293, | |
| "learning_rate": 5.371194968553459e-06, | |
| "loss": 2.593095397949219, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.4675, | |
| "grad_norm": 28.993858337402344, | |
| "learning_rate": 5.358616352201258e-06, | |
| "loss": 2.5382254028320315, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.46875, | |
| "grad_norm": 1.2644715309143066, | |
| "learning_rate": 5.346037735849057e-06, | |
| "loss": 2.5988037109375, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.758081078529358, | |
| "learning_rate": 5.3334591194968554e-06, | |
| "loss": 2.3407969665527344, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.47125, | |
| "grad_norm": 29.136577606201172, | |
| "learning_rate": 5.320880503144654e-06, | |
| "loss": 2.649151611328125, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.4725, | |
| "grad_norm": 25.28328514099121, | |
| "learning_rate": 5.308301886792453e-06, | |
| "loss": 2.5671429443359375, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.47375, | |
| "grad_norm": 15.502847671508789, | |
| "learning_rate": 5.295723270440252e-06, | |
| "loss": 2.6158074951171875, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.475, | |
| "grad_norm": 0.22777187824249268, | |
| "learning_rate": 5.283144654088051e-06, | |
| "loss": 2.15365966796875, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.47625, | |
| "grad_norm": 11.893803596496582, | |
| "learning_rate": 5.270566037735849e-06, | |
| "loss": 2.3156301879882815, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.4775, | |
| "grad_norm": 25.753210067749023, | |
| "learning_rate": 5.257987421383648e-06, | |
| "loss": 2.503589324951172, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.47875, | |
| "grad_norm": 16.314041137695312, | |
| "learning_rate": 5.245408805031446e-06, | |
| "loss": 2.639173583984375, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.021451743319630623, | |
| "learning_rate": 5.232830188679246e-06, | |
| "loss": 2.311227111816406, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.48125, | |
| "grad_norm": 23.82429313659668, | |
| "learning_rate": 5.220251572327044e-06, | |
| "loss": 2.3693087768554686, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.4825, | |
| "grad_norm": 27.395193099975586, | |
| "learning_rate": 5.207672955974843e-06, | |
| "loss": 2.6940673828125, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.48375, | |
| "grad_norm": 4.3216071128845215, | |
| "learning_rate": 5.1950943396226416e-06, | |
| "loss": 2.763636474609375, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.485, | |
| "grad_norm": 7.015088081359863, | |
| "learning_rate": 5.18251572327044e-06, | |
| "loss": 2.432607879638672, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.48625, | |
| "grad_norm": 22.117116928100586, | |
| "learning_rate": 5.169937106918239e-06, | |
| "loss": 2.6036077880859376, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.4875, | |
| "grad_norm": 19.56247329711914, | |
| "learning_rate": 5.157358490566038e-06, | |
| "loss": 2.742894287109375, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.48875, | |
| "grad_norm": 9.732338905334473, | |
| "learning_rate": 5.144779874213837e-06, | |
| "loss": 2.3442169189453126, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 12.13438892364502, | |
| "learning_rate": 5.132201257861635e-06, | |
| "loss": 2.3807391357421874, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.49125, | |
| "grad_norm": 20.819271087646484, | |
| "learning_rate": 5.119622641509434e-06, | |
| "loss": 2.489746551513672, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.4925, | |
| "grad_norm": 22.794889450073242, | |
| "learning_rate": 5.1070440251572325e-06, | |
| "loss": 2.7172930908203123, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.49375, | |
| "grad_norm": 9.96747875213623, | |
| "learning_rate": 5.094465408805032e-06, | |
| "loss": 2.375674133300781, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.495, | |
| "grad_norm": 0.012898732908070087, | |
| "learning_rate": 5.0818867924528305e-06, | |
| "loss": 2.238426513671875, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.49625, | |
| "grad_norm": 23.519365310668945, | |
| "learning_rate": 5.069308176100629e-06, | |
| "loss": 2.3515843200683593, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.4975, | |
| "grad_norm": 24.20486068725586, | |
| "learning_rate": 5.056729559748428e-06, | |
| "loss": 2.6381719970703124, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.49875, | |
| "grad_norm": 25.931121826171875, | |
| "learning_rate": 5.044150943396226e-06, | |
| "loss": 2.2815740966796874, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.4895442724227905, | |
| "learning_rate": 5.031572327044026e-06, | |
| "loss": 2.1325482177734374, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.50125, | |
| "grad_norm": 22.385295867919922, | |
| "learning_rate": 5.018993710691824e-06, | |
| "loss": 2.376383514404297, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.5025, | |
| "grad_norm": 24.816694259643555, | |
| "learning_rate": 5.006415094339623e-06, | |
| "loss": 2.4902183532714846, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.50375, | |
| "grad_norm": 0.2846035957336426, | |
| "learning_rate": 4.9938364779874215e-06, | |
| "loss": 2.5287326049804686, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.505, | |
| "grad_norm": 2.7115321159362793, | |
| "learning_rate": 4.98125786163522e-06, | |
| "loss": 2.357637939453125, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.50625, | |
| "grad_norm": 23.814891815185547, | |
| "learning_rate": 4.968679245283019e-06, | |
| "loss": 2.194998016357422, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.5075, | |
| "grad_norm": 22.298978805541992, | |
| "learning_rate": 4.956100628930818e-06, | |
| "loss": 2.3045025634765626, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.50875, | |
| "grad_norm": 1.843037724494934, | |
| "learning_rate": 4.943522012578617e-06, | |
| "loss": 2.532041473388672, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 0.05613021180033684, | |
| "learning_rate": 4.930943396226415e-06, | |
| "loss": 2.358630828857422, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.51125, | |
| "grad_norm": 26.92227554321289, | |
| "learning_rate": 4.918364779874214e-06, | |
| "loss": 2.375510711669922, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.5125, | |
| "grad_norm": 24.53324317932129, | |
| "learning_rate": 4.9057861635220124e-06, | |
| "loss": 2.3742355346679687, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.51375, | |
| "grad_norm": 1.7757785320281982, | |
| "learning_rate": 4.893207547169812e-06, | |
| "loss": 2.1228904724121094, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.515, | |
| "grad_norm": 1.370865821838379, | |
| "learning_rate": 4.8806289308176104e-06, | |
| "loss": 2.2730003356933595, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.51625, | |
| "grad_norm": 27.052255630493164, | |
| "learning_rate": 4.868050314465409e-06, | |
| "loss": 2.1805772399902343, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.5175, | |
| "grad_norm": 19.289770126342773, | |
| "learning_rate": 4.855471698113208e-06, | |
| "loss": 2.3394677734375, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.51875, | |
| "grad_norm": 0.14031943678855896, | |
| "learning_rate": 4.842893081761006e-06, | |
| "loss": 2.16936767578125, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.3180188536643982, | |
| "learning_rate": 4.830314465408806e-06, | |
| "loss": 2.288330383300781, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.52125, | |
| "grad_norm": 28.790279388427734, | |
| "learning_rate": 4.817735849056604e-06, | |
| "loss": 2.2974285888671875, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.5225, | |
| "grad_norm": 29.82061767578125, | |
| "learning_rate": 4.805157232704403e-06, | |
| "loss": 2.2505938720703127, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.52375, | |
| "grad_norm": 1.3207628726959229, | |
| "learning_rate": 4.792578616352201e-06, | |
| "loss": 2.1726535034179686, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.525, | |
| "grad_norm": 19.538122177124023, | |
| "learning_rate": 4.78e-06, | |
| "loss": 2.3601947021484375, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.52625, | |
| "grad_norm": 23.45235252380371, | |
| "learning_rate": 4.7674213836477986e-06, | |
| "loss": 2.3694439697265626, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.5275, | |
| "grad_norm": 29.030534744262695, | |
| "learning_rate": 4.754842767295598e-06, | |
| "loss": 2.375699462890625, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.52875, | |
| "grad_norm": 0.05739065632224083, | |
| "learning_rate": 4.742264150943397e-06, | |
| "loss": 2.2624081420898436, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.09302949905395508, | |
| "learning_rate": 4.729685534591195e-06, | |
| "loss": 2.418585205078125, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.53125, | |
| "grad_norm": 20.36932945251465, | |
| "learning_rate": 4.717106918238994e-06, | |
| "loss": 2.7232342529296876, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.5325, | |
| "grad_norm": 25.583152770996094, | |
| "learning_rate": 4.704528301886792e-06, | |
| "loss": 2.5170713806152345, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.53375, | |
| "grad_norm": 27.926185607910156, | |
| "learning_rate": 4.691949685534592e-06, | |
| "loss": 2.2554306030273437, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.535, | |
| "grad_norm": 0.015048661269247532, | |
| "learning_rate": 4.67937106918239e-06, | |
| "loss": 2.410753173828125, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.53625, | |
| "grad_norm": 22.782848358154297, | |
| "learning_rate": 4.666792452830189e-06, | |
| "loss": 2.506884765625, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.5375, | |
| "grad_norm": 27.83270263671875, | |
| "learning_rate": 4.6542138364779875e-06, | |
| "loss": 2.42444091796875, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.53875, | |
| "grad_norm": 14.226387023925781, | |
| "learning_rate": 4.641635220125786e-06, | |
| "loss": 2.2237681579589843, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 30.74934196472168, | |
| "learning_rate": 4.6290566037735855e-06, | |
| "loss": 1.8440298461914062, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.54125, | |
| "grad_norm": 26.56260871887207, | |
| "learning_rate": 4.616477987421384e-06, | |
| "loss": 2.5520880126953127, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.5425, | |
| "grad_norm": 26.363876342773438, | |
| "learning_rate": 4.603899371069183e-06, | |
| "loss": 2.6202496337890624, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.54375, | |
| "grad_norm": 24.67633628845215, | |
| "learning_rate": 4.591320754716981e-06, | |
| "loss": 2.253356170654297, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.545, | |
| "grad_norm": 0.01842404529452324, | |
| "learning_rate": 4.57874213836478e-06, | |
| "loss": 2.283622283935547, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.54625, | |
| "grad_norm": 15.855024337768555, | |
| "learning_rate": 4.5661635220125785e-06, | |
| "loss": 2.501055908203125, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.5475, | |
| "grad_norm": 26.492700576782227, | |
| "learning_rate": 4.553584905660378e-06, | |
| "loss": 2.426959228515625, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.54875, | |
| "grad_norm": 7.623685836791992, | |
| "learning_rate": 4.5410062893081765e-06, | |
| "loss": 1.9829179382324218, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 17.01411247253418, | |
| "learning_rate": 4.528427672955975e-06, | |
| "loss": 2.1798980712890623, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.55125, | |
| "grad_norm": 21.383556365966797, | |
| "learning_rate": 4.515849056603774e-06, | |
| "loss": 2.0453337097167967, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.5525, | |
| "grad_norm": 22.872831344604492, | |
| "learning_rate": 4.503270440251572e-06, | |
| "loss": 2.4065341186523437, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.55375, | |
| "grad_norm": 0.3448590934276581, | |
| "learning_rate": 4.490691823899372e-06, | |
| "loss": 2.0755601501464844, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.555, | |
| "grad_norm": 64.99383544921875, | |
| "learning_rate": 4.47811320754717e-06, | |
| "loss": 2.167461242675781, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.55625, | |
| "grad_norm": 26.849061965942383, | |
| "learning_rate": 4.465534591194969e-06, | |
| "loss": 2.392653045654297, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.5575, | |
| "grad_norm": 22.663536071777344, | |
| "learning_rate": 4.4529559748427674e-06, | |
| "loss": 2.6425860595703123, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.55875, | |
| "grad_norm": 0.09723508358001709, | |
| "learning_rate": 4.440377358490566e-06, | |
| "loss": 2.041617431640625, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.6772779822349548, | |
| "learning_rate": 4.427798742138365e-06, | |
| "loss": 2.0510581970214843, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.56125, | |
| "grad_norm": 23.326744079589844, | |
| "learning_rate": 4.415220125786164e-06, | |
| "loss": 2.28655029296875, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.5625, | |
| "grad_norm": 22.986696243286133, | |
| "learning_rate": 4.402641509433963e-06, | |
| "loss": 2.162388916015625, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.56375, | |
| "grad_norm": 0.11825785040855408, | |
| "learning_rate": 4.390062893081761e-06, | |
| "loss": 2.4120408630371095, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.565, | |
| "grad_norm": 0.07316289842128754, | |
| "learning_rate": 4.37748427672956e-06, | |
| "loss": 1.9428927612304687, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.56625, | |
| "grad_norm": 19.97572135925293, | |
| "learning_rate": 4.364905660377358e-06, | |
| "loss": 2.409757537841797, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.5675, | |
| "grad_norm": 21.209138870239258, | |
| "learning_rate": 4.352327044025158e-06, | |
| "loss": 2.303609619140625, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.56875, | |
| "grad_norm": 0.48939380049705505, | |
| "learning_rate": 4.339748427672956e-06, | |
| "loss": 2.2687278747558595, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 2.690356492996216, | |
| "learning_rate": 4.327169811320755e-06, | |
| "loss": 2.068523406982422, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.57125, | |
| "grad_norm": 24.331865310668945, | |
| "learning_rate": 4.3145911949685536e-06, | |
| "loss": 2.371395721435547, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.5725, | |
| "grad_norm": 19.168750762939453, | |
| "learning_rate": 4.302012578616352e-06, | |
| "loss": 2.48111083984375, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.57375, | |
| "grad_norm": 1.1316941976547241, | |
| "learning_rate": 4.289433962264152e-06, | |
| "loss": 2.1914462280273437, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.575, | |
| "grad_norm": 0.7238625884056091, | |
| "learning_rate": 4.27685534591195e-06, | |
| "loss": 2.408410949707031, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.57625, | |
| "grad_norm": 18.509540557861328, | |
| "learning_rate": 4.264276729559749e-06, | |
| "loss": 2.5385202026367186, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.5775, | |
| "grad_norm": 24.170326232910156, | |
| "learning_rate": 4.251698113207547e-06, | |
| "loss": 2.726481628417969, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.57875, | |
| "grad_norm": 5.890894889831543, | |
| "learning_rate": 4.239119496855346e-06, | |
| "loss": 2.3738558959960936, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.009730951860547066, | |
| "learning_rate": 4.2265408805031445e-06, | |
| "loss": 2.6054721069335938, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.58125, | |
| "grad_norm": 24.269445419311523, | |
| "learning_rate": 4.213962264150944e-06, | |
| "loss": 2.3926129150390625, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.5825, | |
| "grad_norm": 20.738441467285156, | |
| "learning_rate": 4.2013836477987425e-06, | |
| "loss": 2.491241455078125, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.58375, | |
| "grad_norm": 1.0278619527816772, | |
| "learning_rate": 4.188805031446541e-06, | |
| "loss": 2.247518768310547, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.585, | |
| "grad_norm": 4.836909294128418, | |
| "learning_rate": 4.17622641509434e-06, | |
| "loss": 2.104373016357422, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.58625, | |
| "grad_norm": 31.359527587890625, | |
| "learning_rate": 4.163647798742138e-06, | |
| "loss": 2.190884704589844, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.5875, | |
| "grad_norm": 33.07499313354492, | |
| "learning_rate": 4.151069182389938e-06, | |
| "loss": 2.332211608886719, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.58875, | |
| "grad_norm": 18.392343521118164, | |
| "learning_rate": 4.138490566037736e-06, | |
| "loss": 2.077041015625, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 0.6778843998908997, | |
| "learning_rate": 4.125911949685535e-06, | |
| "loss": 2.148955993652344, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.59125, | |
| "grad_norm": 29.361854553222656, | |
| "learning_rate": 4.1133333333333335e-06, | |
| "loss": 2.404347381591797, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.5925, | |
| "grad_norm": 21.975435256958008, | |
| "learning_rate": 4.100754716981132e-06, | |
| "loss": 2.4371409606933594, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.59375, | |
| "grad_norm": 0.4038066864013672, | |
| "learning_rate": 4.0881761006289315e-06, | |
| "loss": 2.4506686401367186, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.595, | |
| "grad_norm": 0.0259912870824337, | |
| "learning_rate": 4.07559748427673e-06, | |
| "loss": 2.2095245361328124, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.59625, | |
| "grad_norm": 24.107616424560547, | |
| "learning_rate": 4.063018867924529e-06, | |
| "loss": 2.3150425720214844, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.5975, | |
| "grad_norm": 22.568410873413086, | |
| "learning_rate": 4.050440251572327e-06, | |
| "loss": 2.3050765991210938, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.59875, | |
| "grad_norm": 1.4052597284317017, | |
| "learning_rate": 4.037861635220126e-06, | |
| "loss": 2.3776676940917967, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 5.013542652130127, | |
| "learning_rate": 4.0252830188679244e-06, | |
| "loss": 2.1164060974121095, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.60125, | |
| "grad_norm": 24.288990020751953, | |
| "learning_rate": 4.012704402515724e-06, | |
| "loss": 2.0022381591796874, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.6025, | |
| "grad_norm": 26.07032012939453, | |
| "learning_rate": 4.0001257861635224e-06, | |
| "loss": 2.1755996704101563, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.60375, | |
| "grad_norm": 1.911749005317688, | |
| "learning_rate": 3.987547169811321e-06, | |
| "loss": 2.01885986328125, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.605, | |
| "grad_norm": 12.477325439453125, | |
| "learning_rate": 3.97496855345912e-06, | |
| "loss": 2.368613739013672, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.60625, | |
| "grad_norm": 21.526126861572266, | |
| "learning_rate": 3.962389937106918e-06, | |
| "loss": 2.3319718933105467, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.6075, | |
| "grad_norm": 21.947265625, | |
| "learning_rate": 3.949811320754718e-06, | |
| "loss": 2.5695175170898437, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.60875, | |
| "grad_norm": 9.083207130432129, | |
| "learning_rate": 3.937232704402516e-06, | |
| "loss": 2.4009765625, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 9.588478088378906, | |
| "learning_rate": 3.924654088050315e-06, | |
| "loss": 2.1275369262695314, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.61125, | |
| "grad_norm": 16.444225311279297, | |
| "learning_rate": 3.912075471698113e-06, | |
| "loss": 2.22287841796875, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.6125, | |
| "grad_norm": 20.83136558532715, | |
| "learning_rate": 3.899496855345912e-06, | |
| "loss": 2.3845196533203126, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.61375, | |
| "grad_norm": 10.17677116394043, | |
| "learning_rate": 3.886918238993711e-06, | |
| "loss": 2.190762176513672, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.615, | |
| "grad_norm": 0.006293127313256264, | |
| "learning_rate": 3.87433962264151e-06, | |
| "loss": 2.2727685546875, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.61625, | |
| "grad_norm": 25.517677307128906, | |
| "learning_rate": 3.861761006289309e-06, | |
| "loss": 2.267947082519531, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.6175, | |
| "grad_norm": 28.428192138671875, | |
| "learning_rate": 3.849182389937107e-06, | |
| "loss": 2.418175048828125, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.61875, | |
| "grad_norm": 27.646535873413086, | |
| "learning_rate": 3.836603773584906e-06, | |
| "loss": 2.280270233154297, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.03348470479249954, | |
| "learning_rate": 3.824025157232704e-06, | |
| "loss": 2.0512631225585936, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.62125, | |
| "grad_norm": 29.742656707763672, | |
| "learning_rate": 3.8114465408805033e-06, | |
| "loss": 2.4671990966796873, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.6225, | |
| "grad_norm": 23.064598083496094, | |
| "learning_rate": 3.7988679245283024e-06, | |
| "loss": 2.4474307250976564, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.62375, | |
| "grad_norm": 2.3941636085510254, | |
| "learning_rate": 3.786289308176101e-06, | |
| "loss": 2.03037109375, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 0.19696100056171417, | |
| "learning_rate": 3.7737106918238995e-06, | |
| "loss": 2.0240495300292967, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.62625, | |
| "grad_norm": 26.428871154785156, | |
| "learning_rate": 3.7611320754716985e-06, | |
| "loss": 2.200367889404297, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.6275, | |
| "grad_norm": 23.72710609436035, | |
| "learning_rate": 3.748553459119497e-06, | |
| "loss": 2.7000701904296873, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.62875, | |
| "grad_norm": 27.32769012451172, | |
| "learning_rate": 3.735974842767296e-06, | |
| "loss": 2.0785140991210938, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 32.80459213256836, | |
| "learning_rate": 3.7233962264150947e-06, | |
| "loss": 2.2123362731933596, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.63125, | |
| "grad_norm": 26.444387435913086, | |
| "learning_rate": 3.7108176100628933e-06, | |
| "loss": 2.479073944091797, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.6325, | |
| "grad_norm": 28.786527633666992, | |
| "learning_rate": 3.6982389937106923e-06, | |
| "loss": 2.289686737060547, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.63375, | |
| "grad_norm": 0.1663801074028015, | |
| "learning_rate": 3.685660377358491e-06, | |
| "loss": 2.1976898193359373, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.635, | |
| "grad_norm": 30.683305740356445, | |
| "learning_rate": 3.6730817610062895e-06, | |
| "loss": 2.2509585571289064, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.63625, | |
| "grad_norm": 22.308727264404297, | |
| "learning_rate": 3.6605031446540885e-06, | |
| "loss": 2.5132717895507812, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.6375, | |
| "grad_norm": 19.899017333984375, | |
| "learning_rate": 3.647924528301887e-06, | |
| "loss": 2.246257781982422, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.63875, | |
| "grad_norm": 0.14116325974464417, | |
| "learning_rate": 3.635345911949686e-06, | |
| "loss": 2.05459228515625, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.020385975018143654, | |
| "learning_rate": 3.6227672955974847e-06, | |
| "loss": 2.099994812011719, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.64125, | |
| "grad_norm": 17.793832778930664, | |
| "learning_rate": 3.6101886792452833e-06, | |
| "loss": 2.259058074951172, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.6425, | |
| "grad_norm": 21.187685012817383, | |
| "learning_rate": 3.5976100628930823e-06, | |
| "loss": 2.204525146484375, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.64375, | |
| "grad_norm": 1.0163244009017944, | |
| "learning_rate": 3.585031446540881e-06, | |
| "loss": 2.2062753295898436, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.645, | |
| "grad_norm": 0.44557446241378784, | |
| "learning_rate": 3.5724528301886794e-06, | |
| "loss": 2.1636553955078126, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.64625, | |
| "grad_norm": 22.489730834960938, | |
| "learning_rate": 3.5598742138364784e-06, | |
| "loss": 2.343136444091797, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.6475, | |
| "grad_norm": 27.255525588989258, | |
| "learning_rate": 3.547295597484277e-06, | |
| "loss": 2.585829162597656, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.64875, | |
| "grad_norm": 0.30436962842941284, | |
| "learning_rate": 3.534716981132076e-06, | |
| "loss": 2.0592010498046873, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 0.008368916809558868, | |
| "learning_rate": 3.5221383647798746e-06, | |
| "loss": 2.0964291381835936, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.65125, | |
| "grad_norm": 25.623943328857422, | |
| "learning_rate": 3.509559748427673e-06, | |
| "loss": 2.2132614135742186, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.6525, | |
| "grad_norm": 23.970529556274414, | |
| "learning_rate": 3.4969811320754722e-06, | |
| "loss": 2.2939547729492187, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.65375, | |
| "grad_norm": 0.13999006152153015, | |
| "learning_rate": 3.484402515723271e-06, | |
| "loss": 2.1940087890625, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.655, | |
| "grad_norm": 6.885776519775391, | |
| "learning_rate": 3.4718238993710694e-06, | |
| "loss": 2.3822996520996096, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.65625, | |
| "grad_norm": 28.326892852783203, | |
| "learning_rate": 3.4592452830188684e-06, | |
| "loss": 2.7556744384765626, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.6575, | |
| "grad_norm": 29.53263282775879, | |
| "learning_rate": 3.446666666666667e-06, | |
| "loss": 2.4048992919921877, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.65875, | |
| "grad_norm": 9.168279647827148, | |
| "learning_rate": 3.434088050314466e-06, | |
| "loss": 2.3623193359375, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 51.9276123046875, | |
| "learning_rate": 3.4215094339622646e-06, | |
| "loss": 2.3159774780273437, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.66125, | |
| "grad_norm": 34.09861373901367, | |
| "learning_rate": 3.408930817610063e-06, | |
| "loss": 2.254779968261719, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.6625, | |
| "grad_norm": 24.50155258178711, | |
| "learning_rate": 3.396352201257862e-06, | |
| "loss": 2.4943356323242187, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.66375, | |
| "grad_norm": 1.6096267700195312, | |
| "learning_rate": 3.3837735849056608e-06, | |
| "loss": 2.4191680908203126, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.665, | |
| "grad_norm": 9.347710609436035, | |
| "learning_rate": 3.3711949685534593e-06, | |
| "loss": 2.3084637451171877, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.66625, | |
| "grad_norm": 27.17746925354004, | |
| "learning_rate": 3.3586163522012584e-06, | |
| "loss": 2.3632272338867186, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.6675, | |
| "grad_norm": 19.401647567749023, | |
| "learning_rate": 3.346037735849057e-06, | |
| "loss": 2.4689659118652343, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.66875, | |
| "grad_norm": 0.02893674187362194, | |
| "learning_rate": 3.333459119496856e-06, | |
| "loss": 2.2349473571777345, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 0.308145672082901, | |
| "learning_rate": 3.3208805031446545e-06, | |
| "loss": 2.4012570190429687, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.67125, | |
| "grad_norm": 27.814510345458984, | |
| "learning_rate": 3.308301886792453e-06, | |
| "loss": 2.364227752685547, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.6725, | |
| "grad_norm": 30.748811721801758, | |
| "learning_rate": 3.295723270440252e-06, | |
| "loss": 2.298586883544922, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.67375, | |
| "grad_norm": 12.556225776672363, | |
| "learning_rate": 3.2831446540880507e-06, | |
| "loss": 2.026658935546875, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.675, | |
| "grad_norm": 32.828857421875, | |
| "learning_rate": 3.2705660377358493e-06, | |
| "loss": 2.2101339721679687, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.67625, | |
| "grad_norm": 41.711971282958984, | |
| "learning_rate": 3.2579874213836483e-06, | |
| "loss": 2.2615397644042967, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.6775, | |
| "grad_norm": 22.22212791442871, | |
| "learning_rate": 3.245408805031447e-06, | |
| "loss": 2.264314880371094, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.67875, | |
| "grad_norm": 0.31344112753868103, | |
| "learning_rate": 3.232830188679246e-06, | |
| "loss": 1.843946990966797, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 11.150541305541992, | |
| "learning_rate": 3.2202515723270445e-06, | |
| "loss": 2.107444305419922, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.68125, | |
| "grad_norm": 27.848705291748047, | |
| "learning_rate": 3.207672955974843e-06, | |
| "loss": 2.6434942626953126, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.6825, | |
| "grad_norm": 20.511205673217773, | |
| "learning_rate": 3.195094339622642e-06, | |
| "loss": 2.2614511108398436, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.68375, | |
| "grad_norm": 15.717256546020508, | |
| "learning_rate": 3.1825157232704407e-06, | |
| "loss": 2.4019781494140626, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.685, | |
| "grad_norm": 15.28030014038086, | |
| "learning_rate": 3.1699371069182393e-06, | |
| "loss": 2.1407426452636718, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.68625, | |
| "grad_norm": 26.36769676208496, | |
| "learning_rate": 3.1573584905660383e-06, | |
| "loss": 2.4883404541015626, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.6875, | |
| "grad_norm": 30.11307144165039, | |
| "learning_rate": 3.144779874213837e-06, | |
| "loss": 2.4928182983398437, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.68875, | |
| "grad_norm": 3.050213575363159, | |
| "learning_rate": 3.132201257861636e-06, | |
| "loss": 2.242222900390625, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 0.08332010358572006, | |
| "learning_rate": 3.1196226415094344e-06, | |
| "loss": 2.095365447998047, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.69125, | |
| "grad_norm": 25.02206802368164, | |
| "learning_rate": 3.107044025157233e-06, | |
| "loss": 2.2118115234375, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.6925, | |
| "grad_norm": 21.8033390045166, | |
| "learning_rate": 3.094465408805032e-06, | |
| "loss": 2.372667236328125, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.69375, | |
| "grad_norm": 0.31131941080093384, | |
| "learning_rate": 3.0818867924528306e-06, | |
| "loss": 2.4498031616210936, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.695, | |
| "grad_norm": 1.234559416770935, | |
| "learning_rate": 3.069308176100629e-06, | |
| "loss": 2.095816650390625, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.69625, | |
| "grad_norm": 24.62751007080078, | |
| "learning_rate": 3.0567295597484282e-06, | |
| "loss": 2.246084442138672, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.6975, | |
| "grad_norm": 32.08023452758789, | |
| "learning_rate": 3.044150943396227e-06, | |
| "loss": 2.2728422546386717, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.69875, | |
| "grad_norm": 1.7580287456512451, | |
| "learning_rate": 3.0315723270440254e-06, | |
| "loss": 1.912986297607422, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.024206427857279778, | |
| "learning_rate": 3.0189937106918244e-06, | |
| "loss": 2.12907470703125, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.70125, | |
| "grad_norm": 26.95330810546875, | |
| "learning_rate": 3.006415094339623e-06, | |
| "loss": 2.421792297363281, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.7025, | |
| "grad_norm": 22.763641357421875, | |
| "learning_rate": 2.993836477987422e-06, | |
| "loss": 2.2753536987304686, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.70375, | |
| "grad_norm": 6.763653755187988, | |
| "learning_rate": 2.9812578616352206e-06, | |
| "loss": 2.017315673828125, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.705, | |
| "grad_norm": 0.030549824237823486, | |
| "learning_rate": 2.968679245283019e-06, | |
| "loss": 2.2717471313476563, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.70625, | |
| "grad_norm": 30.307323455810547, | |
| "learning_rate": 2.956100628930818e-06, | |
| "loss": 2.375959777832031, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.7075, | |
| "grad_norm": 22.114675521850586, | |
| "learning_rate": 2.9435220125786168e-06, | |
| "loss": 2.2014059448242187, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.70875, | |
| "grad_norm": 0.3245386779308319, | |
| "learning_rate": 2.9309433962264153e-06, | |
| "loss": 1.9942924499511718, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.012478250078856945, | |
| "learning_rate": 2.9183647798742144e-06, | |
| "loss": 2.166055908203125, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.71125, | |
| "grad_norm": 25.91703987121582, | |
| "learning_rate": 2.905786163522013e-06, | |
| "loss": 2.2955760192871093, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.7125, | |
| "grad_norm": 23.741256713867188, | |
| "learning_rate": 2.893207547169812e-06, | |
| "loss": 2.308643493652344, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.71375, | |
| "grad_norm": 32.72561264038086, | |
| "learning_rate": 2.8806289308176105e-06, | |
| "loss": 2.2564122009277345, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.715, | |
| "grad_norm": 0.09459064900875092, | |
| "learning_rate": 2.868050314465409e-06, | |
| "loss": 2.263880615234375, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.71625, | |
| "grad_norm": 31.514862060546875, | |
| "learning_rate": 2.855471698113208e-06, | |
| "loss": 2.22376953125, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.7175, | |
| "grad_norm": 23.945999145507812, | |
| "learning_rate": 2.8428930817610067e-06, | |
| "loss": 2.121052703857422, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.71875, | |
| "grad_norm": 11.593914985656738, | |
| "learning_rate": 2.8303144654088053e-06, | |
| "loss": 2.0279052734375, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.03185856342315674, | |
| "learning_rate": 2.8177358490566043e-06, | |
| "loss": 1.9632949829101562, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.72125, | |
| "grad_norm": 26.430574417114258, | |
| "learning_rate": 2.805157232704403e-06, | |
| "loss": 2.224911804199219, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.7225, | |
| "grad_norm": 23.202377319335938, | |
| "learning_rate": 2.792578616352202e-06, | |
| "loss": 2.3928271484375, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.72375, | |
| "grad_norm": 6.8675127029418945, | |
| "learning_rate": 2.7800000000000005e-06, | |
| "loss": 2.345365447998047, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.725, | |
| "grad_norm": 7.9162750244140625, | |
| "learning_rate": 2.767421383647799e-06, | |
| "loss": 2.40666015625, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.72625, | |
| "grad_norm": 28.883695602416992, | |
| "learning_rate": 2.754842767295598e-06, | |
| "loss": 2.1290104675292967, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.7275, | |
| "grad_norm": 28.86813735961914, | |
| "learning_rate": 2.7422641509433967e-06, | |
| "loss": 2.3013229370117188, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.72875, | |
| "grad_norm": 0.05526283383369446, | |
| "learning_rate": 2.7296855345911953e-06, | |
| "loss": 2.349634246826172, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.46888020634651184, | |
| "learning_rate": 2.7171069182389943e-06, | |
| "loss": 2.269898681640625, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.73125, | |
| "grad_norm": 21.197877883911133, | |
| "learning_rate": 2.704528301886793e-06, | |
| "loss": 2.294650115966797, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.7325, | |
| "grad_norm": 28.490921020507812, | |
| "learning_rate": 2.691949685534592e-06, | |
| "loss": 2.507839813232422, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.73375, | |
| "grad_norm": 0.9312068819999695, | |
| "learning_rate": 2.6793710691823904e-06, | |
| "loss": 2.1638980102539063, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.735, | |
| "grad_norm": 7.32960319519043, | |
| "learning_rate": 2.666792452830189e-06, | |
| "loss": 2.1146189880371096, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.73625, | |
| "grad_norm": 16.3245792388916, | |
| "learning_rate": 2.654213836477988e-06, | |
| "loss": 2.353025360107422, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.7375, | |
| "grad_norm": 28.876869201660156, | |
| "learning_rate": 2.641635220125786e-06, | |
| "loss": 2.3220205688476563, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.73875, | |
| "grad_norm": 1.1006479263305664, | |
| "learning_rate": 2.6290566037735848e-06, | |
| "loss": 2.3847764587402343, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.052632566541433334, | |
| "learning_rate": 2.616477987421384e-06, | |
| "loss": 1.9747433471679687, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.74125, | |
| "grad_norm": 27.36806297302246, | |
| "learning_rate": 2.6038993710691824e-06, | |
| "loss": 2.234722137451172, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.7425, | |
| "grad_norm": 32.45076370239258, | |
| "learning_rate": 2.591320754716981e-06, | |
| "loss": 2.2224740600585937, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.74375, | |
| "grad_norm": 0.18155953288078308, | |
| "learning_rate": 2.57874213836478e-06, | |
| "loss": 2.1847653198242187, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.745, | |
| "grad_norm": 21.358049392700195, | |
| "learning_rate": 2.5661635220125786e-06, | |
| "loss": 2.1930168151855467, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.74625, | |
| "grad_norm": 23.599361419677734, | |
| "learning_rate": 2.553584905660377e-06, | |
| "loss": 2.13337646484375, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.7475, | |
| "grad_norm": 25.336759567260742, | |
| "learning_rate": 2.541006289308176e-06, | |
| "loss": 2.245163879394531, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.74875, | |
| "grad_norm": 14.410560607910156, | |
| "learning_rate": 2.5284276729559747e-06, | |
| "loss": 2.1882379150390623, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 17.1163330078125, | |
| "learning_rate": 2.5158490566037737e-06, | |
| "loss": 1.899642791748047, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.75125, | |
| "grad_norm": 32.30154800415039, | |
| "learning_rate": 2.5032704402515723e-06, | |
| "loss": 2.124136962890625, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.7525, | |
| "grad_norm": 34.646514892578125, | |
| "learning_rate": 2.4906918238993713e-06, | |
| "loss": 2.2636474609375, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.75375, | |
| "grad_norm": 0.17916053533554077, | |
| "learning_rate": 2.47811320754717e-06, | |
| "loss": 2.10548095703125, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.755, | |
| "grad_norm": 0.3836560845375061, | |
| "learning_rate": 2.465534591194969e-06, | |
| "loss": 2.248681640625, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.75625, | |
| "grad_norm": 25.696950912475586, | |
| "learning_rate": 2.4529559748427675e-06, | |
| "loss": 2.4749154663085937, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.7575, | |
| "grad_norm": 11.523337364196777, | |
| "learning_rate": 2.440377358490566e-06, | |
| "loss": 2.1865379333496096, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.75875, | |
| "grad_norm": 0.2725467383861542, | |
| "learning_rate": 2.427798742138365e-06, | |
| "loss": 2.069666442871094, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.008206835016608238, | |
| "learning_rate": 2.4152201257861637e-06, | |
| "loss": 2.1584426879882814, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.76125, | |
| "grad_norm": 28.906057357788086, | |
| "learning_rate": 2.4026415094339627e-06, | |
| "loss": 2.254979705810547, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.7625, | |
| "grad_norm": 12.576886177062988, | |
| "learning_rate": 2.3900628930817613e-06, | |
| "loss": 2.25433349609375, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.76375, | |
| "grad_norm": 10.839340209960938, | |
| "learning_rate": 2.37748427672956e-06, | |
| "loss": 2.084885711669922, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.765, | |
| "grad_norm": 0.06599520146846771, | |
| "learning_rate": 2.364905660377359e-06, | |
| "loss": 2.23760986328125, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.76625, | |
| "grad_norm": 31.791736602783203, | |
| "learning_rate": 2.3523270440251575e-06, | |
| "loss": 2.42175537109375, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.7675, | |
| "grad_norm": 29.30803680419922, | |
| "learning_rate": 2.339748427672956e-06, | |
| "loss": 2.215085754394531, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.76875, | |
| "grad_norm": 5.6741461753845215, | |
| "learning_rate": 2.327169811320755e-06, | |
| "loss": 2.066580810546875, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.006725333631038666, | |
| "learning_rate": 2.3145911949685537e-06, | |
| "loss": 2.1789097595214844, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.77125, | |
| "grad_norm": 21.000843048095703, | |
| "learning_rate": 2.3020125786163527e-06, | |
| "loss": 2.4083587646484377, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.7725, | |
| "grad_norm": 24.83283042907715, | |
| "learning_rate": 2.2894339622641513e-06, | |
| "loss": 2.813940124511719, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.77375, | |
| "grad_norm": 0.34976866841316223, | |
| "learning_rate": 2.27685534591195e-06, | |
| "loss": 2.1371139526367187, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.775, | |
| "grad_norm": 0.2197369635105133, | |
| "learning_rate": 2.264276729559749e-06, | |
| "loss": 1.9622064208984376, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.77625, | |
| "grad_norm": 23.31989860534668, | |
| "learning_rate": 2.2516981132075474e-06, | |
| "loss": 2.1933587646484374, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.7775, | |
| "grad_norm": 26.234939575195312, | |
| "learning_rate": 2.239119496855346e-06, | |
| "loss": 2.341946716308594, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.77875, | |
| "grad_norm": 27.059640884399414, | |
| "learning_rate": 2.226540880503145e-06, | |
| "loss": 2.026497497558594, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 10.47779369354248, | |
| "learning_rate": 2.2139622641509436e-06, | |
| "loss": 2.1496810913085938, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.78125, | |
| "grad_norm": 14.565492630004883, | |
| "learning_rate": 2.2013836477987426e-06, | |
| "loss": 2.1073020935058593, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.7825, | |
| "grad_norm": 26.76126480102539, | |
| "learning_rate": 2.188805031446541e-06, | |
| "loss": 2.1374913024902344, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.78375, | |
| "grad_norm": 1.1460466384887695, | |
| "learning_rate": 2.17622641509434e-06, | |
| "loss": 2.4106900024414064, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.785, | |
| "grad_norm": 0.04275045916438103, | |
| "learning_rate": 2.163647798742139e-06, | |
| "loss": 1.977836151123047, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.78625, | |
| "grad_norm": 29.69572639465332, | |
| "learning_rate": 2.1510691823899374e-06, | |
| "loss": 2.211481628417969, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.7875, | |
| "grad_norm": 27.114084243774414, | |
| "learning_rate": 2.138490566037736e-06, | |
| "loss": 2.3308370971679686, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.78875, | |
| "grad_norm": 0.024876583367586136, | |
| "learning_rate": 2.125911949685535e-06, | |
| "loss": 2.2788821411132814, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.011269732378423214, | |
| "learning_rate": 2.1133333333333336e-06, | |
| "loss": 2.2254403686523436, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.79125, | |
| "grad_norm": 29.721452713012695, | |
| "learning_rate": 2.1007547169811326e-06, | |
| "loss": 2.2361138916015624, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.7925, | |
| "grad_norm": 26.52519416809082, | |
| "learning_rate": 2.088176100628931e-06, | |
| "loss": 2.304778747558594, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.79375, | |
| "grad_norm": 0.33907511830329895, | |
| "learning_rate": 2.0755974842767297e-06, | |
| "loss": 2.1444125366210938, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.795, | |
| "grad_norm": 0.848170280456543, | |
| "learning_rate": 2.0630188679245288e-06, | |
| "loss": 2.224850311279297, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.79625, | |
| "grad_norm": 30.878385543823242, | |
| "learning_rate": 2.0504402515723273e-06, | |
| "loss": 2.3003367614746093, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.7975, | |
| "grad_norm": 21.298805236816406, | |
| "learning_rate": 2.037861635220126e-06, | |
| "loss": 2.251806640625, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.79875, | |
| "grad_norm": 1.4935413599014282, | |
| "learning_rate": 2.025283018867925e-06, | |
| "loss": 2.0355984497070314, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.23573997616767883, | |
| "learning_rate": 2.012704402515723e-06, | |
| "loss": 2.3094590759277343, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.80125, | |
| "grad_norm": 21.646190643310547, | |
| "learning_rate": 2.000125786163522e-06, | |
| "loss": 2.45231689453125, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.8025, | |
| "grad_norm": 26.4865779876709, | |
| "learning_rate": 1.9875471698113207e-06, | |
| "loss": 2.481203918457031, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.80375, | |
| "grad_norm": 16.815086364746094, | |
| "learning_rate": 1.9749685534591197e-06, | |
| "loss": 2.274866180419922, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.805, | |
| "grad_norm": 1.5168429613113403, | |
| "learning_rate": 1.9623899371069183e-06, | |
| "loss": 2.090848083496094, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.80625, | |
| "grad_norm": 24.665088653564453, | |
| "learning_rate": 1.949811320754717e-06, | |
| "loss": 2.149015197753906, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.8075, | |
| "grad_norm": 44.3161735534668, | |
| "learning_rate": 1.937232704402516e-06, | |
| "loss": 2.267424774169922, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.80875, | |
| "grad_norm": 14.768138885498047, | |
| "learning_rate": 1.9246540880503145e-06, | |
| "loss": 2.1676406860351562, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 23.735509872436523, | |
| "learning_rate": 1.912075471698113e-06, | |
| "loss": 2.066508026123047, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.81125, | |
| "grad_norm": 27.178951263427734, | |
| "learning_rate": 1.899496855345912e-06, | |
| "loss": 2.202220916748047, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.8125, | |
| "grad_norm": 27.76671600341797, | |
| "learning_rate": 1.8869182389937107e-06, | |
| "loss": 2.390093994140625, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.81375, | |
| "grad_norm": 22.02654266357422, | |
| "learning_rate": 1.8743396226415094e-06, | |
| "loss": 2.02405517578125, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.815, | |
| "grad_norm": 0.019850876182317734, | |
| "learning_rate": 1.8617610062893082e-06, | |
| "loss": 2.1032846069335935, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.81625, | |
| "grad_norm": 26.451114654541016, | |
| "learning_rate": 1.849182389937107e-06, | |
| "loss": 2.0573655700683595, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.8175, | |
| "grad_norm": 25.203014373779297, | |
| "learning_rate": 1.8366037735849056e-06, | |
| "loss": 2.23822265625, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.81875, | |
| "grad_norm": 0.07573448121547699, | |
| "learning_rate": 1.8240251572327044e-06, | |
| "loss": 2.170495910644531, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.6197527647018433, | |
| "learning_rate": 1.8114465408805032e-06, | |
| "loss": 2.0060690307617186, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.82125, | |
| "grad_norm": 29.55573081970215, | |
| "learning_rate": 1.798867924528302e-06, | |
| "loss": 2.104353942871094, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.8225, | |
| "grad_norm": 13.039762496948242, | |
| "learning_rate": 1.7862893081761006e-06, | |
| "loss": 1.940672607421875, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.82375, | |
| "grad_norm": 10.955610275268555, | |
| "learning_rate": 1.7737106918238994e-06, | |
| "loss": 1.850831298828125, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.825, | |
| "grad_norm": 28.8968505859375, | |
| "learning_rate": 1.7611320754716982e-06, | |
| "loss": 2.2148202514648436, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.82625, | |
| "grad_norm": 20.05891990661621, | |
| "learning_rate": 1.748553459119497e-06, | |
| "loss": 2.0501080322265626, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.8275, | |
| "grad_norm": 29.854055404663086, | |
| "learning_rate": 1.7359748427672956e-06, | |
| "loss": 2.338354187011719, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.82875, | |
| "grad_norm": 0.17976756393909454, | |
| "learning_rate": 1.7233962264150944e-06, | |
| "loss": 2.372988739013672, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 14.72498893737793, | |
| "learning_rate": 1.7108176100628932e-06, | |
| "loss": 2.317147674560547, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.83125, | |
| "grad_norm": 25.00243377685547, | |
| "learning_rate": 1.698238993710692e-06, | |
| "loss": 2.546220550537109, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.8325, | |
| "grad_norm": 28.58661460876465, | |
| "learning_rate": 1.6856603773584906e-06, | |
| "loss": 2.251160125732422, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.83375, | |
| "grad_norm": 24.616119384765625, | |
| "learning_rate": 1.6730817610062894e-06, | |
| "loss": 1.8937945556640625, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.835, | |
| "grad_norm": 0.021497901529073715, | |
| "learning_rate": 1.6605031446540882e-06, | |
| "loss": 2.2238792419433593, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.83625, | |
| "grad_norm": 20.581586837768555, | |
| "learning_rate": 1.647924528301887e-06, | |
| "loss": 2.025327606201172, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.8375, | |
| "grad_norm": 29.323177337646484, | |
| "learning_rate": 1.6353459119496855e-06, | |
| "loss": 2.465900115966797, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.83875, | |
| "grad_norm": 39.928653717041016, | |
| "learning_rate": 1.6227672955974843e-06, | |
| "loss": 2.1216752624511717, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.043776318430900574, | |
| "learning_rate": 1.6101886792452831e-06, | |
| "loss": 2.0655088806152344, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.84125, | |
| "grad_norm": 25.438045501708984, | |
| "learning_rate": 1.597610062893082e-06, | |
| "loss": 2.087451171875, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.8425, | |
| "grad_norm": 27.504056930541992, | |
| "learning_rate": 1.5850314465408805e-06, | |
| "loss": 2.1888687133789064, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.84375, | |
| "grad_norm": 3.616384744644165, | |
| "learning_rate": 1.5724528301886793e-06, | |
| "loss": 2.11557861328125, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.845, | |
| "grad_norm": 0.0108040077611804, | |
| "learning_rate": 1.5598742138364781e-06, | |
| "loss": 1.8791226196289061, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.84625, | |
| "grad_norm": 24.401247024536133, | |
| "learning_rate": 1.547295597484277e-06, | |
| "loss": 2.1741673278808595, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.8475, | |
| "grad_norm": 30.91659164428711, | |
| "learning_rate": 1.5347169811320755e-06, | |
| "loss": 2.1437338256835936, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.84875, | |
| "grad_norm": 0.09920002520084381, | |
| "learning_rate": 1.5221383647798743e-06, | |
| "loss": 2.361597900390625, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 0.24567686021327972, | |
| "learning_rate": 1.509559748427673e-06, | |
| "loss": 2.162798309326172, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.85125, | |
| "grad_norm": 30.701915740966797, | |
| "learning_rate": 1.4969811320754719e-06, | |
| "loss": 2.3576284790039064, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.8525, | |
| "grad_norm": 27.654489517211914, | |
| "learning_rate": 1.4844025157232705e-06, | |
| "loss": 2.233973083496094, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.85375, | |
| "grad_norm": 3.693110227584839, | |
| "learning_rate": 1.4718238993710693e-06, | |
| "loss": 2.0266494750976562, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.855, | |
| "grad_norm": 24.848302841186523, | |
| "learning_rate": 1.459245283018868e-06, | |
| "loss": 2.010597839355469, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.85625, | |
| "grad_norm": 29.43360137939453, | |
| "learning_rate": 1.4466666666666669e-06, | |
| "loss": 2.4028634643554687, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.8575, | |
| "grad_norm": 19.056543350219727, | |
| "learning_rate": 1.4340880503144654e-06, | |
| "loss": 2.272985076904297, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.85875, | |
| "grad_norm": 1.6909524202346802, | |
| "learning_rate": 1.4215094339622642e-06, | |
| "loss": 2.3617742919921874, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 21.4754695892334, | |
| "learning_rate": 1.408930817610063e-06, | |
| "loss": 2.270048828125, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.86125, | |
| "grad_norm": 23.546085357666016, | |
| "learning_rate": 1.3963522012578618e-06, | |
| "loss": 2.2229489135742186, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.8625, | |
| "grad_norm": 31.1220760345459, | |
| "learning_rate": 1.3837735849056604e-06, | |
| "loss": 2.3392955017089845, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.86375, | |
| "grad_norm": 0.902050256729126, | |
| "learning_rate": 1.3711949685534592e-06, | |
| "loss": 2.018695526123047, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.865, | |
| "grad_norm": 0.15184037387371063, | |
| "learning_rate": 1.358616352201258e-06, | |
| "loss": 2.03200439453125, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.86625, | |
| "grad_norm": 25.513137817382812, | |
| "learning_rate": 1.3460377358490568e-06, | |
| "loss": 2.394747619628906, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.8675, | |
| "grad_norm": 22.844642639160156, | |
| "learning_rate": 1.3334591194968554e-06, | |
| "loss": 2.346148681640625, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.86875, | |
| "grad_norm": 0.07020768523216248, | |
| "learning_rate": 1.3208805031446542e-06, | |
| "loss": 2.269253387451172, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 0.2192593514919281, | |
| "learning_rate": 1.308301886792453e-06, | |
| "loss": 2.37726806640625, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.87125, | |
| "grad_norm": 33.360755920410156, | |
| "learning_rate": 1.2957232704402518e-06, | |
| "loss": 2.214630584716797, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.8725, | |
| "grad_norm": 23.013324737548828, | |
| "learning_rate": 1.2831446540880504e-06, | |
| "loss": 2.24765869140625, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.87375, | |
| "grad_norm": 0.6654048562049866, | |
| "learning_rate": 1.2705660377358492e-06, | |
| "loss": 2.5640155029296876, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.875, | |
| "grad_norm": 3.4219138622283936, | |
| "learning_rate": 1.257987421383648e-06, | |
| "loss": 2.13403564453125, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.87625, | |
| "grad_norm": 29.68235969543457, | |
| "learning_rate": 1.2454088050314468e-06, | |
| "loss": 2.29338134765625, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.8775, | |
| "grad_norm": 23.957015991210938, | |
| "learning_rate": 1.2328301886792454e-06, | |
| "loss": 2.2298233032226564, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.87875, | |
| "grad_norm": 17.787887573242188, | |
| "learning_rate": 1.2202515723270442e-06, | |
| "loss": 2.1456781005859376, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 6.154158115386963, | |
| "learning_rate": 1.207672955974843e-06, | |
| "loss": 2.085262908935547, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.88125, | |
| "grad_norm": 27.724740982055664, | |
| "learning_rate": 1.1950943396226417e-06, | |
| "loss": 2.1977059936523435, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.8825, | |
| "grad_norm": 25.416337966918945, | |
| "learning_rate": 1.1825157232704403e-06, | |
| "loss": 2.048145751953125, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.88375, | |
| "grad_norm": 30.56559181213379, | |
| "learning_rate": 1.1699371069182391e-06, | |
| "loss": 2.236173553466797, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.885, | |
| "grad_norm": 0.02905646152794361, | |
| "learning_rate": 1.157358490566038e-06, | |
| "loss": 2.1224407958984375, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.88625, | |
| "grad_norm": 22.59400749206543, | |
| "learning_rate": 1.1447798742138367e-06, | |
| "loss": 2.273807373046875, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.8875, | |
| "grad_norm": 20.05291175842285, | |
| "learning_rate": 1.1322012578616353e-06, | |
| "loss": 2.122344207763672, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.88875, | |
| "grad_norm": 0.9370853900909424, | |
| "learning_rate": 1.1196226415094341e-06, | |
| "loss": 2.123062744140625, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 33.95699691772461, | |
| "learning_rate": 1.107044025157233e-06, | |
| "loss": 2.0445166015625, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.89125, | |
| "grad_norm": 18.907957077026367, | |
| "learning_rate": 1.0944654088050317e-06, | |
| "loss": 1.96984619140625, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.8925, | |
| "grad_norm": 28.72160530090332, | |
| "learning_rate": 1.0818867924528303e-06, | |
| "loss": 2.2017242431640627, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.89375, | |
| "grad_norm": 0.1414874643087387, | |
| "learning_rate": 1.0693081761006289e-06, | |
| "loss": 2.244500732421875, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.895, | |
| "grad_norm": 9.050920486450195, | |
| "learning_rate": 1.0567295597484277e-06, | |
| "loss": 2.038321990966797, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.89625, | |
| "grad_norm": 20.498157501220703, | |
| "learning_rate": 1.0441509433962265e-06, | |
| "loss": 2.200545349121094, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.8975, | |
| "grad_norm": 28.807994842529297, | |
| "learning_rate": 1.0315723270440253e-06, | |
| "loss": 2.4168437194824217, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.89875, | |
| "grad_norm": 13.557960510253906, | |
| "learning_rate": 1.0189937106918239e-06, | |
| "loss": 1.8951638793945313, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.04173203930258751, | |
| "learning_rate": 1.0064150943396227e-06, | |
| "loss": 1.8145158386230469, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.90125, | |
| "grad_norm": 26.1096248626709, | |
| "learning_rate": 9.938364779874214e-07, | |
| "loss": 1.9488568115234375, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.9025, | |
| "grad_norm": 26.35661506652832, | |
| "learning_rate": 9.812578616352202e-07, | |
| "loss": 2.003428649902344, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.90375, | |
| "grad_norm": 0.6807858943939209, | |
| "learning_rate": 9.686792452830188e-07, | |
| "loss": 2.1505252075195314, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.905, | |
| "grad_norm": 8.778648376464844, | |
| "learning_rate": 9.561006289308176e-07, | |
| "loss": 1.9629728698730469, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.90625, | |
| "grad_norm": 21.047719955444336, | |
| "learning_rate": 9.435220125786164e-07, | |
| "loss": 2.397852020263672, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.9075, | |
| "grad_norm": 28.41111946105957, | |
| "learning_rate": 9.309433962264151e-07, | |
| "loss": 2.4121713256835937, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.90875, | |
| "grad_norm": 0.15218345820903778, | |
| "learning_rate": 9.183647798742139e-07, | |
| "loss": 2.357157897949219, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.017881672829389572, | |
| "learning_rate": 9.057861635220126e-07, | |
| "loss": 1.9150712585449219, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.91125, | |
| "grad_norm": 25.772994995117188, | |
| "learning_rate": 8.932075471698114e-07, | |
| "loss": 2.163759460449219, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.9125, | |
| "grad_norm": 23.19663429260254, | |
| "learning_rate": 8.806289308176101e-07, | |
| "loss": 2.012795867919922, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.91375, | |
| "grad_norm": 4.303622722625732, | |
| "learning_rate": 8.680503144654089e-07, | |
| "loss": 2.200050048828125, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.915, | |
| "grad_norm": 0.030221115797758102, | |
| "learning_rate": 8.554716981132076e-07, | |
| "loss": 2.2937960815429688, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.91625, | |
| "grad_norm": 26.862483978271484, | |
| "learning_rate": 8.428930817610064e-07, | |
| "loss": 2.150227508544922, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.9175, | |
| "grad_norm": 21.228776931762695, | |
| "learning_rate": 8.303144654088051e-07, | |
| "loss": 2.2184078979492186, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.91875, | |
| "grad_norm": 4.426553726196289, | |
| "learning_rate": 8.177358490566039e-07, | |
| "loss": 2.0852456665039063, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 5.594974994659424, | |
| "learning_rate": 8.051572327044026e-07, | |
| "loss": 2.157823944091797, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.92125, | |
| "grad_norm": 27.547351837158203, | |
| "learning_rate": 7.925786163522014e-07, | |
| "loss": 2.1868157958984376, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.9225, | |
| "grad_norm": 29.176279067993164, | |
| "learning_rate": 7.8e-07, | |
| "loss": 2.2395770263671877, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.92375, | |
| "grad_norm": 0.281236857175827, | |
| "learning_rate": 7.674213836477988e-07, | |
| "loss": 2.0369967651367187, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.925, | |
| "grad_norm": 16.027692794799805, | |
| "learning_rate": 7.548427672955975e-07, | |
| "loss": 2.113655853271484, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.92625, | |
| "grad_norm": 31.675695419311523, | |
| "learning_rate": 7.422641509433963e-07, | |
| "loss": 2.0642041015625, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.9275, | |
| "grad_norm": 36.27760696411133, | |
| "learning_rate": 7.29685534591195e-07, | |
| "loss": 1.925348358154297, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.92875, | |
| "grad_norm": 0.32410475611686707, | |
| "learning_rate": 7.171069182389938e-07, | |
| "loss": 1.969832763671875, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 0.028427617624402046, | |
| "learning_rate": 7.045283018867925e-07, | |
| "loss": 1.9005357360839843, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.93125, | |
| "grad_norm": 34.29441833496094, | |
| "learning_rate": 6.919496855345913e-07, | |
| "loss": 2.174887390136719, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.9325, | |
| "grad_norm": 11.873321533203125, | |
| "learning_rate": 6.7937106918239e-07, | |
| "loss": 2.1516876220703125, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.93375, | |
| "grad_norm": 3.312832832336426, | |
| "learning_rate": 6.667924528301888e-07, | |
| "loss": 2.204561767578125, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.935, | |
| "grad_norm": 0.09160123765468597, | |
| "learning_rate": 6.542138364779875e-07, | |
| "loss": 2.13657958984375, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.93625, | |
| "grad_norm": 23.657917022705078, | |
| "learning_rate": 6.416352201257863e-07, | |
| "loss": 2.3265130615234373, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "grad_norm": 21.326000213623047, | |
| "learning_rate": 6.29056603773585e-07, | |
| "loss": 2.133411865234375, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.93875, | |
| "grad_norm": 0.5653764009475708, | |
| "learning_rate": 6.164779874213837e-07, | |
| "loss": 1.9549205017089843, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 20.32880210876465, | |
| "learning_rate": 6.038993710691824e-07, | |
| "loss": 1.80646240234375, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.94125, | |
| "grad_norm": 24.76996421813965, | |
| "learning_rate": 5.913207547169812e-07, | |
| "loss": 2.3081170654296876, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.9425, | |
| "grad_norm": 26.569751739501953, | |
| "learning_rate": 5.787421383647799e-07, | |
| "loss": 2.0019149780273438, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.94375, | |
| "grad_norm": 0.3244832158088684, | |
| "learning_rate": 5.661635220125787e-07, | |
| "loss": 2.1780990600585937, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.945, | |
| "grad_norm": 0.22316910326480865, | |
| "learning_rate": 5.535849056603773e-07, | |
| "loss": 2.0430670166015625, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.94625, | |
| "grad_norm": 27.824186325073242, | |
| "learning_rate": 5.410062893081761e-07, | |
| "loss": 2.4939804077148438, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.9475, | |
| "grad_norm": 31.06242561340332, | |
| "learning_rate": 5.284276729559748e-07, | |
| "loss": 2.15638916015625, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.94875, | |
| "grad_norm": 3.928506851196289, | |
| "learning_rate": 5.158490566037736e-07, | |
| "loss": 2.05089111328125, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 11.446708679199219, | |
| "learning_rate": 5.032704402515723e-07, | |
| "loss": 2.08630615234375, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.95125, | |
| "grad_norm": 26.42768096923828, | |
| "learning_rate": 4.906918238993711e-07, | |
| "loss": 2.343414764404297, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.9525, | |
| "grad_norm": 28.124576568603516, | |
| "learning_rate": 4.781132075471698e-07, | |
| "loss": 2.227467498779297, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.95375, | |
| "grad_norm": 4.6327290534973145, | |
| "learning_rate": 4.655345911949686e-07, | |
| "loss": 2.305023193359375, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.955, | |
| "grad_norm": 0.009622328914701939, | |
| "learning_rate": 4.5295597484276735e-07, | |
| "loss": 1.961500701904297, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.95625, | |
| "grad_norm": 24.499088287353516, | |
| "learning_rate": 4.403773584905661e-07, | |
| "loss": 2.338931121826172, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.9575, | |
| "grad_norm": 32.421878814697266, | |
| "learning_rate": 4.2779874213836484e-07, | |
| "loss": 2.53737548828125, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.95875, | |
| "grad_norm": 0.04140196368098259, | |
| "learning_rate": 4.152201257861636e-07, | |
| "loss": 2.1324703979492186, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.056152064353227615, | |
| "learning_rate": 4.0264150943396233e-07, | |
| "loss": 2.1852513122558594, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.96125, | |
| "grad_norm": 28.970626831054688, | |
| "learning_rate": 3.9006289308176107e-07, | |
| "loss": 2.2562815856933596, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.9625, | |
| "grad_norm": 28.90065574645996, | |
| "learning_rate": 3.774842767295598e-07, | |
| "loss": 2.4266934204101562, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.96375, | |
| "grad_norm": 0.12267394363880157, | |
| "learning_rate": 3.6490566037735856e-07, | |
| "loss": 2.2760604858398437, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.965, | |
| "grad_norm": 55.57836151123047, | |
| "learning_rate": 3.523270440251573e-07, | |
| "loss": 2.051964111328125, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.96625, | |
| "grad_norm": 27.030414581298828, | |
| "learning_rate": 3.3974842767295605e-07, | |
| "loss": 2.140052490234375, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.9675, | |
| "grad_norm": 30.74704933166504, | |
| "learning_rate": 3.271698113207548e-07, | |
| "loss": 2.362886657714844, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.96875, | |
| "grad_norm": 0.04671184718608856, | |
| "learning_rate": 3.1459119496855354e-07, | |
| "loss": 2.0699423217773436, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 0.007876750081777573, | |
| "learning_rate": 3.0201257861635223e-07, | |
| "loss": 1.9259161376953124, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.97125, | |
| "grad_norm": 29.24554443359375, | |
| "learning_rate": 2.89433962264151e-07, | |
| "loss": 2.4339639282226564, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.9725, | |
| "grad_norm": 29.591955184936523, | |
| "learning_rate": 2.768553459119497e-07, | |
| "loss": 2.3606378173828126, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.97375, | |
| "grad_norm": 12.657386779785156, | |
| "learning_rate": 2.6427672955974846e-07, | |
| "loss": 2.428155822753906, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.975, | |
| "grad_norm": 0.024583876132965088, | |
| "learning_rate": 2.516981132075472e-07, | |
| "loss": 2.105915985107422, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.97625, | |
| "grad_norm": 21.956266403198242, | |
| "learning_rate": 2.3911949685534595e-07, | |
| "loss": 2.1736614990234373, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.9775, | |
| "grad_norm": 25.43096923828125, | |
| "learning_rate": 2.2654088050314464e-07, | |
| "loss": 2.178233337402344, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.97875, | |
| "grad_norm": 25.53718376159668, | |
| "learning_rate": 2.139622641509434e-07, | |
| "loss": 2.3168376159667967, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.009442003443837166, | |
| "learning_rate": 2.0138364779874213e-07, | |
| "loss": 2.333406677246094, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.98125, | |
| "grad_norm": 33.688865661621094, | |
| "learning_rate": 1.8880503144654088e-07, | |
| "loss": 2.119158935546875, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.9825, | |
| "grad_norm": 19.21613883972168, | |
| "learning_rate": 1.7622641509433962e-07, | |
| "loss": 2.3394792175292967, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.98375, | |
| "grad_norm": 10.80718994140625, | |
| "learning_rate": 1.6364779874213836e-07, | |
| "loss": 2.2544096374511717, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.985, | |
| "grad_norm": 0.019625332206487656, | |
| "learning_rate": 1.510691823899371e-07, | |
| "loss": 2.086898193359375, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.98625, | |
| "grad_norm": 23.536130905151367, | |
| "learning_rate": 1.3849056603773585e-07, | |
| "loss": 2.2242425537109374, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.9875, | |
| "grad_norm": 28.006027221679688, | |
| "learning_rate": 1.259119496855346e-07, | |
| "loss": 2.1889492797851564, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.98875, | |
| "grad_norm": 0.1388503462076187, | |
| "learning_rate": 1.1333333333333336e-07, | |
| "loss": 2.131824188232422, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 0.1504867821931839, | |
| "learning_rate": 1.0075471698113207e-07, | |
| "loss": 2.0242990112304686, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.99125, | |
| "grad_norm": 26.41069793701172, | |
| "learning_rate": 8.817610062893082e-08, | |
| "loss": 2.210938720703125, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.9925, | |
| "grad_norm": 17.642717361450195, | |
| "learning_rate": 7.559748427672956e-08, | |
| "loss": 1.8704595947265625, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.99375, | |
| "grad_norm": 0.1886824667453766, | |
| "learning_rate": 6.30188679245283e-08, | |
| "loss": 2.071743621826172, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.995, | |
| "grad_norm": 0.07498449087142944, | |
| "learning_rate": 5.0440251572327044e-08, | |
| "loss": 2.117955780029297, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.99625, | |
| "grad_norm": 22.661165237426758, | |
| "learning_rate": 3.786163522012579e-08, | |
| "loss": 2.199887542724609, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.9975, | |
| "grad_norm": 22.61822509765625, | |
| "learning_rate": 2.5283018867924533e-08, | |
| "loss": 2.135641784667969, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.99875, | |
| "grad_norm": 0.6577712297439575, | |
| "learning_rate": 1.2704402515723271e-08, | |
| "loss": 1.8882171630859375, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.028230739757418633, | |
| "learning_rate": 1.257861635220126e-10, | |
| "loss": 2.1253326416015623, | |
| "step": 80000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 80000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.228147083365581e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |