| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999715839665373, | |
| "eval_steps": 500, | |
| "global_step": 21994, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 9.977266527234701e-05, | |
| "loss": 2.6296, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 9.954533054469402e-05, | |
| "loss": 2.6156, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.931799581704102e-05, | |
| "loss": 2.6035, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.909066108938801e-05, | |
| "loss": 2.5399, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.886332636173502e-05, | |
| "loss": 2.5857, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.863599163408202e-05, | |
| "loss": 2.6078, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.840865690642903e-05, | |
| "loss": 2.5931, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.818132217877604e-05, | |
| "loss": 2.5919, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.795398745112304e-05, | |
| "loss": 2.59, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.772665272347005e-05, | |
| "loss": 2.605, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.749931799581704e-05, | |
| "loss": 2.6026, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.727198326816404e-05, | |
| "loss": 2.5839, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.704464854051105e-05, | |
| "loss": 2.5862, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.681731381285806e-05, | |
| "loss": 2.609, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.658997908520506e-05, | |
| "loss": 2.5759, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.636264435755207e-05, | |
| "loss": 2.6046, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.613530962989907e-05, | |
| "loss": 2.5811, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.590797490224606e-05, | |
| "loss": 2.5797, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.568064017459307e-05, | |
| "loss": 2.5867, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.545330544694008e-05, | |
| "loss": 2.5927, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.522597071928708e-05, | |
| "loss": 2.568, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.499863599163409e-05, | |
| "loss": 2.6024, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.477130126398109e-05, | |
| "loss": 2.5936, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.45439665363281e-05, | |
| "loss": 2.605, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.431663180867509e-05, | |
| "loss": 2.5775, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.40892970810221e-05, | |
| "loss": 2.5752, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.38619623533691e-05, | |
| "loss": 2.5679, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.36346276257161e-05, | |
| "loss": 2.5856, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.340729289806311e-05, | |
| "loss": 2.5787, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.317995817041012e-05, | |
| "loss": 2.5875, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.295262344275712e-05, | |
| "loss": 2.5631, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.272528871510412e-05, | |
| "loss": 2.583, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.249795398745112e-05, | |
| "loss": 2.5609, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.227061925979813e-05, | |
| "loss": 2.587, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.204328453214513e-05, | |
| "loss": 2.5555, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.181594980449214e-05, | |
| "loss": 2.5488, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.158861507683914e-05, | |
| "loss": 2.5554, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.136128034918615e-05, | |
| "loss": 2.5408, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.113394562153314e-05, | |
| "loss": 2.582, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.090661089388015e-05, | |
| "loss": 2.5533, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.067927616622715e-05, | |
| "loss": 2.5432, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.045194143857416e-05, | |
| "loss": 2.5867, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.022460671092116e-05, | |
| "loss": 2.5343, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 8.999727198326817e-05, | |
| "loss": 2.585, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 8.976993725561517e-05, | |
| "loss": 2.5679, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 8.954260252796217e-05, | |
| "loss": 2.5515, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 8.931526780030917e-05, | |
| "loss": 2.5713, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 8.908793307265618e-05, | |
| "loss": 2.5587, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 8.886059834500318e-05, | |
| "loss": 2.5774, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 8.863326361735019e-05, | |
| "loss": 2.551, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.84059288896972e-05, | |
| "loss": 2.5685, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.81785941620442e-05, | |
| "loss": 2.5707, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.795125943439119e-05, | |
| "loss": 2.568, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.77239247067382e-05, | |
| "loss": 2.5536, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.74965899790852e-05, | |
| "loss": 2.5406, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.726925525143221e-05, | |
| "loss": 2.5572, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.704192052377921e-05, | |
| "loss": 2.5749, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.681458579612622e-05, | |
| "loss": 2.5607, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.658725106847322e-05, | |
| "loss": 2.5612, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.635991634082023e-05, | |
| "loss": 2.5626, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.613258161316724e-05, | |
| "loss": 2.5677, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.590524688551423e-05, | |
| "loss": 2.5072, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 8.567791215786123e-05, | |
| "loss": 2.562, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.545057743020824e-05, | |
| "loss": 2.5786, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.522324270255524e-05, | |
| "loss": 2.5388, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.499590797490225e-05, | |
| "loss": 2.538, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.476857324724926e-05, | |
| "loss": 2.5448, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.454123851959626e-05, | |
| "loss": 2.5306, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.431390379194327e-05, | |
| "loss": 2.5647, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.408656906429027e-05, | |
| "loss": 2.5386, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.385923433663728e-05, | |
| "loss": 2.5376, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.363189960898427e-05, | |
| "loss": 2.535, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.340456488133128e-05, | |
| "loss": 2.5476, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.317723015367828e-05, | |
| "loss": 2.5462, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.294989542602529e-05, | |
| "loss": 2.5795, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 8.272256069837229e-05, | |
| "loss": 2.5425, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 8.24952259707193e-05, | |
| "loss": 2.5663, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 8.22678912430663e-05, | |
| "loss": 2.5376, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 8.204055651541331e-05, | |
| "loss": 2.5695, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 8.18132217877603e-05, | |
| "loss": 2.5112, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 8.15858870601073e-05, | |
| "loss": 2.5255, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 8.135855233245431e-05, | |
| "loss": 2.5448, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 8.113121760480132e-05, | |
| "loss": 2.5483, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 8.090388287714832e-05, | |
| "loss": 2.5319, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 8.067654814949533e-05, | |
| "loss": 2.5655, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 8.044921342184233e-05, | |
| "loss": 2.5399, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 8.022187869418933e-05, | |
| "loss": 2.5485, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.999454396653633e-05, | |
| "loss": 2.5196, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.976720923888334e-05, | |
| "loss": 2.5554, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 7.953987451123034e-05, | |
| "loss": 2.583, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.931253978357735e-05, | |
| "loss": 2.5643, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.908520505592435e-05, | |
| "loss": 2.5345, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.885787032827136e-05, | |
| "loss": 2.5393, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 7.863053560061835e-05, | |
| "loss": 2.5349, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 7.840320087296536e-05, | |
| "loss": 2.54, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 7.817586614531236e-05, | |
| "loss": 2.5526, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 7.794853141765937e-05, | |
| "loss": 2.5419, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 7.772119669000637e-05, | |
| "loss": 2.5122, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.749386196235338e-05, | |
| "loss": 2.5247, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.726652723470039e-05, | |
| "loss": 2.5516, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.703919250704738e-05, | |
| "loss": 2.5321, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.681185777939438e-05, | |
| "loss": 2.5453, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 7.658452305174139e-05, | |
| "loss": 2.5453, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 7.63571883240884e-05, | |
| "loss": 2.5522, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 7.61298535964354e-05, | |
| "loss": 2.5417, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 7.59025188687824e-05, | |
| "loss": 2.5241, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 7.567518414112941e-05, | |
| "loss": 2.5574, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 7.54478494134764e-05, | |
| "loss": 2.5127, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 7.522051468582341e-05, | |
| "loss": 2.5346, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 7.499317995817041e-05, | |
| "loss": 2.5164, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 7.476584523051742e-05, | |
| "loss": 2.5571, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 7.453851050286442e-05, | |
| "loss": 2.5455, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 7.431117577521143e-05, | |
| "loss": 2.544, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 7.408384104755844e-05, | |
| "loss": 2.5271, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 7.385650631990543e-05, | |
| "loss": 2.525, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 7.362917159225243e-05, | |
| "loss": 2.5278, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 7.340183686459944e-05, | |
| "loss": 2.5161, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 7.317450213694644e-05, | |
| "loss": 2.5296, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 7.294716740929345e-05, | |
| "loss": 2.5454, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 7.271983268164046e-05, | |
| "loss": 2.5319, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.249249795398746e-05, | |
| "loss": 2.5282, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.226516322633445e-05, | |
| "loss": 2.5359, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.203782849868146e-05, | |
| "loss": 2.494, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.181049377102846e-05, | |
| "loss": 2.5289, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 7.158315904337547e-05, | |
| "loss": 2.4985, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 7.135582431572248e-05, | |
| "loss": 2.5156, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 7.112848958806948e-05, | |
| "loss": 2.53, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 7.090115486041649e-05, | |
| "loss": 2.5157, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 7.067382013276348e-05, | |
| "loss": 2.5303, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 7.044648540511048e-05, | |
| "loss": 2.5286, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 7.021915067745749e-05, | |
| "loss": 2.5039, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 6.99918159498045e-05, | |
| "loss": 2.5161, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 6.97644812221515e-05, | |
| "loss": 2.5105, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 6.95371464944985e-05, | |
| "loss": 2.5151, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 6.930981176684551e-05, | |
| "loss": 2.5425, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 6.90824770391925e-05, | |
| "loss": 2.5357, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 6.885514231153951e-05, | |
| "loss": 2.4989, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 6.862780758388652e-05, | |
| "loss": 2.5413, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 6.840047285623352e-05, | |
| "loss": 2.4909, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 6.817313812858053e-05, | |
| "loss": 2.5177, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 6.794580340092753e-05, | |
| "loss": 2.5107, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 6.771846867327454e-05, | |
| "loss": 2.5343, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 6.749113394562153e-05, | |
| "loss": 2.5247, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 6.726379921796854e-05, | |
| "loss": 2.5202, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 6.703646449031554e-05, | |
| "loss": 2.5156, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 6.680912976266255e-05, | |
| "loss": 2.5431, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 6.658179503500955e-05, | |
| "loss": 2.5221, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 6.635446030735656e-05, | |
| "loss": 2.516, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 6.612712557970356e-05, | |
| "loss": 2.5297, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 6.589979085205056e-05, | |
| "loss": 2.5052, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 6.567245612439756e-05, | |
| "loss": 2.4981, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.544512139674457e-05, | |
| "loss": 2.5292, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.521778666909157e-05, | |
| "loss": 2.4853, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.499045194143858e-05, | |
| "loss": 2.5181, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.476311721378558e-05, | |
| "loss": 2.5599, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.453578248613259e-05, | |
| "loss": 2.5093, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6.430844775847958e-05, | |
| "loss": 2.5449, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6.408111303082659e-05, | |
| "loss": 2.5013, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6.385377830317359e-05, | |
| "loss": 2.5366, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6.36264435755206e-05, | |
| "loss": 2.49, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 6.33991088478676e-05, | |
| "loss": 2.5496, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 6.317177412021461e-05, | |
| "loss": 2.5071, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 6.294443939256161e-05, | |
| "loss": 2.5374, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 6.27171046649086e-05, | |
| "loss": 2.5284, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.248976993725561e-05, | |
| "loss": 2.4792, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.226243520960262e-05, | |
| "loss": 2.5061, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.203510048194962e-05, | |
| "loss": 2.5183, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.180776575429663e-05, | |
| "loss": 2.4886, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.158043102664363e-05, | |
| "loss": 2.5116, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 6.135309629899064e-05, | |
| "loss": 2.5364, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 6.112576157133763e-05, | |
| "loss": 2.5205, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 6.0898426843684644e-05, | |
| "loss": 2.5125, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 6.067109211603165e-05, | |
| "loss": 2.5089, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 6.0443757388378655e-05, | |
| "loss": 2.5088, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 6.021642266072566e-05, | |
| "loss": 2.5264, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 5.9989087933072666e-05, | |
| "loss": 2.5045, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 5.976175320541967e-05, | |
| "loss": 2.5085, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 5.9534418477766663e-05, | |
| "loss": 2.4801, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 5.930708375011367e-05, | |
| "loss": 2.5017, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 5.9079749022460675e-05, | |
| "loss": 2.5109, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 5.885241429480768e-05, | |
| "loss": 2.5052, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 5.8625079567154686e-05, | |
| "loss": 2.5139, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5.839774483950169e-05, | |
| "loss": 2.4941, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5.817041011184868e-05, | |
| "loss": 2.5137, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5.794307538419569e-05, | |
| "loss": 2.5101, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5.7715740656542694e-05, | |
| "loss": 2.5009, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.74884059288897e-05, | |
| "loss": 2.5395, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.7261071201236706e-05, | |
| "loss": 2.5108, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.703373647358371e-05, | |
| "loss": 2.5238, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.680640174593072e-05, | |
| "loss": 2.5037, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 5.657906701827771e-05, | |
| "loss": 2.5038, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 5.6351732290624714e-05, | |
| "loss": 2.5324, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 5.612439756297172e-05, | |
| "loss": 2.5054, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 5.5897062835318725e-05, | |
| "loss": 2.5119, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 5.566972810766573e-05, | |
| "loss": 2.5214, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5.5442393380012737e-05, | |
| "loss": 2.5404, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5.521505865235974e-05, | |
| "loss": 2.516, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5.4987723924706734e-05, | |
| "loss": 2.5166, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5.476038919705374e-05, | |
| "loss": 2.4983, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5.4533054469400745e-05, | |
| "loss": 2.5101, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 5.430571974174775e-05, | |
| "loss": 2.4998, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 5.4078385014094756e-05, | |
| "loss": 2.5116, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 5.385105028644176e-05, | |
| "loss": 2.5136, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 5.362371555878877e-05, | |
| "loss": 2.5313, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.339638083113576e-05, | |
| "loss": 2.4989, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.3169046103482765e-05, | |
| "loss": 2.5062, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.294171137582977e-05, | |
| "loss": 2.531, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.2714376648176776e-05, | |
| "loss": 2.4975, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.248704192052378e-05, | |
| "loss": 2.4922, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.225970719287079e-05, | |
| "loss": 2.5128, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.203237246521779e-05, | |
| "loss": 2.504, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.1805037737564785e-05, | |
| "loss": 2.5093, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.157770300991179e-05, | |
| "loss": 2.491, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.1350368282258796e-05, | |
| "loss": 2.5008, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.11230335546058e-05, | |
| "loss": 2.5103, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.089569882695281e-05, | |
| "loss": 2.5167, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.066836409929981e-05, | |
| "loss": 2.5062, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.044102937164682e-05, | |
| "loss": 2.5135, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.021369464399382e-05, | |
| "loss": 2.489, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.998635991634082e-05, | |
| "loss": 2.5071, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.975902518868782e-05, | |
| "loss": 2.5181, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.953169046103483e-05, | |
| "loss": 2.4997, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.930435573338183e-05, | |
| "loss": 2.5127, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.907702100572884e-05, | |
| "loss": 2.4906, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.884968627807584e-05, | |
| "loss": 2.5129, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.862235155042284e-05, | |
| "loss": 2.5015, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.839501682276985e-05, | |
| "loss": 2.5049, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.8167682095116854e-05, | |
| "loss": 2.4971, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.794034736746386e-05, | |
| "loss": 2.5177, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.771301263981086e-05, | |
| "loss": 2.5056, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.7485677912157864e-05, | |
| "loss": 2.4831, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.725834318450487e-05, | |
| "loss": 2.4972, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.7031008456851875e-05, | |
| "loss": 2.5103, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.680367372919888e-05, | |
| "loss": 2.5083, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.657633900154588e-05, | |
| "loss": 2.5027, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.6349004273892885e-05, | |
| "loss": 2.4846, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.612166954623989e-05, | |
| "loss": 2.5193, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.589433481858689e-05, | |
| "loss": 2.5123, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.5667000090933895e-05, | |
| "loss": 2.5219, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.54396653632809e-05, | |
| "loss": 2.4979, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.5212330635627906e-05, | |
| "loss": 2.4849, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.4984995907974905e-05, | |
| "loss": 2.4783, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.475766118032191e-05, | |
| "loss": 2.5035, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.4530326452668916e-05, | |
| "loss": 2.4879, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.4302991725015914e-05, | |
| "loss": 2.4972, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.407565699736292e-05, | |
| "loss": 2.5043, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.3848322269709926e-05, | |
| "loss": 2.491, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.362098754205693e-05, | |
| "loss": 2.5032, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.339365281440393e-05, | |
| "loss": 2.5227, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.3166318086750935e-05, | |
| "loss": 2.5245, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.293898335909794e-05, | |
| "loss": 2.4927, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.271164863144494e-05, | |
| "loss": 2.5002, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.2484313903791945e-05, | |
| "loss": 2.4997, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.225697917613895e-05, | |
| "loss": 2.4939, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.2029644448485957e-05, | |
| "loss": 2.5223, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.1802309720832955e-05, | |
| "loss": 2.4963, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.157497499317996e-05, | |
| "loss": 2.5334, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.1347640265526966e-05, | |
| "loss": 2.5085, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.1120305537873965e-05, | |
| "loss": 2.4901, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.089297081022097e-05, | |
| "loss": 2.5268, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.0665636082567976e-05, | |
| "loss": 2.5237, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.043830135491498e-05, | |
| "loss": 2.4928, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.021096662726198e-05, | |
| "loss": 2.4852, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.9983631899608986e-05, | |
| "loss": 2.5139, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.975629717195599e-05, | |
| "loss": 2.5336, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.952896244430299e-05, | |
| "loss": 2.527, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.9301627716649996e-05, | |
| "loss": 2.4877, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.9074292988997e-05, | |
| "loss": 2.4992, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.884695826134401e-05, | |
| "loss": 2.4909, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.8619623533691006e-05, | |
| "loss": 2.4983, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.839228880603801e-05, | |
| "loss": 2.5146, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.816495407838502e-05, | |
| "loss": 2.5058, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.7937619350732016e-05, | |
| "loss": 2.4943, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.771028462307902e-05, | |
| "loss": 2.5002, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.748294989542603e-05, | |
| "loss": 2.4918, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.7255615167773026e-05, | |
| "loss": 2.4915, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.702828044012003e-05, | |
| "loss": 2.5089, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.680094571246704e-05, | |
| "loss": 2.5048, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.657361098481404e-05, | |
| "loss": 2.5108, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.634627625716104e-05, | |
| "loss": 2.4959, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.611894152950805e-05, | |
| "loss": 2.5154, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.589160680185505e-05, | |
| "loss": 2.5092, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.566427207420205e-05, | |
| "loss": 2.5265, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.543693734654906e-05, | |
| "loss": 2.4678, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.520960261889606e-05, | |
| "loss": 2.5236, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.498226789124307e-05, | |
| "loss": 2.5156, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.475493316359007e-05, | |
| "loss": 2.508, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.452759843593707e-05, | |
| "loss": 2.4949, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.430026370828408e-05, | |
| "loss": 2.4898, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.4072928980631084e-05, | |
| "loss": 2.5006, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.384559425297808e-05, | |
| "loss": 2.4878, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.361825952532509e-05, | |
| "loss": 2.5073, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.3390924797672094e-05, | |
| "loss": 2.5176, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.31635900700191e-05, | |
| "loss": 2.5078, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.2936255342366105e-05, | |
| "loss": 2.5101, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.2708920614713103e-05, | |
| "loss": 2.5076, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.248158588706011e-05, | |
| "loss": 2.4916, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.2254251159407115e-05, | |
| "loss": 2.4919, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.202691643175412e-05, | |
| "loss": 2.5042, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.1799581704101126e-05, | |
| "loss": 2.5191, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.1572246976448124e-05, | |
| "loss": 2.5034, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.134491224879513e-05, | |
| "loss": 2.4878, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.1117577521142136e-05, | |
| "loss": 2.5072, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.0890242793489134e-05, | |
| "loss": 2.506, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.066290806583614e-05, | |
| "loss": 2.4885, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.0435573338183142e-05, | |
| "loss": 2.488, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.0208238610530148e-05, | |
| "loss": 2.4939, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.998090388287715e-05, | |
| "loss": 2.5397, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.9753569155224152e-05, | |
| "loss": 2.5131, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.9526234427571158e-05, | |
| "loss": 2.5287, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.929889969991816e-05, | |
| "loss": 2.4852, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.9071564972265165e-05, | |
| "loss": 2.4941, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.884423024461217e-05, | |
| "loss": 2.508, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.8616895516959173e-05, | |
| "loss": 2.5011, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.8389560789306175e-05, | |
| "loss": 2.5029, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.816222606165318e-05, | |
| "loss": 2.4956, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.7934891334000186e-05, | |
| "loss": 2.4998, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.7707556606347185e-05, | |
| "loss": 2.4954, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.748022187869419e-05, | |
| "loss": 2.5171, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.7252887151041196e-05, | |
| "loss": 2.476, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.7025552423388202e-05, | |
| "loss": 2.506, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.67982176957352e-05, | |
| "loss": 2.5201, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.6570882968082206e-05, | |
| "loss": 2.5205, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.6343548240429212e-05, | |
| "loss": 2.4971, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.611621351277621e-05, | |
| "loss": 2.5135, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.5888878785123216e-05, | |
| "loss": 2.4894, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.5661544057470222e-05, | |
| "loss": 2.5127, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.5434209329817227e-05, | |
| "loss": 2.4999, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.5206874602164226e-05, | |
| "loss": 2.5048, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.4979539874511232e-05, | |
| "loss": 2.5208, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.4752205146858234e-05, | |
| "loss": 2.5155, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.452487041920524e-05, | |
| "loss": 2.5196, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.429753569155224e-05, | |
| "loss": 2.5205, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.4070200963899247e-05, | |
| "loss": 2.5083, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.384286623624625e-05, | |
| "loss": 2.506, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.361553150859325e-05, | |
| "loss": 2.5251, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.3388196780940257e-05, | |
| "loss": 2.5124, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.316086205328726e-05, | |
| "loss": 2.4869, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.2933527325634265e-05, | |
| "loss": 2.5066, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.2706192597981267e-05, | |
| "loss": 2.4888, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.2478857870328273e-05, | |
| "loss": 2.5086, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.2251523142675275e-05, | |
| "loss": 2.5449, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.202418841502228e-05, | |
| "loss": 2.5186, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.1796853687369283e-05, | |
| "loss": 2.482, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.1569518959716288e-05, | |
| "loss": 2.4895, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.1342184232063294e-05, | |
| "loss": 2.4988, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.1114849504410296e-05, | |
| "loss": 2.5107, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.0887514776757298e-05, | |
| "loss": 2.5257, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.0660180049104304e-05, | |
| "loss": 2.5139, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.0432845321451306e-05, | |
| "loss": 2.5364, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.020551059379831e-05, | |
| "loss": 2.5242, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.9978175866145313e-05, | |
| "loss": 2.482, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.975084113849232e-05, | |
| "loss": 2.4981, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.952350641083932e-05, | |
| "loss": 2.5049, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.9296171683186323e-05, | |
| "loss": 2.5089, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.906883695553333e-05, | |
| "loss": 2.4937, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.884150222788033e-05, | |
| "loss": 2.4983, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.8614167500227337e-05, | |
| "loss": 2.5152, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.838683277257434e-05, | |
| "loss": 2.5198, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.8159498044921344e-05, | |
| "loss": 2.5108, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.7932163317268347e-05, | |
| "loss": 2.5362, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.770482858961535e-05, | |
| "loss": 2.5186, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7477493861962354e-05, | |
| "loss": 2.5331, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7250159134309357e-05, | |
| "loss": 2.506, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7022824406656362e-05, | |
| "loss": 2.4932, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6795489679003364e-05, | |
| "loss": 2.4975, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.656815495135037e-05, | |
| "loss": 2.4996, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.6340820223697372e-05, | |
| "loss": 2.4987, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.6113485496044374e-05, | |
| "loss": 2.5013, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.588615076839138e-05, | |
| "loss": 2.4971, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.5658816040738382e-05, | |
| "loss": 2.5349, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.5431481313085388e-05, | |
| "loss": 2.5176, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.5204146585432391e-05, | |
| "loss": 2.4829, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.4976811857779397e-05, | |
| "loss": 2.5258, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.47494771301264e-05, | |
| "loss": 2.5232, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.4522142402473401e-05, | |
| "loss": 2.5032, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.4294807674820407e-05, | |
| "loss": 2.5197, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.4067472947167409e-05, | |
| "loss": 2.5035, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.3840138219514415e-05, | |
| "loss": 2.5016, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.3612803491861417e-05, | |
| "loss": 2.5291, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.3385468764208419e-05, | |
| "loss": 2.5092, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.3158134036555425e-05, | |
| "loss": 2.4956, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.2930799308902428e-05, | |
| "loss": 2.4947, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.2703464581249432e-05, | |
| "loss": 2.519, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.2476129853596436e-05, | |
| "loss": 2.5452, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.224879512594344e-05, | |
| "loss": 2.5248, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.2021460398290444e-05, | |
| "loss": 2.5179, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.1794125670637448e-05, | |
| "loss": 2.5047, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 1.1566790942984452e-05, | |
| "loss": 2.5084, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.1339456215331456e-05, | |
| "loss": 2.5044, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.1112121487678458e-05, | |
| "loss": 2.4969, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.0884786760025462e-05, | |
| "loss": 2.5069, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.0657452032372465e-05, | |
| "loss": 2.4756, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.043011730471947e-05, | |
| "loss": 2.489, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.0202782577066473e-05, | |
| "loss": 2.4985, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 9.975447849413477e-06, | |
| "loss": 2.5375, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 9.748113121760481e-06, | |
| "loss": 2.4924, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 9.520778394107483e-06, | |
| "loss": 2.4879, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 9.293443666454487e-06, | |
| "loss": 2.5486, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 9.066108938801491e-06, | |
| "loss": 2.5187, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 8.838774211148495e-06, | |
| "loss": 2.5102, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 8.6114394834955e-06, | |
| "loss": 2.4975, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 8.384104755842504e-06, | |
| "loss": 2.5036, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 8.156770028189506e-06, | |
| "loss": 2.5323, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 7.92943530053651e-06, | |
| "loss": 2.5228, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 7.702100572883514e-06, | |
| "loss": 2.5411, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 7.474765845230518e-06, | |
| "loss": 2.4923, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 7.247431117577522e-06, | |
| "loss": 2.5178, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 7.020096389924526e-06, | |
| "loss": 2.5121, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6.79276166227153e-06, | |
| "loss": 2.5414, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 6.565426934618532e-06, | |
| "loss": 2.5222, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 6.338092206965536e-06, | |
| "loss": 2.5261, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 6.11075747931254e-06, | |
| "loss": 2.5274, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 5.883422751659544e-06, | |
| "loss": 2.4965, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 5.656088024006548e-06, | |
| "loss": 2.5141, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 5.428753296353551e-06, | |
| "loss": 2.5101, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 5.201418568700555e-06, | |
| "loss": 2.5011, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.974083841047559e-06, | |
| "loss": 2.5091, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.746749113394562e-06, | |
| "loss": 2.5237, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.519414385741566e-06, | |
| "loss": 2.4949, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.29207965808857e-06, | |
| "loss": 2.503, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.0647449304355735e-06, | |
| "loss": 2.5068, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.837410202782577e-06, | |
| "loss": 2.4922, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.6100754751295813e-06, | |
| "loss": 2.5199, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.382740747476585e-06, | |
| "loss": 2.5058, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.155406019823588e-06, | |
| "loss": 2.5294, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.928071292170592e-06, | |
| "loss": 2.4969, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.700736564517596e-06, | |
| "loss": 2.5419, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.4734018368645998e-06, | |
| "loss": 2.5299, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.2460671092116032e-06, | |
| "loss": 2.5275, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.0187323815586067e-06, | |
| "loss": 2.4891, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.7913976539056108e-06, | |
| "loss": 2.5108, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.5640629262526144e-06, | |
| "loss": 2.5246, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.336728198599618e-06, | |
| "loss": 2.5304, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.109393470946622e-06, | |
| "loss": 2.5159, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.820587432936256e-07, | |
| "loss": 2.5071, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 6.547240156406293e-07, | |
| "loss": 2.5091, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.2738928798763303e-07, | |
| "loss": 2.5386, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.0005456033463672e-07, | |
| "loss": 2.5228, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 21994, | |
| "total_flos": 3.604860407937761e+17, | |
| "train_loss": 2.5232514588412474, | |
| "train_runtime": 5052.6511, | |
| "train_samples_per_second": 34.825, | |
| "train_steps_per_second": 4.353 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 21994, | |
| "num_train_epochs": 1, | |
| "save_steps": 2500, | |
| "total_flos": 3.604860407937761e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |