| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 60130, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9584234159321475e-05, | |
| "loss": 0.6379, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.916846831864294e-05, | |
| "loss": 0.4095, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.875270247796441e-05, | |
| "loss": 0.3395, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.8336936637285885e-05, | |
| "loss": 0.3274, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.792117079660735e-05, | |
| "loss": 0.3223, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.750540495592882e-05, | |
| "loss": 0.3417, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.7089639115250295e-05, | |
| "loss": 0.3072, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.667387327457176e-05, | |
| "loss": 0.2396, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.625810743389323e-05, | |
| "loss": 0.2277, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.5842341593214705e-05, | |
| "loss": 0.224, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.542657575253617e-05, | |
| "loss": 0.2246, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.501080991185764e-05, | |
| "loss": 0.2104, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.4595044071179116e-05, | |
| "loss": 0.183, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 4.417927823050058e-05, | |
| "loss": 0.2578, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.3763512389822053e-05, | |
| "loss": 0.2557, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.3347746549143526e-05, | |
| "loss": 0.2352, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.2931980708465e-05, | |
| "loss": 0.2227, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.2516214867786464e-05, | |
| "loss": 0.1994, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.2100449027107936e-05, | |
| "loss": 0.2145, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 4.168468318642941e-05, | |
| "loss": 0.1487, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 4.1268917345750874e-05, | |
| "loss": 0.159, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.0853151505072346e-05, | |
| "loss": 0.1803, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 4.043738566439382e-05, | |
| "loss": 0.1679, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4.0021619823715284e-05, | |
| "loss": 0.1332, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.9605853983036756e-05, | |
| "loss": 0.1288, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.919008814235823e-05, | |
| "loss": 0.1164, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.8774322301679694e-05, | |
| "loss": 0.1237, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.8358556461001167e-05, | |
| "loss": 0.1533, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 3.794279062032264e-05, | |
| "loss": 0.1282, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.7527024779644104e-05, | |
| "loss": 0.5156, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.711125893896558e-05, | |
| "loss": 0.1689, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.669549309828705e-05, | |
| "loss": 0.2002, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.6279727257608515e-05, | |
| "loss": 0.1226, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.586396141692999e-05, | |
| "loss": 0.1192, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 3.544819557625146e-05, | |
| "loss": 0.1264, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 3.5032429735572925e-05, | |
| "loss": 0.138, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.46166638948944e-05, | |
| "loss": 0.116, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.420089805421587e-05, | |
| "loss": 0.168, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 3.3785132213537335e-05, | |
| "loss": 0.0882, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.336936637285881e-05, | |
| "loss": 0.0959, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 3.295360053218028e-05, | |
| "loss": 0.0865, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 3.2537834691501745e-05, | |
| "loss": 0.0853, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 3.212206885082322e-05, | |
| "loss": 0.0783, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.170630301014469e-05, | |
| "loss": 0.0671, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 3.129053716946616e-05, | |
| "loss": 0.0762, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 3.087477132878763e-05, | |
| "loss": 0.0759, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 3.04590054881091e-05, | |
| "loss": 0.0786, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 3.0043239647430572e-05, | |
| "loss": 0.0696, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 2.9627473806752038e-05, | |
| "loss": 0.0561, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 2.921170796607351e-05, | |
| "loss": 0.0567, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 2.8795942125394983e-05, | |
| "loss": 0.0553, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 2.8380176284716448e-05, | |
| "loss": 0.0618, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 2.796441044403792e-05, | |
| "loss": 0.0601, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 2.7548644603359393e-05, | |
| "loss": 0.0572, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 2.713287876268086e-05, | |
| "loss": 0.055, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 2.671711292200233e-05, | |
| "loss": 0.0592, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.6301347081323803e-05, | |
| "loss": 0.0582, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.588558124064527e-05, | |
| "loss": 0.0605, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.546981539996674e-05, | |
| "loss": 0.0511, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.5054049559288213e-05, | |
| "loss": 0.052, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.4638283718609682e-05, | |
| "loss": 0.0387, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 2.422251787793115e-05, | |
| "loss": 0.0408, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.380675203725262e-05, | |
| "loss": 0.038, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 2.3390986196574092e-05, | |
| "loss": 0.0415, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 2.297522035589556e-05, | |
| "loss": 0.0529, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 2.2559454515217034e-05, | |
| "loss": 0.0436, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.2143688674538503e-05, | |
| "loss": 0.0388, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 2.172792283385997e-05, | |
| "loss": 0.0362, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 2.1312156993181444e-05, | |
| "loss": 0.0369, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.0896391152502913e-05, | |
| "loss": 0.0385, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.048062531182438e-05, | |
| "loss": 0.0391, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 2.0064859471145854e-05, | |
| "loss": 0.0395, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 1.9649093630467323e-05, | |
| "loss": 0.0262, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 1.9233327789788792e-05, | |
| "loss": 0.0473, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 1.8817561949110264e-05, | |
| "loss": 0.0306, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 1.8401796108431733e-05, | |
| "loss": 0.0258, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.7986030267753202e-05, | |
| "loss": 0.0284, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 1.7570264427074674e-05, | |
| "loss": 0.0223, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 1.7154498586396143e-05, | |
| "loss": 0.0259, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.6738732745717612e-05, | |
| "loss": 0.0282, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 1.6322966905039085e-05, | |
| "loss": 0.024, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 1.5907201064360554e-05, | |
| "loss": 0.026, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 1.5491435223682026e-05, | |
| "loss": 0.0263, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 1.5075669383003493e-05, | |
| "loss": 0.0257, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 1.4659903542324962e-05, | |
| "loss": 0.0272, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 1.4244137701646434e-05, | |
| "loss": 0.0194, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.3828371860967903e-05, | |
| "loss": 0.014, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 1.3412606020289372e-05, | |
| "loss": 0.0151, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.2996840179610845e-05, | |
| "loss": 0.0173, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 1.2581074338932313e-05, | |
| "loss": 0.0138, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 1.2165308498253784e-05, | |
| "loss": 0.0245, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 1.1749542657575255e-05, | |
| "loss": 0.0162, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 1.1333776816896724e-05, | |
| "loss": 0.0162, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 1.0918010976218194e-05, | |
| "loss": 0.0158, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 1.0502245135539665e-05, | |
| "loss": 0.0186, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 1.0086479294861134e-05, | |
| "loss": 0.0189, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 9.670713454182605e-06, | |
| "loss": 0.0138, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 9.254947613504075e-06, | |
| "loss": 0.0076, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 8.839181772825546e-06, | |
| "loss": 0.0109, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 8.423415932147015e-06, | |
| "loss": 0.0107, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 8.007650091468485e-06, | |
| "loss": 0.0086, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 7.591884250789956e-06, | |
| "loss": 0.0132, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 7.176118410111425e-06, | |
| "loss": 0.0095, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 6.7603525694328956e-06, | |
| "loss": 0.0101, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 6.344586728754366e-06, | |
| "loss": 0.0108, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 5.928820888075836e-06, | |
| "loss": 0.0088, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 5.513055047397306e-06, | |
| "loss": 0.0082, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 5.097289206718776e-06, | |
| "loss": 0.0086, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 4.681523366040246e-06, | |
| "loss": 0.0053, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 4.265757525361717e-06, | |
| "loss": 0.0053, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 3.849991684683187e-06, | |
| "loss": 0.0066, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 3.4342258440046572e-06, | |
| "loss": 0.0041, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 3.018460003326127e-06, | |
| "loss": 0.0047, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 2.6026941626475972e-06, | |
| "loss": 0.0065, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 2.1869283219690674e-06, | |
| "loss": 0.0059, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "learning_rate": 1.7711624812905372e-06, | |
| "loss": 0.0066, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 1.3553966406120074e-06, | |
| "loss": 0.0043, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 9.396307999334775e-07, | |
| "loss": 0.0055, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 5.238649592549476e-07, | |
| "loss": 0.0042, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 1.0809911857641776e-07, | |
| "loss": 0.0031, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 60130, | |
| "total_flos": 2.8632348630306816e+17, | |
| "train_runtime": 40850.056, | |
| "train_samples_per_second": 1.472 | |
| } | |
| ], | |
| "max_steps": 60130, | |
| "num_train_epochs": 10, | |
| "total_flos": 2.8632348630306816e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |