| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 58893, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9575501332925815e-05, | |
| "loss": 4.3519, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9151002665851634e-05, | |
| "loss": 4.193, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8726503998777446e-05, | |
| "loss": 4.1011, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.830200533170326e-05, | |
| "loss": 4.0682, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.787750666462907e-05, | |
| "loss": 4.0022, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.745300799755489e-05, | |
| "loss": 3.9978, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.70285093304807e-05, | |
| "loss": 3.9382, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.6604010663406515e-05, | |
| "loss": 3.897, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.6179511996332334e-05, | |
| "loss": 3.8495, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.575501332925815e-05, | |
| "loss": 3.8095, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5330514662183965e-05, | |
| "loss": 3.8615, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.490601599510978e-05, | |
| "loss": 3.7891, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.44815173280356e-05, | |
| "loss": 3.828, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.405701866096141e-05, | |
| "loss": 3.7765, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.363251999388722e-05, | |
| "loss": 3.7298, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.3208021326813034e-05, | |
| "loss": 3.705, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.278352265973885e-05, | |
| "loss": 3.6773, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.2359023992664665e-05, | |
| "loss": 3.7184, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.193452532559048e-05, | |
| "loss": 3.6683, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.151002665851629e-05, | |
| "loss": 3.6707, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.108552799144211e-05, | |
| "loss": 3.6498, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.066102932436792e-05, | |
| "loss": 3.6855, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.0236530657293734e-05, | |
| "loss": 3.6427, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.981203199021955e-05, | |
| "loss": 3.5934, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.9387533323145365e-05, | |
| "loss": 3.6448, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.8963034656071184e-05, | |
| "loss": 3.597, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.8538535988997e-05, | |
| "loss": 3.6137, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.8114037321922816e-05, | |
| "loss": 3.6207, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.768953865484863e-05, | |
| "loss": 3.6042, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.726503998777444e-05, | |
| "loss": 3.6019, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.684054132070025e-05, | |
| "loss": 3.6258, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.641604265362607e-05, | |
| "loss": 3.5868, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.5991543986551884e-05, | |
| "loss": 3.6198, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.55670453194777e-05, | |
| "loss": 3.5661, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.5142546652403516e-05, | |
| "loss": 3.5257, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.471804798532933e-05, | |
| "loss": 3.5351, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.429354931825514e-05, | |
| "loss": 3.5588, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.386905065118095e-05, | |
| "loss": 3.5219, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.344455198410677e-05, | |
| "loss": 3.5402, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.3020053317032584e-05, | |
| "loss": 3.4953, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.25955546499584e-05, | |
| "loss": 3.4226, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.2171055982884216e-05, | |
| "loss": 3.4332, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.174655731581003e-05, | |
| "loss": 3.478, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.132205864873585e-05, | |
| "loss": 3.3677, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.089755998166166e-05, | |
| "loss": 3.3704, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.0473061314587476e-05, | |
| "loss": 3.3964, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.004856264751329e-05, | |
| "loss": 3.3695, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9624063980439104e-05, | |
| "loss": 3.397, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.919956531336492e-05, | |
| "loss": 3.4309, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.8775066646290732e-05, | |
| "loss": 3.3663, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8350567979216547e-05, | |
| "loss": 3.3603, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.792606931214236e-05, | |
| "loss": 3.4048, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7501570645068176e-05, | |
| "loss": 3.3188, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.707707197799399e-05, | |
| "loss": 3.3426, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6652573310919804e-05, | |
| "loss": 3.4131, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.622807464384562e-05, | |
| "loss": 3.3769, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5803575976771432e-05, | |
| "loss": 3.4038, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5379077309697247e-05, | |
| "loss": 3.3665, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.4954578642623063e-05, | |
| "loss": 3.3207, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.453007997554888e-05, | |
| "loss": 3.3491, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.410558130847469e-05, | |
| "loss": 3.3479, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3681082641400507e-05, | |
| "loss": 3.3752, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.325658397432632e-05, | |
| "loss": 3.2715, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2832085307252135e-05, | |
| "loss": 3.359, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.240758664017795e-05, | |
| "loss": 3.2759, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.1983087973103767e-05, | |
| "loss": 3.2409, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1558589306029582e-05, | |
| "loss": 3.3038, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1134090638955395e-05, | |
| "loss": 3.324, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.070959197188121e-05, | |
| "loss": 3.2846, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0285093304807023e-05, | |
| "loss": 3.2611, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.986059463773284e-05, | |
| "loss": 3.2476, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.943609597065865e-05, | |
| "loss": 3.276, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.9011597303584467e-05, | |
| "loss": 3.3021, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8587098636510282e-05, | |
| "loss": 3.2986, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8162599969436098e-05, | |
| "loss": 3.2816, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.773810130236191e-05, | |
| "loss": 3.239, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7313602635287726e-05, | |
| "loss": 3.2367, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6889103968213542e-05, | |
| "loss": 3.265, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.6464605301139354e-05, | |
| "loss": 3.2244, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.604010663406517e-05, | |
| "loss": 3.1561, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5615607966990982e-05, | |
| "loss": 3.2015, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.51911092999168e-05, | |
| "loss": 3.1714, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4766610632842614e-05, | |
| "loss": 3.2022, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.434211196576843e-05, | |
| "loss": 3.1444, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.3917613298694244e-05, | |
| "loss": 3.1848, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3493114631620058e-05, | |
| "loss": 3.1778, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.3068615964545872e-05, | |
| "loss": 3.1487, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2644117297471686e-05, | |
| "loss": 3.171, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2219618630397501e-05, | |
| "loss": 3.1315, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1795119963323315e-05, | |
| "loss": 3.1574, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.137062129624913e-05, | |
| "loss": 3.1521, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0946122629174944e-05, | |
| "loss": 3.1825, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.052162396210076e-05, | |
| "loss": 3.1498, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0097125295026575e-05, | |
| "loss": 3.1321, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.672626627952389e-06, | |
| "loss": 3.1487, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.248127960878203e-06, | |
| "loss": 3.1348, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.823629293804019e-06, | |
| "loss": 3.133, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.399130626729833e-06, | |
| "loss": 3.1578, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.974631959655647e-06, | |
| "loss": 3.1419, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.550133292581461e-06, | |
| "loss": 3.1395, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.125634625507276e-06, | |
| "loss": 3.1368, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.701135958433092e-06, | |
| "loss": 3.1381, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.276637291358906e-06, | |
| "loss": 3.0896, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.85213862428472e-06, | |
| "loss": 3.172, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.427639957210535e-06, | |
| "loss": 3.052, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.003141290136349e-06, | |
| "loss": 3.1082, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.578642623062164e-06, | |
| "loss": 3.1094, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.1541439559879784e-06, | |
| "loss": 3.0889, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.7296452889137933e-06, | |
| "loss": 3.1371, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.3051466218396074e-06, | |
| "loss": 3.0704, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.8806479547654223e-06, | |
| "loss": 3.0833, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.4561492876912367e-06, | |
| "loss": 3.1675, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.031650620617051e-06, | |
| "loss": 3.065, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.607151953542866e-06, | |
| "loss": 3.0977, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1826532864686806e-06, | |
| "loss": 3.1069, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 7.581546193944951e-07, | |
| "loss": 3.1149, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.336559523203097e-07, | |
| "loss": 3.059, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 58893, | |
| "total_flos": 6554895179700852.0, | |
| "train_loss": 3.4053042012168784, | |
| "train_runtime": 6317.2087, | |
| "train_samples_per_second": 74.58, | |
| "train_steps_per_second": 9.323 | |
| } | |
| ], | |
| "max_steps": 58893, | |
| "num_train_epochs": 3, | |
| "total_flos": 6554895179700852.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |