| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.944110060189166, |
| "global_step": 11500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.920034393809114e-05, |
| "loss": 1.8949, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.834049871023217e-05, |
| "loss": 1.0475, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 9.748065348237318e-05, |
| "loss": 1.0269, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 9.66208082545142e-05, |
| "loss": 1.0098, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.57609630266552e-05, |
| "loss": 0.9949, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 9.490111779879622e-05, |
| "loss": 0.9739, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 9.404127257093723e-05, |
| "loss": 0.9786, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 9.318142734307826e-05, |
| "loss": 0.9689, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.232158211521927e-05, |
| "loss": 0.9606, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.146173688736028e-05, |
| "loss": 0.9493, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.060189165950129e-05, |
| "loss": 0.9472, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 8.974204643164231e-05, |
| "loss": 0.9372, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.888220120378332e-05, |
| "loss": 0.9439, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 8.802235597592435e-05, |
| "loss": 0.9453, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 8.716251074806535e-05, |
| "loss": 0.9414, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 8.630266552020637e-05, |
| "loss": 0.9247, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 8.544282029234738e-05, |
| "loss": 0.9228, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 8.45829750644884e-05, |
| "loss": 0.9209, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 8.372312983662941e-05, |
| "loss": 0.9226, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 8.286328460877043e-05, |
| "loss": 0.9172, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 8.200343938091144e-05, |
| "loss": 0.8962, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 8.114359415305245e-05, |
| "loss": 0.9071, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 8.028374892519347e-05, |
| "loss": 0.9003, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.8224605321884155, |
| "eval_runtime": 0.2074, |
| "eval_samples_per_second": 920.75, |
| "eval_steps_per_second": 14.462, |
| "step": 2326 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 7.942390369733448e-05, |
| "loss": 0.8754, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 7.856405846947549e-05, |
| "loss": 0.8697, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 7.77042132416165e-05, |
| "loss": 0.8656, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 7.684436801375753e-05, |
| "loss": 0.8792, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 7.598452278589854e-05, |
| "loss": 0.8648, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 7.512467755803956e-05, |
| "loss": 0.8529, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 7.426483233018057e-05, |
| "loss": 0.8492, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 7.340498710232158e-05, |
| "loss": 0.8615, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 7.25451418744626e-05, |
| "loss": 0.8562, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 7.168529664660362e-05, |
| "loss": 0.8594, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 7.082545141874463e-05, |
| "loss": 0.8602, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 6.996560619088564e-05, |
| "loss": 0.8553, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.910576096302665e-05, |
| "loss": 0.8616, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 6.824591573516768e-05, |
| "loss": 0.8533, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 6.738607050730869e-05, |
| "loss": 0.845, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 6.652622527944971e-05, |
| "loss": 0.8574, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 6.566638005159072e-05, |
| "loss": 0.8518, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 6.480653482373173e-05, |
| "loss": 0.856, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 6.394668959587274e-05, |
| "loss": 0.8442, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.308684436801377e-05, |
| "loss": 0.8353, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.222699914015478e-05, |
| "loss": 0.8407, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 6.136715391229579e-05, |
| "loss": 0.8474, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 6.0507308684436805e-05, |
| "loss": 0.8506, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.8073627352714539, |
| "eval_runtime": 0.2077, |
| "eval_samples_per_second": 919.421, |
| "eval_steps_per_second": 14.441, |
| "step": 4652 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.964746345657782e-05, |
| "loss": 0.8277, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 5.8787618228718834e-05, |
| "loss": 0.8104, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 5.792777300085985e-05, |
| "loss": 0.8169, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 5.706792777300086e-05, |
| "loss": 0.8068, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.620808254514188e-05, |
| "loss": 0.8026, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.534823731728289e-05, |
| "loss": 0.8069, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 5.44883920894239e-05, |
| "loss": 0.8118, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 5.362854686156492e-05, |
| "loss": 0.8104, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 5.276870163370593e-05, |
| "loss": 0.8071, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 5.190885640584695e-05, |
| "loss": 0.8068, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 5.104901117798796e-05, |
| "loss": 0.8033, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 5.018916595012898e-05, |
| "loss": 0.8196, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 4.932932072226999e-05, |
| "loss": 0.8016, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 4.846947549441101e-05, |
| "loss": 0.7977, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 4.760963026655203e-05, |
| "loss": 0.8107, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 4.674978503869304e-05, |
| "loss": 0.8128, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.5889939810834055e-05, |
| "loss": 0.8018, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.5030094582975066e-05, |
| "loss": 0.7966, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 4.417024935511608e-05, |
| "loss": 0.8029, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 4.33104041272571e-05, |
| "loss": 0.7989, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 4.245055889939811e-05, |
| "loss": 0.8102, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 4.159071367153913e-05, |
| "loss": 0.7975, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.073086844368014e-05, |
| "loss": 0.7997, |
| "step": 6900 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.8013436794281006, |
| "eval_runtime": 0.2081, |
| "eval_samples_per_second": 917.66, |
| "eval_steps_per_second": 14.414, |
| "step": 6978 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 3.987102321582115e-05, |
| "loss": 0.7997, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 3.901117798796217e-05, |
| "loss": 0.7764, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 3.815133276010318e-05, |
| "loss": 0.7773, |
| "step": 7200 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 3.7291487532244196e-05, |
| "loss": 0.7808, |
| "step": 7300 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 3.643164230438521e-05, |
| "loss": 0.7676, |
| "step": 7400 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 3.5571797076526224e-05, |
| "loss": 0.7632, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 3.471195184866724e-05, |
| "loss": 0.776, |
| "step": 7600 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 3.385210662080825e-05, |
| "loss": 0.7804, |
| "step": 7700 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 3.299226139294927e-05, |
| "loss": 0.781, |
| "step": 7800 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 3.213241616509029e-05, |
| "loss": 0.7792, |
| "step": 7900 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 3.12725709372313e-05, |
| "loss": 0.7799, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 3.0412725709372315e-05, |
| "loss": 0.7824, |
| "step": 8100 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 2.955288048151333e-05, |
| "loss": 0.7701, |
| "step": 8200 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 2.8693035253654343e-05, |
| "loss": 0.7752, |
| "step": 8300 |
| }, |
| { |
| "epoch": 3.61, |
| "learning_rate": 2.7833190025795357e-05, |
| "loss": 0.7668, |
| "step": 8400 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 2.697334479793637e-05, |
| "loss": 0.7746, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 2.611349957007739e-05, |
| "loss": 0.7695, |
| "step": 8600 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 2.5253654342218403e-05, |
| "loss": 0.7742, |
| "step": 8700 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 2.4393809114359417e-05, |
| "loss": 0.7833, |
| "step": 8800 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 2.353396388650043e-05, |
| "loss": 0.7753, |
| "step": 8900 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 2.2674118658641445e-05, |
| "loss": 0.7688, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 2.1814273430782463e-05, |
| "loss": 0.773, |
| "step": 9100 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 2.0954428202923474e-05, |
| "loss": 0.7658, |
| "step": 9200 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 2.0094582975064488e-05, |
| "loss": 0.7776, |
| "step": 9300 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.7975377440452576, |
| "eval_runtime": 0.2075, |
| "eval_samples_per_second": 920.648, |
| "eval_steps_per_second": 14.46, |
| "step": 9304 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 1.9234737747205502e-05, |
| "loss": 0.7533, |
| "step": 9400 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 1.837489251934652e-05, |
| "loss": 0.7556, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 1.7515047291487533e-05, |
| "loss": 0.7644, |
| "step": 9600 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 1.6655202063628547e-05, |
| "loss": 0.7531, |
| "step": 9700 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 1.579535683576956e-05, |
| "loss": 0.7543, |
| "step": 9800 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 1.4935511607910577e-05, |
| "loss": 0.7586, |
| "step": 9900 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 1.4075666380051591e-05, |
| "loss": 0.7643, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 1.3215821152192605e-05, |
| "loss": 0.757, |
| "step": 10100 |
| }, |
| { |
| "epoch": 4.39, |
| "learning_rate": 1.2355975924333621e-05, |
| "loss": 0.7444, |
| "step": 10200 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 1.1496130696474635e-05, |
| "loss": 0.7653, |
| "step": 10300 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 1.063628546861565e-05, |
| "loss": 0.7693, |
| "step": 10400 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 9.776440240756663e-06, |
| "loss": 0.7615, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 8.91659501289768e-06, |
| "loss": 0.7517, |
| "step": 10600 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 8.056749785038693e-06, |
| "loss": 0.7584, |
| "step": 10700 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 7.196904557179708e-06, |
| "loss": 0.752, |
| "step": 10800 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 6.337059329320723e-06, |
| "loss": 0.7509, |
| "step": 10900 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 5.477214101461737e-06, |
| "loss": 0.7483, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 4.617368873602751e-06, |
| "loss": 0.755, |
| "step": 11100 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 3.7575236457437662e-06, |
| "loss": 0.7585, |
| "step": 11200 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 2.8976784178847807e-06, |
| "loss": 0.7537, |
| "step": 11300 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 2.0378331900257957e-06, |
| "loss": 0.7511, |
| "step": 11400 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 1.17798796216681e-06, |
| "loss": 0.7521, |
| "step": 11500 |
| } |
| ], |
| "max_steps": 11630, |
| "num_train_epochs": 5, |
| "total_flos": 6.009030844416e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|