| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.994219653179191, |
| "eval_steps": 500, |
| "global_step": 777, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9999666208982518e-05, |
| "loss": 28.454, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9987985862949325e-05, |
| "loss": 8.849, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9959638242644855e-05, |
| "loss": 8.2489, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.991467065265775e-05, |
| "loss": 4.2986, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.985315813185629e-05, |
| "loss": 4.8588, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.9775203328168643e-05, |
| "loss": 9.355, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.9680936327290924e-05, |
| "loss": 5.7211, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.957051443560902e-05, |
| "loss": 10.0795, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9444121917696335e-05, |
| "loss": 3.6404, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.930196968882556e-05, |
| "loss": 30.7862, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.9144294963007542e-05, |
| "loss": 6.9002, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.8971360857144616e-05, |
| "loss": 8.5785, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.8783455951958948e-05, |
| "loss": 5.644, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.8580893810428562e-05, |
| "loss": 14.8062, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.8364012454534687e-05, |
| "loss": 14.919, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.813317380119356e-05, |
| "loss": 6.5775, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.7888763058314016e-05, |
| "loss": 6.5815, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.763118808198859e-05, |
| "loss": 4.1731, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.736087869589092e-05, |
| "loss": 9.1267, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.7078285974015103e-05, |
| "loss": 3.9908, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.678388148795397e-05, |
| "loss": 5.6814, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.6478156519972354e-05, |
| "loss": 5.0232, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.6161621243188528e-05, |
| "loss": 3.5724, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.5834803870231846e-05, |
| "loss": 11.7568, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.549824977179731e-05, |
| "loss": 5.393, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.5152520566567873e-05, |
| "loss": 6.7241, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.4798193184023233e-05, |
| "loss": 3.7898, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1.4435858901698995e-05, |
| "loss": 15.7335, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.4066122358502772e-05, |
| "loss": 2.6745, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.3689600545733713e-05, |
| "loss": 5.8789, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.330692177748925e-05, |
| "loss": 2.7451, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.2918724642177054e-05, |
| "loss": 3.0655, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.252565693688198e-05, |
| "loss": 1.6089, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.2128374586366159e-05, |
| "loss": 9.6673, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.172754054850619e-05, |
| "loss": 1.9807, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.1323823707993937e-05, |
| "loss": 0.7278, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.091789776014706e-05, |
| "loss": 5.7635, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.0510440086691911e-05, |
| "loss": 3.151, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.0102130625394776e-05, |
| "loss": 16.2403, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 9.693650735427808e-06, |
| "loss": 2.06, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.285682060362974e-06, |
| "loss": 3.2416, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 8.878905390691437e-06, |
| "loss": 1.7038, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.473999527766503e-06, |
| "loss": 1.4826, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.071640151065902e-06, |
| "loss": 5.8947, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 7.672498690663632e-06, |
| "loss": 6.4248, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.277241206792944e-06, |
| "loss": 4.6138, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 6.886527278370131e-06, |
| "loss": 2.0887, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.501008902333912e-06, |
| "loss": 3.9643, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.121329405637111e-06, |
| "loss": 4.3215, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 5.748122371706198e-06, |
| "loss": 3.4914, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 5.382010583160201e-06, |
| "loss": 13.7674, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.0236049825532355e-06, |
| "loss": 3.2402, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 4.673503652874977e-06, |
| "loss": 0.7369, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.33229081951025e-06, |
| "loss": 0.707, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.000535875323307e-06, |
| "loss": 1.7793, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 3.6787924304935696e-06, |
| "loss": 0.5325, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.3675973886884506e-06, |
| "loss": 0.4873, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 3.0674700511149057e-06, |
| "loss": 0.3436, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2.7789112499447312e-06, |
| "loss": 0.3319, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2.502402512559773e-06, |
| "loss": 0.4382, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.2384052580116465e-06, |
| "loss": 0.2655, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.9873600270368664e-06, |
| "loss": 1.3714, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.749685746912323e-06, |
| "loss": 0.1256, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.52577903237781e-06, |
| "loss": 0.2344, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.3160135237922011e-06, |
| "loss": 0.4926, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.1207392636277502e-06, |
| "loss": 0.369, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 9.402821123429017e-07, |
| "loss": 0.1464, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 7.749432046084471e-07, |
| "loss": 0.5273, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.249984467943737e-07, |
| "loss": 0.4998, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 4.906980565560004e-07, |
| "loss": 2.2237, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.722661452877163e-07, |
| "loss": 0.2594, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 2.699003441410508e-07, |
| "loss": 0.3388, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1.8377147423120467e-07, |
| "loss": 0.4497, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.1402326158234e-07, |
| "loss": 0.3246, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 6.077209728732492e-08, |
| "loss": 2.0268, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 2.4106843282165615e-08, |
| "loss": 0.1469, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.088684059220249e-09, |
| "loss": 0.5057, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.99, |
| "step": 777, |
| "total_flos": 1.0509820178622054e+17, |
| "train_loss": 4.876755037362972, |
| "train_runtime": 2316.9087, |
| "train_samples_per_second": 21.497, |
| "train_steps_per_second": 0.335 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 777, |
| "num_train_epochs": 3, |
| "save_steps": 300, |
| "total_flos": 1.0509820178622054e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|