| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.994219653179191, | |
| "eval_steps": 500, | |
| "global_step": 777, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9999666208982518e-05, | |
| "loss": 28.454, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9987985862949325e-05, | |
| "loss": 8.849, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9959638242644855e-05, | |
| "loss": 8.2489, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.991467065265775e-05, | |
| "loss": 4.2986, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.985315813185629e-05, | |
| "loss": 4.8588, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.9775203328168643e-05, | |
| "loss": 9.355, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.9680936327290924e-05, | |
| "loss": 5.7211, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.957051443560902e-05, | |
| "loss": 10.0795, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9444121917696335e-05, | |
| "loss": 3.6404, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.930196968882556e-05, | |
| "loss": 30.7862, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.9144294963007542e-05, | |
| "loss": 6.9002, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.8971360857144616e-05, | |
| "loss": 8.5785, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.8783455951958948e-05, | |
| "loss": 5.644, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.8580893810428562e-05, | |
| "loss": 14.8062, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.8364012454534687e-05, | |
| "loss": 14.919, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.813317380119356e-05, | |
| "loss": 6.5775, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7888763058314016e-05, | |
| "loss": 6.5815, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.763118808198859e-05, | |
| "loss": 4.1731, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.736087869589092e-05, | |
| "loss": 9.1267, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.7078285974015103e-05, | |
| "loss": 3.9908, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.678388148795397e-05, | |
| "loss": 5.6814, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.6478156519972354e-05, | |
| "loss": 5.0232, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.6161621243188528e-05, | |
| "loss": 3.5724, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.5834803870231846e-05, | |
| "loss": 11.7568, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.549824977179731e-05, | |
| "loss": 5.393, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.5152520566567873e-05, | |
| "loss": 6.7241, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.4798193184023233e-05, | |
| "loss": 3.7898, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 1.4435858901698995e-05, | |
| "loss": 15.7335, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.4066122358502772e-05, | |
| "loss": 2.6745, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.3689600545733713e-05, | |
| "loss": 5.8789, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.330692177748925e-05, | |
| "loss": 2.7451, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.2918724642177054e-05, | |
| "loss": 3.0655, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.252565693688198e-05, | |
| "loss": 1.6089, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.2128374586366159e-05, | |
| "loss": 9.6673, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.172754054850619e-05, | |
| "loss": 1.9807, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.1323823707993937e-05, | |
| "loss": 0.7278, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.091789776014706e-05, | |
| "loss": 5.7635, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.0510440086691911e-05, | |
| "loss": 3.151, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.0102130625394776e-05, | |
| "loss": 16.2403, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 9.693650735427808e-06, | |
| "loss": 2.06, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 9.285682060362974e-06, | |
| "loss": 3.2416, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 8.878905390691437e-06, | |
| "loss": 1.7038, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.473999527766503e-06, | |
| "loss": 1.4826, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 8.071640151065902e-06, | |
| "loss": 5.8947, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 7.672498690663632e-06, | |
| "loss": 6.4248, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 7.277241206792944e-06, | |
| "loss": 4.6138, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 6.886527278370131e-06, | |
| "loss": 2.0887, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 6.501008902333912e-06, | |
| "loss": 3.9643, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 6.121329405637111e-06, | |
| "loss": 4.3215, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 5.748122371706198e-06, | |
| "loss": 3.4914, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 5.382010583160201e-06, | |
| "loss": 13.7674, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 5.0236049825532355e-06, | |
| "loss": 3.2402, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.673503652874977e-06, | |
| "loss": 0.7369, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 4.33229081951025e-06, | |
| "loss": 0.707, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 4.000535875323307e-06, | |
| "loss": 1.7793, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.6787924304935696e-06, | |
| "loss": 0.5325, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.3675973886884506e-06, | |
| "loss": 0.4873, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.0674700511149057e-06, | |
| "loss": 0.3436, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 2.7789112499447312e-06, | |
| "loss": 0.3319, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.502402512559773e-06, | |
| "loss": 0.4382, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.2384052580116465e-06, | |
| "loss": 0.2655, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.9873600270368664e-06, | |
| "loss": 1.3714, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.749685746912323e-06, | |
| "loss": 0.1256, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.52577903237781e-06, | |
| "loss": 0.2344, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.3160135237922011e-06, | |
| "loss": 0.4926, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.1207392636277502e-06, | |
| "loss": 0.369, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.402821123429017e-07, | |
| "loss": 0.1464, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 7.749432046084471e-07, | |
| "loss": 0.5273, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.249984467943737e-07, | |
| "loss": 0.4998, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.906980565560004e-07, | |
| "loss": 2.2237, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.722661452877163e-07, | |
| "loss": 0.2594, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.699003441410508e-07, | |
| "loss": 0.3388, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.8377147423120467e-07, | |
| "loss": 0.4497, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.1402326158234e-07, | |
| "loss": 0.3246, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 6.077209728732492e-08, | |
| "loss": 2.0268, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.4106843282165615e-08, | |
| "loss": 0.1469, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.088684059220249e-09, | |
| "loss": 0.5057, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "step": 777, | |
| "total_flos": 1.0509820178622054e+17, | |
| "train_loss": 4.876755037362972, | |
| "train_runtime": 2316.9087, | |
| "train_samples_per_second": 21.497, | |
| "train_steps_per_second": 0.335 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 777, | |
| "num_train_epochs": 3, | |
| "save_steps": 300, | |
| "total_flos": 1.0509820178622054e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |