| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 29.850746268656717, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.7034552830322406e-05, | |
| "loss": 4.2127, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.5172764151612024e-05, | |
| "loss": 3.3881, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.993331259751083e-05, | |
| "loss": 3.0315, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.331097547290165e-05, | |
| "loss": 2.8793, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.5930894339355186e-05, | |
| "loss": 2.7883, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.8071523918800455e-05, | |
| "loss": 2.7364, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 4.9881400439889756e-05, | |
| "loss": 2.6109, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 5.1449186794191275e-05, | |
| "loss": 2.5014, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 5.283207236469926e-05, | |
| "loss": 2.4571, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 5.406910566064481e-05, | |
| "loss": 2.5222, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 5.518813839434375e-05, | |
| "loss": 2.4918, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 5.620973524009008e-05, | |
| "loss": 2.4835, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 5.714951323824802e-05, | |
| "loss": 2.4628, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 5.8019611761179374e-05, | |
| "loss": 2.3972, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 5.882965410654361e-05, | |
| "loss": 2.3233, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 5.95873981154809e-05, | |
| "loss": 2.3075, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 6.029918920033657e-05, | |
| "loss": 2.321, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 6.0970283685988885e-05, | |
| "loss": 2.2946, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 6.160508461224668e-05, | |
| "loss": 2.2593, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 6.220731698193443e-05, | |
| "loss": 2.2308, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 6.278016020707817e-05, | |
| "loss": 2.0929, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 6.332634971563337e-05, | |
| "loss": 2.156, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 6.384825595366063e-05, | |
| "loss": 2.1504, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 6.43479465613797e-05, | |
| "loss": 2.182, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 6.482723584838796e-05, | |
| "loss": 2.1635, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 6.528772455953764e-05, | |
| "loss": 2.1302, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 6.573083213188768e-05, | |
| "loss": 2.1364, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 6.6157823082469e-05, | |
| "loss": 2.015, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 6.656982876347945e-05, | |
| "loss": 2.0479, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 6.696786542783324e-05, | |
| "loss": 1.9968, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 6.735284933140416e-05, | |
| "loss": 2.012, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 6.772560943677052e-05, | |
| "loss": 2.0458, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 6.808689816153217e-05, | |
| "loss": 2.0991, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 6.84374005216262e-05, | |
| "loss": 1.9845, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 6.877774194892253e-05, | |
| "loss": 1.9325, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 6.910849500727851e-05, | |
| "loss": 1.9603, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 6.943018518821426e-05, | |
| "loss": 1.9095, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 6.97432959335363e-05, | |
| "loss": 1.9443, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 7.004827300543644e-05, | |
| "loss": 1.9461, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 7.034552830322405e-05, | |
| "loss": 1.9462, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 7.063544320870321e-05, | |
| "loss": 1.8685, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 7.09183715283678e-05, | |
| "loss": 1.8694, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 7.119464208935388e-05, | |
| "loss": 1.8429, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 7.146456103692298e-05, | |
| "loss": 1.8458, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 7.172841387373204e-05, | |
| "loss": 1.9065, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 7.198646727495026e-05, | |
| "loss": 1.911, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 7.223897070815449e-05, | |
| "loss": 1.8694, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 7.248615788266932e-05, | |
| "loss": 1.7973, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 7.272824804945709e-05, | |
| "loss": 1.8114, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 7.296544716967758e-05, | |
| "loss": 1.7718, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "eval_loss": 2.8234732151031494, | |
| "eval_runtime": 35.5202, | |
| "eval_samples_per_second": 18.328, | |
| "eval_steps_per_second": 0.253, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 7.319794896752499e-05, | |
| "loss": 1.7947, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 7.342593588082727e-05, | |
| "loss": 1.8117, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 7.364957992109503e-05, | |
| "loss": 1.8188, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 7.386904345317732e-05, | |
| "loss": 1.8015, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 7.408447990337652e-05, | |
| "loss": 1.734, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 7.429603440375862e-05, | |
| "loss": 1.7217, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 7.450384437943511e-05, | |
| "loss": 1.7398, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 7.470804008476907e-05, | |
| "loss": 1.7452, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 7.490874509374465e-05, | |
| "loss": 1.778, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 7.510607674912285e-05, | |
| "loss": 1.7332, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 7.530014657447177e-05, | |
| "loss": 1.6922, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 7.549106065269378e-05, | |
| "loss": 1.6818, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 7.567891997426661e-05, | |
| "loss": 1.6757, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 7.586382075806015e-05, | |
| "loss": 1.752, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "learning_rate": 7.604585474728082e-05, | |
| "loss": 1.7074, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 7.62251094828218e-05, | |
| "loss": 1.7052, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 7.640166855605846e-05, | |
| "loss": 1.742, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 10.15, | |
| "learning_rate": 7.65756118429158e-05, | |
| "loss": 1.6759, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 10.3, | |
| "learning_rate": 7.674701572084905e-05, | |
| "loss": 1.6935, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "learning_rate": 7.691595327021215e-05, | |
| "loss": 1.6563, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 10.6, | |
| "learning_rate": 7.708249446134367e-05, | |
| "loss": 1.6941, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 7.724670632856813e-05, | |
| "loss": 1.676, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 10.9, | |
| "learning_rate": 7.740865313219632e-05, | |
| "loss": 1.6948, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "learning_rate": 7.756839650950389e-05, | |
| "loss": 1.6687, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 11.19, | |
| "learning_rate": 7.772599561557638e-05, | |
| "loss": 1.6469, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "learning_rate": 7.788150725482592e-05, | |
| "loss": 1.6783, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "learning_rate": 7.803498600391108e-05, | |
| "loss": 1.6408, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "learning_rate": 7.818648432672608e-05, | |
| "loss": 1.6521, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "learning_rate": 7.833605268206489e-05, | |
| "loss": 1.6451, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "learning_rate": 7.848373962451368e-05, | |
| "loss": 1.6504, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "learning_rate": 7.862959189907611e-05, | |
| "loss": 1.6431, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 7.877365452999284e-05, | |
| "loss": 1.6131, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 7.89159709041777e-05, | |
| "loss": 1.6256, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 12.54, | |
| "learning_rate": 7.905658284965742e-05, | |
| "loss": 1.6257, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 12.69, | |
| "learning_rate": 7.919553070936936e-05, | |
| "loss": 1.6143, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 12.84, | |
| "learning_rate": 7.933285341064351e-05, | |
| "loss": 1.6383, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 7.946858853066788e-05, | |
| "loss": 1.6234, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 13.13, | |
| "learning_rate": 7.960277235821263e-05, | |
| "loss": 1.5871, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 13.28, | |
| "learning_rate": 7.973543995186684e-05, | |
| "loss": 1.6028, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "learning_rate": 7.986662519502166e-05, | |
| "loss": 1.5723, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 13.58, | |
| "learning_rate": 7.999636084781537e-05, | |
| "loss": 1.5936, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "learning_rate": 8.012467859623988e-05, | |
| "loss": 1.5869, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "learning_rate": 8.025160909859258e-05, | |
| "loss": 1.6018, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "learning_rate": 8.037718202944411e-05, | |
| "loss": 1.5926, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 14.18, | |
| "learning_rate": 8.050142612127945e-05, | |
| "loss": 1.5546, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 14.33, | |
| "learning_rate": 8.062436920395896e-05, | |
| "loss": 1.5601, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 14.48, | |
| "learning_rate": 8.074603824213446e-05, | |
| "loss": 1.5668, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "learning_rate": 8.086645937074672e-05, | |
| "loss": 1.5623, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "learning_rate": 8.09856579287206e-05, | |
| "loss": 1.579, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "learning_rate": 8.110365849096721e-05, | |
| "loss": 1.5637, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "eval_loss": 3.1092050075531006, | |
| "eval_runtime": 35.1461, | |
| "eval_samples_per_second": 18.523, | |
| "eval_steps_per_second": 0.256, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 15.07, | |
| "learning_rate": 8.122048489879363e-05, | |
| "loss": 1.5647, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "learning_rate": 8.133616028881462e-05, | |
| "loss": 1.5349, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 15.37, | |
| "learning_rate": 8.145070712045392e-05, | |
| "loss": 1.542, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 15.52, | |
| "learning_rate": 8.15641472021169e-05, | |
| "loss": 1.5345, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 15.67, | |
| "learning_rate": 8.167650171611095e-05, | |
| "loss": 1.5491, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 15.82, | |
| "learning_rate": 8.178779124238466e-05, | |
| "loss": 1.5469, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 15.97, | |
| "learning_rate": 8.189803578115246e-05, | |
| "loss": 1.5825, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "learning_rate": 8.200725477446693e-05, | |
| "loss": 1.5314, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "learning_rate": 8.211546712679696e-05, | |
| "loss": 1.5126, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 16.42, | |
| "learning_rate": 8.222269122466616e-05, | |
| "loss": 1.5194, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 16.57, | |
| "learning_rate": 8.232894495540269e-05, | |
| "loss": 1.5276, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 16.72, | |
| "learning_rate": 8.243424572504824e-05, | |
| "loss": 1.5376, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 16.87, | |
| "learning_rate": 8.2538610475471e-05, | |
| "loss": 1.5393, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 17.01, | |
| "learning_rate": 8.264205570072473e-05, | |
| "loss": 1.5298, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 17.16, | |
| "learning_rate": 8.27445974626934e-05, | |
| "loss": 1.5135, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 17.31, | |
| "learning_rate": 8.284625140605869e-05, | |
| "loss": 1.5175, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 17.46, | |
| "learning_rate": 8.294703277262488e-05, | |
| "loss": 1.5106, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "learning_rate": 8.304695641503428e-05, | |
| "loss": 1.5276, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 17.76, | |
| "learning_rate": 8.31460368099039e-05, | |
| "loss": 1.5227, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 17.91, | |
| "learning_rate": 8.324428807041249e-05, | |
| "loss": 1.5241, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "learning_rate": 8.334172395836509e-05, | |
| "loss": 1.5187, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 18.21, | |
| "learning_rate": 8.34383578957614e-05, | |
| "loss": 1.4929, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 18.36, | |
| "learning_rate": 8.353420297589165e-05, | |
| "loss": 1.4934, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 18.51, | |
| "learning_rate": 8.362927197398341e-05, | |
| "loss": 1.5061, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 18.66, | |
| "learning_rate": 8.372357735742074e-05, | |
| "loss": 1.5068, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 18.81, | |
| "learning_rate": 8.381713129555623e-05, | |
| "loss": 1.5058, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 18.96, | |
| "learning_rate": 8.390994566913507e-05, | |
| "loss": 1.4944, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 19.1, | |
| "learning_rate": 8.400203207934977e-05, | |
| "loss": 1.4905, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 19.25, | |
| "learning_rate": 8.409340185654231e-05, | |
| "loss": 1.4908, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 19.4, | |
| "learning_rate": 8.418406606857043e-05, | |
| "loss": 1.4788, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 19.55, | |
| "learning_rate": 8.427403552885332e-05, | |
| "loss": 1.4851, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 19.7, | |
| "learning_rate": 8.436332080411142e-05, | |
| "loss": 1.4934, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 19.85, | |
| "learning_rate": 8.445193222181402e-05, | |
| "loss": 1.4862, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 8.453987987734808e-05, | |
| "loss": 1.4922, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 20.15, | |
| "learning_rate": 8.462717364092046e-05, | |
| "loss": 1.48, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 20.3, | |
| "learning_rate": 8.471382316420545e-05, | |
| "loss": 1.4731, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 20.45, | |
| "learning_rate": 8.479983788674874e-05, | |
| "loss": 1.4746, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 20.6, | |
| "learning_rate": 8.488522704213867e-05, | |
| "loss": 1.48, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "learning_rate": 8.496999966395455e-05, | |
| "loss": 1.4743, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 20.9, | |
| "learning_rate": 8.505416459150177e-05, | |
| "loss": 1.4758, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 21.04, | |
| "learning_rate": 8.513773047534291e-05, | |
| "loss": 1.4738, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 21.19, | |
| "learning_rate": 8.522070578263329e-05, | |
| "loss": 1.4589, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 21.34, | |
| "learning_rate": 8.530309880226936e-05, | |
| "loss": 1.4783, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 21.49, | |
| "learning_rate": 8.538491764985775e-05, | |
| "loss": 1.4656, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 21.64, | |
| "learning_rate": 8.546617027251222e-05, | |
| "loss": 1.4702, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 21.79, | |
| "learning_rate": 8.554686445348594e-05, | |
| "loss": 1.4768, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "learning_rate": 8.562700781664552e-05, | |
| "loss": 1.4802, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 22.09, | |
| "learning_rate": 8.57066078307935e-05, | |
| "loss": 1.463, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 22.24, | |
| "learning_rate": 8.578567181384524e-05, | |
| "loss": 1.4582, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 22.39, | |
| "learning_rate": 8.586420693686602e-05, | |
| "loss": 1.4588, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 22.39, | |
| "eval_loss": 3.2750725746154785, | |
| "eval_runtime": 34.9007, | |
| "eval_samples_per_second": 18.653, | |
| "eval_steps_per_second": 0.258, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 22.54, | |
| "learning_rate": 8.594222022797423e-05, | |
| "loss": 1.462, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 22.69, | |
| "learning_rate": 8.601971857611555e-05, | |
| "loss": 1.4671, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 22.84, | |
| "learning_rate": 8.609670873471342e-05, | |
| "loss": 1.4637, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "learning_rate": 8.617319732520071e-05, | |
| "loss": 1.4661, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 23.13, | |
| "learning_rate": 8.624919084043694e-05, | |
| "loss": 1.4601, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 23.28, | |
| "learning_rate": 8.632469564801571e-05, | |
| "loss": 1.4553, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 23.43, | |
| "learning_rate": 8.639971799346644e-05, | |
| "loss": 1.4543, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 23.58, | |
| "learning_rate": 8.647426400335451e-05, | |
| "loss": 1.4667, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 23.73, | |
| "learning_rate": 8.654833968828348e-05, | |
| "loss": 1.4622, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 23.88, | |
| "learning_rate": 8.66219509458033e-05, | |
| "loss": 1.4654, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 24.03, | |
| "learning_rate": 8.669510356322798e-05, | |
| "loss": 1.4532, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 24.18, | |
| "learning_rate": 8.676780322036573e-05, | |
| "loss": 1.4525, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 24.33, | |
| "learning_rate": 8.684005549216557e-05, | |
| "loss": 1.4508, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 24.48, | |
| "learning_rate": 8.691186585128246e-05, | |
| "loss": 1.4526, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 24.63, | |
| "learning_rate": 8.698323967056495e-05, | |
| "loss": 1.4499, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 24.78, | |
| "learning_rate": 8.705418222546732e-05, | |
| "loss": 1.4633, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 24.93, | |
| "learning_rate": 8.712469869638952e-05, | |
| "loss": 1.4513, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 25.07, | |
| "learning_rate": 8.719479417094704e-05, | |
| "loss": 1.4543, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 25.22, | |
| "learning_rate": 8.726447364617366e-05, | |
| "loss": 1.4454, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 25.37, | |
| "learning_rate": 8.733374203065898e-05, | |
| "loss": 1.4462, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 25.52, | |
| "learning_rate": 8.740260414662352e-05, | |
| "loss": 1.4561, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 25.67, | |
| "learning_rate": 8.747106473193313e-05, | |
| "loss": 1.4503, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 25.82, | |
| "learning_rate": 8.753912844205501e-05, | |
| "loss": 1.453, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 25.97, | |
| "learning_rate": 8.76067998519575e-05, | |
| "loss": 1.4593, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 26.12, | |
| "learning_rate": 8.76740834579553e-05, | |
| "loss": 1.4412, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 26.27, | |
| "learning_rate": 8.774098367950224e-05, | |
| "loss": 1.4476, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 26.42, | |
| "learning_rate": 8.780750486093308e-05, | |
| "loss": 1.4412, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 26.57, | |
| "learning_rate": 8.787365127315646e-05, | |
| "loss": 1.4481, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 26.72, | |
| "learning_rate": 8.79394271153003e-05, | |
| "loss": 1.4471, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 26.87, | |
| "learning_rate": 8.800483651631128e-05, | |
| "loss": 1.447, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 27.01, | |
| "learning_rate": 8.806988353651037e-05, | |
| "loss": 1.4507, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 27.16, | |
| "learning_rate": 8.813457216910499e-05, | |
| "loss": 1.435, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 27.31, | |
| "learning_rate": 8.81989063416602e-05, | |
| "loss": 1.4361, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 27.46, | |
| "learning_rate": 8.82628899175295e-05, | |
| "loss": 1.4359, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "learning_rate": 8.832652669724704e-05, | |
| "loss": 1.4379, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 27.76, | |
| "learning_rate": 8.838982041988221e-05, | |
| "loss": 1.4476, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 27.91, | |
| "learning_rate": 8.845277476435792e-05, | |
| "loss": 1.4395, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 28.06, | |
| "learning_rate": 8.851539335073373e-05, | |
| "loss": 1.4403, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 28.21, | |
| "learning_rate": 8.857767974145503e-05, | |
| "loss": 1.4387, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 28.36, | |
| "learning_rate": 8.863963744256908e-05, | |
| "loss": 1.4388, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 28.51, | |
| "learning_rate": 8.87012699049093e-05, | |
| "loss": 1.4377, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 28.66, | |
| "learning_rate": 8.876258052524857e-05, | |
| "loss": 1.4367, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 28.81, | |
| "learning_rate": 8.882357264742258e-05, | |
| "loss": 1.4482, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 28.96, | |
| "learning_rate": 8.88842495634241e-05, | |
| "loss": 1.4354, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 29.1, | |
| "learning_rate": 8.894461451446924e-05, | |
| "loss": 1.4333, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 29.25, | |
| "learning_rate": 8.900467069203634e-05, | |
| "loss": 1.4334, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 29.4, | |
| "learning_rate": 8.906442123887845e-05, | |
| "loss": 1.4454, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 29.55, | |
| "learning_rate": 8.912386925001022e-05, | |
| "loss": 1.4368, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 29.7, | |
| "learning_rate": 8.918301777366981e-05, | |
| "loss": 1.4319, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 29.85, | |
| "learning_rate": 8.924186981225684e-05, | |
| "loss": 1.4337, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 29.85, | |
| "eval_loss": 3.362933874130249, | |
| "eval_runtime": 35.3655, | |
| "eval_samples_per_second": 18.408, | |
| "eval_steps_per_second": 0.254, | |
| "step": 2000 | |
| } | |
| ], | |
| "max_steps": 50000, | |
| "num_train_epochs": 747, | |
| "total_flos": 348961395840.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |