| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.2820078962210942, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.7773, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.7255, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3e-06, |
| "loss": 1.5729, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.3541, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5e-06, |
| "loss": 1.0505, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6e-06, |
| "loss": 0.7144, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 0.3478, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.1306, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9e-06, |
| "loss": 0.0424, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1e-05, |
| "loss": 0.0169, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.0355, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0346, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.0904, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.0351, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.5e-05, |
| "loss": 0.0337, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.033, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.0901, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.8e-05, |
| "loss": 0.0081, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9e-05, |
| "loss": 0.0335, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2e-05, |
| "loss": 0.0036, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2.1e-05, |
| "loss": 0.0652, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.0346, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.0891, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.4e-05, |
| "loss": 0.0925, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.5e-05, |
| "loss": 0.1139, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.0577, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.0073, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.0298, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.9e-05, |
| "loss": 0.0357, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3e-05, |
| "loss": 0.1385, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.1e-05, |
| "loss": 0.0063, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.002, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.3e-05, |
| "loss": 0.0694, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.0315, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.5e-05, |
| "loss": 0.0035, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.6e-05, |
| "loss": 0.0327, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.7e-05, |
| "loss": 0.0324, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.8e-05, |
| "loss": 0.0025, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.0014, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4e-05, |
| "loss": 0.074, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.1e-05, |
| "loss": 0.0029, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.2e-05, |
| "loss": 0.0335, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.3e-05, |
| "loss": 0.0599, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.0321, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.5e-05, |
| "loss": 0.0862, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.1408, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.7e-05, |
| "loss": 0.0924, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.8e-05, |
| "loss": 0.0052, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.9e-05, |
| "loss": 0.0319, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 5e-05, |
| "loss": 0.0654, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.995068060761492e-05, |
| "loss": 0.0847, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.990136121522983e-05, |
| "loss": 0.0073, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9852041822844744e-05, |
| "loss": 0.0351, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.980272243045966e-05, |
| "loss": 0.123, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.975340303807458e-05, |
| "loss": 0.0059, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9704083645689486e-05, |
| "loss": 0.0299, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.96547642533044e-05, |
| "loss": 0.0552, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.960544486091931e-05, |
| "loss": 0.0616, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.955612546853423e-05, |
| "loss": 0.0049, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9506806076149144e-05, |
| "loss": 0.003, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.945748668376406e-05, |
| "loss": 0.0688, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.940816729137897e-05, |
| "loss": 0.1185, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9358847898993886e-05, |
| "loss": 0.007, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.9309528506608796e-05, |
| "loss": 0.058, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.926020911422372e-05, |
| "loss": 0.0864, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.921088972183863e-05, |
| "loss": 0.0788, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9161570329453544e-05, |
| "loss": 0.0614, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.911225093706846e-05, |
| "loss": 0.0632, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.906293154468337e-05, |
| "loss": 0.0834, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.9013612152298286e-05, |
| "loss": 0.0792, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.89642927599132e-05, |
| "loss": 0.0096, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.891497336752812e-05, |
| "loss": 0.0601, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.886565397514303e-05, |
| "loss": 0.0041, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.8816334582757945e-05, |
| "loss": 0.0025, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.8767015190372854e-05, |
| "loss": 0.0326, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.871769579798777e-05, |
| "loss": 0.0017, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.8668376405602686e-05, |
| "loss": 0.0015, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.86190570132176e-05, |
| "loss": 0.0913, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.856973762083251e-05, |
| "loss": 0.1069, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.852041822844743e-05, |
| "loss": 0.0625, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.847109883606234e-05, |
| "loss": 0.0062, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.8421779443677254e-05, |
| "loss": 0.061, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.837246005129217e-05, |
| "loss": 0.0311, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.8323140658907086e-05, |
| "loss": 0.0339, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.8273821266522e-05, |
| "loss": 0.0028, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.822450187413691e-05, |
| "loss": 0.0588, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.817518248175183e-05, |
| "loss": 0.0638, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.812586308936674e-05, |
| "loss": 0.0035, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.8076543696981654e-05, |
| "loss": 0.0297, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.802722430459657e-05, |
| "loss": 0.0028, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.7977904912211487e-05, |
| "loss": 0.0307, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.7928585519826396e-05, |
| "loss": 0.0638, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.787926612744131e-05, |
| "loss": 0.0292, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.782994673505622e-05, |
| "loss": 0.0308, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.7780627342671145e-05, |
| "loss": 0.0305, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.7731307950286054e-05, |
| "loss": 0.0589, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.768198855790097e-05, |
| "loss": 0.0821, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.763266916551588e-05, |
| "loss": 0.032, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.7583349773130796e-05, |
| "loss": 0.0051, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.7534030380745706e-05, |
| "loss": 0.0464, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.748471098836063e-05, |
| "loss": 0.1152, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.7435391595975545e-05, |
| "loss": 0.0045, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.7386072203590454e-05, |
| "loss": 0.0552, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.733675281120537e-05, |
| "loss": 0.0593, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.728743341882028e-05, |
| "loss": 0.0318, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.7238114026435196e-05, |
| "loss": 0.0297, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.718879463405011e-05, |
| "loss": 0.0424, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.713947524166503e-05, |
| "loss": 0.0565, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.709015584927994e-05, |
| "loss": 0.0029, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.7040836456894854e-05, |
| "loss": 0.0593, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.6991517064509764e-05, |
| "loss": 0.0327, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.694219767212468e-05, |
| "loss": 0.03, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.6892878279739596e-05, |
| "loss": 0.0304, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.684355888735451e-05, |
| "loss": 0.0619, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.679423949496942e-05, |
| "loss": 0.0385, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.674492010258434e-05, |
| "loss": 0.0313, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.669560071019925e-05, |
| "loss": 0.0281, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.6646281317814164e-05, |
| "loss": 0.0304, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.659696192542909e-05, |
| "loss": 0.0319, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.6547642533043996e-05, |
| "loss": 0.0021, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.649832314065891e-05, |
| "loss": 0.0018, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.644900374827382e-05, |
| "loss": 0.0625, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.639968435588874e-05, |
| "loss": 0.0023, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.635036496350365e-05, |
| "loss": 0.0303, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.630104557111857e-05, |
| "loss": 0.0886, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.625172617873348e-05, |
| "loss": 0.0029, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.6202406786348396e-05, |
| "loss": 0.0836, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.6153087393963306e-05, |
| "loss": 0.0289, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.610376800157822e-05, |
| "loss": 0.0665, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.605444860919313e-05, |
| "loss": 0.0053, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.6005129216808054e-05, |
| "loss": 0.0335, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.5955809824422964e-05, |
| "loss": 0.0027, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.590649043203788e-05, |
| "loss": 0.0021, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.585717103965279e-05, |
| "loss": 0.0326, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.5807851647267706e-05, |
| "loss": 0.03, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.575853225488262e-05, |
| "loss": 0.1424, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.570921286249754e-05, |
| "loss": 0.0091, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.5659893470112455e-05, |
| "loss": 0.0032, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.5610574077727364e-05, |
| "loss": 0.0319, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.556125468534228e-05, |
| "loss": 0.0602, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.551193529295719e-05, |
| "loss": 0.0325, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.5462615900572106e-05, |
| "loss": 0.0871, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.541329650818702e-05, |
| "loss": 0.0039, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.536397711580194e-05, |
| "loss": 0.119, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.531465772341685e-05, |
| "loss": 0.0058, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.5265338331031764e-05, |
| "loss": 0.0586, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.5216018938646674e-05, |
| "loss": 0.03, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.516669954626159e-05, |
| "loss": 0.0594, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.5117380153876506e-05, |
| "loss": 0.0663, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.506806076149142e-05, |
| "loss": 0.032, |
| "step": 1500 |
| } |
| ], |
| "max_steps": 10638, |
| "num_train_epochs": 2, |
| "total_flos": 9472338173952000.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|