| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9956458635703918, | |
| "eval_steps": 500, | |
| "global_step": 1548, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.999747735219333e-05, | |
| "loss": 1.6087, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.998512275175808e-05, | |
| "loss": 1.4212, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9962477937145644e-05, | |
| "loss": 1.0602, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.992955223470575e-05, | |
| "loss": 1.0232, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.9886359205009334e-05, | |
| "loss": 1.0354, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9832916637263665e-05, | |
| "loss": 0.8403, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.976924654198569e-05, | |
| "loss": 0.7628, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.9695375141937e-05, | |
| "loss": 0.7421, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9611332861323875e-05, | |
| "loss": 0.7773, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.95171543132669e-05, | |
| "loss": 0.7814, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.941287828554553e-05, | |
| "loss": 0.7798, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.929854772462312e-05, | |
| "loss": 0.7717, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.9174209717959294e-05, | |
| "loss": 0.8173, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.9039915474616805e-05, | |
| "loss": 0.8009, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.889572030417091e-05, | |
| "loss": 0.7499, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.874168359392987e-05, | |
| "loss": 0.6257, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.857786878447612e-05, | |
| "loss": 0.7029, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.8404343343538014e-05, | |
| "loss": 0.6505, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.822117873820301e-05, | |
| "loss": 0.6647, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.802845040548363e-05, | |
| "loss": 0.6787, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.782623772124855e-05, | |
| "loss": 0.5937, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.7614623967531244e-05, | |
| "loss": 0.649, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.7393696298230084e-05, | |
| "loss": 0.729, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.716354570321361e-05, | |
| "loss": 0.6169, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.692426697084605e-05, | |
| "loss": 0.6484, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.6675958648948394e-05, | |
| "loss": 0.6714, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.6418723004211075e-05, | |
| "loss": 0.6608, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.615266598007512e-05, | |
| "loss": 0.6982, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.587789715309888e-05, | |
| "loss": 0.6304, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.559452968782861e-05, | |
| "loss": 0.6261, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.530268029019117e-05, | |
| "loss": 0.6385, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.500246915942827e-05, | |
| "loss": 0.6703, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.469401993859201e-05, | |
| "loss": 0.62, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.437745966362201e-05, | |
| "loss": 0.7172, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.4052918711025194e-05, | |
| "loss": 0.5989, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.372053074417975e-05, | |
| "loss": 0.6586, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.3380432658285367e-05, | |
| "loss": 0.6459, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.3032764523982496e-05, | |
| "loss": 0.6918, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.267766952966369e-05, | |
| "loss": 0.6366, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.231529392250095e-05, | |
| "loss": 0.6692, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.194578694821332e-05, | |
| "loss": 0.715, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.156930078959946e-05, | |
| "loss": 0.6242, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.1185990503860626e-05, | |
| "loss": 0.7372, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.079601395873979e-05, | |
| "loss": 0.6203, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.0399531767503204e-05, | |
| "loss": 0.7101, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.999670722279131e-05, | |
| "loss": 0.6866, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.958770622936599e-05, | |
| "loss": 0.5917, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.917269723578212e-05, | |
| "loss": 0.664, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.8751851165011413e-05, | |
| "loss": 0.6763, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.8325341344047174e-05, | |
| "loss": 0.615, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.7893343432518946e-05, | |
| "loss": 0.5707, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.745603535034641e-05, | |
| "loss": 0.6939, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.701359720446249e-05, | |
| "loss": 0.6863, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.656621121463557e-05, | |
| "loss": 0.6671, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.611406163842168e-05, | |
| "loss": 0.518, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.565733469527731e-05, | |
| "loss": 0.6747, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.519621848986428e-05, | |
| "loss": 0.5754, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.473090293457811e-05, | |
| "loss": 0.6061, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.426157967133192e-05, | |
| "loss": 0.5859, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.3788441992628026e-05, | |
| "loss": 0.546, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.331168476194969e-05, | |
| "loss": 0.6333, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.283150433350589e-05, | |
| "loss": 0.6332, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.234809847136213e-05, | |
| "loss": 0.5709, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.1861666267990566e-05, | |
| "loss": 0.578, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.137240806227306e-05, | |
| "loss": 0.537, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.08805253569909e-05, | |
| "loss": 0.5905, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.038622073583507e-05, | |
| "loss": 0.6015, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.9889697779971504e-05, | |
| "loss": 0.5735, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.9391160984195382e-05, | |
| "loss": 0.5791, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.8890815672709225e-05, | |
| "loss": 0.5683, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.8388867914559347e-05, | |
| "loss": 0.6583, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.7885524438765603e-05, | |
| "loss": 0.6091, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.7380992549179235e-05, | |
| "loss": 0.5614, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6875480039104085e-05, | |
| "loss": 0.6208, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.636919510571609e-05, | |
| "loss": 0.5686, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5862346264316605e-05, | |
| "loss": 0.5565, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5355142262454506e-05, | |
| "loss": 0.5936, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.484779199395285e-05, | |
| "loss": 0.6469, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.4340504412875113e-05, | |
| "loss": 0.5551, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.3833488447466746e-05, | |
| "loss": 0.6454, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.3326952914107268e-05, | |
| "loss": 0.61, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.2821106431308544e-05, | |
| "loss": 0.6171, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.2316157333794414e-05, | |
| "loss": 0.5636, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.1812313586697307e-05, | |
| "loss": 0.5094, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.1309782699907042e-05, | |
| "loss": 0.6278, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.0808771642607146e-05, | |
| "loss": 0.5556, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.0309486758033773e-05, | |
| "loss": 0.5103, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.9812133678492554e-05, | |
| "loss": 0.5002, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.9316917240668133e-05, | |
| "loss": 0.626, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.8824041401261462e-05, | |
| "loss": 0.5432, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.833370915298948e-05, | |
| "loss": 0.5423, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 1.784612244098181e-05, | |
| "loss": 0.5179, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.7361482079608914e-05, | |
| "loss": 0.5541, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.687998766977597e-05, | |
| "loss": 0.6554, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.6401837516716546e-05, | |
| "loss": 0.601, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.5927228548319767e-05, | |
| "loss": 0.5267, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.545635623402496e-05, | |
| "loss": 0.5281, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.4989414504316748e-05, | |
| "loss": 0.5823, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.4526595670854159e-05, | |
| "loss": 0.5789, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.4068090347266311e-05, | |
| "loss": 0.6653, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.3614087370647479e-05, | |
| "loss": 0.5199, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.3164773723783918e-05, | |
| "loss": 0.6317, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.2720334458144235e-05, | |
| "loss": 0.4948, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.2280952617665334e-05, | |
| "loss": 0.6401, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.1846809163365052e-05, | |
| "loss": 0.5579, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.1418082898812721e-05, | |
| "loss": 0.4654, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.0994950396488275e-05, | |
| "loss": 0.5219, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.057758592506022e-05, | |
| "loss": 0.6016, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.0166161377612437e-05, | |
| "loss": 0.515, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 9.760846200849388e-06, | |
| "loss": 0.5687, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 9.361807325308861e-06, | |
| "loss": 0.5587, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 8.969209096611092e-06, | |
| "loss": 0.5365, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 8.5832132077723e-06, | |
| "loss": 0.514, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 8.203978632610915e-06, | |
| "loss": 0.5671, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 7.831661560273585e-06, | |
| "loss": 0.5803, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 7.466415330908147e-06, | |
| "loss": 0.6003, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 7.108390372509893e-06, | |
| "loss": 0.59, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 6.757734138967248e-06, | |
| "loss": 0.6142, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 6.414591049332366e-06, | |
| "loss": 0.5202, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 6.079102428341588e-06, | |
| "loss": 0.5333, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 5.7514064482104e-06, | |
| "loss": 0.5676, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 5.431638071726602e-06, | |
| "loss": 0.5741, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 5.11992899666546e-06, | |
| "loss": 0.5904, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 4.8164076015494695e-06, | |
| "loss": 0.5816, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 4.521198892775203e-06, | |
| "loss": 0.5774, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 4.234424453128974e-06, | |
| "loss": 0.5844, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.9562023917124905e-06, | |
| "loss": 0.5407, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.6866472952992226e-06, | |
| "loss": 0.5709, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.425870181141394e-06, | |
| "loss": 0.5139, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.173978451247153e-06, | |
| "loss": 0.5911, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 2.931075848146647e-06, | |
| "loss": 0.5552, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 2.697262412165261e-06, | |
| "loss": 0.5725, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 2.4726344402216662e-06, | |
| "loss": 0.5334, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.2572844461675902e-06, | |
| "loss": 0.4746, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.051301122685634e-06, | |
| "loss": 0.5849, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 1.8547693047608588e-06, | |
| "loss": 0.5648, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 1.6677699347412035e-06, | |
| "loss": 0.5596, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.4903800290010817e-06, | |
| "loss": 0.508, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 1.3226726462218897e-06, | |
| "loss": 0.5818, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.1647168573025474e-06, | |
| "loss": 0.5936, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.0165777169123703e-06, | |
| "loss": 0.5606, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 8.783162366980763e-07, | |
| "loss": 0.6648, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 7.499893601559255e-07, | |
| "loss": 0.6249, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 6.316499391793212e-07, | |
| "loss": 0.5482, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 5.233467122915642e-07, | |
| "loss": 0.4884, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 4.2512428457271435e-07, | |
| "loss": 0.6196, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 3.370231092888365e-07, | |
| "loss": 0.5377, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.590794712311606e-07, | |
| "loss": 0.5291, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.913254717720664e-07, | |
| "loss": 0.5918, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.3378901564400636e-07, | |
| "loss": 0.5625, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 8.649379944685732e-08, | |
| "loss": 0.4924, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 4.9459301888366004e-08, | |
| "loss": 0.5611, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.2700775761791416e-08, | |
| "loss": 0.5466, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 6.229241663974206e-09, | |
| "loss": 0.5867, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1548, | |
| "total_flos": 5.030540427342643e+17, | |
| "train_loss": 0.6278958536241713, | |
| "train_runtime": 4523.4188, | |
| "train_samples_per_second": 5.483, | |
| "train_steps_per_second": 0.342 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1548, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "total_flos": 5.030540427342643e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |