{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9956458635703918, "eval_steps": 500, "global_step": 1548, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.999747735219333e-05, "loss": 1.6087, "step": 10 }, { "epoch": 0.04, "learning_rate": 4.998512275175808e-05, "loss": 1.4212, "step": 20 }, { "epoch": 0.06, "learning_rate": 4.9962477937145644e-05, "loss": 1.0602, "step": 30 }, { "epoch": 0.08, "learning_rate": 4.992955223470575e-05, "loss": 1.0232, "step": 40 }, { "epoch": 0.1, "learning_rate": 4.9886359205009334e-05, "loss": 1.0354, "step": 50 }, { "epoch": 0.12, "learning_rate": 4.9832916637263665e-05, "loss": 0.8403, "step": 60 }, { "epoch": 0.14, "learning_rate": 4.976924654198569e-05, "loss": 0.7628, "step": 70 }, { "epoch": 0.15, "learning_rate": 4.9695375141937e-05, "loss": 0.7421, "step": 80 }, { "epoch": 0.17, "learning_rate": 4.9611332861323875e-05, "loss": 0.7773, "step": 90 }, { "epoch": 0.19, "learning_rate": 4.95171543132669e-05, "loss": 0.7814, "step": 100 }, { "epoch": 0.21, "learning_rate": 4.941287828554553e-05, "loss": 0.7798, "step": 110 }, { "epoch": 0.23, "learning_rate": 4.929854772462312e-05, "loss": 0.7717, "step": 120 }, { "epoch": 0.25, "learning_rate": 4.9174209717959294e-05, "loss": 0.8173, "step": 130 }, { "epoch": 0.27, "learning_rate": 4.9039915474616805e-05, "loss": 0.8009, "step": 140 }, { "epoch": 0.29, "learning_rate": 4.889572030417091e-05, "loss": 0.7499, "step": 150 }, { "epoch": 0.31, "learning_rate": 4.874168359392987e-05, "loss": 0.6257, "step": 160 }, { "epoch": 0.33, "learning_rate": 4.857786878447612e-05, "loss": 0.7029, "step": 170 }, { "epoch": 0.35, "learning_rate": 4.8404343343538014e-05, "loss": 0.6505, "step": 180 }, { "epoch": 0.37, "learning_rate": 4.822117873820301e-05, "loss": 0.6647, "step": 190 }, { "epoch": 0.39, "learning_rate": 4.802845040548363e-05, "loss": 0.6787, "step": 200 }, { "epoch": 0.41, "learning_rate": 4.782623772124855e-05, "loss": 0.5937, "step": 210 }, { "epoch": 0.43, "learning_rate": 4.7614623967531244e-05, "loss": 0.649, "step": 220 }, { "epoch": 0.45, "learning_rate": 4.7393696298230084e-05, "loss": 0.729, "step": 230 }, { "epoch": 0.46, "learning_rate": 4.716354570321361e-05, "loss": 0.6169, "step": 240 }, { "epoch": 0.48, "learning_rate": 4.692426697084605e-05, "loss": 0.6484, "step": 250 }, { "epoch": 0.5, "learning_rate": 4.6675958648948394e-05, "loss": 0.6714, "step": 260 }, { "epoch": 0.52, "learning_rate": 4.6418723004211075e-05, "loss": 0.6608, "step": 270 }, { "epoch": 0.54, "learning_rate": 4.615266598007512e-05, "loss": 0.6982, "step": 280 }, { "epoch": 0.56, "learning_rate": 4.587789715309888e-05, "loss": 0.6304, "step": 290 }, { "epoch": 0.58, "learning_rate": 4.559452968782861e-05, "loss": 0.6261, "step": 300 }, { "epoch": 0.6, "learning_rate": 4.530268029019117e-05, "loss": 0.6385, "step": 310 }, { "epoch": 0.62, "learning_rate": 4.500246915942827e-05, "loss": 0.6703, "step": 320 }, { "epoch": 0.64, "learning_rate": 4.469401993859201e-05, "loss": 0.62, "step": 330 }, { "epoch": 0.66, "learning_rate": 4.437745966362201e-05, "loss": 0.7172, "step": 340 }, { "epoch": 0.68, "learning_rate": 4.4052918711025194e-05, "loss": 0.5989, "step": 350 }, { "epoch": 0.7, "learning_rate": 4.372053074417975e-05, "loss": 0.6586, "step": 360 }, { "epoch": 0.72, "learning_rate": 4.3380432658285367e-05, "loss": 0.6459, "step": 370 }, { "epoch": 0.74, "learning_rate": 4.3032764523982496e-05, "loss": 0.6918, "step": 380 }, { "epoch": 0.75, "learning_rate": 4.267766952966369e-05, "loss": 0.6366, "step": 390 }, { "epoch": 0.77, "learning_rate": 4.231529392250095e-05, "loss": 0.6692, "step": 400 }, { "epoch": 0.79, "learning_rate": 4.194578694821332e-05, "loss": 0.715, "step": 410 }, { "epoch": 0.81, "learning_rate": 4.156930078959946e-05, "loss": 0.6242, "step": 420 }, { "epoch": 0.83, "learning_rate": 4.1185990503860626e-05, "loss": 0.7372, "step": 430 }, { "epoch": 0.85, "learning_rate": 4.079601395873979e-05, "loss": 0.6203, "step": 440 }, { "epoch": 0.87, "learning_rate": 4.0399531767503204e-05, "loss": 0.7101, "step": 450 }, { "epoch": 0.89, "learning_rate": 3.999670722279131e-05, "loss": 0.6866, "step": 460 }, { "epoch": 0.91, "learning_rate": 3.958770622936599e-05, "loss": 0.5917, "step": 470 }, { "epoch": 0.93, "learning_rate": 3.917269723578212e-05, "loss": 0.664, "step": 480 }, { "epoch": 0.95, "learning_rate": 3.8751851165011413e-05, "loss": 0.6763, "step": 490 }, { "epoch": 0.97, "learning_rate": 3.8325341344047174e-05, "loss": 0.615, "step": 500 }, { "epoch": 0.99, "learning_rate": 3.7893343432518946e-05, "loss": 0.5707, "step": 510 }, { "epoch": 1.01, "learning_rate": 3.745603535034641e-05, "loss": 0.6939, "step": 520 }, { "epoch": 1.03, "learning_rate": 3.701359720446249e-05, "loss": 0.6863, "step": 530 }, { "epoch": 1.04, "learning_rate": 3.656621121463557e-05, "loss": 0.6671, "step": 540 }, { "epoch": 1.06, "learning_rate": 3.611406163842168e-05, "loss": 0.518, "step": 550 }, { "epoch": 1.08, "learning_rate": 3.565733469527731e-05, "loss": 0.6747, "step": 560 }, { "epoch": 1.1, "learning_rate": 3.519621848986428e-05, "loss": 0.5754, "step": 570 }, { "epoch": 1.12, "learning_rate": 3.473090293457811e-05, "loss": 0.6061, "step": 580 }, { "epoch": 1.14, "learning_rate": 3.426157967133192e-05, "loss": 0.5859, "step": 590 }, { "epoch": 1.16, "learning_rate": 3.3788441992628026e-05, "loss": 0.546, "step": 600 }, { "epoch": 1.18, "learning_rate": 3.331168476194969e-05, "loss": 0.6333, "step": 610 }, { "epoch": 1.2, "learning_rate": 3.283150433350589e-05, "loss": 0.6332, "step": 620 }, { "epoch": 1.22, "learning_rate": 3.234809847136213e-05, "loss": 0.5709, "step": 630 }, { "epoch": 1.24, "learning_rate": 3.1861666267990566e-05, "loss": 0.578, "step": 640 }, { "epoch": 1.26, "learning_rate": 3.137240806227306e-05, "loss": 0.537, "step": 650 }, { "epoch": 1.28, "learning_rate": 3.08805253569909e-05, "loss": 0.5905, "step": 660 }, { "epoch": 1.3, "learning_rate": 3.038622073583507e-05, "loss": 0.6015, "step": 670 }, { "epoch": 1.32, "learning_rate": 2.9889697779971504e-05, "loss": 0.5735, "step": 680 }, { "epoch": 1.34, "learning_rate": 2.9391160984195382e-05, "loss": 0.5791, "step": 690 }, { "epoch": 1.35, "learning_rate": 2.8890815672709225e-05, "loss": 0.5683, "step": 700 }, { "epoch": 1.37, "learning_rate": 2.8388867914559347e-05, "loss": 0.6583, "step": 710 }, { "epoch": 1.39, "learning_rate": 2.7885524438765603e-05, "loss": 0.6091, "step": 720 }, { "epoch": 1.41, "learning_rate": 2.7380992549179235e-05, "loss": 0.5614, "step": 730 }, { "epoch": 1.43, "learning_rate": 2.6875480039104085e-05, "loss": 0.6208, "step": 740 }, { "epoch": 1.45, "learning_rate": 2.636919510571609e-05, "loss": 0.5686, "step": 750 }, { "epoch": 1.47, "learning_rate": 2.5862346264316605e-05, "loss": 0.5565, "step": 760 }, { "epoch": 1.49, "learning_rate": 2.5355142262454506e-05, "loss": 0.5936, "step": 770 }, { "epoch": 1.51, "learning_rate": 2.484779199395285e-05, "loss": 0.6469, "step": 780 }, { "epoch": 1.53, "learning_rate": 2.4340504412875113e-05, "loss": 0.5551, "step": 790 }, { "epoch": 1.55, "learning_rate": 2.3833488447466746e-05, "loss": 0.6454, "step": 800 }, { "epoch": 1.57, "learning_rate": 2.3326952914107268e-05, "loss": 0.61, "step": 810 }, { "epoch": 1.59, "learning_rate": 2.2821106431308544e-05, "loss": 0.6171, "step": 820 }, { "epoch": 1.61, "learning_rate": 2.2316157333794414e-05, "loss": 0.5636, "step": 830 }, { "epoch": 1.63, "learning_rate": 2.1812313586697307e-05, "loss": 0.5094, "step": 840 }, { "epoch": 1.64, "learning_rate": 2.1309782699907042e-05, "loss": 0.6278, "step": 850 }, { "epoch": 1.66, "learning_rate": 2.0808771642607146e-05, "loss": 0.5556, "step": 860 }, { "epoch": 1.68, "learning_rate": 2.0309486758033773e-05, "loss": 0.5103, "step": 870 }, { "epoch": 1.7, "learning_rate": 1.9812133678492554e-05, "loss": 0.5002, "step": 880 }, { "epoch": 1.72, "learning_rate": 1.9316917240668133e-05, "loss": 0.626, "step": 890 }, { "epoch": 1.74, "learning_rate": 1.8824041401261462e-05, "loss": 0.5432, "step": 900 }, { "epoch": 1.76, "learning_rate": 1.833370915298948e-05, "loss": 0.5423, "step": 910 }, { "epoch": 1.78, "learning_rate": 1.784612244098181e-05, "loss": 0.5179, "step": 920 }, { "epoch": 1.8, "learning_rate": 1.7361482079608914e-05, "loss": 0.5541, "step": 930 }, { "epoch": 1.82, "learning_rate": 1.687998766977597e-05, "loss": 0.6554, "step": 940 }, { "epoch": 1.84, "learning_rate": 1.6401837516716546e-05, "loss": 0.601, "step": 950 }, { "epoch": 1.86, "learning_rate": 1.5927228548319767e-05, "loss": 0.5267, "step": 960 }, { "epoch": 1.88, "learning_rate": 1.545635623402496e-05, "loss": 0.5281, "step": 970 }, { "epoch": 1.9, "learning_rate": 1.4989414504316748e-05, "loss": 0.5823, "step": 980 }, { "epoch": 1.92, "learning_rate": 1.4526595670854159e-05, "loss": 0.5789, "step": 990 }, { "epoch": 1.94, "learning_rate": 1.4068090347266311e-05, "loss": 0.6653, "step": 1000 }, { "epoch": 1.95, "learning_rate": 1.3614087370647479e-05, "loss": 0.5199, "step": 1010 }, { "epoch": 1.97, "learning_rate": 1.3164773723783918e-05, "loss": 0.6317, "step": 1020 }, { "epoch": 1.99, "learning_rate": 1.2720334458144235e-05, "loss": 0.4948, "step": 1030 }, { "epoch": 2.01, "learning_rate": 1.2280952617665334e-05, "loss": 0.6401, "step": 1040 }, { "epoch": 2.03, "learning_rate": 1.1846809163365052e-05, "loss": 0.5579, "step": 1050 }, { "epoch": 2.05, "learning_rate": 1.1418082898812721e-05, "loss": 0.4654, "step": 1060 }, { "epoch": 2.07, "learning_rate": 1.0994950396488275e-05, "loss": 0.5219, "step": 1070 }, { "epoch": 2.09, "learning_rate": 1.057758592506022e-05, "loss": 0.6016, "step": 1080 }, { "epoch": 2.11, "learning_rate": 1.0166161377612437e-05, "loss": 0.515, "step": 1090 }, { "epoch": 2.13, "learning_rate": 9.760846200849388e-06, "loss": 0.5687, "step": 1100 }, { "epoch": 2.15, "learning_rate": 9.361807325308861e-06, "loss": 0.5587, "step": 1110 }, { "epoch": 2.17, "learning_rate": 8.969209096611092e-06, "loss": 0.5365, "step": 1120 }, { "epoch": 2.19, "learning_rate": 8.5832132077723e-06, "loss": 0.514, "step": 1130 }, { "epoch": 2.21, "learning_rate": 8.203978632610915e-06, "loss": 0.5671, "step": 1140 }, { "epoch": 2.23, "learning_rate": 7.831661560273585e-06, "loss": 0.5803, "step": 1150 }, { "epoch": 2.24, "learning_rate": 7.466415330908147e-06, "loss": 0.6003, "step": 1160 }, { "epoch": 2.26, "learning_rate": 7.108390372509893e-06, "loss": 0.59, "step": 1170 }, { "epoch": 2.28, "learning_rate": 6.757734138967248e-06, "loss": 0.6142, "step": 1180 }, { "epoch": 2.3, "learning_rate": 6.414591049332366e-06, "loss": 0.5202, "step": 1190 }, { "epoch": 2.32, "learning_rate": 6.079102428341588e-06, "loss": 0.5333, "step": 1200 }, { "epoch": 2.34, "learning_rate": 5.7514064482104e-06, "loss": 0.5676, "step": 1210 }, { "epoch": 2.36, "learning_rate": 5.431638071726602e-06, "loss": 0.5741, "step": 1220 }, { "epoch": 2.38, "learning_rate": 5.11992899666546e-06, "loss": 0.5904, "step": 1230 }, { "epoch": 2.4, "learning_rate": 4.8164076015494695e-06, "loss": 0.5816, "step": 1240 }, { "epoch": 2.42, "learning_rate": 4.521198892775203e-06, "loss": 0.5774, "step": 1250 }, { "epoch": 2.44, "learning_rate": 4.234424453128974e-06, "loss": 0.5844, "step": 1260 }, { "epoch": 2.46, "learning_rate": 3.9562023917124905e-06, "loss": 0.5407, "step": 1270 }, { "epoch": 2.48, "learning_rate": 3.6866472952992226e-06, "loss": 0.5709, "step": 1280 }, { "epoch": 2.5, "learning_rate": 3.425870181141394e-06, "loss": 0.5139, "step": 1290 }, { "epoch": 2.52, "learning_rate": 3.173978451247153e-06, "loss": 0.5911, "step": 1300 }, { "epoch": 2.54, "learning_rate": 2.931075848146647e-06, "loss": 0.5552, "step": 1310 }, { "epoch": 2.55, "learning_rate": 2.697262412165261e-06, "loss": 0.5725, "step": 1320 }, { "epoch": 2.57, "learning_rate": 2.4726344402216662e-06, "loss": 0.5334, "step": 1330 }, { "epoch": 2.59, "learning_rate": 2.2572844461675902e-06, "loss": 0.4746, "step": 1340 }, { "epoch": 2.61, "learning_rate": 2.051301122685634e-06, "loss": 0.5849, "step": 1350 }, { "epoch": 2.63, "learning_rate": 1.8547693047608588e-06, "loss": 0.5648, "step": 1360 }, { "epoch": 2.65, "learning_rate": 1.6677699347412035e-06, "loss": 0.5596, "step": 1370 }, { "epoch": 2.67, "learning_rate": 1.4903800290010817e-06, "loss": 0.508, "step": 1380 }, { "epoch": 2.69, "learning_rate": 1.3226726462218897e-06, "loss": 0.5818, "step": 1390 }, { "epoch": 2.71, "learning_rate": 1.1647168573025474e-06, "loss": 0.5936, "step": 1400 }, { "epoch": 2.73, "learning_rate": 1.0165777169123703e-06, "loss": 0.5606, "step": 1410 }, { "epoch": 2.75, "learning_rate": 8.783162366980763e-07, "loss": 0.6648, "step": 1420 }, { "epoch": 2.77, "learning_rate": 7.499893601559255e-07, "loss": 0.6249, "step": 1430 }, { "epoch": 2.79, "learning_rate": 6.316499391793212e-07, "loss": 0.5482, "step": 1440 }, { "epoch": 2.81, "learning_rate": 5.233467122915642e-07, "loss": 0.4884, "step": 1450 }, { "epoch": 2.83, "learning_rate": 4.2512428457271435e-07, "loss": 0.6196, "step": 1460 }, { "epoch": 2.84, "learning_rate": 3.370231092888365e-07, "loss": 0.5377, "step": 1470 }, { "epoch": 2.86, "learning_rate": 2.590794712311606e-07, "loss": 0.5291, "step": 1480 }, { "epoch": 2.88, "learning_rate": 1.913254717720664e-07, "loss": 0.5918, "step": 1490 }, { "epoch": 2.9, "learning_rate": 1.3378901564400636e-07, "loss": 0.5625, "step": 1500 }, { "epoch": 2.92, "learning_rate": 8.649379944685732e-08, "loss": 0.4924, "step": 1510 }, { "epoch": 2.94, "learning_rate": 4.9459301888366004e-08, "loss": 0.5611, "step": 1520 }, { "epoch": 2.96, "learning_rate": 2.2700775761791416e-08, "loss": 0.5466, "step": 1530 }, { "epoch": 2.98, "learning_rate": 6.229241663974206e-09, "loss": 0.5867, "step": 1540 }, { "epoch": 3.0, "step": 1548, "total_flos": 5.030540427342643e+17, "train_loss": 0.6278958536241713, "train_runtime": 4523.4188, "train_samples_per_second": 5.483, "train_steps_per_second": 0.342 } ], "logging_steps": 10, "max_steps": 1548, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 5.030540427342643e+17, "trial_name": null, "trial_params": null }