{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9991220368744512, "global_step": 569, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 9.997256670306478e-06, "loss": 0.6949, "step": 10 }, { "epoch": 0.04, "learning_rate": 9.980502778148438e-06, "loss": 0.6937, "step": 20 }, { "epoch": 0.05, "learning_rate": 9.948570063910216e-06, "loss": 0.6942, "step": 30 }, { "epoch": 0.07, "learning_rate": 9.901555847282123e-06, "loss": 0.6937, "step": 40 }, { "epoch": 0.09, "learning_rate": 9.839603411073388e-06, "loss": 0.675, "step": 50 }, { "epoch": 0.11, "learning_rate": 9.762901564536523e-06, "loss": 0.657, "step": 60 }, { "epoch": 0.12, "learning_rate": 9.671684067943056e-06, "loss": 0.6437, "step": 70 }, { "epoch": 0.14, "learning_rate": 9.566228920164405e-06, "loss": 0.6067, "step": 80 }, { "epoch": 0.16, "learning_rate": 9.446857511429e-06, "loss": 0.5389, "step": 90 }, { "epoch": 0.18, "learning_rate": 9.313933643837825e-06, "loss": 0.3593, "step": 100 }, { "epoch": 0.19, "learning_rate": 9.167862422623474e-06, "loss": 0.1982, "step": 110 }, { "epoch": 0.21, "learning_rate": 9.009089021531777e-06, "loss": 0.1791, "step": 120 }, { "epoch": 0.23, "learning_rate": 8.838097326088667e-06, "loss": 0.1522, "step": 130 }, { "epoch": 0.25, "learning_rate": 8.65540845888717e-06, "loss": 0.1841, "step": 140 }, { "epoch": 0.26, "learning_rate": 8.46157919138889e-06, "loss": 0.1299, "step": 150 }, { "epoch": 0.28, "learning_rate": 8.257200247080249e-06, "loss": 0.1695, "step": 160 }, { "epoch": 0.3, "learning_rate": 8.042894501154937e-06, "loss": 0.1536, "step": 170 }, { "epoch": 0.32, "learning_rate": 7.819315082209217e-06, "loss": 0.1233, "step": 180 }, { "epoch": 0.33, "learning_rate": 7.587143381735498e-06, "loss": 0.1254, "step": 190 }, { "epoch": 0.35, "learning_rate": 7.347086977480552e-06, "loss": 0.1153, "step": 200 }, { "epoch": 0.37, "learning_rate": 7.09987747699721e-06, "loss": 0.1524, "step": 210 }, { "epoch": 0.39, "learning_rate": 6.846268287961667e-06, "loss": 0.1299, "step": 220 }, { "epoch": 0.4, "learning_rate": 6.587032322051667e-06, "loss": 0.1232, "step": 230 }, { "epoch": 0.42, "learning_rate": 6.32295963938335e-06, "loss": 0.1385, "step": 240 }, { "epoch": 0.44, "learning_rate": 6.05485504068568e-06, "loss": 0.1248, "step": 250 }, { "epoch": 0.46, "learning_rate": 5.783535614550666e-06, "loss": 0.1271, "step": 260 }, { "epoch": 0.47, "learning_rate": 5.509828247234505e-06, "loss": 0.073, "step": 270 }, { "epoch": 0.49, "learning_rate": 5.234567102598881e-06, "loss": 0.1234, "step": 280 }, { "epoch": 0.51, "learning_rate": 4.958591079872667e-06, "loss": 0.1003, "step": 290 }, { "epoch": 0.53, "learning_rate": 4.682741256981922e-06, "loss": 0.1757, "step": 300 }, { "epoch": 0.54, "learning_rate": 4.407858327239952e-06, "loss": 0.1151, "step": 310 }, { "epoch": 0.56, "learning_rate": 4.134780037209563e-06, "loss": 0.1124, "step": 320 }, { "epoch": 0.58, "learning_rate": 3.864338633545956e-06, "loss": 0.0899, "step": 330 }, { "epoch": 0.6, "learning_rate": 3.597358326601413e-06, "loss": 0.1251, "step": 340 }, { "epoch": 0.61, "learning_rate": 3.334652778521813e-06, "loss": 0.171, "step": 350 }, { "epoch": 0.63, "learning_rate": 3.077022623490371e-06, "loss": 0.1447, "step": 360 }, { "epoch": 0.65, "learning_rate": 2.825253027676026e-06, "loss": 0.1192, "step": 370 }, { "epoch": 0.67, "learning_rate": 2.580111296322904e-06, "loss": 0.1158, "step": 380 }, { "epoch": 0.68, "learning_rate": 2.342344535273608e-06, "loss": 0.1028, "step": 390 }, { "epoch": 0.7, "learning_rate": 2.112677374053164e-06, "loss": 0.1171, "step": 400 }, { "epoch": 0.72, "learning_rate": 1.8918097574529193e-06, "loss": 0.1017, "step": 410 }, { "epoch": 0.74, "learning_rate": 1.68041481234479e-06, "loss": 0.0763, "step": 420 }, { "epoch": 0.76, "learning_rate": 1.4791367962271425e-06, "loss": 0.1185, "step": 430 }, { "epoch": 0.77, "learning_rate": 1.2885891337543539e-06, "loss": 0.0701, "step": 440 }, { "epoch": 0.79, "learning_rate": 1.1093525472340471e-06, "loss": 0.0873, "step": 450 }, { "epoch": 0.81, "learning_rate": 9.419732867896048e-07, "loss": 0.127, "step": 460 }, { "epoch": 0.83, "learning_rate": 7.869614655817576e-07, "loss": 0.1289, "step": 470 }, { "epoch": 0.84, "learning_rate": 6.44789505162955e-07, "loss": 0.1211, "step": 480 }, { "epoch": 0.86, "learning_rate": 5.158906957025079e-07, "loss": 0.094, "step": 490 }, { "epoch": 0.88, "learning_rate": 4.0065787547042543e-07, "loss": 0.09, "step": 500 }, { "epoch": 0.9, "learning_rate": 2.994422336044345e-07, "loss": 0.1208, "step": 510 }, { "epoch": 0.91, "learning_rate": 2.1255223980891027e-07, "loss": 0.0839, "step": 520 }, { "epoch": 0.93, "learning_rate": 1.402527042476276e-07, "loss": 0.0934, "step": 530 }, { "epoch": 0.95, "learning_rate": 8.276397049545359e-08, "loss": 0.094, "step": 540 }, { "epoch": 0.97, "learning_rate": 4.026124400856479e-08, "loss": 0.0984, "step": 550 }, { "epoch": 0.98, "learning_rate": 1.2874058159796366e-08, "loss": 0.0783, "step": 560 }, { "epoch": 1.0, "step": 569, "total_flos": 0.0, "train_loss": 0.21165546916281192, "train_runtime": 6592.6141, "train_samples_per_second": 5.528, "train_steps_per_second": 0.086 } ], "max_steps": 569, "num_train_epochs": 1, "total_flos": 0.0, "trial_name": null, "trial_params": null }