| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9991220368744512, |
| "global_step": 569, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.997256670306478e-06, |
| "loss": 0.6949, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.980502778148438e-06, |
| "loss": 0.6937, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.948570063910216e-06, |
| "loss": 0.6942, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 9.901555847282123e-06, |
| "loss": 0.6937, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 9.839603411073388e-06, |
| "loss": 0.675, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 9.762901564536523e-06, |
| "loss": 0.657, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 9.671684067943056e-06, |
| "loss": 0.6437, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 9.566228920164405e-06, |
| "loss": 0.6067, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.446857511429e-06, |
| "loss": 0.5389, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 9.313933643837825e-06, |
| "loss": 0.3593, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 9.167862422623474e-06, |
| "loss": 0.1982, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.009089021531777e-06, |
| "loss": 0.1791, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 8.838097326088667e-06, |
| "loss": 0.1522, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 8.65540845888717e-06, |
| "loss": 0.1841, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 8.46157919138889e-06, |
| "loss": 0.1299, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 8.257200247080249e-06, |
| "loss": 0.1695, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.042894501154937e-06, |
| "loss": 0.1536, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.819315082209217e-06, |
| "loss": 0.1233, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 7.587143381735498e-06, |
| "loss": 0.1254, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 7.347086977480552e-06, |
| "loss": 0.1153, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 7.09987747699721e-06, |
| "loss": 0.1524, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 6.846268287961667e-06, |
| "loss": 0.1299, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 6.587032322051667e-06, |
| "loss": 0.1232, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 6.32295963938335e-06, |
| "loss": 0.1385, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 6.05485504068568e-06, |
| "loss": 0.1248, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 5.783535614550666e-06, |
| "loss": 0.1271, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 5.509828247234505e-06, |
| "loss": 0.073, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 5.234567102598881e-06, |
| "loss": 0.1234, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.958591079872667e-06, |
| "loss": 0.1003, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.682741256981922e-06, |
| "loss": 0.1757, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.407858327239952e-06, |
| "loss": 0.1151, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.134780037209563e-06, |
| "loss": 0.1124, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 3.864338633545956e-06, |
| "loss": 0.0899, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.597358326601413e-06, |
| "loss": 0.1251, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.334652778521813e-06, |
| "loss": 0.171, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.077022623490371e-06, |
| "loss": 0.1447, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 2.825253027676026e-06, |
| "loss": 0.1192, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 2.580111296322904e-06, |
| "loss": 0.1158, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 2.342344535273608e-06, |
| "loss": 0.1028, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 2.112677374053164e-06, |
| "loss": 0.1171, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.8918097574529193e-06, |
| "loss": 0.1017, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 1.68041481234479e-06, |
| "loss": 0.0763, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 1.4791367962271425e-06, |
| "loss": 0.1185, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.2885891337543539e-06, |
| "loss": 0.0701, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 1.1093525472340471e-06, |
| "loss": 0.0873, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.419732867896048e-07, |
| "loss": 0.127, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.869614655817576e-07, |
| "loss": 0.1289, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 6.44789505162955e-07, |
| "loss": 0.1211, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 5.158906957025079e-07, |
| "loss": 0.094, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 4.0065787547042543e-07, |
| "loss": 0.09, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.994422336044345e-07, |
| "loss": 0.1208, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 2.1255223980891027e-07, |
| "loss": 0.0839, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 1.402527042476276e-07, |
| "loss": 0.0934, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 8.276397049545359e-08, |
| "loss": 0.094, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.026124400856479e-08, |
| "loss": 0.0984, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.2874058159796366e-08, |
| "loss": 0.0783, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 569, |
| "total_flos": 0.0, |
| "train_loss": 0.21165546916281192, |
| "train_runtime": 6592.6141, |
| "train_samples_per_second": 5.528, |
| "train_steps_per_second": 0.086 |
| } |
| ], |
| "max_steps": 569, |
| "num_train_epochs": 1, |
| "total_flos": 0.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|