| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.999401161746212, | |
| "global_step": 83490, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.9826326506168404e-06, | |
| "loss": 1.509, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.9652653012336808e-06, | |
| "loss": 1.5092, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.947897951850521e-06, | |
| "loss": 1.5089, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.9305306024673614e-06, | |
| "loss": 1.5082, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 2.9131632530842018e-06, | |
| "loss": 1.5086, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.895795903701042e-06, | |
| "loss": 1.5082, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.8784285543178824e-06, | |
| "loss": 1.5077, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.8610612049347228e-06, | |
| "loss": 1.5077, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.843693855551563e-06, | |
| "loss": 1.5067, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.8263265061684034e-06, | |
| "loss": 1.507, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.8089591567852438e-06, | |
| "loss": 1.5067, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.791591807402084e-06, | |
| "loss": 1.5067, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.7742244580189245e-06, | |
| "loss": 1.5061, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 2.756857108635765e-06, | |
| "loss": 1.5054, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.739489759252605e-06, | |
| "loss": 1.5058, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.7221224098694455e-06, | |
| "loss": 1.5052, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.5115793943405151, | |
| "eval_runtime": 99.986, | |
| "eval_samples_per_second": 300.042, | |
| "eval_steps_per_second": 1.88, | |
| "step": 8349 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.704755060486286e-06, | |
| "loss": 1.5046, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.687387711103126e-06, | |
| "loss": 1.5045, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.6700203617199665e-06, | |
| "loss": 1.5038, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.652653012336807e-06, | |
| "loss": 1.5042, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.635285662953647e-06, | |
| "loss": 1.5039, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.6179183135704875e-06, | |
| "loss": 1.5033, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.600550964187328e-06, | |
| "loss": 1.5028, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.583183614804168e-06, | |
| "loss": 1.5036, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.5658162654210085e-06, | |
| "loss": 1.5032, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.548448916037849e-06, | |
| "loss": 1.5029, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5310815666546896e-06, | |
| "loss": 1.5028, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.5137142172715295e-06, | |
| "loss": 1.5024, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.49634686788837e-06, | |
| "loss": 1.5022, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.47897951850521e-06, | |
| "loss": 1.5022, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.4616121691220505e-06, | |
| "loss": 1.5019, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.444244819738891e-06, | |
| "loss": 1.502, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 2.426877470355731e-06, | |
| "loss": 1.5019, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.509125828742981, | |
| "eval_runtime": 102.1448, | |
| "eval_samples_per_second": 293.701, | |
| "eval_steps_per_second": 1.841, | |
| "step": 16699 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 2.4095101209725715e-06, | |
| "loss": 1.501, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 2.392142771589412e-06, | |
| "loss": 1.5004, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 2.374775422206252e-06, | |
| "loss": 1.5009, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 2.3574080728230925e-06, | |
| "loss": 1.5004, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 2.340040723439933e-06, | |
| "loss": 1.5004, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 2.3226733740567732e-06, | |
| "loss": 1.5001, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.3053060246736136e-06, | |
| "loss": 1.4998, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 2.2879386752904543e-06, | |
| "loss": 1.4998, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 2.2705713259072942e-06, | |
| "loss": 1.4998, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.2532039765241346e-06, | |
| "loss": 1.4997, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.235836627140975e-06, | |
| "loss": 1.4992, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.2184692777578152e-06, | |
| "loss": 1.4991, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.2011019283746556e-06, | |
| "loss": 1.499, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.183734578991496e-06, | |
| "loss": 1.4987, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.1663672296083363e-06, | |
| "loss": 1.4991, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.1489998802251766e-06, | |
| "loss": 1.4986, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.131632530842017e-06, | |
| "loss": 1.4989, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.5069462060928345, | |
| "eval_runtime": 101.4916, | |
| "eval_samples_per_second": 295.591, | |
| "eval_steps_per_second": 1.852, | |
| "step": 25048 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 2.1142651814588573e-06, | |
| "loss": 1.498, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 2.096897832075698e-06, | |
| "loss": 1.4977, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 2.079530482692538e-06, | |
| "loss": 1.4975, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 2.0621631333093783e-06, | |
| "loss": 1.4974, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.044795783926219e-06, | |
| "loss": 1.4976, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 2.027428434543059e-06, | |
| "loss": 1.4978, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 2.0100610851598993e-06, | |
| "loss": 1.4971, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 1.99269373577674e-06, | |
| "loss": 1.4974, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 1.97532638639358e-06, | |
| "loss": 1.4972, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.9579590370104203e-06, | |
| "loss": 1.4969, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 1.940591687627261e-06, | |
| "loss": 1.4967, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.923224338244101e-06, | |
| "loss": 1.4969, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 1.9058569888609415e-06, | |
| "loss": 1.497, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 1.888489639477782e-06, | |
| "loss": 1.4967, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 1.871122290094622e-06, | |
| "loss": 1.4965, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 1.8537549407114627e-06, | |
| "loss": 1.4969, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.5056320428848267, | |
| "eval_runtime": 101.7674, | |
| "eval_samples_per_second": 294.79, | |
| "eval_steps_per_second": 1.847, | |
| "step": 33398 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 1.8363875913283029e-06, | |
| "loss": 1.4962, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 1.8190202419451432e-06, | |
| "loss": 1.4956, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.8016528925619838e-06, | |
| "loss": 1.4955, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 1.7842855431788235e-06, | |
| "loss": 1.4955, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 1.7669181937956644e-06, | |
| "loss": 1.4953, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.7495508444125048e-06, | |
| "loss": 1.4954, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 1.7321834950293447e-06, | |
| "loss": 1.4952, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 1.714816145646185e-06, | |
| "loss": 1.4956, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.6974487962630258e-06, | |
| "loss": 1.4953, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 1.6800814468798657e-06, | |
| "loss": 1.4951, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 1.6627140974967064e-06, | |
| "loss": 1.4953, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 1.645346748113547e-06, | |
| "loss": 1.4954, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 1.6279793987303871e-06, | |
| "loss": 1.495, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 1.6106120493472275e-06, | |
| "loss": 1.4951, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 1.5932446999640674e-06, | |
| "loss": 1.4946, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 1.5758773505809081e-06, | |
| "loss": 1.4948, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 1.5585100011977485e-06, | |
| "loss": 1.4946, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.504227638244629, | |
| "eval_runtime": 101.5567, | |
| "eval_samples_per_second": 295.402, | |
| "eval_steps_per_second": 1.851, | |
| "step": 41747 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 1.5411426518145884e-06, | |
| "loss": 1.4944, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 1.5237753024314291e-06, | |
| "loss": 1.494, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 1.5064079530482697e-06, | |
| "loss": 1.4942, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.4890406036651098e-06, | |
| "loss": 1.4938, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 1.4716732542819497e-06, | |
| "loss": 1.4939, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 1.4543059048987905e-06, | |
| "loss": 1.4938, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 1.4369385555156308e-06, | |
| "loss": 1.4937, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 1.4195712061324707e-06, | |
| "loss": 1.494, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 1.4022038567493115e-06, | |
| "loss": 1.4936, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 1.3848365073661518e-06, | |
| "loss": 1.4936, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 1.3674691579829922e-06, | |
| "loss": 1.4934, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 1.350101808599832e-06, | |
| "loss": 1.4936, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 1.3327344592166729e-06, | |
| "loss": 1.4933, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 1.3153671098335132e-06, | |
| "loss": 1.4934, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 1.297999760450353e-06, | |
| "loss": 1.493, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 1.2806324110671939e-06, | |
| "loss": 1.4932, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 1.2632650616840342e-06, | |
| "loss": 1.4933, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.5031880140304565, | |
| "eval_runtime": 101.923, | |
| "eval_samples_per_second": 294.34, | |
| "eval_steps_per_second": 1.845, | |
| "step": 50097 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 1.2458977123008745e-06, | |
| "loss": 1.4928, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 1.2285303629177145e-06, | |
| "loss": 1.4928, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 1.2111630135345552e-06, | |
| "loss": 1.4927, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 1.1937956641513955e-06, | |
| "loss": 1.4926, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 1.1764283147682355e-06, | |
| "loss": 1.4929, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 1.1590609653850762e-06, | |
| "loss": 1.4927, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 1.1416936160019166e-06, | |
| "loss": 1.4924, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 1.124326266618757e-06, | |
| "loss": 1.4924, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 1.1069589172355968e-06, | |
| "loss": 1.4921, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 1.0895915678524376e-06, | |
| "loss": 1.4922, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.072224218469278e-06, | |
| "loss": 1.4925, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 1.0548568690861182e-06, | |
| "loss": 1.4925, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 1.0374895197029584e-06, | |
| "loss": 1.4922, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 1.020122170319799e-06, | |
| "loss": 1.4924, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 1.0027548209366393e-06, | |
| "loss": 1.492, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 9.853874715534794e-07, | |
| "loss": 1.4923, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.502358317375183, | |
| "eval_runtime": 100.8628, | |
| "eval_samples_per_second": 297.434, | |
| "eval_steps_per_second": 1.864, | |
| "step": 58446 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 9.6802012217032e-07, | |
| "loss": 1.4919, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 9.506527727871603e-07, | |
| "loss": 1.4919, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 9.332854234040007e-07, | |
| "loss": 1.4917, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 9.159180740208407e-07, | |
| "loss": 1.4917, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 8.985507246376812e-07, | |
| "loss": 1.4914, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 8.811833752545216e-07, | |
| "loss": 1.4917, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 8.638160258713617e-07, | |
| "loss": 1.4917, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 8.464486764882022e-07, | |
| "loss": 1.4914, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 8.290813271050427e-07, | |
| "loss": 1.4917, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 8.11713977721883e-07, | |
| "loss": 1.4914, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 7.943466283387231e-07, | |
| "loss": 1.4915, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 7.769792789555636e-07, | |
| "loss": 1.4915, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 7.596119295724041e-07, | |
| "loss": 1.4912, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 7.422445801892441e-07, | |
| "loss": 1.4913, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 7.248772308060845e-07, | |
| "loss": 1.4913, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 7.07509881422925e-07, | |
| "loss": 1.4912, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 6.901425320397654e-07, | |
| "loss": 1.4914, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.5018346309661865, | |
| "eval_runtime": 100.3999, | |
| "eval_samples_per_second": 298.805, | |
| "eval_steps_per_second": 1.873, | |
| "step": 66796 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 6.727751826566056e-07, | |
| "loss": 1.4912, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 6.554078332734459e-07, | |
| "loss": 1.4909, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 6.380404838902863e-07, | |
| "loss": 1.4912, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 6.206731345071265e-07, | |
| "loss": 1.4909, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 6.033057851239669e-07, | |
| "loss": 1.4911, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 5.859384357408073e-07, | |
| "loss": 1.4908, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 5.685710863576478e-07, | |
| "loss": 1.491, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 5.512037369744878e-07, | |
| "loss": 1.491, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 5.338363875913284e-07, | |
| "loss": 1.491, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 5.164690382081687e-07, | |
| "loss": 1.4911, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 4.991016888250091e-07, | |
| "loss": 1.4908, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 4.817343394418493e-07, | |
| "loss": 1.4907, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 4.643669900586897e-07, | |
| "loss": 1.4907, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 4.4699964067553015e-07, | |
| "loss": 1.4906, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 4.296322912923702e-07, | |
| "loss": 1.4907, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 4.1226494190921066e-07, | |
| "loss": 1.491, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 3.948975925260511e-07, | |
| "loss": 1.4907, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.5016120672225952, | |
| "eval_runtime": 100.2408, | |
| "eval_samples_per_second": 299.279, | |
| "eval_steps_per_second": 1.875, | |
| "step": 75145 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 3.775302431428916e-07, | |
| "loss": 1.4908, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 3.601628937597316e-07, | |
| "loss": 1.4906, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 3.4279554437657206e-07, | |
| "loss": 1.4906, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 3.254281949934125e-07, | |
| "loss": 1.4903, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 3.0806084561025263e-07, | |
| "loss": 1.4905, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 2.90693496227093e-07, | |
| "loss": 1.4907, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 2.7332614684393346e-07, | |
| "loss": 1.4905, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 2.5595879746077385e-07, | |
| "loss": 1.4904, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 2.38591448077614e-07, | |
| "loss": 1.4905, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 2.2122409869445442e-07, | |
| "loss": 1.4906, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 2.0385674931129484e-07, | |
| "loss": 1.4904, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "learning_rate": 1.8648939992813496e-07, | |
| "loss": 1.4903, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 1.6912205054497538e-07, | |
| "loss": 1.4906, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 1.5175470116181582e-07, | |
| "loss": 1.4908, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "learning_rate": 1.3438735177865624e-07, | |
| "loss": 1.49, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 1.1702000239549635e-07, | |
| "loss": 1.4905, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.5012842416763306, | |
| "eval_runtime": 100.3947, | |
| "eval_samples_per_second": 298.821, | |
| "eval_steps_per_second": 1.873, | |
| "step": 83490 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 83490, | |
| "total_flos": 1.463646111228887e+18, | |
| "train_loss": 1.4961074900378695, | |
| "train_runtime": 199226.4751, | |
| "train_samples_per_second": 268.221, | |
| "train_steps_per_second": 0.419 | |
| } | |
| ], | |
| "max_steps": 83490, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.463646111228887e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |