{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.999401161746212, "global_step": 83490, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 2.9826326506168404e-06, "loss": 1.509, "step": 500 }, { "epoch": 0.12, "learning_rate": 2.9652653012336808e-06, "loss": 1.5092, "step": 1000 }, { "epoch": 0.18, "learning_rate": 2.947897951850521e-06, "loss": 1.5089, "step": 1500 }, { "epoch": 0.24, "learning_rate": 2.9305306024673614e-06, "loss": 1.5082, "step": 2000 }, { "epoch": 0.3, "learning_rate": 2.9131632530842018e-06, "loss": 1.5086, "step": 2500 }, { "epoch": 0.36, "learning_rate": 2.895795903701042e-06, "loss": 1.5082, "step": 3000 }, { "epoch": 0.42, "learning_rate": 2.8784285543178824e-06, "loss": 1.5077, "step": 3500 }, { "epoch": 0.48, "learning_rate": 2.8610612049347228e-06, "loss": 1.5077, "step": 4000 }, { "epoch": 0.54, "learning_rate": 2.843693855551563e-06, "loss": 1.5067, "step": 4500 }, { "epoch": 0.6, "learning_rate": 2.8263265061684034e-06, "loss": 1.507, "step": 5000 }, { "epoch": 0.66, "learning_rate": 2.8089591567852438e-06, "loss": 1.5067, "step": 5500 }, { "epoch": 0.72, "learning_rate": 2.791591807402084e-06, "loss": 1.5067, "step": 6000 }, { "epoch": 0.78, "learning_rate": 2.7742244580189245e-06, "loss": 1.5061, "step": 6500 }, { "epoch": 0.84, "learning_rate": 2.756857108635765e-06, "loss": 1.5054, "step": 7000 }, { "epoch": 0.9, "learning_rate": 2.739489759252605e-06, "loss": 1.5058, "step": 7500 }, { "epoch": 0.96, "learning_rate": 2.7221224098694455e-06, "loss": 1.5052, "step": 8000 }, { "epoch": 1.0, "eval_loss": 1.5115793943405151, "eval_runtime": 99.986, "eval_samples_per_second": 300.042, "eval_steps_per_second": 1.88, "step": 8349 }, { "epoch": 1.02, "learning_rate": 2.704755060486286e-06, "loss": 1.5046, "step": 8500 }, { "epoch": 1.08, "learning_rate": 2.687387711103126e-06, "loss": 1.5045, "step": 9000 }, { "epoch": 1.14, "learning_rate": 2.6700203617199665e-06, "loss": 1.5038, "step": 9500 }, { "epoch": 1.2, "learning_rate": 2.652653012336807e-06, "loss": 1.5042, "step": 10000 }, { "epoch": 1.26, "learning_rate": 2.635285662953647e-06, "loss": 1.5039, "step": 10500 }, { "epoch": 1.32, "learning_rate": 2.6179183135704875e-06, "loss": 1.5033, "step": 11000 }, { "epoch": 1.38, "learning_rate": 2.600550964187328e-06, "loss": 1.5028, "step": 11500 }, { "epoch": 1.44, "learning_rate": 2.583183614804168e-06, "loss": 1.5036, "step": 12000 }, { "epoch": 1.5, "learning_rate": 2.5658162654210085e-06, "loss": 1.5032, "step": 12500 }, { "epoch": 1.56, "learning_rate": 2.548448916037849e-06, "loss": 1.5029, "step": 13000 }, { "epoch": 1.62, "learning_rate": 2.5310815666546896e-06, "loss": 1.5028, "step": 13500 }, { "epoch": 1.68, "learning_rate": 2.5137142172715295e-06, "loss": 1.5024, "step": 14000 }, { "epoch": 1.74, "learning_rate": 2.49634686788837e-06, "loss": 1.5022, "step": 14500 }, { "epoch": 1.8, "learning_rate": 2.47897951850521e-06, "loss": 1.5022, "step": 15000 }, { "epoch": 1.86, "learning_rate": 2.4616121691220505e-06, "loss": 1.5019, "step": 15500 }, { "epoch": 1.92, "learning_rate": 2.444244819738891e-06, "loss": 1.502, "step": 16000 }, { "epoch": 1.98, "learning_rate": 2.426877470355731e-06, "loss": 1.5019, "step": 16500 }, { "epoch": 2.0, "eval_loss": 1.509125828742981, "eval_runtime": 102.1448, "eval_samples_per_second": 293.701, "eval_steps_per_second": 1.841, "step": 16699 }, { "epoch": 2.04, "learning_rate": 2.4095101209725715e-06, "loss": 1.501, "step": 17000 }, { "epoch": 2.1, "learning_rate": 2.392142771589412e-06, "loss": 1.5004, "step": 17500 }, { "epoch": 2.16, "learning_rate": 2.374775422206252e-06, "loss": 1.5009, "step": 18000 }, { "epoch": 2.22, "learning_rate": 2.3574080728230925e-06, "loss": 1.5004, "step": 18500 }, { "epoch": 2.28, "learning_rate": 2.340040723439933e-06, "loss": 1.5004, "step": 19000 }, { "epoch": 2.34, "learning_rate": 2.3226733740567732e-06, "loss": 1.5001, "step": 19500 }, { "epoch": 2.4, "learning_rate": 2.3053060246736136e-06, "loss": 1.4998, "step": 20000 }, { "epoch": 2.46, "learning_rate": 2.2879386752904543e-06, "loss": 1.4998, "step": 20500 }, { "epoch": 2.52, "learning_rate": 2.2705713259072942e-06, "loss": 1.4998, "step": 21000 }, { "epoch": 2.58, "learning_rate": 2.2532039765241346e-06, "loss": 1.4997, "step": 21500 }, { "epoch": 2.63, "learning_rate": 2.235836627140975e-06, "loss": 1.4992, "step": 22000 }, { "epoch": 2.69, "learning_rate": 2.2184692777578152e-06, "loss": 1.4991, "step": 22500 }, { "epoch": 2.75, "learning_rate": 2.2011019283746556e-06, "loss": 1.499, "step": 23000 }, { "epoch": 2.81, "learning_rate": 2.183734578991496e-06, "loss": 1.4987, "step": 23500 }, { "epoch": 2.87, "learning_rate": 2.1663672296083363e-06, "loss": 1.4991, "step": 24000 }, { "epoch": 2.93, "learning_rate": 2.1489998802251766e-06, "loss": 1.4986, "step": 24500 }, { "epoch": 2.99, "learning_rate": 2.131632530842017e-06, "loss": 1.4989, "step": 25000 }, { "epoch": 3.0, "eval_loss": 1.5069462060928345, "eval_runtime": 101.4916, "eval_samples_per_second": 295.591, "eval_steps_per_second": 1.852, "step": 25048 }, { "epoch": 3.05, "learning_rate": 2.1142651814588573e-06, "loss": 1.498, "step": 25500 }, { "epoch": 3.11, "learning_rate": 2.096897832075698e-06, "loss": 1.4977, "step": 26000 }, { "epoch": 3.17, "learning_rate": 2.079530482692538e-06, "loss": 1.4975, "step": 26500 }, { "epoch": 3.23, "learning_rate": 2.0621631333093783e-06, "loss": 1.4974, "step": 27000 }, { "epoch": 3.29, "learning_rate": 2.044795783926219e-06, "loss": 1.4976, "step": 27500 }, { "epoch": 3.35, "learning_rate": 2.027428434543059e-06, "loss": 1.4978, "step": 28000 }, { "epoch": 3.41, "learning_rate": 2.0100610851598993e-06, "loss": 1.4971, "step": 28500 }, { "epoch": 3.47, "learning_rate": 1.99269373577674e-06, "loss": 1.4974, "step": 29000 }, { "epoch": 3.53, "learning_rate": 1.97532638639358e-06, "loss": 1.4972, "step": 29500 }, { "epoch": 3.59, "learning_rate": 1.9579590370104203e-06, "loss": 1.4969, "step": 30000 }, { "epoch": 3.65, "learning_rate": 1.940591687627261e-06, "loss": 1.4967, "step": 30500 }, { "epoch": 3.71, "learning_rate": 1.923224338244101e-06, "loss": 1.4969, "step": 31000 }, { "epoch": 3.77, "learning_rate": 1.9058569888609415e-06, "loss": 1.497, "step": 31500 }, { "epoch": 3.83, "learning_rate": 1.888489639477782e-06, "loss": 1.4967, "step": 32000 }, { "epoch": 3.89, "learning_rate": 1.871122290094622e-06, "loss": 1.4965, "step": 32500 }, { "epoch": 3.95, "learning_rate": 1.8537549407114627e-06, "loss": 1.4969, "step": 33000 }, { "epoch": 4.0, "eval_loss": 1.5056320428848267, "eval_runtime": 101.7674, "eval_samples_per_second": 294.79, "eval_steps_per_second": 1.847, "step": 33398 }, { "epoch": 4.01, "learning_rate": 1.8363875913283029e-06, "loss": 1.4962, "step": 33500 }, { "epoch": 4.07, "learning_rate": 1.8190202419451432e-06, "loss": 1.4956, "step": 34000 }, { "epoch": 4.13, "learning_rate": 1.8016528925619838e-06, "loss": 1.4955, "step": 34500 }, { "epoch": 4.19, "learning_rate": 1.7842855431788235e-06, "loss": 1.4955, "step": 35000 }, { "epoch": 4.25, "learning_rate": 1.7669181937956644e-06, "loss": 1.4953, "step": 35500 }, { "epoch": 4.31, "learning_rate": 1.7495508444125048e-06, "loss": 1.4954, "step": 36000 }, { "epoch": 4.37, "learning_rate": 1.7321834950293447e-06, "loss": 1.4952, "step": 36500 }, { "epoch": 4.43, "learning_rate": 1.714816145646185e-06, "loss": 1.4956, "step": 37000 }, { "epoch": 4.49, "learning_rate": 1.6974487962630258e-06, "loss": 1.4953, "step": 37500 }, { "epoch": 4.55, "learning_rate": 1.6800814468798657e-06, "loss": 1.4951, "step": 38000 }, { "epoch": 4.61, "learning_rate": 1.6627140974967064e-06, "loss": 1.4953, "step": 38500 }, { "epoch": 4.67, "learning_rate": 1.645346748113547e-06, "loss": 1.4954, "step": 39000 }, { "epoch": 4.73, "learning_rate": 1.6279793987303871e-06, "loss": 1.495, "step": 39500 }, { "epoch": 4.79, "learning_rate": 1.6106120493472275e-06, "loss": 1.4951, "step": 40000 }, { "epoch": 4.85, "learning_rate": 1.5932446999640674e-06, "loss": 1.4946, "step": 40500 }, { "epoch": 4.91, "learning_rate": 1.5758773505809081e-06, "loss": 1.4948, "step": 41000 }, { "epoch": 4.97, "learning_rate": 1.5585100011977485e-06, "loss": 1.4946, "step": 41500 }, { "epoch": 5.0, "eval_loss": 1.504227638244629, "eval_runtime": 101.5567, "eval_samples_per_second": 295.402, "eval_steps_per_second": 1.851, "step": 41747 }, { "epoch": 5.03, "learning_rate": 1.5411426518145884e-06, "loss": 1.4944, "step": 42000 }, { "epoch": 5.09, "learning_rate": 1.5237753024314291e-06, "loss": 1.494, "step": 42500 }, { "epoch": 5.15, "learning_rate": 1.5064079530482697e-06, "loss": 1.4942, "step": 43000 }, { "epoch": 5.21, "learning_rate": 1.4890406036651098e-06, "loss": 1.4938, "step": 43500 }, { "epoch": 5.27, "learning_rate": 1.4716732542819497e-06, "loss": 1.4939, "step": 44000 }, { "epoch": 5.33, "learning_rate": 1.4543059048987905e-06, "loss": 1.4938, "step": 44500 }, { "epoch": 5.39, "learning_rate": 1.4369385555156308e-06, "loss": 1.4937, "step": 45000 }, { "epoch": 5.45, "learning_rate": 1.4195712061324707e-06, "loss": 1.494, "step": 45500 }, { "epoch": 5.51, "learning_rate": 1.4022038567493115e-06, "loss": 1.4936, "step": 46000 }, { "epoch": 5.57, "learning_rate": 1.3848365073661518e-06, "loss": 1.4936, "step": 46500 }, { "epoch": 5.63, "learning_rate": 1.3674691579829922e-06, "loss": 1.4934, "step": 47000 }, { "epoch": 5.69, "learning_rate": 1.350101808599832e-06, "loss": 1.4936, "step": 47500 }, { "epoch": 5.75, "learning_rate": 1.3327344592166729e-06, "loss": 1.4933, "step": 48000 }, { "epoch": 5.81, "learning_rate": 1.3153671098335132e-06, "loss": 1.4934, "step": 48500 }, { "epoch": 5.87, "learning_rate": 1.297999760450353e-06, "loss": 1.493, "step": 49000 }, { "epoch": 5.93, "learning_rate": 1.2806324110671939e-06, "loss": 1.4932, "step": 49500 }, { "epoch": 5.99, "learning_rate": 1.2632650616840342e-06, "loss": 1.4933, "step": 50000 }, { "epoch": 6.0, "eval_loss": 1.5031880140304565, "eval_runtime": 101.923, "eval_samples_per_second": 294.34, "eval_steps_per_second": 1.845, "step": 50097 }, { "epoch": 6.05, "learning_rate": 1.2458977123008745e-06, "loss": 1.4928, "step": 50500 }, { "epoch": 6.11, "learning_rate": 1.2285303629177145e-06, "loss": 1.4928, "step": 51000 }, { "epoch": 6.17, "learning_rate": 1.2111630135345552e-06, "loss": 1.4927, "step": 51500 }, { "epoch": 6.23, "learning_rate": 1.1937956641513955e-06, "loss": 1.4926, "step": 52000 }, { "epoch": 6.29, "learning_rate": 1.1764283147682355e-06, "loss": 1.4929, "step": 52500 }, { "epoch": 6.35, "learning_rate": 1.1590609653850762e-06, "loss": 1.4927, "step": 53000 }, { "epoch": 6.41, "learning_rate": 1.1416936160019166e-06, "loss": 1.4924, "step": 53500 }, { "epoch": 6.47, "learning_rate": 1.124326266618757e-06, "loss": 1.4924, "step": 54000 }, { "epoch": 6.53, "learning_rate": 1.1069589172355968e-06, "loss": 1.4921, "step": 54500 }, { "epoch": 6.59, "learning_rate": 1.0895915678524376e-06, "loss": 1.4922, "step": 55000 }, { "epoch": 6.65, "learning_rate": 1.072224218469278e-06, "loss": 1.4925, "step": 55500 }, { "epoch": 6.71, "learning_rate": 1.0548568690861182e-06, "loss": 1.4925, "step": 56000 }, { "epoch": 6.77, "learning_rate": 1.0374895197029584e-06, "loss": 1.4922, "step": 56500 }, { "epoch": 6.83, "learning_rate": 1.020122170319799e-06, "loss": 1.4924, "step": 57000 }, { "epoch": 6.89, "learning_rate": 1.0027548209366393e-06, "loss": 1.492, "step": 57500 }, { "epoch": 6.95, "learning_rate": 9.853874715534794e-07, "loss": 1.4923, "step": 58000 }, { "epoch": 7.0, "eval_loss": 1.502358317375183, "eval_runtime": 100.8628, "eval_samples_per_second": 297.434, "eval_steps_per_second": 1.864, "step": 58446 }, { "epoch": 7.01, "learning_rate": 9.6802012217032e-07, "loss": 1.4919, "step": 58500 }, { "epoch": 7.07, "learning_rate": 9.506527727871603e-07, "loss": 1.4919, "step": 59000 }, { "epoch": 7.13, "learning_rate": 9.332854234040007e-07, "loss": 1.4917, "step": 59500 }, { "epoch": 7.19, "learning_rate": 9.159180740208407e-07, "loss": 1.4917, "step": 60000 }, { "epoch": 7.25, "learning_rate": 8.985507246376812e-07, "loss": 1.4914, "step": 60500 }, { "epoch": 7.31, "learning_rate": 8.811833752545216e-07, "loss": 1.4917, "step": 61000 }, { "epoch": 7.37, "learning_rate": 8.638160258713617e-07, "loss": 1.4917, "step": 61500 }, { "epoch": 7.43, "learning_rate": 8.464486764882022e-07, "loss": 1.4914, "step": 62000 }, { "epoch": 7.49, "learning_rate": 8.290813271050427e-07, "loss": 1.4917, "step": 62500 }, { "epoch": 7.55, "learning_rate": 8.11713977721883e-07, "loss": 1.4914, "step": 63000 }, { "epoch": 7.61, "learning_rate": 7.943466283387231e-07, "loss": 1.4915, "step": 63500 }, { "epoch": 7.67, "learning_rate": 7.769792789555636e-07, "loss": 1.4915, "step": 64000 }, { "epoch": 7.73, "learning_rate": 7.596119295724041e-07, "loss": 1.4912, "step": 64500 }, { "epoch": 7.78, "learning_rate": 7.422445801892441e-07, "loss": 1.4913, "step": 65000 }, { "epoch": 7.84, "learning_rate": 7.248772308060845e-07, "loss": 1.4913, "step": 65500 }, { "epoch": 7.9, "learning_rate": 7.07509881422925e-07, "loss": 1.4912, "step": 66000 }, { "epoch": 7.96, "learning_rate": 6.901425320397654e-07, "loss": 1.4914, "step": 66500 }, { "epoch": 8.0, "eval_loss": 1.5018346309661865, "eval_runtime": 100.3999, "eval_samples_per_second": 298.805, "eval_steps_per_second": 1.873, "step": 66796 }, { "epoch": 8.02, "learning_rate": 6.727751826566056e-07, "loss": 1.4912, "step": 67000 }, { "epoch": 8.08, "learning_rate": 6.554078332734459e-07, "loss": 1.4909, "step": 67500 }, { "epoch": 8.14, "learning_rate": 6.380404838902863e-07, "loss": 1.4912, "step": 68000 }, { "epoch": 8.2, "learning_rate": 6.206731345071265e-07, "loss": 1.4909, "step": 68500 }, { "epoch": 8.26, "learning_rate": 6.033057851239669e-07, "loss": 1.4911, "step": 69000 }, { "epoch": 8.32, "learning_rate": 5.859384357408073e-07, "loss": 1.4908, "step": 69500 }, { "epoch": 8.38, "learning_rate": 5.685710863576478e-07, "loss": 1.491, "step": 70000 }, { "epoch": 8.44, "learning_rate": 5.512037369744878e-07, "loss": 1.491, "step": 70500 }, { "epoch": 8.5, "learning_rate": 5.338363875913284e-07, "loss": 1.491, "step": 71000 }, { "epoch": 8.56, "learning_rate": 5.164690382081687e-07, "loss": 1.4911, "step": 71500 }, { "epoch": 8.62, "learning_rate": 4.991016888250091e-07, "loss": 1.4908, "step": 72000 }, { "epoch": 8.68, "learning_rate": 4.817343394418493e-07, "loss": 1.4907, "step": 72500 }, { "epoch": 8.74, "learning_rate": 4.643669900586897e-07, "loss": 1.4907, "step": 73000 }, { "epoch": 8.8, "learning_rate": 4.4699964067553015e-07, "loss": 1.4906, "step": 73500 }, { "epoch": 8.86, "learning_rate": 4.296322912923702e-07, "loss": 1.4907, "step": 74000 }, { "epoch": 8.92, "learning_rate": 4.1226494190921066e-07, "loss": 1.491, "step": 74500 }, { "epoch": 8.98, "learning_rate": 3.948975925260511e-07, "loss": 1.4907, "step": 75000 }, { "epoch": 9.0, "eval_loss": 1.5016120672225952, "eval_runtime": 100.2408, "eval_samples_per_second": 299.279, "eval_steps_per_second": 1.875, "step": 75145 }, { "epoch": 9.04, "learning_rate": 3.775302431428916e-07, "loss": 1.4908, "step": 75500 }, { "epoch": 9.1, "learning_rate": 3.601628937597316e-07, "loss": 1.4906, "step": 76000 }, { "epoch": 9.16, "learning_rate": 3.4279554437657206e-07, "loss": 1.4906, "step": 76500 }, { "epoch": 9.22, "learning_rate": 3.254281949934125e-07, "loss": 1.4903, "step": 77000 }, { "epoch": 9.28, "learning_rate": 3.0806084561025263e-07, "loss": 1.4905, "step": 77500 }, { "epoch": 9.34, "learning_rate": 2.90693496227093e-07, "loss": 1.4907, "step": 78000 }, { "epoch": 9.4, "learning_rate": 2.7332614684393346e-07, "loss": 1.4905, "step": 78500 }, { "epoch": 9.46, "learning_rate": 2.5595879746077385e-07, "loss": 1.4904, "step": 79000 }, { "epoch": 9.52, "learning_rate": 2.38591448077614e-07, "loss": 1.4905, "step": 79500 }, { "epoch": 9.58, "learning_rate": 2.2122409869445442e-07, "loss": 1.4906, "step": 80000 }, { "epoch": 9.64, "learning_rate": 2.0385674931129484e-07, "loss": 1.4904, "step": 80500 }, { "epoch": 9.7, "learning_rate": 1.8648939992813496e-07, "loss": 1.4903, "step": 81000 }, { "epoch": 9.76, "learning_rate": 1.6912205054497538e-07, "loss": 1.4906, "step": 81500 }, { "epoch": 9.82, "learning_rate": 1.5175470116181582e-07, "loss": 1.4908, "step": 82000 }, { "epoch": 9.88, "learning_rate": 1.3438735177865624e-07, "loss": 1.49, "step": 82500 }, { "epoch": 9.94, "learning_rate": 1.1702000239549635e-07, "loss": 1.4905, "step": 83000 }, { "epoch": 10.0, "eval_loss": 1.5012842416763306, "eval_runtime": 100.3947, "eval_samples_per_second": 298.821, "eval_steps_per_second": 1.873, "step": 83490 }, { "epoch": 10.0, "step": 83490, "total_flos": 1.463646111228887e+18, "train_loss": 1.4961074900378695, "train_runtime": 199226.4751, "train_samples_per_second": 268.221, "train_steps_per_second": 0.419 } ], "max_steps": 83490, "num_train_epochs": 10, "total_flos": 1.463646111228887e+18, "trial_name": null, "trial_params": null }