| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 59688, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00019832462136442838, |
| "loss": 70.136, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00019664924272885673, |
| "loss": 54.3396, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_loss": 55.01993179321289, |
| "eval_runtime": 0.5271, |
| "eval_samples_per_second": 94.864, |
| "eval_steps_per_second": 3.795, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00019497386409328508, |
| "loss": 52.5498, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00019329848545771345, |
| "loss": 51.633, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_loss": 53.25694274902344, |
| "eval_runtime": 0.5629, |
| "eval_samples_per_second": 88.83, |
| "eval_steps_per_second": 3.553, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.00019162310682214182, |
| "loss": 51.0517, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0001899477281865702, |
| "loss": 50.6422, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_loss": 52.43913269042969, |
| "eval_runtime": 0.506, |
| "eval_samples_per_second": 98.817, |
| "eval_steps_per_second": 3.953, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.00018827234955099854, |
| "loss": 50.2641, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0001865969709154269, |
| "loss": 50.023, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_loss": 52.02763748168945, |
| "eval_runtime": 0.5162, |
| "eval_samples_per_second": 96.869, |
| "eval_steps_per_second": 3.875, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.00018492159227985526, |
| "loss": 49.8153, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00018324621364428363, |
| "loss": 49.7182, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_loss": 51.66868209838867, |
| "eval_runtime": 0.4836, |
| "eval_samples_per_second": 103.392, |
| "eval_steps_per_second": 4.136, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00018157083500871198, |
| "loss": 49.5574, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00017989545637314033, |
| "loss": 49.4063, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_loss": 51.36139678955078, |
| "eval_runtime": 0.487, |
| "eval_samples_per_second": 102.67, |
| "eval_steps_per_second": 4.107, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0001782200777375687, |
| "loss": 49.3062, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00017654469910199707, |
| "loss": 49.2278, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.35, |
| "eval_loss": 51.16618347167969, |
| "eval_runtime": 0.4852, |
| "eval_samples_per_second": 103.045, |
| "eval_steps_per_second": 4.122, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00017486932046642542, |
| "loss": 49.1404, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0001731939418308538, |
| "loss": 49.0291, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_loss": 50.956336975097656, |
| "eval_runtime": 0.4843, |
| "eval_samples_per_second": 103.238, |
| "eval_steps_per_second": 4.13, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00017151856319528214, |
| "loss": 48.966, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0001698431845597105, |
| "loss": 48.8771, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_loss": 50.9631233215332, |
| "eval_runtime": 0.4832, |
| "eval_samples_per_second": 103.477, |
| "eval_steps_per_second": 4.139, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00016816780592413886, |
| "loss": 48.842, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00016649242728856723, |
| "loss": 48.8414, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_loss": 50.825313568115234, |
| "eval_runtime": 0.484, |
| "eval_samples_per_second": 103.302, |
| "eval_steps_per_second": 4.132, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00016481704865299558, |
| "loss": 48.7524, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00016314167001742395, |
| "loss": 48.6891, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_loss": 50.6945686340332, |
| "eval_runtime": 0.4902, |
| "eval_samples_per_second": 101.994, |
| "eval_steps_per_second": 4.08, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0001614662913818523, |
| "loss": 48.6808, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00015979091274628067, |
| "loss": 48.6235, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_loss": 50.51723098754883, |
| "eval_runtime": 0.4899, |
| "eval_samples_per_second": 102.054, |
| "eval_steps_per_second": 4.082, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00015811553411070902, |
| "loss": 48.5341, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0001564401554751374, |
| "loss": 48.5, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_loss": 50.48223114013672, |
| "eval_runtime": 0.4899, |
| "eval_samples_per_second": 102.056, |
| "eval_steps_per_second": 4.082, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00015476477683956574, |
| "loss": 48.4708, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0001530893982039941, |
| "loss": 48.4386, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_loss": 50.327213287353516, |
| "eval_runtime": 0.4814, |
| "eval_samples_per_second": 103.863, |
| "eval_steps_per_second": 4.155, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00015141401956842248, |
| "loss": 48.4488, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00014973864093285083, |
| "loss": 48.3875, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_loss": 50.33498001098633, |
| "eval_runtime": 0.5229, |
| "eval_samples_per_second": 95.612, |
| "eval_steps_per_second": 3.824, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00014806326229727917, |
| "loss": 48.3842, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00014638788366170755, |
| "loss": 48.3353, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_loss": 50.22550964355469, |
| "eval_runtime": 0.5249, |
| "eval_samples_per_second": 95.248, |
| "eval_steps_per_second": 3.81, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00014471250502613592, |
| "loss": 48.3718, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00014303712639056427, |
| "loss": 48.3404, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_loss": 50.19435501098633, |
| "eval_runtime": 0.5549, |
| "eval_samples_per_second": 90.105, |
| "eval_steps_per_second": 3.604, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00014136174775499264, |
| "loss": 48.2976, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.000139686369119421, |
| "loss": 48.2946, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_loss": 50.1772346496582, |
| "eval_runtime": 0.6058, |
| "eval_samples_per_second": 82.531, |
| "eval_steps_per_second": 3.301, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00013801099048384936, |
| "loss": 48.2515, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00013633561184827773, |
| "loss": 48.2941, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_loss": 50.101707458496094, |
| "eval_runtime": 0.55, |
| "eval_samples_per_second": 90.916, |
| "eval_steps_per_second": 3.637, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00013466023321270608, |
| "loss": 48.1908, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00013298485457713443, |
| "loss": 48.1547, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.01, |
| "eval_loss": 50.0978889465332, |
| "eval_runtime": 0.4898, |
| "eval_samples_per_second": 102.081, |
| "eval_steps_per_second": 4.083, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0001313094759415628, |
| "loss": 47.9981, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00012963409730599117, |
| "loss": 48.0147, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_loss": 50.161685943603516, |
| "eval_runtime": 0.4871, |
| "eval_samples_per_second": 102.644, |
| "eval_steps_per_second": 4.106, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00012795871867041952, |
| "loss": 47.9851, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00012628334003484786, |
| "loss": 47.936, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_loss": 50.107810974121094, |
| "eval_runtime": 0.4846, |
| "eval_samples_per_second": 103.186, |
| "eval_steps_per_second": 4.127, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00012460796139927624, |
| "loss": 48.0228, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0001229325827637046, |
| "loss": 47.9642, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_loss": 50.060237884521484, |
| "eval_runtime": 0.4856, |
| "eval_samples_per_second": 102.961, |
| "eval_steps_per_second": 4.118, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00012125720412813297, |
| "loss": 47.9917, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00011958182549256132, |
| "loss": 47.9531, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_loss": 50.01976013183594, |
| "eval_runtime": 0.483, |
| "eval_samples_per_second": 103.524, |
| "eval_steps_per_second": 4.141, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00011790644685698968, |
| "loss": 47.9594, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00011623106822141805, |
| "loss": 47.9126, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_loss": 50.01350021362305, |
| "eval_runtime": 0.4811, |
| "eval_samples_per_second": 103.929, |
| "eval_steps_per_second": 4.157, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00011455568958584641, |
| "loss": 47.9141, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00011288031095027476, |
| "loss": 47.9471, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.31, |
| "eval_loss": 49.97874450683594, |
| "eval_runtime": 0.5117, |
| "eval_samples_per_second": 97.707, |
| "eval_steps_per_second": 3.908, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00011120493231470313, |
| "loss": 47.9071, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00010952955367913149, |
| "loss": 47.9708, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_loss": 49.936649322509766, |
| "eval_runtime": 0.4804, |
| "eval_samples_per_second": 104.085, |
| "eval_steps_per_second": 4.163, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00010785417504355986, |
| "loss": 47.9294, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010617879640798821, |
| "loss": 47.8889, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_loss": 49.88063049316406, |
| "eval_runtime": 0.5134, |
| "eval_samples_per_second": 97.396, |
| "eval_steps_per_second": 3.896, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010450341777241657, |
| "loss": 47.9306, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00010282803913684493, |
| "loss": 47.909, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_loss": 49.8420524597168, |
| "eval_runtime": 0.556, |
| "eval_samples_per_second": 89.932, |
| "eval_steps_per_second": 3.597, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.0001011526605012733, |
| "loss": 47.9044, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 9.947728186570166e-05, |
| "loss": 47.8723, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_loss": 49.780277252197266, |
| "eval_runtime": 0.4985, |
| "eval_samples_per_second": 100.306, |
| "eval_steps_per_second": 4.012, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.780190323013e-05, |
| "loss": 47.8765, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 9.612652459455838e-05, |
| "loss": 47.865, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.56, |
| "eval_loss": 49.72420883178711, |
| "eval_runtime": 0.5031, |
| "eval_samples_per_second": 99.377, |
| "eval_steps_per_second": 3.975, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.445114595898673e-05, |
| "loss": 47.8886, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.27757673234151e-05, |
| "loss": 47.8624, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_loss": 49.74140548706055, |
| "eval_runtime": 0.5052, |
| "eval_samples_per_second": 98.962, |
| "eval_steps_per_second": 3.958, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.110038868784345e-05, |
| "loss": 47.8765, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.942501005227182e-05, |
| "loss": 47.8475, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_loss": 49.755550384521484, |
| "eval_runtime": 0.5482, |
| "eval_samples_per_second": 91.209, |
| "eval_steps_per_second": 3.648, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.774963141670018e-05, |
| "loss": 47.8899, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.607425278112854e-05, |
| "loss": 47.8309, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_loss": 49.78474044799805, |
| "eval_runtime": 0.511, |
| "eval_samples_per_second": 97.843, |
| "eval_steps_per_second": 3.914, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.439887414555691e-05, |
| "loss": 47.8192, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.272349550998526e-05, |
| "loss": 47.8468, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_loss": 49.781124114990234, |
| "eval_runtime": 0.5263, |
| "eval_samples_per_second": 95.0, |
| "eval_steps_per_second": 3.8, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.104811687441363e-05, |
| "loss": 47.7881, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.937273823884198e-05, |
| "loss": 47.85, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_loss": 49.724666595458984, |
| "eval_runtime": 0.5014, |
| "eval_samples_per_second": 99.722, |
| "eval_steps_per_second": 3.989, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.769735960327035e-05, |
| "loss": 47.7805, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.60219809676987e-05, |
| "loss": 47.7769, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_loss": 49.74330139160156, |
| "eval_runtime": 0.5536, |
| "eval_samples_per_second": 90.316, |
| "eval_steps_per_second": 3.613, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 7.434660233212707e-05, |
| "loss": 47.8081, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 7.267122369655543e-05, |
| "loss": 47.8395, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_loss": 49.658382415771484, |
| "eval_runtime": 0.5279, |
| "eval_samples_per_second": 94.711, |
| "eval_steps_per_second": 3.788, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 7.099584506098379e-05, |
| "loss": 47.8198, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.932046642541215e-05, |
| "loss": 47.7978, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_loss": 49.645263671875, |
| "eval_runtime": 0.4826, |
| "eval_samples_per_second": 103.596, |
| "eval_steps_per_second": 4.144, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 6.764508778984051e-05, |
| "loss": 47.8224, |
| "step": 39500 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 6.596970915426887e-05, |
| "loss": 47.7541, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.01, |
| "eval_loss": 49.628944396972656, |
| "eval_runtime": 0.484, |
| "eval_samples_per_second": 103.305, |
| "eval_steps_per_second": 4.132, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.429433051869723e-05, |
| "loss": 47.6855, |
| "step": 40500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 6.261895188312559e-05, |
| "loss": 47.6644, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.06, |
| "eval_loss": 49.626243591308594, |
| "eval_runtime": 0.5063, |
| "eval_samples_per_second": 98.747, |
| "eval_steps_per_second": 3.95, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 6.0943573247553954e-05, |
| "loss": 47.6146, |
| "step": 41500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 5.926819461198231e-05, |
| "loss": 47.6472, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_loss": 49.61125183105469, |
| "eval_runtime": 0.483, |
| "eval_samples_per_second": 103.514, |
| "eval_steps_per_second": 4.141, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 5.7592815976410674e-05, |
| "loss": 47.6434, |
| "step": 42500 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 5.5917437340839026e-05, |
| "loss": 47.6436, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.16, |
| "eval_loss": 49.59366226196289, |
| "eval_runtime": 0.4865, |
| "eval_samples_per_second": 102.777, |
| "eval_steps_per_second": 4.111, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.424205870526739e-05, |
| "loss": 47.6443, |
| "step": 43500 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 5.256668006969575e-05, |
| "loss": 47.6082, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_loss": 49.62874984741211, |
| "eval_runtime": 0.4801, |
| "eval_samples_per_second": 104.141, |
| "eval_steps_per_second": 4.166, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.089130143412412e-05, |
| "loss": 47.6407, |
| "step": 44500 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 4.921592279855248e-05, |
| "loss": 47.63, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.26, |
| "eval_loss": 49.61036682128906, |
| "eval_runtime": 0.4794, |
| "eval_samples_per_second": 104.297, |
| "eval_steps_per_second": 4.172, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 4.754054416298084e-05, |
| "loss": 47.6592, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 4.58651655274092e-05, |
| "loss": 47.6281, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.31, |
| "eval_loss": 49.57414245605469, |
| "eval_runtime": 0.4781, |
| "eval_samples_per_second": 104.584, |
| "eval_steps_per_second": 4.183, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 4.418978689183756e-05, |
| "loss": 47.6234, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.251440825626592e-05, |
| "loss": 47.661, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.36, |
| "eval_loss": 49.538631439208984, |
| "eval_runtime": 0.4811, |
| "eval_samples_per_second": 103.938, |
| "eval_steps_per_second": 4.158, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 4.083902962069428e-05, |
| "loss": 47.6321, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 3.9163650985122644e-05, |
| "loss": 47.5848, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.41, |
| "eval_loss": 49.5694580078125, |
| "eval_runtime": 0.4826, |
| "eval_samples_per_second": 103.601, |
| "eval_steps_per_second": 4.144, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.7488272349551004e-05, |
| "loss": 47.6428, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.581289371397936e-05, |
| "loss": 47.6209, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.46, |
| "eval_loss": 49.554813385009766, |
| "eval_runtime": 0.4889, |
| "eval_samples_per_second": 102.277, |
| "eval_steps_per_second": 4.091, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.413751507840772e-05, |
| "loss": 47.6301, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 3.246213644283608e-05, |
| "loss": 47.637, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.51, |
| "eval_loss": 49.56814956665039, |
| "eval_runtime": 0.483, |
| "eval_samples_per_second": 103.514, |
| "eval_steps_per_second": 4.141, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.078675780726444e-05, |
| "loss": 47.6194, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2.9111379171692806e-05, |
| "loss": 47.6155, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_loss": 49.561641693115234, |
| "eval_runtime": 0.4803, |
| "eval_samples_per_second": 104.094, |
| "eval_steps_per_second": 4.164, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 2.7436000536121165e-05, |
| "loss": 47.5766, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 2.5760621900549525e-05, |
| "loss": 47.605, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_loss": 49.548133850097656, |
| "eval_runtime": 0.4783, |
| "eval_samples_per_second": 104.528, |
| "eval_steps_per_second": 4.181, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 2.4085243264977885e-05, |
| "loss": 47.5708, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 2.2409864629406248e-05, |
| "loss": 47.6354, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.66, |
| "eval_loss": 49.530643463134766, |
| "eval_runtime": 0.4813, |
| "eval_samples_per_second": 103.878, |
| "eval_steps_per_second": 4.155, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 2.0734485993834608e-05, |
| "loss": 47.5549, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 1.9059107358262967e-05, |
| "loss": 47.6402, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.71, |
| "eval_loss": 49.539710998535156, |
| "eval_runtime": 0.4791, |
| "eval_samples_per_second": 104.352, |
| "eval_steps_per_second": 4.174, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 1.7383728722691327e-05, |
| "loss": 47.5883, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 1.570835008711969e-05, |
| "loss": 47.607, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.76, |
| "eval_loss": 49.52818298339844, |
| "eval_runtime": 0.5295, |
| "eval_samples_per_second": 94.423, |
| "eval_steps_per_second": 3.777, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 1.403297145154805e-05, |
| "loss": 47.5659, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1.2357592815976411e-05, |
| "loss": 47.577, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.81, |
| "eval_loss": 49.54225540161133, |
| "eval_runtime": 0.478, |
| "eval_samples_per_second": 104.606, |
| "eval_steps_per_second": 4.184, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.0682214180404771e-05, |
| "loss": 47.5713, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 9.006835544833132e-06, |
| "loss": 47.5776, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.86, |
| "eval_loss": 49.539093017578125, |
| "eval_runtime": 0.5132, |
| "eval_samples_per_second": 97.431, |
| "eval_steps_per_second": 3.897, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 7.331456909261493e-06, |
| "loss": 47.5852, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 5.656078273689854e-06, |
| "loss": 47.6098, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_loss": 49.5291748046875, |
| "eval_runtime": 0.5718, |
| "eval_samples_per_second": 87.44, |
| "eval_steps_per_second": 3.498, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 3.980699638118215e-06, |
| "loss": 47.5699, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 2.3053210025465755e-06, |
| "loss": 47.596, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.97, |
| "eval_loss": 49.524417877197266, |
| "eval_runtime": 0.5058, |
| "eval_samples_per_second": 98.86, |
| "eval_steps_per_second": 3.954, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 6.299423669749365e-07, |
| "loss": 47.5627, |
| "step": 59500 |
| } |
| ], |
| "max_steps": 59688, |
| "num_train_epochs": 3, |
| "total_flos": 1.0794540774520259e+19, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|