| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.380952380952381, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 2e-05, |
| "loss": 1.6335, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4e-05, |
| "loss": 1.5176, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 6e-05, |
| "loss": 1.4883, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 8e-05, |
| "loss": 1.6, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0001, |
| "loss": 1.5088, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 1.7048434019088745, |
| "eval_runtime": 2.1875, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00012, |
| "loss": 1.4985, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00014, |
| "loss": 1.4626, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00016, |
| "loss": 1.3285, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.00018, |
| "loss": 1.6476, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002, |
| "loss": 1.5266, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_loss": 1.692796230316162, |
| "eval_runtime": 2.1867, |
| "eval_samples_per_second": 0.915, |
| "eval_steps_per_second": 0.457, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0001999915737775817, |
| "loss": 1.6152, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.00019996629653035126, |
| "loss": 1.505, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.00019992417251814282, |
| "loss": 1.3107, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.00019986520883988232, |
| "loss": 1.3979, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0001997894154323911, |
| "loss": 1.2276, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_loss": 1.6662951707839966, |
| "eval_runtime": 2.186, |
| "eval_samples_per_second": 0.915, |
| "eval_steps_per_second": 0.457, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.00019969680506871137, |
| "loss": 1.7369, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0001995873933559535, |
| "loss": 1.6659, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.00019946119873266613, |
| "loss": 1.1324, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0001993182424657285, |
| "loss": 1.9695, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00019915854864676664, |
| "loss": 2.5525, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_loss": 1.6528061628341675, |
| "eval_runtime": 2.1857, |
| "eval_samples_per_second": 0.915, |
| "eval_steps_per_second": 0.458, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0001989821441880933, |
| "loss": 1.3183, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.00019878905881817252, |
| "loss": 1.5486, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0001985793250766098, |
| "loss": 1.5504, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.00019835297830866826, |
| "loss": 1.4022, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00019811005665931205, |
| "loss": 1.4385, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_loss": 1.6313893795013428, |
| "eval_runtime": 2.1922, |
| "eval_samples_per_second": 0.912, |
| "eval_steps_per_second": 0.456, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00019785060106677818, |
| "loss": 1.4413, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0001975746552556772, |
| "loss": 1.2569, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00019728226572962473, |
| "loss": 1.4904, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0001969734817634044, |
| "loss": 1.5558, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0001966483553946637, |
| "loss": 1.2282, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_loss": 1.6210927963256836, |
| "eval_runtime": 2.1889, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00019630694141514464, |
| "loss": 1.4598, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00019594929736144976, |
| "loss": 1.48, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0001955754835053459, |
| "loss": 1.3934, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00019518556284360696, |
| "loss": 1.1312, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0001947796010873974, |
| "loss": 1.6493, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_loss": 1.6185638904571533, |
| "eval_runtime": 2.1911, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.456, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0001943576666511982, |
| "loss": 1.587, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0001939198306412775, |
| "loss": 1.5798, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0001934661668437073, |
| "loss": 1.4308, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0001929967517119289, |
| "loss": 1.0766, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0001925116643538684, |
| "loss": 2.082, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_loss": 1.633681297302246, |
| "eval_runtime": 2.1862, |
| "eval_samples_per_second": 0.915, |
| "eval_steps_per_second": 0.457, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0001920109865186052, |
| "loss": 1.8061, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00019149480258259533, |
| "loss": 1.4312, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00019096319953545185, |
| "loss": 1.737, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00019041626696528503, |
| "loss": 1.5035, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00018985409704360456, |
| "loss": 1.4689, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_loss": 1.6150808334350586, |
| "eval_runtime": 2.1952, |
| "eval_samples_per_second": 0.911, |
| "eval_steps_per_second": 0.456, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0001892767845097864, |
| "loss": 1.2483, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00018868442665510678, |
| "loss": 1.1436, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00018807712330634642, |
| "loss": 1.0488, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00018745497680896722, |
| "loss": 1.3745, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0001868180920098644, |
| "loss": 0.9061, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_loss": 1.6097811460494995, |
| "eval_runtime": 2.1875, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0001861665762396974, |
| "loss": 1.1305, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00018550053929480202, |
| "loss": 1.2315, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00018482009341868697, |
| "loss": 1.4964, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00018412535328311814, |
| "loss": 1.0928, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00018341643596879367, |
| "loss": 0.9473, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_loss": 1.6084190607070923, |
| "eval_runtime": 2.1902, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.457, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0001826934609456129, |
| "loss": 1.362, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00018195655005254273, |
| "loss": 1.5478, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00018120582747708502, |
| "loss": 1.4831, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00018044141973434758, |
| "loss": 1.7483, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0001796634556457236, |
| "loss": 1.4993, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.71, |
| "eval_loss": 1.6235202550888062, |
| "eval_runtime": 2.1859, |
| "eval_samples_per_second": 0.915, |
| "eval_steps_per_second": 0.457, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00017887206631718203, |
| "loss": 1.5076, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0001780673851171728, |
| "loss": 1.6395, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00017724954765415137, |
| "loss": 1.6389, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00017641869175372493, |
| "loss": 1.7769, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00017557495743542585, |
| "loss": 1.2022, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 1.6078369617462158, |
| "eval_runtime": 2.1883, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00017471848688911464, |
| "loss": 1.5265, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00017384942445101772, |
| "loss": 1.4065, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.000172967916579403, |
| "loss": 1.4326, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00017207411182989832, |
| "loss": 1.571, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00017116816083045602, |
| "loss": 1.5233, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_loss": 1.6035966873168945, |
| "eval_runtime": 2.1894, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00017025021625596853, |
| "loss": 1.5745, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0001693204328025389, |
| "loss": 1.608, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0001683789671614107, |
| "loss": 1.4234, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00016742597799256182, |
| "loss": 1.2839, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00016646162589796615, |
| "loss": 1.3248, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.89, |
| "eval_loss": 1.6052225828170776, |
| "eval_runtime": 2.1914, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.456, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00016548607339452853, |
| "loss": 1.0683, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00016449948488669639, |
| "loss": 1.5298, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00016350202663875386, |
| "loss": 1.5696, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00016249386674680184, |
| "loss": 1.1743, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0001614751751104301, |
| "loss": 1.8626, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_loss": 1.6131467819213867, |
| "eval_runtime": 2.1907, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.456, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00016044612340408466, |
| "loss": 1.4832, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00015940688504813662, |
| "loss": 1.4476, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00015835763517965673, |
| "loss": 1.3783, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00015729855062290022, |
| "loss": 1.6671, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0001562298098595078, |
| "loss": 1.4658, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.01, |
| "eval_loss": 1.6062182188034058, |
| "eval_runtime": 2.1944, |
| "eval_samples_per_second": 0.911, |
| "eval_steps_per_second": 0.456, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00015515159299842707, |
| "loss": 1.64, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00015406408174555976, |
| "loss": 1.2125, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00015296745937313987, |
| "loss": 1.5001, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00015186191068884775, |
| "loss": 1.4294, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00015074762200466556, |
| "loss": 1.3162, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.07, |
| "eval_loss": 1.5980761051177979, |
| "eval_runtime": 2.191, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.456, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00014962478110547918, |
| "loss": 1.3707, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00014849357721743168, |
| "loss": 1.4644, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0001473542009760343, |
| "loss": 1.427, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00014620684439403962, |
| "loss": 1.3337, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0001450517008290827, |
| "loss": 1.4111, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.13, |
| "eval_loss": 1.5972555875778198, |
| "eval_runtime": 2.1901, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.457, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0001438889649510956, |
| "loss": 1.441, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00014271883270950073, |
| "loss": 0.7424, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00014154150130018866, |
| "loss": 1.3582, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00014035716913228568, |
| "loss": 1.3479, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00013916603579471705, |
| "loss": 1.2211, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_loss": 1.6037462949752808, |
| "eval_runtime": 2.1932, |
| "eval_samples_per_second": 0.912, |
| "eval_steps_per_second": 0.456, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.0001379683020225714, |
| "loss": 2.0387, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.000136764169663272, |
| "loss": 1.3237, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00013555384164256048, |
| "loss": 1.4286, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00013433752193029886, |
| "loss": 1.7905, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00013311541550609565, |
| "loss": 1.7277, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_loss": 1.5989317893981934, |
| "eval_runtime": 2.1896, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.457, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00013188772832476188, |
| "loss": 1.5016, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00013065466728160252, |
| "loss": 1.7159, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00012941644017754964, |
| "loss": 1.2701, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00012817325568414297, |
| "loss": 1.4085, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00012692532330836346, |
| "loss": 1.246, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.31, |
| "eval_loss": 1.597010850906372, |
| "eval_runtime": 2.1881, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00012567285335732633, |
| "loss": 1.3382, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00012441605690283915, |
| "loss": 0.9305, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00012315514574583113, |
| "loss": 1.388, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0001218903323806595, |
| "loss": 1.2634, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012062182995929882, |
| "loss": 1.1971, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.37, |
| "eval_loss": 1.5930073261260986, |
| "eval_runtime": 2.1881, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00011934985225541998, |
| "loss": 1.2645, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0001180746136283638, |
| "loss": 1.6775, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011679632898701649, |
| "loss": 1.018, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00011551521375359206, |
| "loss": 1.225, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00011423148382732853, |
| "loss": 1.166, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_loss": 1.593321681022644, |
| "eval_runtime": 2.1858, |
| "eval_samples_per_second": 0.915, |
| "eval_steps_per_second": 0.457, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00011294535554810354, |
| "loss": 1.7995, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 0.00011165704565997593, |
| "loss": 0.7254, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 0.00011036677127465889, |
| "loss": 1.4558, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 0.00010907474983493144, |
| "loss": 1.5358, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 0.00010778119907799398, |
| "loss": 1.5007, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.49, |
| "eval_loss": 1.5938643217086792, |
| "eval_runtime": 2.189, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 0.0001064863369987743, |
| "loss": 1.6357, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 0.00010519038181318999, |
| "loss": 1.7524, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 0.00010389355192137377, |
| "loss": 1.6955, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 0.00010259606587086783, |
| "loss": 1.4174, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 0.0001012981423197931, |
| "loss": 1.2135, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_loss": 1.5910111665725708, |
| "eval_runtime": 2.1873, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0001, |
| "loss": 1.0919, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.870185768020693e-05, |
| "loss": 1.4658, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 9.740393412913219e-05, |
| "loss": 1.1472, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 9.610644807862625e-05, |
| "loss": 1.2626, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.480961818681004e-05, |
| "loss": 1.3915, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_loss": 1.5905121564865112, |
| "eval_runtime": 2.1919, |
| "eval_samples_per_second": 0.912, |
| "eval_steps_per_second": 0.456, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 9.35136630012257e-05, |
| "loss": 1.8036, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.221880092200601e-05, |
| "loss": 1.1988, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 9.092525016506858e-05, |
| "loss": 1.1454, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 8.963322872534114e-05, |
| "loss": 1.3185, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 8.83429543400241e-05, |
| "loss": 1.6912, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_loss": 1.5902668237686157, |
| "eval_runtime": 2.1897, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.457, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.705464445189647e-05, |
| "loss": 1.6251, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 8.57685161726715e-05, |
| "loss": 1.4459, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.448478624640797e-05, |
| "loss": 1.3483, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 8.320367101298351e-05, |
| "loss": 1.7937, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 8.192538637163621e-05, |
| "loss": 1.6808, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_loss": 1.587677240371704, |
| "eval_runtime": 2.1912, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.456, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 8.065014774458003e-05, |
| "loss": 1.453, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 7.93781700407012e-05, |
| "loss": 1.3279, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 7.810966761934053e-05, |
| "loss": 1.6721, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 7.684485425416888e-05, |
| "loss": 1.1307, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 7.558394309716088e-05, |
| "loss": 1.249, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_loss": 1.5859589576721191, |
| "eval_runtime": 2.1868, |
| "eval_samples_per_second": 0.915, |
| "eval_steps_per_second": 0.457, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 7.432714664267373e-05, |
| "loss": 1.1872, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.307467669163655e-05, |
| "loss": 1.4116, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 7.182674431585704e-05, |
| "loss": 1.2309, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.058355982245037e-05, |
| "loss": 1.3953, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 6.934533271839752e-05, |
| "loss": 1.43, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.85, |
| "eval_loss": 1.5868343114852905, |
| "eval_runtime": 2.1915, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.456, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.811227167523815e-05, |
| "loss": 1.9049, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.688458449390437e-05, |
| "loss": 0.8853, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 6.566247806970119e-05, |
| "loss": 1.6253, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 6.444615835743955e-05, |
| "loss": 1.3031, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 6.323583033672799e-05, |
| "loss": 0.8793, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.9, |
| "eval_loss": 1.5895787477493286, |
| "eval_runtime": 2.1923, |
| "eval_samples_per_second": 0.912, |
| "eval_steps_per_second": 0.456, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 6.203169797742861e-05, |
| "loss": 1.3793, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 6.083396420528298e-05, |
| "loss": 1.5299, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 5.964283086771435e-05, |
| "loss": 1.3525, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 5.845849869981137e-05, |
| "loss": 1.4941, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.728116729049928e-05, |
| "loss": 1.1564, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_loss": 1.5867228507995605, |
| "eval_runtime": 2.1914, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.456, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 5.611103504890444e-05, |
| "loss": 1.5568, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 5.4948299170917325e-05, |
| "loss": 1.2441, |
| "step": 167 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 5.379315560596038e-05, |
| "loss": 0.9717, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 5.26457990239657e-05, |
| "loss": 1.5905, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 5.1506422782568345e-05, |
| "loss": 1.4259, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_loss": 1.5872297286987305, |
| "eval_runtime": 2.1903, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.457, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 5.0375218894520834e-05, |
| "loss": 1.4877, |
| "step": 171 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.9252377995334444e-05, |
| "loss": 1.4578, |
| "step": 172 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 4.813808931115228e-05, |
| "loss": 1.0967, |
| "step": 173 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 4.703254062686017e-05, |
| "loss": 1.3642, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 4.593591825444028e-05, |
| "loss": 1.4059, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.08, |
| "eval_loss": 1.5853421688079834, |
| "eval_runtime": 2.1875, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 4.484840700157295e-05, |
| "loss": 1.3578, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 4.377019014049223e-05, |
| "loss": 1.178, |
| "step": 177 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 4.270144937709981e-05, |
| "loss": 1.6276, |
| "step": 178 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 4.164236482034327e-05, |
| "loss": 1.5173, |
| "step": 179 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 4.059311495186338e-05, |
| "loss": 1.3487, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.14, |
| "eval_loss": 1.5867466926574707, |
| "eval_runtime": 2.1894, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.457, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.9553876595915375e-05, |
| "loss": 1.0898, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.852482488956992e-05, |
| "loss": 0.8375, |
| "step": 182 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 3.750613325319817e-05, |
| "loss": 1.1532, |
| "step": 183 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.649797336124615e-05, |
| "loss": 1.603, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 3.550051511330361e-05, |
| "loss": 1.7306, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_loss": 1.5883917808532715, |
| "eval_runtime": 2.189, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.45139266054715e-05, |
| "loss": 1.4042, |
| "step": 186 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.3538374102033866e-05, |
| "loss": 1.1013, |
| "step": 187 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 3.257402200743821e-05, |
| "loss": 1.1465, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.1621032838589305e-05, |
| "loss": 1.7603, |
| "step": 189 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 3.0679567197461134e-05, |
| "loss": 1.6117, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.26, |
| "eval_loss": 1.5858465433120728, |
| "eval_runtime": 2.1875, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.457, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 2.974978374403147e-05, |
| "loss": 1.4448, |
| "step": 191 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 2.8831839169543996e-05, |
| "loss": 1.2446, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 2.7925888170101665e-05, |
| "loss": 1.2843, |
| "step": 193 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 2.7032083420597e-05, |
| "loss": 0.9528, |
| "step": 194 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 2.6150575548982292e-05, |
| "loss": 1.1751, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.32, |
| "eval_loss": 1.5852700471878052, |
| "eval_runtime": 2.1934, |
| "eval_samples_per_second": 0.912, |
| "eval_steps_per_second": 0.456, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 2.528151311088537e-05, |
| "loss": 1.2334, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.4425042564574184e-05, |
| "loss": 1.4127, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 2.3581308246275103e-05, |
| "loss": 1.1989, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 2.2750452345848682e-05, |
| "loss": 1.0506, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 2.1932614882827197e-05, |
| "loss": 1.5642, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_loss": 1.5846655368804932, |
| "eval_runtime": 2.1905, |
| "eval_samples_per_second": 0.913, |
| "eval_steps_per_second": 0.457, |
| "step": 200 |
| } |
| ], |
| "max_steps": 252, |
| "num_train_epochs": 3, |
| "total_flos": 1.0096652091466526e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|