| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.002146082027547257, | |
| "global_step": 29000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.6876, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.4209991693496704, | |
| "eval_runtime": 29774.1937, | |
| "eval_samples_per_second": 24.175, | |
| "eval_steps_per_second": 6.044, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.4255, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.4117300510406494, | |
| "eval_runtime": 29117.2533, | |
| "eval_samples_per_second": 24.72, | |
| "eval_steps_per_second": 6.18, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2975, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3992412090301514, | |
| "eval_runtime": 29718.2883, | |
| "eval_samples_per_second": 24.22, | |
| "eval_steps_per_second": 6.055, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3514, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.4060639142990112, | |
| "eval_runtime": 28900.2542, | |
| "eval_samples_per_second": 24.906, | |
| "eval_steps_per_second": 6.226, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3757, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.399295687675476, | |
| "eval_runtime": 29509.4537, | |
| "eval_samples_per_second": 24.392, | |
| "eval_steps_per_second": 6.098, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.295, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.393278956413269, | |
| "eval_runtime": 28449.3173, | |
| "eval_samples_per_second": 25.301, | |
| "eval_steps_per_second": 6.325, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2565, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3863052129745483, | |
| "eval_runtime": 28789.7552, | |
| "eval_samples_per_second": 25.001, | |
| "eval_steps_per_second": 6.25, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3095, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3882980346679688, | |
| "eval_runtime": 28567.3896, | |
| "eval_samples_per_second": 25.196, | |
| "eval_steps_per_second": 6.299, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3021, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3863459825515747, | |
| "eval_runtime": 28424.5337, | |
| "eval_samples_per_second": 25.323, | |
| "eval_steps_per_second": 6.331, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2299, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3776334524154663, | |
| "eval_runtime": 28753.4447, | |
| "eval_samples_per_second": 25.033, | |
| "eval_steps_per_second": 6.258, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2306, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3795045614242554, | |
| "eval_runtime": 28113.3911, | |
| "eval_samples_per_second": 25.603, | |
| "eval_steps_per_second": 6.401, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2425, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3737467527389526, | |
| "eval_runtime": 28752.4369, | |
| "eval_samples_per_second": 25.034, | |
| "eval_steps_per_second": 6.258, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1932, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3799411058425903, | |
| "eval_runtime": 27577.5799, | |
| "eval_samples_per_second": 26.1, | |
| "eval_steps_per_second": 6.525, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2312, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3715393543243408, | |
| "eval_runtime": 28034.4873, | |
| "eval_samples_per_second": 25.675, | |
| "eval_steps_per_second": 6.419, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2841, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3804839849472046, | |
| "eval_runtime": 28127.7804, | |
| "eval_samples_per_second": 25.59, | |
| "eval_steps_per_second": 6.397, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1463, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3776183128356934, | |
| "eval_runtime": 27577.1163, | |
| "eval_samples_per_second": 26.101, | |
| "eval_steps_per_second": 6.525, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1313, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3699731826782227, | |
| "eval_runtime": 28190.8614, | |
| "eval_samples_per_second": 25.533, | |
| "eval_steps_per_second": 6.383, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2267, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.366495132446289, | |
| "eval_runtime": 28103.2881, | |
| "eval_samples_per_second": 25.612, | |
| "eval_steps_per_second": 6.403, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1866, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3763595819473267, | |
| "eval_runtime": 27696.0614, | |
| "eval_samples_per_second": 25.989, | |
| "eval_steps_per_second": 6.497, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2347, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3777934312820435, | |
| "eval_runtime": 28118.8859, | |
| "eval_samples_per_second": 25.598, | |
| "eval_steps_per_second": 6.399, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1514, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.370295763015747, | |
| "eval_runtime": 27665.6655, | |
| "eval_samples_per_second": 26.017, | |
| "eval_steps_per_second": 6.504, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2867, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.372216820716858, | |
| "eval_runtime": 28029.069, | |
| "eval_samples_per_second": 25.68, | |
| "eval_steps_per_second": 6.42, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3031, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3675533533096313, | |
| "eval_runtime": 28060.4111, | |
| "eval_samples_per_second": 25.651, | |
| "eval_steps_per_second": 6.413, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2353, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3742448091506958, | |
| "eval_runtime": 27439.4619, | |
| "eval_samples_per_second": 26.232, | |
| "eval_steps_per_second": 6.558, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2022, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3690038919448853, | |
| "eval_runtime": 28177.5616, | |
| "eval_samples_per_second": 25.545, | |
| "eval_steps_per_second": 6.386, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1925, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3660128116607666, | |
| "eval_runtime": 28183.7194, | |
| "eval_samples_per_second": 25.539, | |
| "eval_steps_per_second": 6.385, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2097, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3706327676773071, | |
| "eval_runtime": 27638.252, | |
| "eval_samples_per_second": 26.043, | |
| "eval_steps_per_second": 6.511, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1606, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.369661569595337, | |
| "eval_runtime": 28259.5204, | |
| "eval_samples_per_second": 25.47, | |
| "eval_steps_per_second": 6.368, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2216, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.368302822113037, | |
| "eval_runtime": 28149.9769, | |
| "eval_samples_per_second": 25.57, | |
| "eval_steps_per_second": 6.392, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1916, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3689770698547363, | |
| "eval_runtime": 27702.5821, | |
| "eval_samples_per_second": 25.983, | |
| "eval_steps_per_second": 6.496, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2369, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3695650100708008, | |
| "eval_runtime": 28082.9192, | |
| "eval_samples_per_second": 25.631, | |
| "eval_steps_per_second": 6.408, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1862, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3672432899475098, | |
| "eval_runtime": 27790.3978, | |
| "eval_samples_per_second": 25.9, | |
| "eval_steps_per_second": 6.475, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1875, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.372326135635376, | |
| "eval_runtime": 27957.8295, | |
| "eval_samples_per_second": 25.745, | |
| "eval_steps_per_second": 6.436, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2237, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3708332777023315, | |
| "eval_runtime": 28123.0687, | |
| "eval_samples_per_second": 25.594, | |
| "eval_steps_per_second": 6.399, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1123, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3687807321548462, | |
| "eval_runtime": 27597.4681, | |
| "eval_samples_per_second": 26.082, | |
| "eval_steps_per_second": 6.52, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1219, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.376206874847412, | |
| "eval_runtime": 28127.0268, | |
| "eval_samples_per_second": 25.59, | |
| "eval_steps_per_second": 6.398, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1776, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.368283748626709, | |
| "eval_runtime": 28116.6088, | |
| "eval_samples_per_second": 25.6, | |
| "eval_steps_per_second": 6.4, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1627, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3710017204284668, | |
| "eval_runtime": 27585.1366, | |
| "eval_samples_per_second": 26.093, | |
| "eval_steps_per_second": 6.523, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0627, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3697084188461304, | |
| "eval_runtime": 28272.5185, | |
| "eval_samples_per_second": 25.459, | |
| "eval_steps_per_second": 6.365, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0632, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3792474269866943, | |
| "eval_runtime": 28381.2307, | |
| "eval_samples_per_second": 25.361, | |
| "eval_steps_per_second": 6.34, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2426, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.373166799545288, | |
| "eval_runtime": 28634.6137, | |
| "eval_samples_per_second": 25.137, | |
| "eval_steps_per_second": 6.284, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1263, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3743404150009155, | |
| "eval_runtime": 28674.6776, | |
| "eval_samples_per_second": 25.102, | |
| "eval_steps_per_second": 6.275, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1131, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3682280778884888, | |
| "eval_runtime": 28099.2766, | |
| "eval_samples_per_second": 25.616, | |
| "eval_steps_per_second": 6.404, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0595, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3719111680984497, | |
| "eval_runtime": 28695.4529, | |
| "eval_samples_per_second": 25.084, | |
| "eval_steps_per_second": 6.271, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1468, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3663983345031738, | |
| "eval_runtime": 28026.4183, | |
| "eval_samples_per_second": 25.682, | |
| "eval_steps_per_second": 6.421, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.184, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3723489046096802, | |
| "eval_runtime": 28690.9854, | |
| "eval_samples_per_second": 25.087, | |
| "eval_steps_per_second": 6.272, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1262, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3714051246643066, | |
| "eval_runtime": 28156.2291, | |
| "eval_samples_per_second": 25.564, | |
| "eval_steps_per_second": 6.391, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1758, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.368726134300232, | |
| "eval_runtime": 28657.2462, | |
| "eval_samples_per_second": 25.117, | |
| "eval_steps_per_second": 6.279, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0438, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3730684518814087, | |
| "eval_runtime": 28686.5378, | |
| "eval_samples_per_second": 25.091, | |
| "eval_steps_per_second": 6.273, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1404, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3706409931182861, | |
| "eval_runtime": 28123.1244, | |
| "eval_samples_per_second": 25.594, | |
| "eval_steps_per_second": 6.399, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1135, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3763220310211182, | |
| "eval_runtime": 28682.6176, | |
| "eval_samples_per_second": 25.095, | |
| "eval_steps_per_second": 6.274, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0536, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3709115982055664, | |
| "eval_runtime": 28032.6358, | |
| "eval_samples_per_second": 25.677, | |
| "eval_steps_per_second": 6.419, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1203, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3710169792175293, | |
| "eval_runtime": 28678.3157, | |
| "eval_samples_per_second": 25.099, | |
| "eval_steps_per_second": 6.275, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0784, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.370123028755188, | |
| "eval_runtime": 28008.2176, | |
| "eval_samples_per_second": 25.699, | |
| "eval_steps_per_second": 6.425, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0521, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3733536005020142, | |
| "eval_runtime": 17990.0293, | |
| "eval_samples_per_second": 40.01, | |
| "eval_steps_per_second": 10.003, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1773, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3757646083831787, | |
| "eval_runtime": 18000.6521, | |
| "eval_samples_per_second": 39.987, | |
| "eval_steps_per_second": 9.997, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1688, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3776638507843018, | |
| "eval_runtime": 17989.8584, | |
| "eval_samples_per_second": 40.011, | |
| "eval_steps_per_second": 10.003, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-05, | |
| "loss": 2.0855, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.3749734163284302, | |
| "eval_runtime": 17990.6544, | |
| "eval_samples_per_second": 40.009, | |
| "eval_steps_per_second": 10.002, | |
| "step": 29000 | |
| } | |
| ], | |
| "max_steps": 30000, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.5154937856e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |