| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.7742370848016797, |
| "eval_steps": 300, |
| "global_step": 37000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 0.328, |
| "eval_loss": 2.7831807136535645, |
| "eval_runtime": 1186.4739, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.975e-05, |
| "loss": 4.2509, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_accuracy": 0.4605, |
| "eval_loss": 2.0948123931884766, |
| "eval_runtime": 1186.9029, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_accuracy": 0.535, |
| "eval_loss": 1.6745976209640503, |
| "eval_runtime": 1186.3538, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.9500000000000004e-05, |
| "loss": 1.7807, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_accuracy": 0.579, |
| "eval_loss": 1.4981111288070679, |
| "eval_runtime": 1186.31, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9250000000000004e-05, |
| "loss": 1.572, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_accuracy": 0.587, |
| "eval_loss": 1.4770824909210205, |
| "eval_runtime": 1186.999, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_accuracy": 0.608, |
| "eval_loss": 1.4447531700134277, |
| "eval_runtime": 1186.6764, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9e-05, |
| "loss": 1.4599, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_accuracy": 0.601, |
| "eval_loss": 1.4452807903289795, |
| "eval_runtime": 1187.0967, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_accuracy": 0.631, |
| "eval_loss": 1.3295071125030518, |
| "eval_runtime": 1187.5516, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.875e-05, |
| "loss": 1.359, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_accuracy": 0.6205, |
| "eval_loss": 1.3333522081375122, |
| "eval_runtime": 1186.0391, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.85e-05, |
| "loss": 1.3251, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_accuracy": 0.6255, |
| "eval_loss": 1.363963007926941, |
| "eval_runtime": 1187.0984, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_accuracy": 0.653, |
| "eval_loss": 1.268043041229248, |
| "eval_runtime": 1187.7128, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.825e-05, |
| "loss": 1.3291, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.27, |
| "eval_accuracy": 0.653, |
| "eval_loss": 1.2229315042495728, |
| "eval_runtime": 1187.3105, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_accuracy": 0.646, |
| "eval_loss": 1.2400562763214111, |
| "eval_runtime": 1187.2506, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.8e-05, |
| "loss": 1.3028, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_accuracy": 0.6625, |
| "eval_loss": 1.1704109907150269, |
| "eval_runtime": 1187.1809, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.775e-05, |
| "loss": 1.241, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_accuracy": 0.6485, |
| "eval_loss": 1.1750303506851196, |
| "eval_runtime": 1186.827, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_accuracy": 0.6595, |
| "eval_loss": 1.1775925159454346, |
| "eval_runtime": 1186.113, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.75e-05, |
| "loss": 1.1833, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_accuracy": 0.6825, |
| "eval_loss": 1.1013399362564087, |
| "eval_runtime": 1187.1796, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_accuracy": 0.7125, |
| "eval_loss": 1.0126953125, |
| "eval_runtime": 1186.6585, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.7249999999999997e-05, |
| "loss": 1.1398, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.43, |
| "eval_accuracy": 0.7345, |
| "eval_loss": 0.93663489818573, |
| "eval_runtime": 1185.7147, |
| "eval_samples_per_second": 1.687, |
| "eval_steps_per_second": 0.211, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.7e-05, |
| "loss": 0.924, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_accuracy": 0.734, |
| "eval_loss": 0.8979936838150024, |
| "eval_runtime": 1185.9917, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_accuracy": 0.7415, |
| "eval_loss": 0.9261764287948608, |
| "eval_runtime": 1186.1073, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.6750000000000005e-05, |
| "loss": 0.9069, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_accuracy": 0.7505, |
| "eval_loss": 0.8675258755683899, |
| "eval_runtime": 1186.5035, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_accuracy": 0.747, |
| "eval_loss": 0.9057780504226685, |
| "eval_runtime": 1186.5258, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.6500000000000005e-05, |
| "loss": 0.859, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_accuracy": 0.768, |
| "eval_loss": 0.8729298710823059, |
| "eval_runtime": 1185.9339, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.6250000000000006e-05, |
| "loss": 0.8543, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_accuracy": 0.747, |
| "eval_loss": 0.9086282849311829, |
| "eval_runtime": 1186.632, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_accuracy": 0.7495, |
| "eval_loss": 0.9070839285850525, |
| "eval_runtime": 1185.9595, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.8249, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.61, |
| "eval_accuracy": 0.758, |
| "eval_loss": 0.8381221890449524, |
| "eval_runtime": 1187.0834, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_accuracy": 0.7575, |
| "eval_loss": 0.8683782815933228, |
| "eval_runtime": 1186.7854, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.575e-05, |
| "loss": 0.8244, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_accuracy": 0.758, |
| "eval_loss": 0.8148394227027893, |
| "eval_runtime": 1187.1442, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 4.55e-05, |
| "loss": 0.8829, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_accuracy": 0.7475, |
| "eval_loss": 0.8369246125221252, |
| "eval_runtime": 1186.5959, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_accuracy": 0.7685, |
| "eval_loss": 0.7999008297920227, |
| "eval_runtime": 1186.8545, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.525e-05, |
| "loss": 0.8641, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_accuracy": 0.766, |
| "eval_loss": 0.8229847550392151, |
| "eval_runtime": 1186.889, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_accuracy": 0.759, |
| "eval_loss": 0.805006206035614, |
| "eval_runtime": 1187.0511, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.5e-05, |
| "loss": 0.8418, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.76, |
| "eval_accuracy": 0.763, |
| "eval_loss": 0.8287988901138306, |
| "eval_runtime": 1186.3691, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 4.4750000000000004e-05, |
| "loss": 0.7795, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_accuracy": 0.7595, |
| "eval_loss": 0.829521656036377, |
| "eval_runtime": 1187.0672, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_accuracy": 0.7715, |
| "eval_loss": 0.7910842299461365, |
| "eval_runtime": 1187.3028, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 4.4500000000000004e-05, |
| "loss": 0.7874, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_accuracy": 0.765, |
| "eval_loss": 0.8317267298698425, |
| "eval_runtime": 1187.1774, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_accuracy": 0.7635, |
| "eval_loss": 0.8163227438926697, |
| "eval_runtime": 1186.9338, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 4.4250000000000005e-05, |
| "loss": 0.8084, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_accuracy": 0.7695, |
| "eval_loss": 0.7647069692611694, |
| "eval_runtime": 1188.3228, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.8158, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_accuracy": 0.762, |
| "eval_loss": 0.7762951850891113, |
| "eval_runtime": 1187.5448, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_accuracy": 0.776, |
| "eval_loss": 0.748843789100647, |
| "eval_runtime": 1186.9009, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.375e-05, |
| "loss": 0.8213, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.94, |
| "eval_accuracy": 0.7635, |
| "eval_loss": 0.8027353286743164, |
| "eval_runtime": 1186.9766, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.97, |
| "eval_accuracy": 0.7815, |
| "eval_loss": 0.7765725255012512, |
| "eval_runtime": 1187.3977, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.35e-05, |
| "loss": 0.7743, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_accuracy": 0.7825, |
| "eval_loss": 0.7971859574317932, |
| "eval_runtime": 1187.3337, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 13200 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.325e-05, |
| "loss": 0.7384, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.01, |
| "eval_accuracy": 0.7715, |
| "eval_loss": 0.8095068335533142, |
| "eval_runtime": 1187.8109, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_accuracy": 0.7665, |
| "eval_loss": 0.8057354092597961, |
| "eval_runtime": 1187.1204, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 13800 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 4.3e-05, |
| "loss": 0.6811, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_accuracy": 0.765, |
| "eval_loss": 0.8190826773643494, |
| "eval_runtime": 1186.898, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 14100 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_accuracy": 0.7815, |
| "eval_loss": 0.8019638657569885, |
| "eval_runtime": 1186.6924, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 14400 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 4.275e-05, |
| "loss": 0.6512, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_accuracy": 0.774, |
| "eval_loss": 0.8071369528770447, |
| "eval_runtime": 1187.0834, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 14700 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 4.25e-05, |
| "loss": 0.6752, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_accuracy": 0.781, |
| "eval_loss": 0.7650523781776428, |
| "eval_runtime": 1188.8808, |
| "eval_samples_per_second": 1.682, |
| "eval_steps_per_second": 0.21, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_accuracy": 0.77, |
| "eval_loss": 0.8068825006484985, |
| "eval_runtime": 1187.497, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 15300 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 4.2250000000000004e-05, |
| "loss": 0.6467, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.17, |
| "eval_accuracy": 0.772, |
| "eval_loss": 0.817916989326477, |
| "eval_runtime": 1188.2442, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_accuracy": 0.7725, |
| "eval_loss": 0.7762767672538757, |
| "eval_runtime": 1188.1044, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 15900 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 4.2e-05, |
| "loss": 0.6815, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_accuracy": 0.7815, |
| "eval_loss": 0.804487943649292, |
| "eval_runtime": 1186.96, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 4.175e-05, |
| "loss": 0.6652, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.24, |
| "eval_accuracy": 0.7685, |
| "eval_loss": 0.7841760516166687, |
| "eval_runtime": 1192.1415, |
| "eval_samples_per_second": 1.678, |
| "eval_steps_per_second": 0.21, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_accuracy": 0.769, |
| "eval_loss": 0.7735722661018372, |
| "eval_runtime": 1187.6295, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 4.15e-05, |
| "loss": 0.6787, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_accuracy": 0.7725, |
| "eval_loss": 0.8701850771903992, |
| "eval_runtime": 1186.4462, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 17100 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_accuracy": 0.781, |
| "eval_loss": 0.7945307493209839, |
| "eval_runtime": 1186.8131, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 4.125e-05, |
| "loss": 0.6731, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.33, |
| "eval_accuracy": 0.772, |
| "eval_loss": 0.8117572069168091, |
| "eval_runtime": 1187.4347, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 4.1e-05, |
| "loss": 0.7126, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_accuracy": 0.767, |
| "eval_loss": 0.8045957088470459, |
| "eval_runtime": 1187.2145, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.37, |
| "eval_accuracy": 0.778, |
| "eval_loss": 0.84222012758255, |
| "eval_runtime": 1187.2901, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 18300 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 4.075e-05, |
| "loss": 0.6724, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_accuracy": 0.7785, |
| "eval_loss": 0.8121286630630493, |
| "eval_runtime": 1187.0784, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_accuracy": 0.775, |
| "eval_loss": 0.8152139782905579, |
| "eval_runtime": 1188.0198, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 18900 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 4.05e-05, |
| "loss": 0.6797, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.44, |
| "eval_accuracy": 0.768, |
| "eval_loss": 0.7854306697845459, |
| "eval_runtime": 1187.194, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 4.025e-05, |
| "loss": 0.6684, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_accuracy": 0.7715, |
| "eval_loss": 0.8213361501693726, |
| "eval_runtime": 1186.9831, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_accuracy": 0.7745, |
| "eval_loss": 0.7918490767478943, |
| "eval_runtime": 1186.5427, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 4e-05, |
| "loss": 0.6613, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_accuracy": 0.7845, |
| "eval_loss": 0.7775911092758179, |
| "eval_runtime": 1186.9067, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 20100 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_accuracy": 0.7755, |
| "eval_loss": 0.8130319714546204, |
| "eval_runtime": 1187.1843, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 3.9750000000000004e-05, |
| "loss": 0.655, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_accuracy": 0.778, |
| "eval_loss": 0.7867146134376526, |
| "eval_runtime": 1185.9114, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 20700 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 3.9500000000000005e-05, |
| "loss": 0.6778, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.57, |
| "eval_accuracy": 0.781, |
| "eval_loss": 0.7961477637290955, |
| "eval_runtime": 1186.2535, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_accuracy": 0.7745, |
| "eval_loss": 0.7971734404563904, |
| "eval_runtime": 1186.9143, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 21300 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 3.9250000000000005e-05, |
| "loss": 0.6517, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_accuracy": 0.7775, |
| "eval_loss": 0.7863536477088928, |
| "eval_runtime": 1186.9914, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_accuracy": 0.7915, |
| "eval_loss": 0.8122566938400269, |
| "eval_runtime": 1186.3977, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 21900 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.6406, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_accuracy": 0.7925, |
| "eval_loss": 0.7533332705497742, |
| "eval_runtime": 1187.9251, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.875e-05, |
| "loss": 0.662, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_accuracy": 0.78, |
| "eval_loss": 0.7849723100662231, |
| "eval_runtime": 1187.4101, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_accuracy": 0.7885, |
| "eval_loss": 0.7343565821647644, |
| "eval_runtime": 1187.2869, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 3.85e-05, |
| "loss": 0.6659, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_accuracy": 0.7785, |
| "eval_loss": 0.789780855178833, |
| "eval_runtime": 1186.7745, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 23100 |
| }, |
| { |
| "epoch": 1.75, |
| "eval_accuracy": 0.79, |
| "eval_loss": 0.7574155330657959, |
| "eval_runtime": 1187.4419, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 23400 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 3.825e-05, |
| "loss": 0.641, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.78, |
| "eval_accuracy": 0.7915, |
| "eval_loss": 0.7914082407951355, |
| "eval_runtime": 1187.5239, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 23700 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 3.8e-05, |
| "loss": 0.6445, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_accuracy": 0.785, |
| "eval_loss": 0.7899203300476074, |
| "eval_runtime": 1189.4198, |
| "eval_samples_per_second": 1.681, |
| "eval_steps_per_second": 0.21, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.82, |
| "eval_accuracy": 0.7785, |
| "eval_loss": 0.7695072293281555, |
| "eval_runtime": 1186.6637, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 24300 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 3.775e-05, |
| "loss": 0.6667, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_accuracy": 0.793, |
| "eval_loss": 0.812134861946106, |
| "eval_runtime": 1188.2303, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 24600 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_accuracy": 0.79, |
| "eval_loss": 0.7290639877319336, |
| "eval_runtime": 1188.6526, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 24900 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.6313, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_accuracy": 0.78, |
| "eval_loss": 0.7741135954856873, |
| "eval_runtime": 1188.1815, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 25200 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 3.7250000000000004e-05, |
| "loss": 0.6634, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_accuracy": 0.778, |
| "eval_loss": 0.7394410371780396, |
| "eval_runtime": 1188.3415, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.93, |
| "eval_accuracy": 0.776, |
| "eval_loss": 0.7500986456871033, |
| "eval_runtime": 1188.0864, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 25800 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 3.7e-05, |
| "loss": 0.6614, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_accuracy": 0.787, |
| "eval_loss": 0.7917065620422363, |
| "eval_runtime": 1187.7872, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 26100 |
| }, |
| { |
| "epoch": 1.98, |
| "eval_accuracy": 0.778, |
| "eval_loss": 0.7841246724128723, |
| "eval_runtime": 1188.5012, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 26400 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 3.675e-05, |
| "loss": 0.6286, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.7825, |
| "eval_loss": 0.7422041296958923, |
| "eval_runtime": 1188.5052, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 26700 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 3.65e-05, |
| "loss": 0.516, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_accuracy": 0.773, |
| "eval_loss": 0.9123806357383728, |
| "eval_runtime": 1187.6306, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.05, |
| "eval_accuracy": 0.801, |
| "eval_loss": 0.8635061979293823, |
| "eval_runtime": 1187.6211, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 27300 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 3.625e-05, |
| "loss": 0.4621, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.07, |
| "eval_accuracy": 0.774, |
| "eval_loss": 0.9402374029159546, |
| "eval_runtime": 1187.828, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 27600 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_accuracy": 0.7975, |
| "eval_loss": 0.8873071074485779, |
| "eval_runtime": 1187.9698, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 27900 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 3.6e-05, |
| "loss": 0.4644, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_accuracy": 0.788, |
| "eval_loss": 0.8984495401382446, |
| "eval_runtime": 1188.2025, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 28200 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 3.575e-05, |
| "loss": 0.4601, |
| "step": 28500 |
| }, |
| { |
| "epoch": 2.14, |
| "eval_accuracy": 0.7745, |
| "eval_loss": 0.9219344258308411, |
| "eval_runtime": 1188.2213, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 28500 |
| }, |
| { |
| "epoch": 2.16, |
| "eval_accuracy": 0.7845, |
| "eval_loss": 0.895272433757782, |
| "eval_runtime": 1188.0314, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 28800 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.55e-05, |
| "loss": 0.4612, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.18, |
| "eval_accuracy": 0.7855, |
| "eval_loss": 0.9521645903587341, |
| "eval_runtime": 1188.1008, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 29100 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_accuracy": 0.7915, |
| "eval_loss": 0.8539759516716003, |
| "eval_runtime": 1188.3095, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 29400 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.525e-05, |
| "loss": 0.4594, |
| "step": 29500 |
| }, |
| { |
| "epoch": 2.23, |
| "eval_accuracy": 0.79, |
| "eval_loss": 0.8441948890686035, |
| "eval_runtime": 1188.0956, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 29700 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.5e-05, |
| "loss": 0.4502, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.25, |
| "eval_accuracy": 0.7815, |
| "eval_loss": 0.9029179811477661, |
| "eval_runtime": 1187.7265, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.27, |
| "eval_accuracy": 0.785, |
| "eval_loss": 0.8536617159843445, |
| "eval_runtime": 1187.8344, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 30300 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 3.475e-05, |
| "loss": 0.4733, |
| "step": 30500 |
| }, |
| { |
| "epoch": 2.29, |
| "eval_accuracy": 0.7855, |
| "eval_loss": 0.9026994705200195, |
| "eval_runtime": 1187.3948, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 30600 |
| }, |
| { |
| "epoch": 2.32, |
| "eval_accuracy": 0.7875, |
| "eval_loss": 0.9556459784507751, |
| "eval_runtime": 1187.5961, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 30900 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 3.45e-05, |
| "loss": 0.4394, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.34, |
| "eval_accuracy": 0.7905, |
| "eval_loss": 0.8229288458824158, |
| "eval_runtime": 1188.2545, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 31200 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 3.4250000000000006e-05, |
| "loss": 0.4487, |
| "step": 31500 |
| }, |
| { |
| "epoch": 2.36, |
| "eval_accuracy": 0.7835, |
| "eval_loss": 0.9420590400695801, |
| "eval_runtime": 1188.0122, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 31500 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_accuracy": 0.792, |
| "eval_loss": 0.8579486608505249, |
| "eval_runtime": 1187.7995, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 31800 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.4713, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.41, |
| "eval_accuracy": 0.7955, |
| "eval_loss": 0.8385721445083618, |
| "eval_runtime": 1188.3507, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 32100 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_accuracy": 0.794, |
| "eval_loss": 0.8750296831130981, |
| "eval_runtime": 1189.7033, |
| "eval_samples_per_second": 1.681, |
| "eval_steps_per_second": 0.21, |
| "step": 32400 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.375000000000001e-05, |
| "loss": 0.4886, |
| "step": 32500 |
| }, |
| { |
| "epoch": 2.45, |
| "eval_accuracy": 0.789, |
| "eval_loss": 0.8962150812149048, |
| "eval_runtime": 1189.4135, |
| "eval_samples_per_second": 1.682, |
| "eval_steps_per_second": 0.21, |
| "step": 32700 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 3.35e-05, |
| "loss": 0.4904, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.47, |
| "eval_accuracy": 0.783, |
| "eval_loss": 0.8365711569786072, |
| "eval_runtime": 1189.5724, |
| "eval_samples_per_second": 1.681, |
| "eval_steps_per_second": 0.21, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_accuracy": 0.796, |
| "eval_loss": 0.8707769513130188, |
| "eval_runtime": 1188.4348, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 33300 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 3.325e-05, |
| "loss": 0.4652, |
| "step": 33500 |
| }, |
| { |
| "epoch": 2.52, |
| "eval_accuracy": 0.7945, |
| "eval_loss": 0.8616237640380859, |
| "eval_runtime": 1187.5594, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 33600 |
| }, |
| { |
| "epoch": 2.54, |
| "eval_accuracy": 0.7805, |
| "eval_loss": 0.8876886367797852, |
| "eval_runtime": 1186.5623, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 33900 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 3.3e-05, |
| "loss": 0.4526, |
| "step": 34000 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_accuracy": 0.787, |
| "eval_loss": 0.8660940527915955, |
| "eval_runtime": 1187.4762, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 34200 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 3.275e-05, |
| "loss": 0.4691, |
| "step": 34500 |
| }, |
| { |
| "epoch": 2.59, |
| "eval_accuracy": 0.793, |
| "eval_loss": 0.8734720349311829, |
| "eval_runtime": 1188.0976, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 34500 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_accuracy": 0.784, |
| "eval_loss": 0.9059090614318848, |
| "eval_runtime": 1187.6806, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 34800 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 0.4408, |
| "step": 35000 |
| }, |
| { |
| "epoch": 2.63, |
| "eval_accuracy": 0.7855, |
| "eval_loss": 0.8489252924919128, |
| "eval_runtime": 1186.6452, |
| "eval_samples_per_second": 1.685, |
| "eval_steps_per_second": 0.211, |
| "step": 35100 |
| }, |
| { |
| "epoch": 2.65, |
| "eval_accuracy": 0.774, |
| "eval_loss": 0.8704678416252136, |
| "eval_runtime": 1186.5813, |
| "eval_samples_per_second": 1.686, |
| "eval_steps_per_second": 0.211, |
| "step": 35400 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 3.2250000000000005e-05, |
| "loss": 0.4725, |
| "step": 35500 |
| }, |
| { |
| "epoch": 2.68, |
| "eval_accuracy": 0.782, |
| "eval_loss": 0.9732263684272766, |
| "eval_runtime": 1187.7392, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 35700 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.4486, |
| "step": 36000 |
| }, |
| { |
| "epoch": 2.7, |
| "eval_accuracy": 0.783, |
| "eval_loss": 0.8399544954299927, |
| "eval_runtime": 1187.666, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 36000 |
| }, |
| { |
| "epoch": 2.72, |
| "eval_accuracy": 0.792, |
| "eval_loss": 0.8701621294021606, |
| "eval_runtime": 1187.7885, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.21, |
| "step": 36300 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 3.175e-05, |
| "loss": 0.4427, |
| "step": 36500 |
| }, |
| { |
| "epoch": 2.74, |
| "eval_accuracy": 0.793, |
| "eval_loss": 0.8826534152030945, |
| "eval_runtime": 1187.3044, |
| "eval_samples_per_second": 1.684, |
| "eval_steps_per_second": 0.211, |
| "step": 36600 |
| }, |
| { |
| "epoch": 2.77, |
| "eval_accuracy": 0.802, |
| "eval_loss": 0.8090859651565552, |
| "eval_runtime": 1188.0705, |
| "eval_samples_per_second": 1.683, |
| "eval_steps_per_second": 0.21, |
| "step": 36900 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.15e-05, |
| "loss": 0.4576, |
| "step": 37000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 500, |
| "total_flos": 2.5551147393613824e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|