| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.4613642796490236, | |
| "global_step": 42800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.4900000000000002e-05, | |
| "loss": 1.6261, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.4594048261642456, | |
| "eval_loss": 1.6715384721755981, | |
| "eval_runtime": 1195.9203, | |
| "eval_samples_per_second": 10.34, | |
| "eval_steps_per_second": 5.17, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9850000000000006e-05, | |
| "loss": 1.5983, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.4273815155029297, | |
| "eval_loss": 1.8006267547607422, | |
| "eval_runtime": 1200.9941, | |
| "eval_samples_per_second": 10.296, | |
| "eval_steps_per_second": 5.148, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 7.475000000000001e-05, | |
| "loss": 1.8724, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.3767588436603546, | |
| "eval_loss": 1.967054009437561, | |
| "eval_runtime": 1198.349, | |
| "eval_samples_per_second": 10.319, | |
| "eval_steps_per_second": 5.16, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.975000000000001e-05, | |
| "loss": 1.9731, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.3982694447040558, | |
| "eval_loss": 1.9361200332641602, | |
| "eval_runtime": 1206.207, | |
| "eval_samples_per_second": 10.252, | |
| "eval_steps_per_second": 5.126, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.979820627802691e-05, | |
| "loss": 1.9923, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.3914766311645508, | |
| "eval_loss": 2.0836076736450195, | |
| "eval_runtime": 1190.0672, | |
| "eval_samples_per_second": 10.391, | |
| "eval_steps_per_second": 5.196, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.959437423562985e-05, | |
| "loss": 1.8714, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.47185832262039185, | |
| "eval_loss": 1.6872302293777466, | |
| "eval_runtime": 1192.2479, | |
| "eval_samples_per_second": 10.372, | |
| "eval_steps_per_second": 5.186, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.939054219323278e-05, | |
| "loss": 1.7592, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.4551997482776642, | |
| "eval_loss": 1.7559692859649658, | |
| "eval_runtime": 1199.3651, | |
| "eval_samples_per_second": 10.31, | |
| "eval_steps_per_second": 5.155, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 9.918671015083571e-05, | |
| "loss": 1.6757, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.5049328804016113, | |
| "eval_loss": 1.6301194429397583, | |
| "eval_runtime": 1201.6603, | |
| "eval_samples_per_second": 10.291, | |
| "eval_steps_per_second": 5.145, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 9.898287810843865e-05, | |
| "loss": 1.6694, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.5016981959342957, | |
| "eval_loss": 1.7610756158828735, | |
| "eval_runtime": 1198.381, | |
| "eval_samples_per_second": 10.319, | |
| "eval_steps_per_second": 5.159, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 9.877904606604159e-05, | |
| "loss": 1.6181, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.5145560503005981, | |
| "eval_loss": 1.5850120782852173, | |
| "eval_runtime": 1202.0306, | |
| "eval_samples_per_second": 10.288, | |
| "eval_steps_per_second": 5.144, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 9.857562168772932e-05, | |
| "loss": 1.686, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.5050137639045715, | |
| "eval_loss": 1.6993136405944824, | |
| "eval_runtime": 1206.9484, | |
| "eval_samples_per_second": 10.246, | |
| "eval_steps_per_second": 5.123, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 9.837178964533225e-05, | |
| "loss": 1.5614, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.5293546915054321, | |
| "eval_loss": 1.5649951696395874, | |
| "eval_runtime": 1211.1061, | |
| "eval_samples_per_second": 10.211, | |
| "eval_steps_per_second": 5.105, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.816795760293519e-05, | |
| "loss": 1.5995, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.5736697316169739, | |
| "eval_loss": 1.4089938402175903, | |
| "eval_runtime": 1208.3994, | |
| "eval_samples_per_second": 10.233, | |
| "eval_steps_per_second": 5.117, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.796412556053812e-05, | |
| "loss": 1.4745, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.5655021667480469, | |
| "eval_loss": 1.4745545387268066, | |
| "eval_runtime": 1213.8281, | |
| "eval_samples_per_second": 10.188, | |
| "eval_steps_per_second": 5.094, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 9.776029351814105e-05, | |
| "loss": 1.4518, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.5529677867889404, | |
| "eval_loss": 1.5653101205825806, | |
| "eval_runtime": 1212.4544, | |
| "eval_samples_per_second": 10.199, | |
| "eval_steps_per_second": 5.1, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.755646147574398e-05, | |
| "loss": 1.4693, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.5558789968490601, | |
| "eval_loss": 1.499045729637146, | |
| "eval_runtime": 1219.4703, | |
| "eval_samples_per_second": 10.14, | |
| "eval_steps_per_second": 5.07, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.735262943334693e-05, | |
| "loss": 1.4723, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.5532912611961365, | |
| "eval_loss": 1.5437432527542114, | |
| "eval_runtime": 1217.1159, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 5.08, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 9.714879739094986e-05, | |
| "loss": 1.468, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.5289503335952759, | |
| "eval_loss": 1.6594600677490234, | |
| "eval_runtime": 1215.7239, | |
| "eval_samples_per_second": 10.172, | |
| "eval_steps_per_second": 5.086, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 9.69449653485528e-05, | |
| "loss": 1.4135, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.5919456481933594, | |
| "eval_loss": 1.2978076934814453, | |
| "eval_runtime": 1220.3558, | |
| "eval_samples_per_second": 10.133, | |
| "eval_steps_per_second": 5.067, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.674194863432532e-05, | |
| "loss": 1.4042, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.5769044160842896, | |
| "eval_loss": 1.4572824239730835, | |
| "eval_runtime": 1236.4044, | |
| "eval_samples_per_second": 10.002, | |
| "eval_steps_per_second": 5.001, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 9.653811659192825e-05, | |
| "loss": 1.3864, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.5811903476715088, | |
| "eval_loss": 1.4342533349990845, | |
| "eval_runtime": 1219.3647, | |
| "eval_samples_per_second": 10.141, | |
| "eval_steps_per_second": 5.071, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 9.63342845495312e-05, | |
| "loss": 1.3847, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.6037521958351135, | |
| "eval_loss": 1.433942437171936, | |
| "eval_runtime": 1217.0747, | |
| "eval_samples_per_second": 10.16, | |
| "eval_steps_per_second": 5.08, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.613045250713413e-05, | |
| "loss": 1.3346, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.6101406812667847, | |
| "eval_loss": 1.3803991079330444, | |
| "eval_runtime": 1217.1935, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 5.08, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.592662046473706e-05, | |
| "loss": 1.3487, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.6148309707641602, | |
| "eval_loss": 1.342442274093628, | |
| "eval_runtime": 1216.3216, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 5.083, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9.572278842233999e-05, | |
| "loss": 1.3597, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_accuracy": 0.6078764200210571, | |
| "eval_loss": 1.3775306940078735, | |
| "eval_runtime": 1216.7531, | |
| "eval_samples_per_second": 10.163, | |
| "eval_steps_per_second": 5.082, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.551895637994293e-05, | |
| "loss": 1.3021, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_accuracy": 0.6058547496795654, | |
| "eval_loss": 1.3221111297607422, | |
| "eval_runtime": 1220.1747, | |
| "eval_samples_per_second": 10.135, | |
| "eval_steps_per_second": 5.067, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.531512433754587e-05, | |
| "loss": 1.2762, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_accuracy": 0.6061782240867615, | |
| "eval_loss": 1.3001333475112915, | |
| "eval_runtime": 1208.1233, | |
| "eval_samples_per_second": 10.236, | |
| "eval_steps_per_second": 5.118, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.51112922951488e-05, | |
| "loss": 1.3218, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_accuracy": 0.592430830001831, | |
| "eval_loss": 1.4698729515075684, | |
| "eval_runtime": 1219.7567, | |
| "eval_samples_per_second": 10.138, | |
| "eval_steps_per_second": 5.069, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 9.490786791683654e-05, | |
| "loss": 1.3226, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_accuracy": 0.5907326340675354, | |
| "eval_loss": 1.396222710609436, | |
| "eval_runtime": 1210.2892, | |
| "eval_samples_per_second": 10.217, | |
| "eval_steps_per_second": 5.109, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 9.470444353852426e-05, | |
| "loss": 1.3306, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_accuracy": 0.6242924332618713, | |
| "eval_loss": 1.3146133422851562, | |
| "eval_runtime": 1215.13, | |
| "eval_samples_per_second": 10.177, | |
| "eval_steps_per_second": 5.088, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 9.45006114961272e-05, | |
| "loss": 1.307, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_accuracy": 0.6107068061828613, | |
| "eval_loss": 1.3835194110870361, | |
| "eval_runtime": 1216.4262, | |
| "eval_samples_per_second": 10.166, | |
| "eval_steps_per_second": 5.083, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 9.429677945373014e-05, | |
| "loss": 1.239, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_accuracy": 0.6234028935432434, | |
| "eval_loss": 1.333041787147522, | |
| "eval_runtime": 1214.4499, | |
| "eval_samples_per_second": 10.182, | |
| "eval_steps_per_second": 5.091, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 9.409294741133307e-05, | |
| "loss": 1.2486, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_accuracy": 0.644670844078064, | |
| "eval_loss": 1.187103033065796, | |
| "eval_runtime": 1217.2373, | |
| "eval_samples_per_second": 10.159, | |
| "eval_steps_per_second": 5.08, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 9.3889115368936e-05, | |
| "loss": 1.2305, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_accuracy": 0.6330260634422302, | |
| "eval_loss": 1.280613660812378, | |
| "eval_runtime": 1218.1968, | |
| "eval_samples_per_second": 10.151, | |
| "eval_steps_per_second": 5.076, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 9.368528332653894e-05, | |
| "loss": 1.2427, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_accuracy": 0.6369885206222534, | |
| "eval_loss": 1.3734965324401855, | |
| "eval_runtime": 1216.3233, | |
| "eval_samples_per_second": 10.167, | |
| "eval_steps_per_second": 5.083, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 9.348145128414187e-05, | |
| "loss": 1.232, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_accuracy": 0.6478247046470642, | |
| "eval_loss": 1.244884729385376, | |
| "eval_runtime": 1226.612, | |
| "eval_samples_per_second": 10.081, | |
| "eval_steps_per_second": 5.041, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 9.32776192417448e-05, | |
| "loss": 1.2819, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_accuracy": 0.6360180974006653, | |
| "eval_loss": 1.2376329898834229, | |
| "eval_runtime": 1221.3054, | |
| "eval_samples_per_second": 10.125, | |
| "eval_steps_per_second": 5.063, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 9.307378719934773e-05, | |
| "loss": 1.206, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_accuracy": 0.6438621878623962, | |
| "eval_loss": 1.3146148920059204, | |
| "eval_runtime": 1216.9126, | |
| "eval_samples_per_second": 10.162, | |
| "eval_steps_per_second": 5.081, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 9.286995515695067e-05, | |
| "loss": 1.1787, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_accuracy": 0.6048035025596619, | |
| "eval_loss": 1.5405412912368774, | |
| "eval_runtime": 1222.2408, | |
| "eval_samples_per_second": 10.117, | |
| "eval_steps_per_second": 5.059, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.266612311455361e-05, | |
| "loss": 1.2037, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_accuracy": 0.6377162933349609, | |
| "eval_loss": 1.245172142982483, | |
| "eval_runtime": 1222.2415, | |
| "eval_samples_per_second": 10.117, | |
| "eval_steps_per_second": 5.059, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 9.246269873624134e-05, | |
| "loss": 1.2359, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_accuracy": 0.6452369689941406, | |
| "eval_loss": 1.305955171585083, | |
| "eval_runtime": 1226.2844, | |
| "eval_samples_per_second": 10.084, | |
| "eval_steps_per_second": 5.042, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 9.225886669384428e-05, | |
| "loss": 1.258, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_accuracy": 0.6490376591682434, | |
| "eval_loss": 1.2557748556137085, | |
| "eval_runtime": 1225.4315, | |
| "eval_samples_per_second": 10.091, | |
| "eval_steps_per_second": 5.046, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 9.2055442315532e-05, | |
| "loss": 1.124, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_accuracy": 0.6577712893486023, | |
| "eval_loss": 1.201909065246582, | |
| "eval_runtime": 1234.6035, | |
| "eval_samples_per_second": 10.016, | |
| "eval_steps_per_second": 5.008, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 9.185201793721973e-05, | |
| "loss": 1.2008, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 0.6518680453300476, | |
| "eval_loss": 1.2547849416732788, | |
| "eval_runtime": 1221.6378, | |
| "eval_samples_per_second": 10.122, | |
| "eval_steps_per_second": 5.061, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 9.164818589482268e-05, | |
| "loss": 1.231, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_accuracy": 0.6403040885925293, | |
| "eval_loss": 1.2401455640792847, | |
| "eval_runtime": 1227.1884, | |
| "eval_samples_per_second": 10.077, | |
| "eval_steps_per_second": 5.038, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 9.14443538524256e-05, | |
| "loss": 1.176, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_accuracy": 0.630195677280426, | |
| "eval_loss": 1.3183224201202393, | |
| "eval_runtime": 1230.2141, | |
| "eval_samples_per_second": 10.052, | |
| "eval_steps_per_second": 5.026, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 9.124052181002855e-05, | |
| "loss": 1.1756, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_accuracy": 0.6433770060539246, | |
| "eval_loss": 1.3021215200424194, | |
| "eval_runtime": 1225.278, | |
| "eval_samples_per_second": 10.092, | |
| "eval_steps_per_second": 5.046, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 9.103709743171626e-05, | |
| "loss": 1.2213, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_accuracy": 0.6450752019882202, | |
| "eval_loss": 1.2812703847885132, | |
| "eval_runtime": 1226.6055, | |
| "eval_samples_per_second": 10.081, | |
| "eval_steps_per_second": 5.041, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 9.08332653893192e-05, | |
| "loss": 1.1343, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_accuracy": 0.6614103317260742, | |
| "eval_loss": 1.2407350540161133, | |
| "eval_runtime": 1227.7031, | |
| "eval_samples_per_second": 10.072, | |
| "eval_steps_per_second": 5.036, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 9.062943334692214e-05, | |
| "loss": 1.1498, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_accuracy": 0.6570435166358948, | |
| "eval_loss": 1.1911596059799194, | |
| "eval_runtime": 1201.1782, | |
| "eval_samples_per_second": 10.295, | |
| "eval_steps_per_second": 5.147, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 9.042560130452508e-05, | |
| "loss": 1.1015, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_accuracy": 0.6508976221084595, | |
| "eval_loss": 1.360363245010376, | |
| "eval_runtime": 1193.6402, | |
| "eval_samples_per_second": 10.36, | |
| "eval_steps_per_second": 5.18, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 9.022176926212801e-05, | |
| "loss": 1.0794, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_accuracy": 0.6508976221084595, | |
| "eval_loss": 1.3214635848999023, | |
| "eval_runtime": 1206.1082, | |
| "eval_samples_per_second": 10.253, | |
| "eval_steps_per_second": 5.126, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 9.001793721973094e-05, | |
| "loss": 1.0692, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_accuracy": 0.662542462348938, | |
| "eval_loss": 1.2083770036697388, | |
| "eval_runtime": 1204.5386, | |
| "eval_samples_per_second": 10.266, | |
| "eval_steps_per_second": 5.133, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 8.981492050550348e-05, | |
| "loss": 1.1552, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_accuracy": 0.6536471247673035, | |
| "eval_loss": 1.2683591842651367, | |
| "eval_runtime": 1207.0957, | |
| "eval_samples_per_second": 10.244, | |
| "eval_steps_per_second": 5.122, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 8.961108846310641e-05, | |
| "loss": 1.0985, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_accuracy": 0.6645641326904297, | |
| "eval_loss": 1.1616308689117432, | |
| "eval_runtime": 1202.0246, | |
| "eval_samples_per_second": 10.288, | |
| "eval_steps_per_second": 5.144, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 8.940725642070934e-05, | |
| "loss": 1.1239, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_accuracy": 0.669173538684845, | |
| "eval_loss": 1.1362210512161255, | |
| "eval_runtime": 1205.0904, | |
| "eval_samples_per_second": 10.261, | |
| "eval_steps_per_second": 5.131, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 8.920342437831227e-05, | |
| "loss": 1.0899, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_accuracy": 0.6521106362342834, | |
| "eval_loss": 1.3762954473495483, | |
| "eval_runtime": 1210.3119, | |
| "eval_samples_per_second": 10.217, | |
| "eval_steps_per_second": 5.109, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 8.89995923359152e-05, | |
| "loss": 1.0953, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_accuracy": 0.666747510433197, | |
| "eval_loss": 1.1568596363067627, | |
| "eval_runtime": 1213.4003, | |
| "eval_samples_per_second": 10.191, | |
| "eval_steps_per_second": 5.096, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 8.879576029351814e-05, | |
| "loss": 1.1148, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_accuracy": 0.6541323065757751, | |
| "eval_loss": 1.2405096292495728, | |
| "eval_runtime": 1215.762, | |
| "eval_samples_per_second": 10.171, | |
| "eval_steps_per_second": 5.086, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 8.859192825112108e-05, | |
| "loss": 1.0715, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_accuracy": 0.6622189879417419, | |
| "eval_loss": 1.2203196287155151, | |
| "eval_runtime": 1237.659, | |
| "eval_samples_per_second": 9.991, | |
| "eval_steps_per_second": 4.996, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.838809620872402e-05, | |
| "loss": 1.1298, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_accuracy": 0.667960524559021, | |
| "eval_loss": 1.258815884590149, | |
| "eval_runtime": 1214.4985, | |
| "eval_samples_per_second": 10.182, | |
| "eval_steps_per_second": 5.091, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.818426416632695e-05, | |
| "loss": 1.1078, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_accuracy": 0.6598738431930542, | |
| "eval_loss": 1.1994160413742065, | |
| "eval_runtime": 1223.4805, | |
| "eval_samples_per_second": 10.107, | |
| "eval_steps_per_second": 5.054, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.79804321239299e-05, | |
| "loss": 1.1229, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_accuracy": 0.6650493144989014, | |
| "eval_loss": 1.1719839572906494, | |
| "eval_runtime": 1217.7937, | |
| "eval_samples_per_second": 10.154, | |
| "eval_steps_per_second": 5.077, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 8.777660008153283e-05, | |
| "loss": 1.1176, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_accuracy": 0.6727316975593567, | |
| "eval_loss": 1.1364519596099854, | |
| "eval_runtime": 1228.5963, | |
| "eval_samples_per_second": 10.065, | |
| "eval_steps_per_second": 5.033, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 8.757276803913576e-05, | |
| "loss": 1.0393, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_accuracy": 0.6387676000595093, | |
| "eval_loss": 1.356237769126892, | |
| "eval_runtime": 1220.1605, | |
| "eval_samples_per_second": 10.135, | |
| "eval_steps_per_second": 5.067, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 8.736893599673869e-05, | |
| "loss": 1.0686, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_accuracy": 0.6770985126495361, | |
| "eval_loss": 1.2882039546966553, | |
| "eval_runtime": 1227.7272, | |
| "eval_samples_per_second": 10.072, | |
| "eval_steps_per_second": 5.036, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 8.716591928251121e-05, | |
| "loss": 1.0984, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_accuracy": 0.6504932641983032, | |
| "eval_loss": 1.2953248023986816, | |
| "eval_runtime": 1228.2391, | |
| "eval_samples_per_second": 10.068, | |
| "eval_steps_per_second": 5.034, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 8.696208724011415e-05, | |
| "loss": 1.0653, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_accuracy": 0.6635128855705261, | |
| "eval_loss": 1.2924034595489502, | |
| "eval_runtime": 1226.8106, | |
| "eval_samples_per_second": 10.08, | |
| "eval_steps_per_second": 5.04, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 8.675825519771708e-05, | |
| "loss": 1.0878, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_accuracy": 0.6699013710021973, | |
| "eval_loss": 1.2249584197998047, | |
| "eval_runtime": 1223.8019, | |
| "eval_samples_per_second": 10.105, | |
| "eval_steps_per_second": 5.052, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 8.655442315532001e-05, | |
| "loss": 1.0446, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_accuracy": 0.6664240956306458, | |
| "eval_loss": 1.3492021560668945, | |
| "eval_runtime": 1227.92, | |
| "eval_samples_per_second": 10.071, | |
| "eval_steps_per_second": 5.035, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 8.635059111292296e-05, | |
| "loss": 1.0754, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_accuracy": 0.6584182381629944, | |
| "eval_loss": 1.275933861732483, | |
| "eval_runtime": 1225.9895, | |
| "eval_samples_per_second": 10.087, | |
| "eval_steps_per_second": 5.043, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 8.614675907052589e-05, | |
| "loss": 1.0441, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_accuracy": 0.6821931004524231, | |
| "eval_loss": 1.1557066440582275, | |
| "eval_runtime": 1227.8238, | |
| "eval_samples_per_second": 10.071, | |
| "eval_steps_per_second": 5.036, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.594374235629842e-05, | |
| "loss": 1.2804, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_accuracy": 0.6013262271881104, | |
| "eval_loss": 1.4437016248703003, | |
| "eval_runtime": 1228.8358, | |
| "eval_samples_per_second": 10.063, | |
| "eval_steps_per_second": 5.032, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 8.573991031390135e-05, | |
| "loss": 1.0833, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.6906032562255859, | |
| "eval_loss": 1.1393189430236816, | |
| "eval_runtime": 1227.4427, | |
| "eval_samples_per_second": 10.075, | |
| "eval_steps_per_second": 5.037, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 8.553607827150428e-05, | |
| "loss": 0.9851, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "eval_accuracy": 0.6836487054824829, | |
| "eval_loss": 1.1196837425231934, | |
| "eval_runtime": 1223.1266, | |
| "eval_samples_per_second": 10.11, | |
| "eval_steps_per_second": 5.055, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 8.533224622910721e-05, | |
| "loss": 0.95, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "eval_accuracy": 0.6299530863761902, | |
| "eval_loss": 1.4798767566680908, | |
| "eval_runtime": 1224.7327, | |
| "eval_samples_per_second": 10.097, | |
| "eval_steps_per_second": 5.048, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 8.512841418671016e-05, | |
| "loss": 1.0258, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "eval_accuracy": 0.6828400492668152, | |
| "eval_loss": 1.1659743785858154, | |
| "eval_runtime": 1235.0907, | |
| "eval_samples_per_second": 10.012, | |
| "eval_steps_per_second": 5.006, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 8.492458214431309e-05, | |
| "loss": 1.0604, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "eval_accuracy": 0.6709526181221008, | |
| "eval_loss": 1.2192966938018799, | |
| "eval_runtime": 1228.4696, | |
| "eval_samples_per_second": 10.066, | |
| "eval_steps_per_second": 5.033, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 8.472075010191602e-05, | |
| "loss": 1.0496, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "eval_accuracy": 0.6782306432723999, | |
| "eval_loss": 1.2562731504440308, | |
| "eval_runtime": 1229.0843, | |
| "eval_samples_per_second": 10.061, | |
| "eval_steps_per_second": 5.031, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 8.451732572360376e-05, | |
| "loss": 2.5985, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "eval_accuracy": 0.1606016457080841, | |
| "eval_loss": 2.8276100158691406, | |
| "eval_runtime": 1227.4333, | |
| "eval_samples_per_second": 10.075, | |
| "eval_steps_per_second": 5.037, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 8.431430900937628e-05, | |
| "loss": 2.5628, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "eval_accuracy": 0.6409509778022766, | |
| "eval_loss": 1.2782047986984253, | |
| "eval_runtime": 1226.8685, | |
| "eval_samples_per_second": 10.079, | |
| "eval_steps_per_second": 5.04, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 8.411047696697921e-05, | |
| "loss": 1.0363, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "eval_accuracy": 0.6780688762664795, | |
| "eval_loss": 1.2363730669021606, | |
| "eval_runtime": 1229.116, | |
| "eval_samples_per_second": 10.061, | |
| "eval_steps_per_second": 5.03, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 8.390664492458214e-05, | |
| "loss": 1.0159, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "eval_accuracy": 0.6617338061332703, | |
| "eval_loss": 1.323940396308899, | |
| "eval_runtime": 1225.656, | |
| "eval_samples_per_second": 10.089, | |
| "eval_steps_per_second": 5.045, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 8.370281288218509e-05, | |
| "loss": 1.0396, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_accuracy": 0.6728934049606323, | |
| "eval_loss": 1.187890887260437, | |
| "eval_runtime": 1229.8463, | |
| "eval_samples_per_second": 10.055, | |
| "eval_steps_per_second": 5.027, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 8.349898083978802e-05, | |
| "loss": 1.0262, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "eval_accuracy": 0.6631894111633301, | |
| "eval_loss": 1.18537437915802, | |
| "eval_runtime": 1238.5652, | |
| "eval_samples_per_second": 9.984, | |
| "eval_steps_per_second": 4.992, | |
| "step": 42500 | |
| } | |
| ], | |
| "max_steps": 247300, | |
| "num_train_epochs": 20, | |
| "total_flos": 2.8496051091712414e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |