| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9922563509994307, |
| "global_step": 173500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9856279714169095e-05, |
| "loss": 2.264, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_accuracy": 0.6029953888515062, |
| "eval_loss": 2.099475383758545, |
| "eval_runtime": 506.0798, |
| "eval_samples_per_second": 76.381, |
| "eval_steps_per_second": 12.731, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9712559428338195e-05, |
| "loss": 2.2782, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 0.6092498916028328, |
| "eval_loss": 2.065396308898926, |
| "eval_runtime": 505.0033, |
| "eval_samples_per_second": 76.544, |
| "eval_steps_per_second": 12.758, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.956883914250729e-05, |
| "loss": 2.2653, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_accuracy": 0.609359125787774, |
| "eval_loss": 2.0755701065063477, |
| "eval_runtime": 505.2798, |
| "eval_samples_per_second": 76.502, |
| "eval_steps_per_second": 12.751, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.942511885667638e-05, |
| "loss": 2.2549, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_accuracy": 0.6082329787540008, |
| "eval_loss": 2.0734264850616455, |
| "eval_runtime": 505.8877, |
| "eval_samples_per_second": 76.41, |
| "eval_steps_per_second": 12.736, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.9281398570845475e-05, |
| "loss": 2.2545, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_accuracy": 0.6138295862459853, |
| "eval_loss": 2.0366480350494385, |
| "eval_runtime": 505.1671, |
| "eval_samples_per_second": 76.519, |
| "eval_steps_per_second": 12.754, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9137678285014574e-05, |
| "loss": 2.1822, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_accuracy": 0.6128831652341514, |
| "eval_loss": 2.025559663772583, |
| "eval_runtime": 505.5224, |
| "eval_samples_per_second": 76.465, |
| "eval_steps_per_second": 12.745, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.899395799918367e-05, |
| "loss": 2.2251, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_accuracy": 0.6135322892259629, |
| "eval_loss": 2.0310781002044678, |
| "eval_runtime": 505.3262, |
| "eval_samples_per_second": 76.495, |
| "eval_steps_per_second": 12.75, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.885023771335277e-05, |
| "loss": 2.1889, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_accuracy": 0.6140580713202821, |
| "eval_loss": 2.0253171920776367, |
| "eval_runtime": 505.0453, |
| "eval_samples_per_second": 76.538, |
| "eval_steps_per_second": 12.757, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.870651742752186e-05, |
| "loss": 2.1563, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_accuracy": 0.6188487491770902, |
| "eval_loss": 2.0138497352600098, |
| "eval_runtime": 505.0832, |
| "eval_samples_per_second": 76.532, |
| "eval_steps_per_second": 12.756, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.856279714169096e-05, |
| "loss": 2.1504, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_accuracy": 0.6154928295391541, |
| "eval_loss": 2.0200319290161133, |
| "eval_runtime": 505.1362, |
| "eval_samples_per_second": 76.524, |
| "eval_steps_per_second": 12.755, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.8419076855860053e-05, |
| "loss": 2.1675, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_accuracy": 0.6187489432946115, |
| "eval_loss": 2.0284132957458496, |
| "eval_runtime": 504.9908, |
| "eval_samples_per_second": 76.546, |
| "eval_steps_per_second": 12.759, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.827535657002915e-05, |
| "loss": 2.1266, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_accuracy": 0.6206070748456345, |
| "eval_loss": 1.987363338470459, |
| "eval_runtime": 505.0202, |
| "eval_samples_per_second": 76.541, |
| "eval_steps_per_second": 12.758, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.8131636284198246e-05, |
| "loss": 2.1225, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_accuracy": 0.6230235543210437, |
| "eval_loss": 1.9767862558364868, |
| "eval_runtime": 505.1832, |
| "eval_samples_per_second": 76.517, |
| "eval_steps_per_second": 12.754, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.798791599836734e-05, |
| "loss": 2.1301, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_accuracy": 0.6239397066744514, |
| "eval_loss": 1.9729039669036865, |
| "eval_runtime": 505.396, |
| "eval_samples_per_second": 76.485, |
| "eval_steps_per_second": 12.748, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.784419571253643e-05, |
| "loss": 2.0836, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_accuracy": 0.6261581529009562, |
| "eval_loss": 1.964255690574646, |
| "eval_runtime": 504.8882, |
| "eval_samples_per_second": 76.562, |
| "eval_steps_per_second": 12.761, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.770047542670553e-05, |
| "loss": 2.1076, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_accuracy": 0.6274933613289693, |
| "eval_loss": 1.9697866439819336, |
| "eval_runtime": 505.1549, |
| "eval_samples_per_second": 76.521, |
| "eval_steps_per_second": 12.755, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.7556755140874625e-05, |
| "loss": 2.0771, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_accuracy": 0.6285522506681381, |
| "eval_loss": 1.9520649909973145, |
| "eval_runtime": 505.0873, |
| "eval_samples_per_second": 76.531, |
| "eval_steps_per_second": 12.756, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.741303485504372e-05, |
| "loss": 2.1113, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_accuracy": 0.6283807429914815, |
| "eval_loss": 1.9525035619735718, |
| "eval_runtime": 505.2085, |
| "eval_samples_per_second": 76.513, |
| "eval_steps_per_second": 12.753, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.726931456921282e-05, |
| "loss": 2.0969, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_accuracy": 0.6321105641490391, |
| "eval_loss": 1.9497127532958984, |
| "eval_runtime": 504.8423, |
| "eval_samples_per_second": 76.568, |
| "eval_steps_per_second": 12.762, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.712559428338191e-05, |
| "loss": 2.0806, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_accuracy": 0.6330588579023823, |
| "eval_loss": 1.9182238578796387, |
| "eval_runtime": 505.0136, |
| "eval_samples_per_second": 76.542, |
| "eval_steps_per_second": 12.758, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.6981873997551005e-05, |
| "loss": 2.077, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_accuracy": 0.6320653563504697, |
| "eval_loss": 1.9318033456802368, |
| "eval_runtime": 504.8992, |
| "eval_samples_per_second": 76.56, |
| "eval_steps_per_second": 12.761, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.6838153711720104e-05, |
| "loss": 2.0818, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_accuracy": 0.6318708445790271, |
| "eval_loss": 1.9297560453414917, |
| "eval_runtime": 505.1042, |
| "eval_samples_per_second": 76.529, |
| "eval_steps_per_second": 12.756, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.66944334258892e-05, |
| "loss": 2.0634, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_accuracy": 0.6329302654169371, |
| "eval_loss": 1.91959810256958, |
| "eval_runtime": 505.1282, |
| "eval_samples_per_second": 76.525, |
| "eval_steps_per_second": 12.755, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.65507131400583e-05, |
| "loss": 2.0339, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_accuracy": 0.6344204140460133, |
| "eval_loss": 1.9171222448349, |
| "eval_runtime": 505.0284, |
| "eval_samples_per_second": 76.54, |
| "eval_steps_per_second": 12.758, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.640699285422739e-05, |
| "loss": 2.0653, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_accuracy": 0.6342928763689412, |
| "eval_loss": 1.914884328842163, |
| "eval_runtime": 505.0567, |
| "eval_samples_per_second": 76.536, |
| "eval_steps_per_second": 12.757, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.626327256839649e-05, |
| "loss": 2.0635, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_accuracy": 0.6342027696817006, |
| "eval_loss": 1.9123356342315674, |
| "eval_runtime": 505.1483, |
| "eval_samples_per_second": 76.522, |
| "eval_steps_per_second": 12.755, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.6119552282565583e-05, |
| "loss": 2.0763, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_accuracy": 0.6322545926583194, |
| "eval_loss": 1.9199713468551636, |
| "eval_runtime": 504.9808, |
| "eval_samples_per_second": 76.547, |
| "eval_steps_per_second": 12.759, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.5975831996734677e-05, |
| "loss": 2.0422, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_accuracy": 0.6341375897055185, |
| "eval_loss": 1.9098281860351562, |
| "eval_runtime": 504.9104, |
| "eval_samples_per_second": 76.558, |
| "eval_steps_per_second": 12.761, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.5832111710903776e-05, |
| "loss": 2.047, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_accuracy": 0.6391451800998059, |
| "eval_loss": 1.8846170902252197, |
| "eval_runtime": 505.1745, |
| "eval_samples_per_second": 76.518, |
| "eval_steps_per_second": 12.754, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.568839142507287e-05, |
| "loss": 2.05, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_accuracy": 0.6396398250155354, |
| "eval_loss": 1.8791234493255615, |
| "eval_runtime": 504.9829, |
| "eval_samples_per_second": 76.547, |
| "eval_steps_per_second": 12.759, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.554467113924196e-05, |
| "loss": 2.0744, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.27, |
| "eval_accuracy": 0.6405644885814117, |
| "eval_loss": 1.869956612586975, |
| "eval_runtime": 505.1472, |
| "eval_samples_per_second": 76.522, |
| "eval_steps_per_second": 12.755, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.540095085341106e-05, |
| "loss": 1.9902, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_accuracy": 0.6370134439465205, |
| "eval_loss": 1.8874872922897339, |
| "eval_runtime": 505.1562, |
| "eval_samples_per_second": 76.521, |
| "eval_steps_per_second": 12.754, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.5257230567580155e-05, |
| "loss": 2.0111, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_accuracy": 0.6396196539609758, |
| "eval_loss": 1.882645606994629, |
| "eval_runtime": 505.0571, |
| "eval_samples_per_second": 76.536, |
| "eval_steps_per_second": 12.757, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.511351028174925e-05, |
| "loss": 1.9714, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_accuracy": 0.6377732860325056, |
| "eval_loss": 1.8992263078689575, |
| "eval_runtime": 508.7678, |
| "eval_samples_per_second": 75.978, |
| "eval_steps_per_second": 12.664, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.496978999591834e-05, |
| "loss": 2.062, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_accuracy": 0.6411479312361512, |
| "eval_loss": 1.8557517528533936, |
| "eval_runtime": 505.1236, |
| "eval_samples_per_second": 76.526, |
| "eval_steps_per_second": 12.755, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.482606971008744e-05, |
| "loss": 1.9647, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_accuracy": 0.6416682790674697, |
| "eval_loss": 1.8758158683776855, |
| "eval_runtime": 522.23, |
| "eval_samples_per_second": 74.019, |
| "eval_steps_per_second": 12.337, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.4682349424256535e-05, |
| "loss": 1.9872, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_accuracy": 0.6421780823047304, |
| "eval_loss": 1.8721212148666382, |
| "eval_runtime": 506.6952, |
| "eval_samples_per_second": 76.288, |
| "eval_steps_per_second": 12.716, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.453862913842563e-05, |
| "loss": 1.9116, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_accuracy": 0.6428642020045625, |
| "eval_loss": 1.8643959760665894, |
| "eval_runtime": 505.1676, |
| "eval_samples_per_second": 76.519, |
| "eval_steps_per_second": 12.754, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.439490885259473e-05, |
| "loss": 1.9967, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_accuracy": 0.6469490054814687, |
| "eval_loss": 1.8492257595062256, |
| "eval_runtime": 504.9433, |
| "eval_samples_per_second": 76.553, |
| "eval_steps_per_second": 12.76, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.425118856676382e-05, |
| "loss": 2.0278, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_accuracy": 0.6445656199843615, |
| "eval_loss": 1.8525065183639526, |
| "eval_runtime": 504.7949, |
| "eval_samples_per_second": 76.576, |
| "eval_steps_per_second": 12.764, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.410746828093292e-05, |
| "loss": 1.9533, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.35, |
| "eval_accuracy": 0.6451005128712545, |
| "eval_loss": 1.8465856313705444, |
| "eval_runtime": 504.9056, |
| "eval_samples_per_second": 76.559, |
| "eval_steps_per_second": 12.761, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.396374799510202e-05, |
| "loss": 2.0196, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_accuracy": 0.6454255578059838, |
| "eval_loss": 1.848383903503418, |
| "eval_runtime": 504.8738, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.3820027709271113e-05, |
| "loss": 2.0124, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_accuracy": 0.6437290618903769, |
| "eval_loss": 1.8351905345916748, |
| "eval_runtime": 504.816, |
| "eval_samples_per_second": 76.572, |
| "eval_steps_per_second": 12.763, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.3676307423440206e-05, |
| "loss": 1.9508, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_accuracy": 0.6431495881003088, |
| "eval_loss": 1.8396267890930176, |
| "eval_runtime": 505.2321, |
| "eval_samples_per_second": 76.509, |
| "eval_steps_per_second": 12.753, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.35325871376093e-05, |
| "loss": 1.978, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_accuracy": 0.6489612628141159, |
| "eval_loss": 1.82603120803833, |
| "eval_runtime": 505.1319, |
| "eval_samples_per_second": 76.525, |
| "eval_steps_per_second": 12.755, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.33888668517784e-05, |
| "loss": 1.9732, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_accuracy": 0.6462456128787754, |
| "eval_loss": 1.840147852897644, |
| "eval_runtime": 505.0136, |
| "eval_samples_per_second": 76.542, |
| "eval_steps_per_second": 12.758, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.324514656594749e-05, |
| "loss": 1.9306, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_accuracy": 0.6486216902430177, |
| "eval_loss": 1.8207985162734985, |
| "eval_runtime": 505.1092, |
| "eval_samples_per_second": 76.528, |
| "eval_steps_per_second": 12.756, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.3101426280116586e-05, |
| "loss": 1.9934, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_accuracy": 0.6489328427620152, |
| "eval_loss": 1.8231595754623413, |
| "eval_runtime": 504.8996, |
| "eval_samples_per_second": 76.56, |
| "eval_steps_per_second": 12.761, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.2957705994285685e-05, |
| "loss": 1.8852, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_accuracy": 0.6462696012360696, |
| "eval_loss": 1.8441661596298218, |
| "eval_runtime": 504.7529, |
| "eval_samples_per_second": 76.582, |
| "eval_steps_per_second": 12.765, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.281398570845478e-05, |
| "loss": 1.9778, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.43, |
| "eval_accuracy": 0.6492346149562549, |
| "eval_loss": 1.8168740272521973, |
| "eval_runtime": 504.7968, |
| "eval_samples_per_second": 76.575, |
| "eval_steps_per_second": 12.764, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.267026542262387e-05, |
| "loss": 1.9392, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_accuracy": 0.6482285290298646, |
| "eval_loss": 1.8116620779037476, |
| "eval_runtime": 505.1576, |
| "eval_samples_per_second": 76.521, |
| "eval_steps_per_second": 12.754, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.252654513679297e-05, |
| "loss": 1.9499, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_accuracy": 0.6536515001648533, |
| "eval_loss": 1.788891315460205, |
| "eval_runtime": 505.2601, |
| "eval_samples_per_second": 76.505, |
| "eval_steps_per_second": 12.752, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.2382824850962065e-05, |
| "loss": 1.9246, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_accuracy": 0.6498087424922134, |
| "eval_loss": 1.823807716369629, |
| "eval_runtime": 505.0768, |
| "eval_samples_per_second": 76.533, |
| "eval_steps_per_second": 12.756, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.223910456513116e-05, |
| "loss": 1.9272, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_accuracy": 0.6532794168462969, |
| "eval_loss": 1.7950499057769775, |
| "eval_runtime": 505.1797, |
| "eval_samples_per_second": 76.517, |
| "eval_steps_per_second": 12.754, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.209538427930025e-05, |
| "loss": 1.9407, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_accuracy": 0.6512844165149856, |
| "eval_loss": 1.8170486688613892, |
| "eval_runtime": 505.1731, |
| "eval_samples_per_second": 76.518, |
| "eval_steps_per_second": 12.754, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.195166399346935e-05, |
| "loss": 1.9334, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_accuracy": 0.6513125268621681, |
| "eval_loss": 1.82261061668396, |
| "eval_runtime": 504.8798, |
| "eval_samples_per_second": 76.563, |
| "eval_steps_per_second": 12.761, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.180794370763845e-05, |
| "loss": 1.9449, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_accuracy": 0.6541717049576784, |
| "eval_loss": 1.7894972562789917, |
| "eval_runtime": 505.1505, |
| "eval_samples_per_second": 76.522, |
| "eval_steps_per_second": 12.755, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.1664223421807544e-05, |
| "loss": 1.9267, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_accuracy": 0.6532831089704005, |
| "eval_loss": 1.8102754354476929, |
| "eval_runtime": 505.3042, |
| "eval_samples_per_second": 76.498, |
| "eval_steps_per_second": 12.751, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.1520503135976643e-05, |
| "loss": 1.9297, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_accuracy": 0.6531894741176955, |
| "eval_loss": 1.8069477081298828, |
| "eval_runtime": 505.1253, |
| "eval_samples_per_second": 76.526, |
| "eval_steps_per_second": 12.755, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.1376782850145736e-05, |
| "loss": 1.9438, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_accuracy": 0.6524309116897132, |
| "eval_loss": 1.7968119382858276, |
| "eval_runtime": 505.1207, |
| "eval_samples_per_second": 76.526, |
| "eval_steps_per_second": 12.755, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.123306256431483e-05, |
| "loss": 1.9157, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_accuracy": 0.6529667531866351, |
| "eval_loss": 1.7964750528335571, |
| "eval_runtime": 505.3178, |
| "eval_samples_per_second": 76.496, |
| "eval_steps_per_second": 12.75, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.108934227848393e-05, |
| "loss": 1.946, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_accuracy": 0.6551017046858272, |
| "eval_loss": 1.7928993701934814, |
| "eval_runtime": 505.3702, |
| "eval_samples_per_second": 76.488, |
| "eval_steps_per_second": 12.749, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.094562199265302e-05, |
| "loss": 1.9598, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_accuracy": 0.6532876093474065, |
| "eval_loss": 1.7965306043624878, |
| "eval_runtime": 505.186, |
| "eval_samples_per_second": 76.516, |
| "eval_steps_per_second": 12.754, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.0801901706822116e-05, |
| "loss": 1.9061, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_accuracy": 0.6538225518823437, |
| "eval_loss": 1.7989882230758667, |
| "eval_runtime": 504.9892, |
| "eval_samples_per_second": 76.546, |
| "eval_steps_per_second": 12.759, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.065818142099121e-05, |
| "loss": 1.9147, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_accuracy": 0.6573878535643568, |
| "eval_loss": 1.7725856304168701, |
| "eval_runtime": 504.6953, |
| "eval_samples_per_second": 76.591, |
| "eval_steps_per_second": 12.766, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.051446113516031e-05, |
| "loss": 1.887, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_accuracy": 0.6580234833659491, |
| "eval_loss": 1.7835739850997925, |
| "eval_runtime": 504.8373, |
| "eval_samples_per_second": 76.569, |
| "eval_steps_per_second": 12.763, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.03707408493294e-05, |
| "loss": 1.9408, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_accuracy": 0.6543795725857366, |
| "eval_loss": 1.770628809928894, |
| "eval_runtime": 504.8327, |
| "eval_samples_per_second": 76.57, |
| "eval_steps_per_second": 12.763, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.0227020563498495e-05, |
| "loss": 1.9019, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_accuracy": 0.6543934777237915, |
| "eval_loss": 1.7860642671585083, |
| "eval_runtime": 504.6493, |
| "eval_samples_per_second": 76.598, |
| "eval_steps_per_second": 12.767, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.0083300277667595e-05, |
| "loss": 1.8917, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_accuracy": 0.6570292214482958, |
| "eval_loss": 1.7824567556381226, |
| "eval_runtime": 505.2863, |
| "eval_samples_per_second": 76.501, |
| "eval_steps_per_second": 12.751, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.993957999183669e-05, |
| "loss": 1.9087, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_accuracy": 0.656976576204726, |
| "eval_loss": 1.7875770330429077, |
| "eval_runtime": 505.2585, |
| "eval_samples_per_second": 76.505, |
| "eval_steps_per_second": 12.752, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.979585970600578e-05, |
| "loss": 1.9381, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.61, |
| "eval_accuracy": 0.6596124584025764, |
| "eval_loss": 1.7620325088500977, |
| "eval_runtime": 505.0138, |
| "eval_samples_per_second": 76.542, |
| "eval_steps_per_second": 12.758, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.965213942017488e-05, |
| "loss": 1.8765, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_accuracy": 0.6544805843400484, |
| "eval_loss": 1.7880455255508423, |
| "eval_runtime": 504.9261, |
| "eval_samples_per_second": 76.556, |
| "eval_steps_per_second": 12.76, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.9508419134343974e-05, |
| "loss": 1.8642, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_accuracy": 0.6564586643832797, |
| "eval_loss": 1.7799092531204224, |
| "eval_runtime": 504.9316, |
| "eval_samples_per_second": 76.555, |
| "eval_steps_per_second": 12.76, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.9364698848513074e-05, |
| "loss": 1.8982, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_accuracy": 0.6592074091805823, |
| "eval_loss": 1.7632508277893066, |
| "eval_runtime": 505.13, |
| "eval_samples_per_second": 76.525, |
| "eval_steps_per_second": 12.755, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.922097856268217e-05, |
| "loss": 1.8602, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_accuracy": 0.6603858914849792, |
| "eval_loss": 1.7626314163208008, |
| "eval_runtime": 505.3311, |
| "eval_samples_per_second": 76.494, |
| "eval_steps_per_second": 12.75, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.9077258276851266e-05, |
| "loss": 1.8654, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_accuracy": 0.6575902226245833, |
| "eval_loss": 1.7715989351272583, |
| "eval_runtime": 504.9731, |
| "eval_samples_per_second": 76.549, |
| "eval_steps_per_second": 12.759, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.893353799102036e-05, |
| "loss": 1.9229, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_accuracy": 0.6620079381107626, |
| "eval_loss": 1.7379710674285889, |
| "eval_runtime": 504.9541, |
| "eval_samples_per_second": 76.552, |
| "eval_steps_per_second": 12.76, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.878981770518945e-05, |
| "loss": 1.8788, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_accuracy": 0.6614539654350221, |
| "eval_loss": 1.7656759023666382, |
| "eval_runtime": 505.0073, |
| "eval_samples_per_second": 76.543, |
| "eval_steps_per_second": 12.758, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.864609741935855e-05, |
| "loss": 1.9227, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_accuracy": 0.6590935292083203, |
| "eval_loss": 1.753671646118164, |
| "eval_runtime": 504.8067, |
| "eval_samples_per_second": 76.574, |
| "eval_steps_per_second": 12.763, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 3.8502377133527646e-05, |
| "loss": 1.9366, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.69, |
| "eval_accuracy": 0.6579416989164885, |
| "eval_loss": 1.7639555931091309, |
| "eval_runtime": 504.931, |
| "eval_samples_per_second": 76.555, |
| "eval_steps_per_second": 12.76, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 3.835865684769674e-05, |
| "loss": 1.8818, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_accuracy": 0.6609612561224868, |
| "eval_loss": 1.7601826190948486, |
| "eval_runtime": 504.7323, |
| "eval_samples_per_second": 76.585, |
| "eval_steps_per_second": 12.765, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.821493656186584e-05, |
| "loss": 1.8731, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.71, |
| "eval_accuracy": 0.6598513855672998, |
| "eval_loss": 1.7653340101242065, |
| "eval_runtime": 504.9615, |
| "eval_samples_per_second": 76.55, |
| "eval_steps_per_second": 12.759, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.807121627603493e-05, |
| "loss": 1.8618, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_accuracy": 0.6622164598280119, |
| "eval_loss": 1.7503831386566162, |
| "eval_runtime": 515.2492, |
| "eval_samples_per_second": 75.022, |
| "eval_steps_per_second": 12.505, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.7927495990204025e-05, |
| "loss": 1.8521, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_accuracy": 0.6620665499124343, |
| "eval_loss": 1.7471672296524048, |
| "eval_runtime": 504.6893, |
| "eval_samples_per_second": 76.592, |
| "eval_steps_per_second": 12.766, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.778377570437312e-05, |
| "loss": 1.8568, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_accuracy": 0.6629361773859987, |
| "eval_loss": 1.7437787055969238, |
| "eval_runtime": 505.0127, |
| "eval_samples_per_second": 76.543, |
| "eval_steps_per_second": 12.758, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.764005541854222e-05, |
| "loss": 1.9053, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_accuracy": 0.6615143904146251, |
| "eval_loss": 1.757638692855835, |
| "eval_runtime": 504.9279, |
| "eval_samples_per_second": 76.555, |
| "eval_steps_per_second": 12.76, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.749633513271131e-05, |
| "loss": 1.9065, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_accuracy": 0.6655632411067194, |
| "eval_loss": 1.7270922660827637, |
| "eval_runtime": 504.7992, |
| "eval_samples_per_second": 76.575, |
| "eval_steps_per_second": 12.763, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.7352614846880404e-05, |
| "loss": 1.8826, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.76, |
| "eval_accuracy": 0.6638198948758107, |
| "eval_loss": 1.7360082864761353, |
| "eval_runtime": 504.9976, |
| "eval_samples_per_second": 76.545, |
| "eval_steps_per_second": 12.758, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.7208894561049504e-05, |
| "loss": 1.91, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_accuracy": 0.6624269564967081, |
| "eval_loss": 1.7586431503295898, |
| "eval_runtime": 505.7234, |
| "eval_samples_per_second": 76.435, |
| "eval_steps_per_second": 12.74, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.7065174275218604e-05, |
| "loss": 1.8536, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_accuracy": 0.6657290317274279, |
| "eval_loss": 1.7201447486877441, |
| "eval_runtime": 505.6221, |
| "eval_samples_per_second": 76.45, |
| "eval_steps_per_second": 12.743, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.69214539893877e-05, |
| "loss": 1.9, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_accuracy": 0.6655557935670973, |
| "eval_loss": 1.7227860689163208, |
| "eval_runtime": 506.5799, |
| "eval_samples_per_second": 76.306, |
| "eval_steps_per_second": 12.719, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.6777733703556796e-05, |
| "loss": 1.8249, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_accuracy": 0.6666817419333618, |
| "eval_loss": 1.711987018585205, |
| "eval_runtime": 504.8983, |
| "eval_samples_per_second": 76.56, |
| "eval_steps_per_second": 12.761, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.663401341772589e-05, |
| "loss": 1.8732, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_accuracy": 0.6656392553322649, |
| "eval_loss": 1.7155495882034302, |
| "eval_runtime": 505.2472, |
| "eval_samples_per_second": 76.507, |
| "eval_steps_per_second": 12.752, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.649029313189498e-05, |
| "loss": 1.9091, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_accuracy": 0.6634785586671013, |
| "eval_loss": 1.738441824913025, |
| "eval_runtime": 505.2565, |
| "eval_samples_per_second": 76.506, |
| "eval_steps_per_second": 12.752, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 3.6346572846064076e-05, |
| "loss": 1.9014, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.82, |
| "eval_accuracy": 0.6651310249170258, |
| "eval_loss": 1.721003532409668, |
| "eval_runtime": 509.6123, |
| "eval_samples_per_second": 75.852, |
| "eval_steps_per_second": 12.643, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 3.6202852560233176e-05, |
| "loss": 1.8369, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_accuracy": 0.6663525364301911, |
| "eval_loss": 1.7259396314620972, |
| "eval_runtime": 506.3189, |
| "eval_samples_per_second": 76.345, |
| "eval_steps_per_second": 12.725, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.605913227440227e-05, |
| "loss": 1.8315, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_accuracy": 0.6673759128451905, |
| "eval_loss": 1.708840250968933, |
| "eval_runtime": 504.9236, |
| "eval_samples_per_second": 76.556, |
| "eval_steps_per_second": 12.76, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.591541198857136e-05, |
| "loss": 1.8541, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_accuracy": 0.6656735044712961, |
| "eval_loss": 1.730662226676941, |
| "eval_runtime": 504.7568, |
| "eval_samples_per_second": 76.581, |
| "eval_steps_per_second": 12.765, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.577169170274046e-05, |
| "loss": 1.8014, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_accuracy": 0.6668221020921883, |
| "eval_loss": 1.7203967571258545, |
| "eval_runtime": 507.0863, |
| "eval_samples_per_second": 76.23, |
| "eval_steps_per_second": 12.706, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.5627971416909555e-05, |
| "loss": 1.8425, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_accuracy": 0.6681489099136158, |
| "eval_loss": 1.715635895729065, |
| "eval_runtime": 505.1301, |
| "eval_samples_per_second": 76.525, |
| "eval_steps_per_second": 12.755, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.548425113107865e-05, |
| "loss": 1.8635, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_accuracy": 0.6711167899758878, |
| "eval_loss": 1.6936331987380981, |
| "eval_runtime": 505.423, |
| "eval_samples_per_second": 76.48, |
| "eval_steps_per_second": 12.748, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.534053084524775e-05, |
| "loss": 1.7903, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_accuracy": 0.6703802134329463, |
| "eval_loss": 1.6915478706359863, |
| "eval_runtime": 505.1761, |
| "eval_samples_per_second": 76.518, |
| "eval_steps_per_second": 12.754, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.519681055941684e-05, |
| "loss": 1.8496, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.89, |
| "eval_accuracy": 0.6684161816424325, |
| "eval_loss": 1.7119927406311035, |
| "eval_runtime": 505.3906, |
| "eval_samples_per_second": 76.485, |
| "eval_steps_per_second": 12.749, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.5053090273585934e-05, |
| "loss": 1.8223, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_accuracy": 0.6696371134020619, |
| "eval_loss": 1.7081302404403687, |
| "eval_runtime": 505.2808, |
| "eval_samples_per_second": 76.502, |
| "eval_steps_per_second": 12.751, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.4909369987755034e-05, |
| "loss": 1.8038, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_accuracy": 0.669557564539054, |
| "eval_loss": 1.7036747932434082, |
| "eval_runtime": 505.1987, |
| "eval_samples_per_second": 76.514, |
| "eval_steps_per_second": 12.753, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.476564970192413e-05, |
| "loss": 1.8165, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_accuracy": 0.671407488024273, |
| "eval_loss": 1.7063789367675781, |
| "eval_runtime": 505.3126, |
| "eval_samples_per_second": 76.497, |
| "eval_steps_per_second": 12.751, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.462192941609323e-05, |
| "loss": 1.7896, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_accuracy": 0.6721173658616995, |
| "eval_loss": 1.6945536136627197, |
| "eval_runtime": 504.9744, |
| "eval_samples_per_second": 76.548, |
| "eval_steps_per_second": 12.759, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.447820913026232e-05, |
| "loss": 1.8216, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_accuracy": 0.6703369528529444, |
| "eval_loss": 1.6991063356399536, |
| "eval_runtime": 505.0938, |
| "eval_samples_per_second": 76.53, |
| "eval_steps_per_second": 12.756, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.433448884443142e-05, |
| "loss": 1.8367, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.94, |
| "eval_accuracy": 0.6718564900998477, |
| "eval_loss": 1.699464201927185, |
| "eval_runtime": 504.9635, |
| "eval_samples_per_second": 76.55, |
| "eval_steps_per_second": 12.759, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.419076855860051e-05, |
| "loss": 1.7611, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_accuracy": 0.6728948136695765, |
| "eval_loss": 1.6895838975906372, |
| "eval_runtime": 504.943, |
| "eval_samples_per_second": 76.553, |
| "eval_steps_per_second": 12.76, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.4047048272769606e-05, |
| "loss": 1.8293, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_accuracy": 0.6719230436769119, |
| "eval_loss": 1.6957842111587524, |
| "eval_runtime": 505.0035, |
| "eval_samples_per_second": 76.544, |
| "eval_steps_per_second": 12.758, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.3903327986938706e-05, |
| "loss": 1.7817, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.97, |
| "eval_accuracy": 0.6710064582029062, |
| "eval_loss": 1.6945245265960693, |
| "eval_runtime": 505.104, |
| "eval_samples_per_second": 76.529, |
| "eval_steps_per_second": 12.756, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.37596077011078e-05, |
| "loss": 1.7918, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.97, |
| "eval_accuracy": 0.671061253590607, |
| "eval_loss": 1.6923878192901611, |
| "eval_runtime": 504.7188, |
| "eval_samples_per_second": 76.587, |
| "eval_steps_per_second": 12.766, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.361588741527689e-05, |
| "loss": 1.8128, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_accuracy": 0.6750276133797128, |
| "eval_loss": 1.6709179878234863, |
| "eval_runtime": 504.9, |
| "eval_samples_per_second": 76.56, |
| "eval_steps_per_second": 12.761, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.3472167129445985e-05, |
| "loss": 1.7915, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_accuracy": 0.6757300443550381, |
| "eval_loss": 1.6758147478103638, |
| "eval_runtime": 504.7734, |
| "eval_samples_per_second": 76.579, |
| "eval_steps_per_second": 12.764, |
| "step": 57500 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.3328446843615085e-05, |
| "loss": 1.7713, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.676173418972332, |
| "eval_loss": 1.6615269184112549, |
| "eval_runtime": 505.1324, |
| "eval_samples_per_second": 76.524, |
| "eval_steps_per_second": 12.755, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.318472655778418e-05, |
| "loss": 1.7724, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.01, |
| "eval_accuracy": 0.6714590864278672, |
| "eval_loss": 1.7042878866195679, |
| "eval_runtime": 504.6509, |
| "eval_samples_per_second": 76.598, |
| "eval_steps_per_second": 12.767, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.304100627195327e-05, |
| "loss": 1.7625, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_accuracy": 0.67387649477909, |
| "eval_loss": 1.6903934478759766, |
| "eval_runtime": 504.9418, |
| "eval_samples_per_second": 76.553, |
| "eval_steps_per_second": 12.76, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.289728598612237e-05, |
| "loss": 1.7928, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_accuracy": 0.6752247629213022, |
| "eval_loss": 1.682010293006897, |
| "eval_runtime": 505.0915, |
| "eval_samples_per_second": 76.531, |
| "eval_steps_per_second": 12.756, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.2753565700291464e-05, |
| "loss": 1.767, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_accuracy": 0.6763652677851133, |
| "eval_loss": 1.668526291847229, |
| "eval_runtime": 504.9491, |
| "eval_samples_per_second": 76.552, |
| "eval_steps_per_second": 12.76, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.2609845414460564e-05, |
| "loss": 1.728, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_accuracy": 0.6749877247201442, |
| "eval_loss": 1.6819721460342407, |
| "eval_runtime": 505.054, |
| "eval_samples_per_second": 76.536, |
| "eval_steps_per_second": 12.757, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.246612512862966e-05, |
| "loss": 1.7841, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.05, |
| "eval_accuracy": 0.6743459409745602, |
| "eval_loss": 1.681199073791504, |
| "eval_runtime": 504.9374, |
| "eval_samples_per_second": 76.554, |
| "eval_steps_per_second": 12.76, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.232240484279876e-05, |
| "loss": 1.7962, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_accuracy": 0.6757155660804, |
| "eval_loss": 1.6789964437484741, |
| "eval_runtime": 505.0667, |
| "eval_samples_per_second": 76.534, |
| "eval_steps_per_second": 12.757, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.217868455696785e-05, |
| "loss": 1.7958, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.07, |
| "eval_accuracy": 0.6753474225614481, |
| "eval_loss": 1.6794512271881104, |
| "eval_runtime": 505.0538, |
| "eval_samples_per_second": 76.536, |
| "eval_steps_per_second": 12.757, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.203496427113694e-05, |
| "loss": 1.7584, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_accuracy": 0.6770464499413978, |
| "eval_loss": 1.668652892112732, |
| "eval_runtime": 504.9672, |
| "eval_samples_per_second": 76.55, |
| "eval_steps_per_second": 12.759, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 3.189124398530604e-05, |
| "loss": 1.7608, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_accuracy": 0.6780699588477366, |
| "eval_loss": 1.6648616790771484, |
| "eval_runtime": 505.1193, |
| "eval_samples_per_second": 76.526, |
| "eval_steps_per_second": 12.755, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.1747523699475136e-05, |
| "loss": 1.7654, |
| "step": 63500 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_accuracy": 0.6763291808479001, |
| "eval_loss": 1.6693153381347656, |
| "eval_runtime": 504.9778, |
| "eval_samples_per_second": 76.548, |
| "eval_steps_per_second": 12.759, |
| "step": 63500 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.160380341364423e-05, |
| "loss": 1.7889, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_accuracy": 0.6750900749324951, |
| "eval_loss": 1.674703598022461, |
| "eval_runtime": 505.2609, |
| "eval_samples_per_second": 76.505, |
| "eval_steps_per_second": 12.752, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.146008312781333e-05, |
| "loss": 1.7524, |
| "step": 64500 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_accuracy": 0.6784246645920491, |
| "eval_loss": 1.6532648801803589, |
| "eval_runtime": 505.233, |
| "eval_samples_per_second": 76.509, |
| "eval_steps_per_second": 12.753, |
| "step": 64500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.131636284198242e-05, |
| "loss": 1.7254, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_accuracy": 0.6786630591006618, |
| "eval_loss": 1.6566537618637085, |
| "eval_runtime": 504.9383, |
| "eval_samples_per_second": 76.554, |
| "eval_steps_per_second": 12.76, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.1172642556151515e-05, |
| "loss": 1.7332, |
| "step": 65500 |
| }, |
| { |
| "epoch": 1.13, |
| "eval_accuracy": 0.6807642481702436, |
| "eval_loss": 1.6512647867202759, |
| "eval_runtime": 504.9241, |
| "eval_samples_per_second": 76.556, |
| "eval_steps_per_second": 12.76, |
| "step": 65500 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.1028922270320615e-05, |
| "loss": 1.7253, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_accuracy": 0.6796908456039793, |
| "eval_loss": 1.6632815599441528, |
| "eval_runtime": 504.9607, |
| "eval_samples_per_second": 76.551, |
| "eval_steps_per_second": 12.759, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 3.088520198448971e-05, |
| "loss": 1.7751, |
| "step": 66500 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_accuracy": 0.6804585687421044, |
| "eval_loss": 1.6311832666397095, |
| "eval_runtime": 505.0949, |
| "eval_samples_per_second": 76.53, |
| "eval_steps_per_second": 12.756, |
| "step": 66500 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.07414816986588e-05, |
| "loss": 1.7777, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_accuracy": 0.6787007914482475, |
| "eval_loss": 1.6626741886138916, |
| "eval_runtime": 504.8124, |
| "eval_samples_per_second": 76.573, |
| "eval_steps_per_second": 12.763, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.0597761412827894e-05, |
| "loss": 1.7276, |
| "step": 67500 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_accuracy": 0.6796160068499636, |
| "eval_loss": 1.6614458560943604, |
| "eval_runtime": 504.8244, |
| "eval_samples_per_second": 76.571, |
| "eval_steps_per_second": 12.763, |
| "step": 67500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.0454041126996997e-05, |
| "loss": 1.728, |
| "step": 68000 |
| }, |
| { |
| "epoch": 1.17, |
| "eval_accuracy": 0.6785516814524448, |
| "eval_loss": 1.6519767045974731, |
| "eval_runtime": 505.1447, |
| "eval_samples_per_second": 76.523, |
| "eval_steps_per_second": 12.755, |
| "step": 68000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.031032084116609e-05, |
| "loss": 1.7441, |
| "step": 68500 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_accuracy": 0.6805815554877496, |
| "eval_loss": 1.6434807777404785, |
| "eval_runtime": 505.0057, |
| "eval_samples_per_second": 76.544, |
| "eval_steps_per_second": 12.758, |
| "step": 68500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.0166600555335183e-05, |
| "loss": 1.7479, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_accuracy": 0.6789060344366061, |
| "eval_loss": 1.6525135040283203, |
| "eval_runtime": 505.3587, |
| "eval_samples_per_second": 76.49, |
| "eval_steps_per_second": 12.749, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 3.0022880269504283e-05, |
| "loss": 1.7509, |
| "step": 69500 |
| }, |
| { |
| "epoch": 1.2, |
| "eval_accuracy": 0.6798258991501043, |
| "eval_loss": 1.647918939590454, |
| "eval_runtime": 505.1501, |
| "eval_samples_per_second": 76.522, |
| "eval_steps_per_second": 12.755, |
| "step": 69500 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 2.9879159983673376e-05, |
| "loss": 1.7206, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_accuracy": 0.6783461894477013, |
| "eval_loss": 1.6519067287445068, |
| "eval_runtime": 506.0559, |
| "eval_samples_per_second": 76.385, |
| "eval_steps_per_second": 12.732, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.973543969784247e-05, |
| "loss": 1.7673, |
| "step": 70500 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_accuracy": 0.6807932850559578, |
| "eval_loss": 1.6508548259735107, |
| "eval_runtime": 514.2083, |
| "eval_samples_per_second": 75.174, |
| "eval_steps_per_second": 12.53, |
| "step": 70500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.959171941201157e-05, |
| "loss": 1.714, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_accuracy": 0.6812635429626028, |
| "eval_loss": 1.642662525177002, |
| "eval_runtime": 504.8701, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2.9447999126180666e-05, |
| "loss": 1.7669, |
| "step": 71500 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_accuracy": 0.6794306930693069, |
| "eval_loss": 1.6571295261383057, |
| "eval_runtime": 504.891, |
| "eval_samples_per_second": 76.561, |
| "eval_steps_per_second": 12.761, |
| "step": 71500 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 2.930427884034976e-05, |
| "loss": 1.7507, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.24, |
| "eval_accuracy": 0.6833037270962861, |
| "eval_loss": 1.636967420578003, |
| "eval_runtime": 504.5508, |
| "eval_samples_per_second": 76.613, |
| "eval_steps_per_second": 12.77, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.9160558554518852e-05, |
| "loss": 1.7362, |
| "step": 72500 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_accuracy": 0.6802713518548285, |
| "eval_loss": 1.649387240409851, |
| "eval_runtime": 505.0872, |
| "eval_samples_per_second": 76.531, |
| "eval_steps_per_second": 12.756, |
| "step": 72500 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 2.9016838268687952e-05, |
| "loss": 1.7424, |
| "step": 73000 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_accuracy": 0.6819498268045648, |
| "eval_loss": 1.638290286064148, |
| "eval_runtime": 505.0276, |
| "eval_samples_per_second": 76.54, |
| "eval_steps_per_second": 12.758, |
| "step": 73000 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 2.8873117982857045e-05, |
| "loss": 1.7091, |
| "step": 73500 |
| }, |
| { |
| "epoch": 1.27, |
| "eval_accuracy": 0.683740636433462, |
| "eval_loss": 1.628255009651184, |
| "eval_runtime": 506.2374, |
| "eval_samples_per_second": 76.357, |
| "eval_steps_per_second": 12.727, |
| "step": 73500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.8729397697026138e-05, |
| "loss": 1.6795, |
| "step": 74000 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_accuracy": 0.6822740409554627, |
| "eval_loss": 1.639488935470581, |
| "eval_runtime": 505.1097, |
| "eval_samples_per_second": 76.528, |
| "eval_steps_per_second": 12.756, |
| "step": 74000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.8585677411195238e-05, |
| "loss": 1.7194, |
| "step": 74500 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_accuracy": 0.6824255431118128, |
| "eval_loss": 1.6366465091705322, |
| "eval_runtime": 505.027, |
| "eval_samples_per_second": 76.54, |
| "eval_steps_per_second": 12.758, |
| "step": 74500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2.844195712536433e-05, |
| "loss": 1.7048, |
| "step": 75000 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_accuracy": 0.6851581288834533, |
| "eval_loss": 1.6212780475616455, |
| "eval_runtime": 505.1215, |
| "eval_samples_per_second": 76.526, |
| "eval_steps_per_second": 12.755, |
| "step": 75000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 2.8298236839533427e-05, |
| "loss": 1.731, |
| "step": 75500 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_accuracy": 0.6864512401469193, |
| "eval_loss": 1.6137720346450806, |
| "eval_runtime": 505.2377, |
| "eval_samples_per_second": 76.509, |
| "eval_steps_per_second": 12.752, |
| "step": 75500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 2.8154516553702527e-05, |
| "loss": 1.6964, |
| "step": 76000 |
| }, |
| { |
| "epoch": 1.31, |
| "eval_accuracy": 0.6843588288043702, |
| "eval_loss": 1.6282873153686523, |
| "eval_runtime": 505.1421, |
| "eval_samples_per_second": 76.523, |
| "eval_steps_per_second": 12.755, |
| "step": 76000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2.801079626787162e-05, |
| "loss": 1.6727, |
| "step": 76500 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_accuracy": 0.6865967135220749, |
| "eval_loss": 1.6177432537078857, |
| "eval_runtime": 505.308, |
| "eval_samples_per_second": 76.498, |
| "eval_steps_per_second": 12.751, |
| "step": 76500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2.7867075982040713e-05, |
| "loss": 1.709, |
| "step": 77000 |
| }, |
| { |
| "epoch": 1.33, |
| "eval_accuracy": 0.686547563000223, |
| "eval_loss": 1.605528712272644, |
| "eval_runtime": 504.7333, |
| "eval_samples_per_second": 76.585, |
| "eval_steps_per_second": 12.765, |
| "step": 77000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2.7723355696209806e-05, |
| "loss": 1.6966, |
| "step": 77500 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_accuracy": 0.6849204263635915, |
| "eval_loss": 1.6172435283660889, |
| "eval_runtime": 504.9992, |
| "eval_samples_per_second": 76.545, |
| "eval_steps_per_second": 12.758, |
| "step": 77500 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.7579635410378906e-05, |
| "loss": 1.7162, |
| "step": 78000 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_accuracy": 0.6839215039577836, |
| "eval_loss": 1.6272331476211548, |
| "eval_runtime": 504.9102, |
| "eval_samples_per_second": 76.558, |
| "eval_steps_per_second": 12.761, |
| "step": 78000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.7435915124548e-05, |
| "loss": 1.7378, |
| "step": 78500 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_accuracy": 0.6864994685803267, |
| "eval_loss": 1.611964464187622, |
| "eval_runtime": 504.7676, |
| "eval_samples_per_second": 76.58, |
| "eval_steps_per_second": 12.764, |
| "step": 78500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2.7292194838717096e-05, |
| "loss": 1.6832, |
| "step": 79000 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_accuracy": 0.6883578066607137, |
| "eval_loss": 1.6121339797973633, |
| "eval_runtime": 505.1895, |
| "eval_samples_per_second": 76.516, |
| "eval_steps_per_second": 12.754, |
| "step": 79000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 2.7148474552886192e-05, |
| "loss": 1.7429, |
| "step": 79500 |
| }, |
| { |
| "epoch": 1.37, |
| "eval_accuracy": 0.6863302661684582, |
| "eval_loss": 1.6140283346176147, |
| "eval_runtime": 505.2667, |
| "eval_samples_per_second": 76.504, |
| "eval_steps_per_second": 12.752, |
| "step": 79500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2.700475426705529e-05, |
| "loss": 1.7239, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_accuracy": 0.6874554305664904, |
| "eval_loss": 1.6063591241836548, |
| "eval_runtime": 505.1062, |
| "eval_samples_per_second": 76.528, |
| "eval_steps_per_second": 12.756, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2.6861033981224382e-05, |
| "loss": 1.6906, |
| "step": 80500 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_accuracy": 0.6866043613707166, |
| "eval_loss": 1.618539810180664, |
| "eval_runtime": 504.7862, |
| "eval_samples_per_second": 76.577, |
| "eval_steps_per_second": 12.764, |
| "step": 80500 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.6717313695393482e-05, |
| "loss": 1.7063, |
| "step": 81000 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_accuracy": 0.6867114513614355, |
| "eval_loss": 1.6177887916564941, |
| "eval_runtime": 505.1379, |
| "eval_samples_per_second": 76.524, |
| "eval_steps_per_second": 12.755, |
| "step": 81000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.6573593409562575e-05, |
| "loss": 1.6529, |
| "step": 81500 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_accuracy": 0.6881720872617729, |
| "eval_loss": 1.6108900308609009, |
| "eval_runtime": 504.7522, |
| "eval_samples_per_second": 76.582, |
| "eval_steps_per_second": 12.765, |
| "step": 81500 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.6429873123731668e-05, |
| "loss": 1.6786, |
| "step": 82000 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_accuracy": 0.6877889170792587, |
| "eval_loss": 1.6024373769760132, |
| "eval_runtime": 505.1343, |
| "eval_samples_per_second": 76.524, |
| "eval_steps_per_second": 12.755, |
| "step": 82000 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2.628615283790076e-05, |
| "loss": 1.7013, |
| "step": 82500 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_accuracy": 0.6847550935142176, |
| "eval_loss": 1.6186041831970215, |
| "eval_runtime": 504.8484, |
| "eval_samples_per_second": 76.568, |
| "eval_steps_per_second": 12.762, |
| "step": 82500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.614243255206986e-05, |
| "loss": 1.708, |
| "step": 83000 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_accuracy": 0.6894669962042859, |
| "eval_loss": 1.594452142715454, |
| "eval_runtime": 505.3279, |
| "eval_samples_per_second": 76.495, |
| "eval_steps_per_second": 12.75, |
| "step": 83000 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.5998712266238957e-05, |
| "loss": 1.6841, |
| "step": 83500 |
| }, |
| { |
| "epoch": 1.44, |
| "eval_accuracy": 0.6878174578614017, |
| "eval_loss": 1.6058648824691772, |
| "eval_runtime": 505.2779, |
| "eval_samples_per_second": 76.502, |
| "eval_steps_per_second": 12.751, |
| "step": 83500 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.585499198040805e-05, |
| "loss": 1.702, |
| "step": 84000 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_accuracy": 0.689128775940149, |
| "eval_loss": 1.590348482131958, |
| "eval_runtime": 505.1393, |
| "eval_samples_per_second": 76.523, |
| "eval_steps_per_second": 12.755, |
| "step": 84000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.571127169457715e-05, |
| "loss": 1.682, |
| "step": 84500 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_accuracy": 0.688908355850576, |
| "eval_loss": 1.6047139167785645, |
| "eval_runtime": 505.1712, |
| "eval_samples_per_second": 76.519, |
| "eval_steps_per_second": 12.754, |
| "step": 84500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5567551408746243e-05, |
| "loss": 1.7345, |
| "step": 85000 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_accuracy": 0.6890339447120143, |
| "eval_loss": 1.6020911931991577, |
| "eval_runtime": 505.197, |
| "eval_samples_per_second": 76.515, |
| "eval_steps_per_second": 12.753, |
| "step": 85000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5423831122915336e-05, |
| "loss": 1.6603, |
| "step": 85500 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_accuracy": 0.6915587628865979, |
| "eval_loss": 1.5837475061416626, |
| "eval_runtime": 505.2715, |
| "eval_samples_per_second": 76.503, |
| "eval_steps_per_second": 12.752, |
| "step": 85500 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.5280110837084436e-05, |
| "loss": 1.6832, |
| "step": 86000 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_accuracy": 0.6890505833367687, |
| "eval_loss": 1.5918046236038208, |
| "eval_runtime": 505.266, |
| "eval_samples_per_second": 76.504, |
| "eval_steps_per_second": 12.752, |
| "step": 86000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.513639055125353e-05, |
| "loss": 1.6458, |
| "step": 86500 |
| }, |
| { |
| "epoch": 1.49, |
| "eval_accuracy": 0.6931117330889456, |
| "eval_loss": 1.5845005512237549, |
| "eval_runtime": 504.8892, |
| "eval_samples_per_second": 76.561, |
| "eval_steps_per_second": 12.761, |
| "step": 86500 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.4992670265422622e-05, |
| "loss": 1.6892, |
| "step": 87000 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_accuracy": 0.6918331592324206, |
| "eval_loss": 1.5995681285858154, |
| "eval_runtime": 504.9467, |
| "eval_samples_per_second": 76.553, |
| "eval_steps_per_second": 12.76, |
| "step": 87000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2.4848949979591722e-05, |
| "loss": 1.6622, |
| "step": 87500 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_accuracy": 0.6911891665227794, |
| "eval_loss": 1.5737853050231934, |
| "eval_runtime": 505.2609, |
| "eval_samples_per_second": 76.505, |
| "eval_steps_per_second": 12.752, |
| "step": 87500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.470522969376082e-05, |
| "loss": 1.6945, |
| "step": 88000 |
| }, |
| { |
| "epoch": 1.52, |
| "eval_accuracy": 0.6907878234624099, |
| "eval_loss": 1.5821518898010254, |
| "eval_runtime": 504.9007, |
| "eval_samples_per_second": 76.56, |
| "eval_steps_per_second": 12.761, |
| "step": 88000 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.4561509407929912e-05, |
| "loss": 1.6775, |
| "step": 88500 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_accuracy": 0.6915599108105217, |
| "eval_loss": 1.5868152379989624, |
| "eval_runtime": 507.9367, |
| "eval_samples_per_second": 76.102, |
| "eval_steps_per_second": 12.685, |
| "step": 88500 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.441778912209901e-05, |
| "loss": 1.6648, |
| "step": 89000 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_accuracy": 0.6926215684822759, |
| "eval_loss": 1.5802749395370483, |
| "eval_runtime": 505.1133, |
| "eval_samples_per_second": 76.527, |
| "eval_steps_per_second": 12.756, |
| "step": 89000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.42740688362681e-05, |
| "loss": 1.6999, |
| "step": 89500 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_accuracy": 0.691963530251813, |
| "eval_loss": 1.5864415168762207, |
| "eval_runtime": 504.9269, |
| "eval_samples_per_second": 76.556, |
| "eval_steps_per_second": 12.76, |
| "step": 89500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.4130348550437198e-05, |
| "loss": 1.663, |
| "step": 90000 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_accuracy": 0.6933203738936223, |
| "eval_loss": 1.5767111778259277, |
| "eval_runtime": 505.0692, |
| "eval_samples_per_second": 76.534, |
| "eval_steps_per_second": 12.757, |
| "step": 90000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.3986628264606294e-05, |
| "loss": 1.6824, |
| "step": 90500 |
| }, |
| { |
| "epoch": 1.56, |
| "eval_accuracy": 0.6924102239208152, |
| "eval_loss": 1.5771360397338867, |
| "eval_runtime": 506.0223, |
| "eval_samples_per_second": 76.39, |
| "eval_steps_per_second": 12.733, |
| "step": 90500 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.3842907978775387e-05, |
| "loss": 1.7296, |
| "step": 91000 |
| }, |
| { |
| "epoch": 1.57, |
| "eval_accuracy": 0.6955193188340566, |
| "eval_loss": 1.5531344413757324, |
| "eval_runtime": 504.9026, |
| "eval_samples_per_second": 76.559, |
| "eval_steps_per_second": 12.761, |
| "step": 91000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.3699187692944484e-05, |
| "loss": 1.6886, |
| "step": 91500 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_accuracy": 0.6942384647970158, |
| "eval_loss": 1.5708072185516357, |
| "eval_runtime": 504.6487, |
| "eval_samples_per_second": 76.598, |
| "eval_steps_per_second": 12.767, |
| "step": 91500 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.355546740711358e-05, |
| "loss": 1.6505, |
| "step": 92000 |
| }, |
| { |
| "epoch": 1.59, |
| "eval_accuracy": 0.6933419036906101, |
| "eval_loss": 1.576134443283081, |
| "eval_runtime": 504.7326, |
| "eval_samples_per_second": 76.585, |
| "eval_steps_per_second": 12.765, |
| "step": 92000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3411747121282677e-05, |
| "loss": 1.6249, |
| "step": 92500 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_accuracy": 0.6943414702385116, |
| "eval_loss": 1.568155288696289, |
| "eval_runtime": 504.819, |
| "eval_samples_per_second": 76.572, |
| "eval_steps_per_second": 12.763, |
| "step": 92500 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3268026835451773e-05, |
| "loss": 1.6444, |
| "step": 93000 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_accuracy": 0.6940347796835451, |
| "eval_loss": 1.5619697570800781, |
| "eval_runtime": 505.3126, |
| "eval_samples_per_second": 76.497, |
| "eval_steps_per_second": 12.751, |
| "step": 93000 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2.3124306549620866e-05, |
| "loss": 1.6732, |
| "step": 93500 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_accuracy": 0.6952784568514526, |
| "eval_loss": 1.5603562593460083, |
| "eval_runtime": 505.2957, |
| "eval_samples_per_second": 76.5, |
| "eval_steps_per_second": 12.751, |
| "step": 93500 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.2980586263789963e-05, |
| "loss": 1.6216, |
| "step": 94000 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_accuracy": 0.6949359842853672, |
| "eval_loss": 1.565598964691162, |
| "eval_runtime": 504.9546, |
| "eval_samples_per_second": 76.551, |
| "eval_steps_per_second": 12.76, |
| "step": 94000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.2836865977959056e-05, |
| "loss": 1.6582, |
| "step": 94500 |
| }, |
| { |
| "epoch": 1.63, |
| "eval_accuracy": 0.6937760191253453, |
| "eval_loss": 1.5781822204589844, |
| "eval_runtime": 504.8729, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 94500 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.2693145692128152e-05, |
| "loss": 1.6508, |
| "step": 95000 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_accuracy": 0.6950486418920586, |
| "eval_loss": 1.571102499961853, |
| "eval_runtime": 504.8466, |
| "eval_samples_per_second": 76.568, |
| "eval_steps_per_second": 12.762, |
| "step": 95000 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.254942540629725e-05, |
| "loss": 1.7233, |
| "step": 95500 |
| }, |
| { |
| "epoch": 1.65, |
| "eval_accuracy": 0.695228203796665, |
| "eval_loss": 1.5533523559570312, |
| "eval_runtime": 505.0209, |
| "eval_samples_per_second": 76.541, |
| "eval_steps_per_second": 12.758, |
| "step": 95500 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.2405705120466345e-05, |
| "loss": 1.6595, |
| "step": 96000 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_accuracy": 0.6961651429964184, |
| "eval_loss": 1.5645027160644531, |
| "eval_runtime": 505.5184, |
| "eval_samples_per_second": 76.466, |
| "eval_steps_per_second": 12.745, |
| "step": 96000 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.2261984834635442e-05, |
| "loss": 1.675, |
| "step": 96500 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_accuracy": 0.6951619937951069, |
| "eval_loss": 1.5551990270614624, |
| "eval_runtime": 504.96, |
| "eval_samples_per_second": 76.551, |
| "eval_steps_per_second": 12.759, |
| "step": 96500 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.2118264548804535e-05, |
| "loss": 1.6693, |
| "step": 97000 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_accuracy": 0.6955352805065834, |
| "eval_loss": 1.559889554977417, |
| "eval_runtime": 504.7338, |
| "eval_samples_per_second": 76.585, |
| "eval_steps_per_second": 12.765, |
| "step": 97000 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.197454426297363e-05, |
| "loss": 1.6683, |
| "step": 97500 |
| }, |
| { |
| "epoch": 1.68, |
| "eval_accuracy": 0.6959633835629406, |
| "eval_loss": 1.5636415481567383, |
| "eval_runtime": 505.0424, |
| "eval_samples_per_second": 76.538, |
| "eval_steps_per_second": 12.757, |
| "step": 97500 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.1830823977142728e-05, |
| "loss": 1.629, |
| "step": 98000 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_accuracy": 0.6984619435963693, |
| "eval_loss": 1.5556976795196533, |
| "eval_runtime": 505.0451, |
| "eval_samples_per_second": 76.538, |
| "eval_steps_per_second": 12.757, |
| "step": 98000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.168710369131182e-05, |
| "loss": 1.6397, |
| "step": 98500 |
| }, |
| { |
| "epoch": 1.7, |
| "eval_accuracy": 0.6993880694125302, |
| "eval_loss": 1.5522592067718506, |
| "eval_runtime": 504.9429, |
| "eval_samples_per_second": 76.553, |
| "eval_steps_per_second": 12.76, |
| "step": 98500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.1543383405480917e-05, |
| "loss": 1.6592, |
| "step": 99000 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_accuracy": 0.6966342933002339, |
| "eval_loss": 1.5551400184631348, |
| "eval_runtime": 504.9485, |
| "eval_samples_per_second": 76.552, |
| "eval_steps_per_second": 12.76, |
| "step": 99000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1399663119650014e-05, |
| "loss": 1.6667, |
| "step": 99500 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_accuracy": 0.6980728580787916, |
| "eval_loss": 1.5401898622512817, |
| "eval_runtime": 504.874, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 99500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.125594283381911e-05, |
| "loss": 1.6814, |
| "step": 100000 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_accuracy": 0.6997914704418605, |
| "eval_loss": 1.5416996479034424, |
| "eval_runtime": 504.972, |
| "eval_samples_per_second": 76.549, |
| "eval_steps_per_second": 12.759, |
| "step": 100000 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.1112222547988207e-05, |
| "loss": 1.6148, |
| "step": 100500 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_accuracy": 0.6998273195982002, |
| "eval_loss": 1.5450518131256104, |
| "eval_runtime": 504.8704, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 100500 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.09685022621573e-05, |
| "loss": 1.6434, |
| "step": 101000 |
| }, |
| { |
| "epoch": 1.74, |
| "eval_accuracy": 0.6984844237017003, |
| "eval_loss": 1.543935775756836, |
| "eval_runtime": 505.0969, |
| "eval_samples_per_second": 76.53, |
| "eval_steps_per_second": 12.756, |
| "step": 101000 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.0824781976326396e-05, |
| "loss": 1.6248, |
| "step": 101500 |
| }, |
| { |
| "epoch": 1.75, |
| "eval_accuracy": 0.6980961799975275, |
| "eval_loss": 1.5435203313827515, |
| "eval_runtime": 504.9596, |
| "eval_samples_per_second": 76.551, |
| "eval_steps_per_second": 12.759, |
| "step": 101500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.068106169049549e-05, |
| "loss": 1.618, |
| "step": 102000 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_accuracy": 0.6988481813281099, |
| "eval_loss": 1.5486124753952026, |
| "eval_runtime": 504.8619, |
| "eval_samples_per_second": 76.565, |
| "eval_steps_per_second": 12.762, |
| "step": 102000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.0537341404664586e-05, |
| "loss": 1.6631, |
| "step": 102500 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_accuracy": 0.7006969816661729, |
| "eval_loss": 1.5370893478393555, |
| "eval_runtime": 505.0082, |
| "eval_samples_per_second": 76.543, |
| "eval_steps_per_second": 12.758, |
| "step": 102500 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.0393621118833682e-05, |
| "loss": 1.6398, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1.78, |
| "eval_accuracy": 0.7007735380707729, |
| "eval_loss": 1.5380626916885376, |
| "eval_runtime": 505.0441, |
| "eval_samples_per_second": 76.538, |
| "eval_steps_per_second": 12.757, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.024990083300278e-05, |
| "loss": 1.5985, |
| "step": 103500 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_accuracy": 0.7004679547384006, |
| "eval_loss": 1.532943606376648, |
| "eval_runtime": 504.9121, |
| "eval_samples_per_second": 76.558, |
| "eval_steps_per_second": 12.761, |
| "step": 103500 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.0106180547171875e-05, |
| "loss": 1.6232, |
| "step": 104000 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_accuracy": 0.6991788643974363, |
| "eval_loss": 1.542557954788208, |
| "eval_runtime": 504.7417, |
| "eval_samples_per_second": 76.584, |
| "eval_steps_per_second": 12.765, |
| "step": 104000 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.996246026134097e-05, |
| "loss": 1.5916, |
| "step": 104500 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_accuracy": 0.7015240937041495, |
| "eval_loss": 1.5383764505386353, |
| "eval_runtime": 504.8766, |
| "eval_samples_per_second": 76.563, |
| "eval_steps_per_second": 12.762, |
| "step": 104500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.9818739975510065e-05, |
| "loss": 1.6381, |
| "step": 105000 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_accuracy": 0.7009540601107302, |
| "eval_loss": 1.5326433181762695, |
| "eval_runtime": 504.9816, |
| "eval_samples_per_second": 76.547, |
| "eval_steps_per_second": 12.759, |
| "step": 105000 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.967501968967916e-05, |
| "loss": 1.649, |
| "step": 105500 |
| }, |
| { |
| "epoch": 1.82, |
| "eval_accuracy": 0.7017622688954387, |
| "eval_loss": 1.5390700101852417, |
| "eval_runtime": 506.9578, |
| "eval_samples_per_second": 76.249, |
| "eval_steps_per_second": 12.709, |
| "step": 105500 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.9531299403848255e-05, |
| "loss": 1.6159, |
| "step": 106000 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_accuracy": 0.7017851844525764, |
| "eval_loss": 1.5259411334991455, |
| "eval_runtime": 505.2078, |
| "eval_samples_per_second": 76.513, |
| "eval_steps_per_second": 12.753, |
| "step": 106000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.938757911801735e-05, |
| "loss": 1.6491, |
| "step": 106500 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_accuracy": 0.7018004670461587, |
| "eval_loss": 1.5233443975448608, |
| "eval_runtime": 505.2163, |
| "eval_samples_per_second": 76.512, |
| "eval_steps_per_second": 12.753, |
| "step": 106500 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.9243858832186444e-05, |
| "loss": 1.6356, |
| "step": 107000 |
| }, |
| { |
| "epoch": 1.85, |
| "eval_accuracy": 0.702718938758044, |
| "eval_loss": 1.5181509256362915, |
| "eval_runtime": 504.9505, |
| "eval_samples_per_second": 76.552, |
| "eval_steps_per_second": 12.76, |
| "step": 107000 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.910013854635554e-05, |
| "loss": 1.6216, |
| "step": 107500 |
| }, |
| { |
| "epoch": 1.85, |
| "eval_accuracy": 0.7008744506365819, |
| "eval_loss": 1.5331019163131714, |
| "eval_runtime": 505.0379, |
| "eval_samples_per_second": 76.539, |
| "eval_steps_per_second": 12.757, |
| "step": 107500 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.895641826052464e-05, |
| "loss": 1.623, |
| "step": 108000 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_accuracy": 0.7014500866429856, |
| "eval_loss": 1.5305390357971191, |
| "eval_runtime": 504.8289, |
| "eval_samples_per_second": 76.57, |
| "eval_steps_per_second": 12.763, |
| "step": 108000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.8812697974693733e-05, |
| "loss": 1.5611, |
| "step": 108500 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_accuracy": 0.7019720023345088, |
| "eval_loss": 1.5330239534378052, |
| "eval_runtime": 504.8761, |
| "eval_samples_per_second": 76.563, |
| "eval_steps_per_second": 12.762, |
| "step": 108500 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.866897768886283e-05, |
| "loss": 1.5784, |
| "step": 109000 |
| }, |
| { |
| "epoch": 1.88, |
| "eval_accuracy": 0.7024345811918609, |
| "eval_loss": 1.5184859037399292, |
| "eval_runtime": 504.9097, |
| "eval_samples_per_second": 76.558, |
| "eval_steps_per_second": 12.761, |
| "step": 109000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.8525257403031923e-05, |
| "loss": 1.6062, |
| "step": 109500 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_accuracy": 0.7024881609370686, |
| "eval_loss": 1.522905945777893, |
| "eval_runtime": 505.0697, |
| "eval_samples_per_second": 76.534, |
| "eval_steps_per_second": 12.757, |
| "step": 109500 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.838153711720102e-05, |
| "loss": 1.6332, |
| "step": 110000 |
| }, |
| { |
| "epoch": 1.9, |
| "eval_accuracy": 0.701047717501887, |
| "eval_loss": 1.5286990404129028, |
| "eval_runtime": 504.9809, |
| "eval_samples_per_second": 76.547, |
| "eval_steps_per_second": 12.759, |
| "step": 110000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.8237816831370116e-05, |
| "loss": 1.6187, |
| "step": 110500 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_accuracy": 0.7042429027920003, |
| "eval_loss": 1.5118212699890137, |
| "eval_runtime": 504.8, |
| "eval_samples_per_second": 76.575, |
| "eval_steps_per_second": 12.763, |
| "step": 110500 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.809409654553921e-05, |
| "loss": 1.6815, |
| "step": 111000 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_accuracy": 0.7022455682831017, |
| "eval_loss": 1.5234098434448242, |
| "eval_runtime": 505.0894, |
| "eval_samples_per_second": 76.531, |
| "eval_steps_per_second": 12.756, |
| "step": 111000 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.7950376259708306e-05, |
| "loss": 1.6255, |
| "step": 111500 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_accuracy": 0.7030569488306809, |
| "eval_loss": 1.5167911052703857, |
| "eval_runtime": 505.4691, |
| "eval_samples_per_second": 76.474, |
| "eval_steps_per_second": 12.747, |
| "step": 111500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.7806655973877402e-05, |
| "loss": 1.6295, |
| "step": 112000 |
| }, |
| { |
| "epoch": 1.93, |
| "eval_accuracy": 0.7052926958479571, |
| "eval_loss": 1.5069060325622559, |
| "eval_runtime": 505.3751, |
| "eval_samples_per_second": 76.488, |
| "eval_steps_per_second": 12.749, |
| "step": 112000 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.76629356880465e-05, |
| "loss": 1.595, |
| "step": 112500 |
| }, |
| { |
| "epoch": 1.94, |
| "eval_accuracy": 0.703291867996299, |
| "eval_loss": 1.5169904232025146, |
| "eval_runtime": 504.966, |
| "eval_samples_per_second": 76.55, |
| "eval_steps_per_second": 12.759, |
| "step": 112500 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.7519215402215595e-05, |
| "loss": 1.5757, |
| "step": 113000 |
| }, |
| { |
| "epoch": 1.95, |
| "eval_accuracy": 0.7063367233499402, |
| "eval_loss": 1.4981634616851807, |
| "eval_runtime": 504.9904, |
| "eval_samples_per_second": 76.546, |
| "eval_steps_per_second": 12.759, |
| "step": 113000 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.7375495116384688e-05, |
| "loss": 1.5807, |
| "step": 113500 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_accuracy": 0.7059590446774121, |
| "eval_loss": 1.5014806985855103, |
| "eval_runtime": 505.3093, |
| "eval_samples_per_second": 76.498, |
| "eval_steps_per_second": 12.751, |
| "step": 113500 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7231774830553785e-05, |
| "loss": 1.5777, |
| "step": 114000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_accuracy": 0.7049637778374027, |
| "eval_loss": 1.5109049081802368, |
| "eval_runtime": 505.236, |
| "eval_samples_per_second": 76.509, |
| "eval_steps_per_second": 12.752, |
| "step": 114000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7088054544722878e-05, |
| "loss": 1.6062, |
| "step": 114500 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_accuracy": 0.7062876193760829, |
| "eval_loss": 1.506945013999939, |
| "eval_runtime": 505.1874, |
| "eval_samples_per_second": 76.516, |
| "eval_steps_per_second": 12.754, |
| "step": 114500 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.6944334258891974e-05, |
| "loss": 1.5888, |
| "step": 115000 |
| }, |
| { |
| "epoch": 1.98, |
| "eval_accuracy": 0.7056501194671887, |
| "eval_loss": 1.5124071836471558, |
| "eval_runtime": 505.2744, |
| "eval_samples_per_second": 76.503, |
| "eval_steps_per_second": 12.751, |
| "step": 115000 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.680061397306107e-05, |
| "loss": 1.6134, |
| "step": 115500 |
| }, |
| { |
| "epoch": 1.99, |
| "eval_accuracy": 0.7066008230452675, |
| "eval_loss": 1.5099009275436401, |
| "eval_runtime": 505.2493, |
| "eval_samples_per_second": 76.507, |
| "eval_steps_per_second": 12.752, |
| "step": 115500 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.6656893687230167e-05, |
| "loss": 1.5746, |
| "step": 116000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.705594261450303, |
| "eval_loss": 1.506860375404358, |
| "eval_runtime": 505.1971, |
| "eval_samples_per_second": 76.515, |
| "eval_steps_per_second": 12.753, |
| "step": 116000 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.6513173401399263e-05, |
| "loss": 1.5172, |
| "step": 116500 |
| }, |
| { |
| "epoch": 2.01, |
| "eval_accuracy": 0.7052716776372842, |
| "eval_loss": 1.5075557231903076, |
| "eval_runtime": 504.9109, |
| "eval_samples_per_second": 76.558, |
| "eval_steps_per_second": 12.761, |
| "step": 116500 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.6369453115568357e-05, |
| "loss": 1.5778, |
| "step": 117000 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_accuracy": 0.7059466099459174, |
| "eval_loss": 1.508126139640808, |
| "eval_runtime": 504.8611, |
| "eval_samples_per_second": 76.566, |
| "eval_steps_per_second": 12.762, |
| "step": 117000 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.6225732829737453e-05, |
| "loss": 1.5624, |
| "step": 117500 |
| }, |
| { |
| "epoch": 2.03, |
| "eval_accuracy": 0.7065924244982682, |
| "eval_loss": 1.5056620836257935, |
| "eval_runtime": 504.7598, |
| "eval_samples_per_second": 76.581, |
| "eval_steps_per_second": 12.764, |
| "step": 117500 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.608201254390655e-05, |
| "loss": 1.5414, |
| "step": 118000 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_accuracy": 0.7066743572841134, |
| "eval_loss": 1.5125539302825928, |
| "eval_runtime": 504.8072, |
| "eval_samples_per_second": 76.574, |
| "eval_steps_per_second": 12.763, |
| "step": 118000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.5938292258075643e-05, |
| "loss": 1.6024, |
| "step": 118500 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_accuracy": 0.7076791098023204, |
| "eval_loss": 1.4966239929199219, |
| "eval_runtime": 504.9377, |
| "eval_samples_per_second": 76.554, |
| "eval_steps_per_second": 12.76, |
| "step": 118500 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.579457197224474e-05, |
| "loss": 1.5471, |
| "step": 119000 |
| }, |
| { |
| "epoch": 2.05, |
| "eval_accuracy": 0.7088341024818406, |
| "eval_loss": 1.4898276329040527, |
| "eval_runtime": 504.9068, |
| "eval_samples_per_second": 76.559, |
| "eval_steps_per_second": 12.761, |
| "step": 119000 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.5650851686413832e-05, |
| "loss": 1.5654, |
| "step": 119500 |
| }, |
| { |
| "epoch": 2.06, |
| "eval_accuracy": 0.70954067396437, |
| "eval_loss": 1.4860520362854004, |
| "eval_runtime": 504.9494, |
| "eval_samples_per_second": 76.552, |
| "eval_steps_per_second": 12.76, |
| "step": 119500 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.5507131400582932e-05, |
| "loss": 1.5847, |
| "step": 120000 |
| }, |
| { |
| "epoch": 2.07, |
| "eval_accuracy": 0.7085845618684777, |
| "eval_loss": 1.4859682321548462, |
| "eval_runtime": 505.1913, |
| "eval_samples_per_second": 76.516, |
| "eval_steps_per_second": 12.754, |
| "step": 120000 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.536341111475203e-05, |
| "loss": 1.545, |
| "step": 120500 |
| }, |
| { |
| "epoch": 2.08, |
| "eval_accuracy": 0.7086799566969486, |
| "eval_loss": 1.4879974126815796, |
| "eval_runtime": 505.2124, |
| "eval_samples_per_second": 76.512, |
| "eval_steps_per_second": 12.753, |
| "step": 120500 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.5219690828921122e-05, |
| "loss": 1.5343, |
| "step": 121000 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_accuracy": 0.7089044894203476, |
| "eval_loss": 1.4901121854782104, |
| "eval_runtime": 505.2338, |
| "eval_samples_per_second": 76.509, |
| "eval_steps_per_second": 12.753, |
| "step": 121000 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.5075970543090218e-05, |
| "loss": 1.5411, |
| "step": 121500 |
| }, |
| { |
| "epoch": 2.1, |
| "eval_accuracy": 0.709381160149452, |
| "eval_loss": 1.4883925914764404, |
| "eval_runtime": 505.0355, |
| "eval_samples_per_second": 76.539, |
| "eval_steps_per_second": 12.758, |
| "step": 121500 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.4932250257259311e-05, |
| "loss": 1.5547, |
| "step": 122000 |
| }, |
| { |
| "epoch": 2.1, |
| "eval_accuracy": 0.711971749736287, |
| "eval_loss": 1.473547101020813, |
| "eval_runtime": 505.1236, |
| "eval_samples_per_second": 76.526, |
| "eval_steps_per_second": 12.755, |
| "step": 122000 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.4788529971428408e-05, |
| "loss": 1.4866, |
| "step": 122500 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_accuracy": 0.7098092699187059, |
| "eval_loss": 1.4946099519729614, |
| "eval_runtime": 504.4889, |
| "eval_samples_per_second": 76.622, |
| "eval_steps_per_second": 12.771, |
| "step": 122500 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.4644809685597504e-05, |
| "loss": 1.5229, |
| "step": 123000 |
| }, |
| { |
| "epoch": 2.12, |
| "eval_accuracy": 0.7096436187916816, |
| "eval_loss": 1.4902827739715576, |
| "eval_runtime": 504.8444, |
| "eval_samples_per_second": 76.568, |
| "eval_steps_per_second": 12.762, |
| "step": 123000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.4501089399766599e-05, |
| "loss": 1.5477, |
| "step": 123500 |
| }, |
| { |
| "epoch": 2.13, |
| "eval_accuracy": 0.7104851587837282, |
| "eval_loss": 1.4803611040115356, |
| "eval_runtime": 509.3025, |
| "eval_samples_per_second": 75.898, |
| "eval_steps_per_second": 12.651, |
| "step": 123500 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.4357369113935695e-05, |
| "loss": 1.5529, |
| "step": 124000 |
| }, |
| { |
| "epoch": 2.14, |
| "eval_accuracy": 0.7104633554799552, |
| "eval_loss": 1.4857234954833984, |
| "eval_runtime": 520.4787, |
| "eval_samples_per_second": 74.268, |
| "eval_steps_per_second": 12.379, |
| "step": 124000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.4213648828104788e-05, |
| "loss": 1.5215, |
| "step": 124500 |
| }, |
| { |
| "epoch": 2.15, |
| "eval_accuracy": 0.7099463648179529, |
| "eval_loss": 1.4857269525527954, |
| "eval_runtime": 505.7185, |
| "eval_samples_per_second": 76.436, |
| "eval_steps_per_second": 12.74, |
| "step": 124500 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.4069928542273887e-05, |
| "loss": 1.4828, |
| "step": 125000 |
| }, |
| { |
| "epoch": 2.16, |
| "eval_accuracy": 0.7106234965485086, |
| "eval_loss": 1.4739534854888916, |
| "eval_runtime": 505.3807, |
| "eval_samples_per_second": 76.487, |
| "eval_steps_per_second": 12.749, |
| "step": 125000 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.3926208256442983e-05, |
| "loss": 1.5883, |
| "step": 125500 |
| }, |
| { |
| "epoch": 2.16, |
| "eval_accuracy": 0.7115277840612314, |
| "eval_loss": 1.473445177078247, |
| "eval_runtime": 505.1268, |
| "eval_samples_per_second": 76.525, |
| "eval_steps_per_second": 12.755, |
| "step": 125500 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.3782487970612076e-05, |
| "loss": 1.5156, |
| "step": 126000 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_accuracy": 0.7095255566227909, |
| "eval_loss": 1.4827450513839722, |
| "eval_runtime": 505.5723, |
| "eval_samples_per_second": 76.458, |
| "eval_steps_per_second": 12.744, |
| "step": 126000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.3638767684781173e-05, |
| "loss": 1.5213, |
| "step": 126500 |
| }, |
| { |
| "epoch": 2.18, |
| "eval_accuracy": 0.712029256788515, |
| "eval_loss": 1.4690048694610596, |
| "eval_runtime": 504.8111, |
| "eval_samples_per_second": 76.573, |
| "eval_steps_per_second": 12.763, |
| "step": 126500 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.3495047398950267e-05, |
| "loss": 1.4861, |
| "step": 127000 |
| }, |
| { |
| "epoch": 2.19, |
| "eval_accuracy": 0.7123234620391885, |
| "eval_loss": 1.4674574136734009, |
| "eval_runtime": 505.4759, |
| "eval_samples_per_second": 76.472, |
| "eval_steps_per_second": 12.746, |
| "step": 127000 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.3351327113119364e-05, |
| "loss": 1.5009, |
| "step": 127500 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_accuracy": 0.7121283468404834, |
| "eval_loss": 1.4678421020507812, |
| "eval_runtime": 504.6214, |
| "eval_samples_per_second": 76.602, |
| "eval_steps_per_second": 12.768, |
| "step": 127500 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.320760682728846e-05, |
| "loss": 1.5087, |
| "step": 128000 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_accuracy": 0.713161382739629, |
| "eval_loss": 1.4610421657562256, |
| "eval_runtime": 505.0126, |
| "eval_samples_per_second": 76.543, |
| "eval_steps_per_second": 12.758, |
| "step": 128000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.3063886541457553e-05, |
| "loss": 1.5054, |
| "step": 128500 |
| }, |
| { |
| "epoch": 2.22, |
| "eval_accuracy": 0.7133309189082409, |
| "eval_loss": 1.4649887084960938, |
| "eval_runtime": 504.8831, |
| "eval_samples_per_second": 76.562, |
| "eval_steps_per_second": 12.761, |
| "step": 128500 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.292016625562665e-05, |
| "loss": 1.4892, |
| "step": 129000 |
| }, |
| { |
| "epoch": 2.22, |
| "eval_accuracy": 0.7127896062836536, |
| "eval_loss": 1.466170310974121, |
| "eval_runtime": 504.939, |
| "eval_samples_per_second": 76.554, |
| "eval_steps_per_second": 12.76, |
| "step": 129000 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.2776445969795745e-05, |
| "loss": 1.5192, |
| "step": 129500 |
| }, |
| { |
| "epoch": 2.23, |
| "eval_accuracy": 0.7139719852935129, |
| "eval_loss": 1.4647654294967651, |
| "eval_runtime": 504.8547, |
| "eval_samples_per_second": 76.567, |
| "eval_steps_per_second": 12.762, |
| "step": 129500 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.2632725683964841e-05, |
| "loss": 1.4956, |
| "step": 130000 |
| }, |
| { |
| "epoch": 2.24, |
| "eval_accuracy": 0.7122000246133651, |
| "eval_loss": 1.4689513444900513, |
| "eval_runtime": 504.8432, |
| "eval_samples_per_second": 76.568, |
| "eval_steps_per_second": 12.762, |
| "step": 130000 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.2489005398133936e-05, |
| "loss": 1.5119, |
| "step": 130500 |
| }, |
| { |
| "epoch": 2.25, |
| "eval_accuracy": 0.7143438328299329, |
| "eval_loss": 1.4534658193588257, |
| "eval_runtime": 504.8073, |
| "eval_samples_per_second": 76.574, |
| "eval_steps_per_second": 12.763, |
| "step": 130500 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.2345285112303032e-05, |
| "loss": 1.503, |
| "step": 131000 |
| }, |
| { |
| "epoch": 2.26, |
| "eval_accuracy": 0.7134830305248999, |
| "eval_loss": 1.4606177806854248, |
| "eval_runtime": 504.9644, |
| "eval_samples_per_second": 76.55, |
| "eval_steps_per_second": 12.759, |
| "step": 131000 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.2201564826472127e-05, |
| "loss": 1.4992, |
| "step": 131500 |
| }, |
| { |
| "epoch": 2.27, |
| "eval_accuracy": 0.7160677368607458, |
| "eval_loss": 1.458638072013855, |
| "eval_runtime": 506.1403, |
| "eval_samples_per_second": 76.372, |
| "eval_steps_per_second": 12.73, |
| "step": 131500 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.2057844540641224e-05, |
| "loss": 1.5276, |
| "step": 132000 |
| }, |
| { |
| "epoch": 2.28, |
| "eval_accuracy": 0.7141355149782739, |
| "eval_loss": 1.4541469812393188, |
| "eval_runtime": 505.3203, |
| "eval_samples_per_second": 76.496, |
| "eval_steps_per_second": 12.75, |
| "step": 132000 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.1914124254810318e-05, |
| "loss": 1.5141, |
| "step": 132500 |
| }, |
| { |
| "epoch": 2.29, |
| "eval_accuracy": 0.7130751899483604, |
| "eval_loss": 1.4653090238571167, |
| "eval_runtime": 505.781, |
| "eval_samples_per_second": 76.426, |
| "eval_steps_per_second": 12.739, |
| "step": 132500 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.1770403968979415e-05, |
| "loss": 1.5113, |
| "step": 133000 |
| }, |
| { |
| "epoch": 2.29, |
| "eval_accuracy": 0.7153582127066929, |
| "eval_loss": 1.447860836982727, |
| "eval_runtime": 505.2634, |
| "eval_samples_per_second": 76.505, |
| "eval_steps_per_second": 12.752, |
| "step": 133000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.162668368314851e-05, |
| "loss": 1.5167, |
| "step": 133500 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_accuracy": 0.7174313590516194, |
| "eval_loss": 1.4457464218139648, |
| "eval_runtime": 505.5762, |
| "eval_samples_per_second": 76.457, |
| "eval_steps_per_second": 12.744, |
| "step": 133500 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.1482963397317604e-05, |
| "loss": 1.4885, |
| "step": 134000 |
| }, |
| { |
| "epoch": 2.31, |
| "eval_accuracy": 0.7142763176827037, |
| "eval_loss": 1.4579509496688843, |
| "eval_runtime": 505.8302, |
| "eval_samples_per_second": 76.419, |
| "eval_steps_per_second": 12.737, |
| "step": 134000 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.1339243111486701e-05, |
| "loss": 1.5393, |
| "step": 134500 |
| }, |
| { |
| "epoch": 2.32, |
| "eval_accuracy": 0.7158960704953302, |
| "eval_loss": 1.444856882095337, |
| "eval_runtime": 506.4452, |
| "eval_samples_per_second": 76.326, |
| "eval_steps_per_second": 12.722, |
| "step": 134500 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.1195522825655796e-05, |
| "loss": 1.4911, |
| "step": 135000 |
| }, |
| { |
| "epoch": 2.33, |
| "eval_accuracy": 0.7163415045657121, |
| "eval_loss": 1.4464455842971802, |
| "eval_runtime": 505.497, |
| "eval_samples_per_second": 76.469, |
| "eval_steps_per_second": 12.746, |
| "step": 135000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.1051802539824892e-05, |
| "loss": 1.5198, |
| "step": 135500 |
| }, |
| { |
| "epoch": 2.34, |
| "eval_accuracy": 0.7159453509560285, |
| "eval_loss": 1.4453022480010986, |
| "eval_runtime": 505.769, |
| "eval_samples_per_second": 76.428, |
| "eval_steps_per_second": 12.739, |
| "step": 135500 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.0908082253993987e-05, |
| "loss": 1.4998, |
| "step": 136000 |
| }, |
| { |
| "epoch": 2.35, |
| "eval_accuracy": 0.7154473553109933, |
| "eval_loss": 1.447924017906189, |
| "eval_runtime": 505.5382, |
| "eval_samples_per_second": 76.463, |
| "eval_steps_per_second": 12.745, |
| "step": 136000 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.0764361968163082e-05, |
| "loss": 1.4882, |
| "step": 136500 |
| }, |
| { |
| "epoch": 2.35, |
| "eval_accuracy": 0.715215147290456, |
| "eval_loss": 1.4536397457122803, |
| "eval_runtime": 505.7441, |
| "eval_samples_per_second": 76.432, |
| "eval_steps_per_second": 12.74, |
| "step": 136500 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.0620641682332178e-05, |
| "loss": 1.4999, |
| "step": 137000 |
| }, |
| { |
| "epoch": 2.36, |
| "eval_accuracy": 0.7163305139882888, |
| "eval_loss": 1.4394898414611816, |
| "eval_runtime": 508.4335, |
| "eval_samples_per_second": 76.028, |
| "eval_steps_per_second": 12.672, |
| "step": 137000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.0476921396501275e-05, |
| "loss": 1.4776, |
| "step": 137500 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_accuracy": 0.7160342616644199, |
| "eval_loss": 1.4408942461013794, |
| "eval_runtime": 505.6385, |
| "eval_samples_per_second": 76.448, |
| "eval_steps_per_second": 12.742, |
| "step": 137500 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.033320111067037e-05, |
| "loss": 1.4862, |
| "step": 138000 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_accuracy": 0.7176822528219858, |
| "eval_loss": 1.4391921758651733, |
| "eval_runtime": 506.2686, |
| "eval_samples_per_second": 76.353, |
| "eval_steps_per_second": 12.726, |
| "step": 138000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.0189480824839464e-05, |
| "loss": 1.4771, |
| "step": 138500 |
| }, |
| { |
| "epoch": 2.39, |
| "eval_accuracy": 0.7176454606081064, |
| "eval_loss": 1.452812910079956, |
| "eval_runtime": 505.5969, |
| "eval_samples_per_second": 76.454, |
| "eval_steps_per_second": 12.743, |
| "step": 138500 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.004576053900856e-05, |
| "loss": 1.5073, |
| "step": 139000 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_accuracy": 0.7177807712429609, |
| "eval_loss": 1.4350136518478394, |
| "eval_runtime": 505.9222, |
| "eval_samples_per_second": 76.405, |
| "eval_steps_per_second": 12.735, |
| "step": 139000 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.902040253177657e-06, |
| "loss": 1.511, |
| "step": 139500 |
| }, |
| { |
| "epoch": 2.41, |
| "eval_accuracy": 0.7190818122474611, |
| "eval_loss": 1.4352947473526, |
| "eval_runtime": 505.7139, |
| "eval_samples_per_second": 76.437, |
| "eval_steps_per_second": 12.74, |
| "step": 139500 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.758319967346752e-06, |
| "loss": 1.4775, |
| "step": 140000 |
| }, |
| { |
| "epoch": 2.41, |
| "eval_accuracy": 0.7172922766863388, |
| "eval_loss": 1.4344879388809204, |
| "eval_runtime": 505.722, |
| "eval_samples_per_second": 76.435, |
| "eval_steps_per_second": 12.74, |
| "step": 140000 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.614599681515847e-06, |
| "loss": 1.4745, |
| "step": 140500 |
| }, |
| { |
| "epoch": 2.42, |
| "eval_accuracy": 0.7194636963696369, |
| "eval_loss": 1.4287306070327759, |
| "eval_runtime": 505.8247, |
| "eval_samples_per_second": 76.42, |
| "eval_steps_per_second": 12.738, |
| "step": 140500 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.470879395684941e-06, |
| "loss": 1.521, |
| "step": 141000 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_accuracy": 0.7193017054722198, |
| "eval_loss": 1.42353355884552, |
| "eval_runtime": 506.3881, |
| "eval_samples_per_second": 76.335, |
| "eval_steps_per_second": 12.723, |
| "step": 141000 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.327159109854038e-06, |
| "loss": 1.4287, |
| "step": 141500 |
| }, |
| { |
| "epoch": 2.44, |
| "eval_accuracy": 0.7189707137467686, |
| "eval_loss": 1.4366601705551147, |
| "eval_runtime": 505.3678, |
| "eval_samples_per_second": 76.489, |
| "eval_steps_per_second": 12.749, |
| "step": 141500 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.183438824023134e-06, |
| "loss": 1.4811, |
| "step": 142000 |
| }, |
| { |
| "epoch": 2.45, |
| "eval_accuracy": 0.7186938977188504, |
| "eval_loss": 1.4298981428146362, |
| "eval_runtime": 506.0901, |
| "eval_samples_per_second": 76.38, |
| "eval_steps_per_second": 12.731, |
| "step": 142000 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 9.03971853819223e-06, |
| "loss": 1.4682, |
| "step": 142500 |
| }, |
| { |
| "epoch": 2.46, |
| "eval_accuracy": 0.7198487994110787, |
| "eval_loss": 1.4298604726791382, |
| "eval_runtime": 505.9575, |
| "eval_samples_per_second": 76.4, |
| "eval_steps_per_second": 12.734, |
| "step": 142500 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 8.895998252361324e-06, |
| "loss": 1.4546, |
| "step": 143000 |
| }, |
| { |
| "epoch": 2.47, |
| "eval_accuracy": 0.7190477365848842, |
| "eval_loss": 1.435451626777649, |
| "eval_runtime": 505.6754, |
| "eval_samples_per_second": 76.442, |
| "eval_steps_per_second": 12.741, |
| "step": 143000 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 8.75227796653042e-06, |
| "loss": 1.4422, |
| "step": 143500 |
| }, |
| { |
| "epoch": 2.47, |
| "eval_accuracy": 0.7204236305651021, |
| "eval_loss": 1.4251837730407715, |
| "eval_runtime": 507.1741, |
| "eval_samples_per_second": 76.216, |
| "eval_steps_per_second": 12.704, |
| "step": 143500 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.608557680699515e-06, |
| "loss": 1.4905, |
| "step": 144000 |
| }, |
| { |
| "epoch": 2.48, |
| "eval_accuracy": 0.7187870629901548, |
| "eval_loss": 1.4323233366012573, |
| "eval_runtime": 505.671, |
| "eval_samples_per_second": 76.443, |
| "eval_steps_per_second": 12.741, |
| "step": 144000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 8.464837394868612e-06, |
| "loss": 1.4433, |
| "step": 144500 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_accuracy": 0.7200582663297973, |
| "eval_loss": 1.4275065660476685, |
| "eval_runtime": 511.8167, |
| "eval_samples_per_second": 75.525, |
| "eval_steps_per_second": 12.588, |
| "step": 144500 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.321117109037706e-06, |
| "loss": 1.5135, |
| "step": 145000 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_accuracy": 0.7204414796646927, |
| "eval_loss": 1.4193999767303467, |
| "eval_runtime": 505.6602, |
| "eval_samples_per_second": 76.445, |
| "eval_steps_per_second": 12.742, |
| "step": 145000 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 8.177396823206803e-06, |
| "loss": 1.4919, |
| "step": 145500 |
| }, |
| { |
| "epoch": 2.51, |
| "eval_accuracy": 0.7213458347230629, |
| "eval_loss": 1.4182835817337036, |
| "eval_runtime": 505.9604, |
| "eval_samples_per_second": 76.399, |
| "eval_steps_per_second": 12.734, |
| "step": 145500 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 8.033676537375898e-06, |
| "loss": 1.4585, |
| "step": 146000 |
| }, |
| { |
| "epoch": 2.52, |
| "eval_accuracy": 0.7202945875320451, |
| "eval_loss": 1.416925311088562, |
| "eval_runtime": 505.7207, |
| "eval_samples_per_second": 76.435, |
| "eval_steps_per_second": 12.74, |
| "step": 146000 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.889956251544993e-06, |
| "loss": 1.4742, |
| "step": 146500 |
| }, |
| { |
| "epoch": 2.53, |
| "eval_accuracy": 0.7204507767318687, |
| "eval_loss": 1.4170012474060059, |
| "eval_runtime": 505.5941, |
| "eval_samples_per_second": 76.455, |
| "eval_steps_per_second": 12.743, |
| "step": 146500 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.746235965714089e-06, |
| "loss": 1.4832, |
| "step": 147000 |
| }, |
| { |
| "epoch": 2.54, |
| "eval_accuracy": 0.7206863797293463, |
| "eval_loss": 1.4174823760986328, |
| "eval_runtime": 505.7, |
| "eval_samples_per_second": 76.439, |
| "eval_steps_per_second": 12.741, |
| "step": 147000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.602515679883185e-06, |
| "loss": 1.4453, |
| "step": 147500 |
| }, |
| { |
| "epoch": 2.54, |
| "eval_accuracy": 0.7194231884177156, |
| "eval_loss": 1.428833246231079, |
| "eval_runtime": 505.2505, |
| "eval_samples_per_second": 76.507, |
| "eval_steps_per_second": 12.752, |
| "step": 147500 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 7.45879539405228e-06, |
| "loss": 1.4606, |
| "step": 148000 |
| }, |
| { |
| "epoch": 2.55, |
| "eval_accuracy": 0.7202078930271955, |
| "eval_loss": 1.4212771654129028, |
| "eval_runtime": 506.1452, |
| "eval_samples_per_second": 76.371, |
| "eval_steps_per_second": 12.73, |
| "step": 148000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.315075108221375e-06, |
| "loss": 1.4495, |
| "step": 148500 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_accuracy": 0.7219433311662127, |
| "eval_loss": 1.4107123613357544, |
| "eval_runtime": 504.9404, |
| "eval_samples_per_second": 76.554, |
| "eval_steps_per_second": 12.76, |
| "step": 148500 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.171354822390471e-06, |
| "loss": 1.4813, |
| "step": 149000 |
| }, |
| { |
| "epoch": 2.57, |
| "eval_accuracy": 0.7245293409334687, |
| "eval_loss": 1.4001590013504028, |
| "eval_runtime": 505.3645, |
| "eval_samples_per_second": 76.489, |
| "eval_steps_per_second": 12.749, |
| "step": 149000 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 7.027634536559567e-06, |
| "loss": 1.4637, |
| "step": 149500 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_accuracy": 0.7223724126545185, |
| "eval_loss": 1.4037506580352783, |
| "eval_runtime": 504.8736, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 149500 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 6.883914250728663e-06, |
| "loss": 1.4941, |
| "step": 150000 |
| }, |
| { |
| "epoch": 2.59, |
| "eval_accuracy": 0.721680796773397, |
| "eval_loss": 1.4062496423721313, |
| "eval_runtime": 505.0122, |
| "eval_samples_per_second": 76.543, |
| "eval_steps_per_second": 12.758, |
| "step": 150000 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.7401939648977575e-06, |
| "loss": 1.4273, |
| "step": 150500 |
| }, |
| { |
| "epoch": 2.6, |
| "eval_accuracy": 0.7224053739186476, |
| "eval_loss": 1.4148577451705933, |
| "eval_runtime": 504.9633, |
| "eval_samples_per_second": 76.55, |
| "eval_steps_per_second": 12.759, |
| "step": 150500 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.596473679066853e-06, |
| "loss": 1.4834, |
| "step": 151000 |
| }, |
| { |
| "epoch": 2.6, |
| "eval_accuracy": 0.7204841475802902, |
| "eval_loss": 1.4197235107421875, |
| "eval_runtime": 505.2544, |
| "eval_samples_per_second": 76.506, |
| "eval_steps_per_second": 12.752, |
| "step": 151000 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.452753393235948e-06, |
| "loss": 1.437, |
| "step": 151500 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_accuracy": 0.7232536192764332, |
| "eval_loss": 1.4054090976715088, |
| "eval_runtime": 504.8238, |
| "eval_samples_per_second": 76.571, |
| "eval_steps_per_second": 12.763, |
| "step": 151500 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 6.309033107405045e-06, |
| "loss": 1.4826, |
| "step": 152000 |
| }, |
| { |
| "epoch": 2.62, |
| "eval_accuracy": 0.7212208583903122, |
| "eval_loss": 1.409009575843811, |
| "eval_runtime": 504.8912, |
| "eval_samples_per_second": 76.561, |
| "eval_steps_per_second": 12.761, |
| "step": 152000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.16531282157414e-06, |
| "loss": 1.4358, |
| "step": 152500 |
| }, |
| { |
| "epoch": 2.63, |
| "eval_accuracy": 0.722482498100967, |
| "eval_loss": 1.4088752269744873, |
| "eval_runtime": 505.1094, |
| "eval_samples_per_second": 76.528, |
| "eval_steps_per_second": 12.756, |
| "step": 152500 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 6.021592535743236e-06, |
| "loss": 1.4966, |
| "step": 153000 |
| }, |
| { |
| "epoch": 2.64, |
| "eval_accuracy": 0.7229703795723466, |
| "eval_loss": 1.4033186435699463, |
| "eval_runtime": 505.2686, |
| "eval_samples_per_second": 76.504, |
| "eval_steps_per_second": 12.752, |
| "step": 153000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.87787224991233e-06, |
| "loss": 1.4864, |
| "step": 153500 |
| }, |
| { |
| "epoch": 2.65, |
| "eval_accuracy": 0.7256031141200021, |
| "eval_loss": 1.3902387619018555, |
| "eval_runtime": 505.1856, |
| "eval_samples_per_second": 76.516, |
| "eval_steps_per_second": 12.754, |
| "step": 153500 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 5.734151964081427e-06, |
| "loss": 1.4667, |
| "step": 154000 |
| }, |
| { |
| "epoch": 2.66, |
| "eval_accuracy": 0.7234773743751645, |
| "eval_loss": 1.4088350534439087, |
| "eval_runtime": 505.3647, |
| "eval_samples_per_second": 76.489, |
| "eval_steps_per_second": 12.749, |
| "step": 154000 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 5.590431678250522e-06, |
| "loss": 1.4685, |
| "step": 154500 |
| }, |
| { |
| "epoch": 2.66, |
| "eval_accuracy": 0.7219259271471318, |
| "eval_loss": 1.4083491563796997, |
| "eval_runtime": 505.1615, |
| "eval_samples_per_second": 76.52, |
| "eval_steps_per_second": 12.754, |
| "step": 154500 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 5.446711392419618e-06, |
| "loss": 1.417, |
| "step": 155000 |
| }, |
| { |
| "epoch": 2.67, |
| "eval_accuracy": 0.7238082346860926, |
| "eval_loss": 1.395462155342102, |
| "eval_runtime": 505.4024, |
| "eval_samples_per_second": 76.484, |
| "eval_steps_per_second": 12.748, |
| "step": 155000 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.302991106588713e-06, |
| "loss": 1.4659, |
| "step": 155500 |
| }, |
| { |
| "epoch": 2.68, |
| "eval_accuracy": 0.7241877315903708, |
| "eval_loss": 1.40505850315094, |
| "eval_runtime": 505.1117, |
| "eval_samples_per_second": 76.528, |
| "eval_steps_per_second": 12.756, |
| "step": 155500 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.1592708207578085e-06, |
| "loss": 1.4781, |
| "step": 156000 |
| }, |
| { |
| "epoch": 2.69, |
| "eval_accuracy": 0.7232966049141886, |
| "eval_loss": 1.4047409296035767, |
| "eval_runtime": 504.8733, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 156000 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.015550534926904e-06, |
| "loss": 1.4421, |
| "step": 156500 |
| }, |
| { |
| "epoch": 2.7, |
| "eval_accuracy": 0.7228116108410798, |
| "eval_loss": 1.4027156829833984, |
| "eval_runtime": 504.8455, |
| "eval_samples_per_second": 76.568, |
| "eval_steps_per_second": 12.762, |
| "step": 156500 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.871830249096e-06, |
| "loss": 1.4452, |
| "step": 157000 |
| }, |
| { |
| "epoch": 2.71, |
| "eval_accuracy": 0.723422951414816, |
| "eval_loss": 1.4014335870742798, |
| "eval_runtime": 504.9452, |
| "eval_samples_per_second": 76.553, |
| "eval_steps_per_second": 12.76, |
| "step": 157000 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.728109963265095e-06, |
| "loss": 1.4355, |
| "step": 157500 |
| }, |
| { |
| "epoch": 2.72, |
| "eval_accuracy": 0.7252601695125072, |
| "eval_loss": 1.3960394859313965, |
| "eval_runtime": 504.724, |
| "eval_samples_per_second": 76.586, |
| "eval_steps_per_second": 12.765, |
| "step": 157500 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.584389677434191e-06, |
| "loss": 1.4017, |
| "step": 158000 |
| }, |
| { |
| "epoch": 2.72, |
| "eval_accuracy": 0.7239494672629475, |
| "eval_loss": 1.4029176235198975, |
| "eval_runtime": 504.9116, |
| "eval_samples_per_second": 76.558, |
| "eval_steps_per_second": 12.761, |
| "step": 158000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.440669391603286e-06, |
| "loss": 1.4471, |
| "step": 158500 |
| }, |
| { |
| "epoch": 2.73, |
| "eval_accuracy": 0.7256501114006889, |
| "eval_loss": 1.3967643976211548, |
| "eval_runtime": 505.3598, |
| "eval_samples_per_second": 76.49, |
| "eval_steps_per_second": 12.749, |
| "step": 158500 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.296949105772382e-06, |
| "loss": 1.4219, |
| "step": 159000 |
| }, |
| { |
| "epoch": 2.74, |
| "eval_accuracy": 0.7257657791292739, |
| "eval_loss": 1.3916139602661133, |
| "eval_runtime": 505.5902, |
| "eval_samples_per_second": 76.455, |
| "eval_steps_per_second": 12.744, |
| "step": 159000 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.153228819941477e-06, |
| "loss": 1.4419, |
| "step": 159500 |
| }, |
| { |
| "epoch": 2.75, |
| "eval_accuracy": 0.7262047322129083, |
| "eval_loss": 1.3859390020370483, |
| "eval_runtime": 505.5826, |
| "eval_samples_per_second": 76.456, |
| "eval_steps_per_second": 12.744, |
| "step": 159500 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 4.0095085341105735e-06, |
| "loss": 1.4684, |
| "step": 160000 |
| }, |
| { |
| "epoch": 2.76, |
| "eval_accuracy": 0.726700418044344, |
| "eval_loss": 1.3843731880187988, |
| "eval_runtime": 506.0989, |
| "eval_samples_per_second": 76.378, |
| "eval_steps_per_second": 12.731, |
| "step": 160000 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.865788248279668e-06, |
| "loss": 1.4394, |
| "step": 160500 |
| }, |
| { |
| "epoch": 2.77, |
| "eval_accuracy": 0.725073819405728, |
| "eval_loss": 1.391176462173462, |
| "eval_runtime": 504.9982, |
| "eval_samples_per_second": 76.545, |
| "eval_steps_per_second": 12.758, |
| "step": 160500 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 3.7220679624487635e-06, |
| "loss": 1.4004, |
| "step": 161000 |
| }, |
| { |
| "epoch": 2.78, |
| "eval_accuracy": 0.7258770483695473, |
| "eval_loss": 1.3909010887145996, |
| "eval_runtime": 504.8407, |
| "eval_samples_per_second": 76.569, |
| "eval_steps_per_second": 12.762, |
| "step": 161000 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.5783476766178596e-06, |
| "loss": 1.4411, |
| "step": 161500 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_accuracy": 0.7276005410983969, |
| "eval_loss": 1.3886851072311401, |
| "eval_runtime": 504.9231, |
| "eval_samples_per_second": 76.556, |
| "eval_steps_per_second": 12.76, |
| "step": 161500 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.4346273907869548e-06, |
| "loss": 1.4375, |
| "step": 162000 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_accuracy": 0.7253618959153645, |
| "eval_loss": 1.39145827293396, |
| "eval_runtime": 504.9264, |
| "eval_samples_per_second": 76.556, |
| "eval_steps_per_second": 12.76, |
| "step": 162000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.290907104956051e-06, |
| "loss": 1.3995, |
| "step": 162500 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_accuracy": 0.7265949449764009, |
| "eval_loss": 1.3870223760604858, |
| "eval_runtime": 504.7658, |
| "eval_samples_per_second": 76.58, |
| "eval_steps_per_second": 12.764, |
| "step": 162500 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.147186819125146e-06, |
| "loss": 1.4349, |
| "step": 163000 |
| }, |
| { |
| "epoch": 2.81, |
| "eval_accuracy": 0.7255884854486213, |
| "eval_loss": 1.3914169073104858, |
| "eval_runtime": 504.8132, |
| "eval_samples_per_second": 76.573, |
| "eval_steps_per_second": 12.763, |
| "step": 163000 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 3.0034665332942416e-06, |
| "loss": 1.4848, |
| "step": 163500 |
| }, |
| { |
| "epoch": 2.82, |
| "eval_accuracy": 0.726284268379846, |
| "eval_loss": 1.3807812929153442, |
| "eval_runtime": 504.8043, |
| "eval_samples_per_second": 76.574, |
| "eval_steps_per_second": 12.763, |
| "step": 163500 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.859746247463337e-06, |
| "loss": 1.3954, |
| "step": 164000 |
| }, |
| { |
| "epoch": 2.83, |
| "eval_accuracy": 0.7267120731862337, |
| "eval_loss": 1.3888977766036987, |
| "eval_runtime": 504.8064, |
| "eval_samples_per_second": 76.574, |
| "eval_steps_per_second": 12.763, |
| "step": 164000 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2.7160259616324325e-06, |
| "loss": 1.4382, |
| "step": 164500 |
| }, |
| { |
| "epoch": 2.84, |
| "eval_accuracy": 0.7277240142632113, |
| "eval_loss": 1.3722596168518066, |
| "eval_runtime": 504.8748, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 164500 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.572305675801528e-06, |
| "loss": 1.4367, |
| "step": 165000 |
| }, |
| { |
| "epoch": 2.85, |
| "eval_accuracy": 0.7269612344997715, |
| "eval_loss": 1.379455327987671, |
| "eval_runtime": 504.8574, |
| "eval_samples_per_second": 76.566, |
| "eval_steps_per_second": 12.762, |
| "step": 165000 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.4285853899706237e-06, |
| "loss": 1.4153, |
| "step": 165500 |
| }, |
| { |
| "epoch": 2.85, |
| "eval_accuracy": 0.7279789329662812, |
| "eval_loss": 1.3802344799041748, |
| "eval_runtime": 504.9805, |
| "eval_samples_per_second": 76.548, |
| "eval_steps_per_second": 12.759, |
| "step": 165500 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2.2848651041397193e-06, |
| "loss": 1.4371, |
| "step": 166000 |
| }, |
| { |
| "epoch": 2.86, |
| "eval_accuracy": 0.7261079734628949, |
| "eval_loss": 1.3896615505218506, |
| "eval_runtime": 504.6306, |
| "eval_samples_per_second": 76.601, |
| "eval_steps_per_second": 12.768, |
| "step": 166000 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.1411448183088145e-06, |
| "loss": 1.4353, |
| "step": 166500 |
| }, |
| { |
| "epoch": 2.87, |
| "eval_accuracy": 0.7273896695032653, |
| "eval_loss": 1.3873107433319092, |
| "eval_runtime": 504.8741, |
| "eval_samples_per_second": 76.564, |
| "eval_steps_per_second": 12.762, |
| "step": 166500 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.99742453247791e-06, |
| "loss": 1.4723, |
| "step": 167000 |
| }, |
| { |
| "epoch": 2.88, |
| "eval_accuracy": 0.7266772006057015, |
| "eval_loss": 1.3845970630645752, |
| "eval_runtime": 504.7421, |
| "eval_samples_per_second": 76.584, |
| "eval_steps_per_second": 12.765, |
| "step": 167000 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.8537042466470058e-06, |
| "loss": 1.4015, |
| "step": 167500 |
| }, |
| { |
| "epoch": 2.89, |
| "eval_accuracy": 0.7280572761776449, |
| "eval_loss": 1.3760839700698853, |
| "eval_runtime": 504.8626, |
| "eval_samples_per_second": 76.565, |
| "eval_steps_per_second": 12.762, |
| "step": 167500 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.7099839608161014e-06, |
| "loss": 1.4738, |
| "step": 168000 |
| }, |
| { |
| "epoch": 2.9, |
| "eval_accuracy": 0.726927934116834, |
| "eval_loss": 1.3840312957763672, |
| "eval_runtime": 505.2058, |
| "eval_samples_per_second": 76.513, |
| "eval_steps_per_second": 12.753, |
| "step": 168000 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.566263674985197e-06, |
| "loss": 1.4143, |
| "step": 168500 |
| }, |
| { |
| "epoch": 2.91, |
| "eval_accuracy": 0.7292658889699849, |
| "eval_loss": 1.3763234615325928, |
| "eval_runtime": 505.0164, |
| "eval_samples_per_second": 76.542, |
| "eval_steps_per_second": 12.758, |
| "step": 168500 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.4225433891542924e-06, |
| "loss": 1.3964, |
| "step": 169000 |
| }, |
| { |
| "epoch": 2.91, |
| "eval_accuracy": 0.7272445858768465, |
| "eval_loss": 1.381569504737854, |
| "eval_runtime": 504.7595, |
| "eval_samples_per_second": 76.581, |
| "eval_steps_per_second": 12.764, |
| "step": 169000 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.2788231033233879e-06, |
| "loss": 1.4515, |
| "step": 169500 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_accuracy": 0.7290772443962044, |
| "eval_loss": 1.3675845861434937, |
| "eval_runtime": 504.8663, |
| "eval_samples_per_second": 76.565, |
| "eval_steps_per_second": 12.762, |
| "step": 169500 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.1351028174924835e-06, |
| "loss": 1.4396, |
| "step": 170000 |
| }, |
| { |
| "epoch": 2.93, |
| "eval_accuracy": 0.7302622633346382, |
| "eval_loss": 1.374088168144226, |
| "eval_runtime": 504.92, |
| "eval_samples_per_second": 76.557, |
| "eval_steps_per_second": 12.76, |
| "step": 170000 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 9.91382531661579e-07, |
| "loss": 1.4024, |
| "step": 170500 |
| }, |
| { |
| "epoch": 2.94, |
| "eval_accuracy": 0.7294072935772237, |
| "eval_loss": 1.3755673170089722, |
| "eval_runtime": 504.9173, |
| "eval_samples_per_second": 76.557, |
| "eval_steps_per_second": 12.761, |
| "step": 170500 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.476622458306745e-07, |
| "loss": 1.4604, |
| "step": 171000 |
| }, |
| { |
| "epoch": 2.95, |
| "eval_accuracy": 0.7277957717206553, |
| "eval_loss": 1.3840378522872925, |
| "eval_runtime": 504.995, |
| "eval_samples_per_second": 76.545, |
| "eval_steps_per_second": 12.759, |
| "step": 171000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 7.039419599997701e-07, |
| "loss": 1.474, |
| "step": 171500 |
| }, |
| { |
| "epoch": 2.96, |
| "eval_accuracy": 0.7266752921364468, |
| "eval_loss": 1.3847219944000244, |
| "eval_runtime": 504.9123, |
| "eval_samples_per_second": 76.558, |
| "eval_steps_per_second": 12.761, |
| "step": 171500 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 5.602216741688657e-07, |
| "loss": 1.4416, |
| "step": 172000 |
| }, |
| { |
| "epoch": 2.97, |
| "eval_accuracy": 0.727821212507557, |
| "eval_loss": 1.3769787549972534, |
| "eval_runtime": 504.9052, |
| "eval_samples_per_second": 76.559, |
| "eval_steps_per_second": 12.761, |
| "step": 172000 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 4.165013883379611e-07, |
| "loss": 1.4342, |
| "step": 172500 |
| }, |
| { |
| "epoch": 2.98, |
| "eval_accuracy": 0.729509105012374, |
| "eval_loss": 1.3689066171646118, |
| "eval_runtime": 504.8558, |
| "eval_samples_per_second": 76.566, |
| "eval_steps_per_second": 12.762, |
| "step": 172500 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 2.727811025070567e-07, |
| "loss": 1.4136, |
| "step": 173000 |
| }, |
| { |
| "epoch": 2.98, |
| "eval_accuracy": 0.7274245222104783, |
| "eval_loss": 1.3741812705993652, |
| "eval_runtime": 508.6473, |
| "eval_samples_per_second": 75.996, |
| "eval_steps_per_second": 12.667, |
| "step": 173000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.2906081667615222e-07, |
| "loss": 1.412, |
| "step": 173500 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_accuracy": 0.7297507852367252, |
| "eval_loss": 1.3732599020004272, |
| "eval_runtime": 505.3141, |
| "eval_samples_per_second": 76.497, |
| "eval_steps_per_second": 12.75, |
| "step": 173500 |
| } |
| ], |
| "max_steps": 173949, |
| "num_train_epochs": 3, |
| "total_flos": 4.57513874276067e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|