| { |
| "best_metric": 61.73204279164543, |
| "best_model_checkpoint": "outputs/bert-tiny/gd_seqz/mnli_21/checkpoint-69771", |
| "epoch": 3.0, |
| "global_step": 69771, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.9856673976293877e-05, |
| "loss": 1.096, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9713347952587752e-05, |
| "loss": 1.0505, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9570021928881627e-05, |
| "loss": 1.0032, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9426695905175502e-05, |
| "loss": 0.988, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.928336988146938e-05, |
| "loss": 0.9801, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9140043857763256e-05, |
| "loss": 0.9722, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.899671783405713e-05, |
| "loss": 0.9591, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8853391810351007e-05, |
| "loss": 0.9524, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8710065786644882e-05, |
| "loss": 0.9486, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.856673976293876e-05, |
| "loss": 0.9414, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.8423413739232636e-05, |
| "loss": 0.9409, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.8280087715526507e-05, |
| "loss": 0.9317, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.8136761691820386e-05, |
| "loss": 0.9281, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.799343566811426e-05, |
| "loss": 0.9224, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.7850109644408136e-05, |
| "loss": 0.917, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.770678362070201e-05, |
| "loss": 0.9202, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.7563457596995887e-05, |
| "loss": 0.9124, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.7420131573289765e-05, |
| "loss": 0.9116, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.727680554958364e-05, |
| "loss": 0.9044, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.7133479525877516e-05, |
| "loss": 0.9049, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.699015350217139e-05, |
| "loss": 0.8855, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.6846827478465266e-05, |
| "loss": 0.8977, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.670350145475914e-05, |
| "loss": 0.8943, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.656017543105302e-05, |
| "loss": 0.8939, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 1.641684940734689e-05, |
| "loss": 0.8958, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 1.6273523383640767e-05, |
| "loss": 0.8785, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.6130197359934645e-05, |
| "loss": 0.8787, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.598687133622852e-05, |
| "loss": 0.8742, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 1.5843545312522396e-05, |
| "loss": 0.8725, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 1.570021928881627e-05, |
| "loss": 0.8735, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.5556893265110146e-05, |
| "loss": 0.8771, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.5413567241404025e-05, |
| "loss": 0.8684, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 1.52702412176979e-05, |
| "loss": 0.8755, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.5126915193991773e-05, |
| "loss": 0.871, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.498358917028565e-05, |
| "loss": 0.8695, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.4840263146579526e-05, |
| "loss": 0.8586, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.46969371228734e-05, |
| "loss": 0.8567, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.4553611099167278e-05, |
| "loss": 0.8578, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.4410285075461153e-05, |
| "loss": 0.8549, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.426695905175503e-05, |
| "loss": 0.8492, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 1.4123633028048905e-05, |
| "loss": 0.8511, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.3980307004342778e-05, |
| "loss": 0.8457, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.3836980980636657e-05, |
| "loss": 0.8395, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.369365495693053e-05, |
| "loss": 0.8439, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.3550328933224406e-05, |
| "loss": 0.8396, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.3407002909518283e-05, |
| "loss": 0.8333, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 58.430973000509425, |
| "eval_combined_score": 58.430973000509425, |
| "eval_loss": 0.9210277795791626, |
| "eval_runtime": 5.4726, |
| "eval_samples_per_second": 1793.494, |
| "eval_steps_per_second": 7.126, |
| "step": 23257 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.3263676885812158e-05, |
| "loss": 0.8316, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 1.3120350862106033e-05, |
| "loss": 0.8371, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 1.297702483839991e-05, |
| "loss": 0.8256, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.2833698814693785e-05, |
| "loss": 0.8267, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.2690372790987662e-05, |
| "loss": 0.8207, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.2547046767281537e-05, |
| "loss": 0.8202, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.2403720743575412e-05, |
| "loss": 0.8253, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.2260394719869289e-05, |
| "loss": 0.8129, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.2117068696163163e-05, |
| "loss": 0.8134, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 1.1973742672457038e-05, |
| "loss": 0.8079, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.1830416648750915e-05, |
| "loss": 0.7972, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1.168709062504479e-05, |
| "loss": 0.8175, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.1543764601338665e-05, |
| "loss": 0.8103, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 1.1400438577632542e-05, |
| "loss": 0.8058, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 1.1257112553926417e-05, |
| "loss": 0.7969, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.1113786530220294e-05, |
| "loss": 0.8145, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.097046050651417e-05, |
| "loss": 0.8072, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.0827134482808044e-05, |
| "loss": 0.7985, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.0683808459101921e-05, |
| "loss": 0.8077, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.0540482435395797e-05, |
| "loss": 0.7866, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.039715641168967e-05, |
| "loss": 0.7917, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 1.0253830387983547e-05, |
| "loss": 0.803, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 1.0110504364277422e-05, |
| "loss": 0.7915, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 9.967178340571299e-06, |
| "loss": 0.796, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 9.823852316865174e-06, |
| "loss": 0.7903, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 9.68052629315905e-06, |
| "loss": 0.7946, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 9.537200269452925e-06, |
| "loss": 0.7922, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 9.393874245746801e-06, |
| "loss": 0.7843, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 9.250548222040677e-06, |
| "loss": 0.7828, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 9.107222198334552e-06, |
| "loss": 0.7869, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 8.963896174628429e-06, |
| "loss": 0.7852, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 8.820570150922304e-06, |
| "loss": 0.7818, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8.677244127216179e-06, |
| "loss": 0.7865, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 8.533918103510054e-06, |
| "loss": 0.7753, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 8.390592079803931e-06, |
| "loss": 0.7828, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 8.247266056097806e-06, |
| "loss": 0.776, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 8.103940032391682e-06, |
| "loss": 0.7762, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 7.960614008685558e-06, |
| "loss": 0.7881, |
| "step": 42000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 7.817287984979434e-06, |
| "loss": 0.7836, |
| "step": 42500 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.673961961273309e-06, |
| "loss": 0.7757, |
| "step": 43000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 7.530635937567185e-06, |
| "loss": 0.7645, |
| "step": 43500 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 7.38730991386106e-06, |
| "loss": 0.7721, |
| "step": 44000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 7.243983890154936e-06, |
| "loss": 0.7725, |
| "step": 44500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 7.100657866448811e-06, |
| "loss": 0.7741, |
| "step": 45000 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.957331842742687e-06, |
| "loss": 0.7829, |
| "step": 45500 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 6.814005819036563e-06, |
| "loss": 0.7746, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 6.670679795330439e-06, |
| "loss": 0.7685, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 61.29393785022924, |
| "eval_combined_score": 61.29393785022924, |
| "eval_loss": 0.8761915564537048, |
| "eval_runtime": 5.5218, |
| "eval_samples_per_second": 1777.495, |
| "eval_steps_per_second": 7.063, |
| "step": 46514 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 6.527353771624314e-06, |
| "loss": 0.7637, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 6.38402774791819e-06, |
| "loss": 0.755, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 6.240701724212066e-06, |
| "loss": 0.7487, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 6.097375700505941e-06, |
| "loss": 0.7652, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 5.954049676799817e-06, |
| "loss": 0.7661, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 5.810723653093693e-06, |
| "loss": 0.748, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 5.667397629387569e-06, |
| "loss": 0.7651, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 5.5240716056814435e-06, |
| "loss": 0.7529, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 5.3807455819753195e-06, |
| "loss": 0.7579, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 5.237419558269196e-06, |
| "loss": 0.756, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 5.094093534563072e-06, |
| "loss": 0.7665, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 4.950767510856947e-06, |
| "loss": 0.7613, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 4.807441487150822e-06, |
| "loss": 0.7516, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 4.664115463444698e-06, |
| "loss": 0.7505, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 4.520789439738574e-06, |
| "loss": 0.7423, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 4.377463416032449e-06, |
| "loss": 0.7586, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 4.234137392326325e-06, |
| "loss": 0.7452, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 4.0908113686202005e-06, |
| "loss": 0.7536, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 3.9474853449140765e-06, |
| "loss": 0.7465, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 3.8041593212079517e-06, |
| "loss": 0.7481, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 3.6608332975018278e-06, |
| "loss": 0.7473, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 3.5175072737957034e-06, |
| "loss": 0.7572, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.3741812500895794e-06, |
| "loss": 0.7476, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.2308552263834546e-06, |
| "loss": 0.7564, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.0875292026773302e-06, |
| "loss": 0.7488, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 2.9442031789712063e-06, |
| "loss": 0.7495, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 2.8008771552650815e-06, |
| "loss": 0.7481, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 2.6575511315589575e-06, |
| "loss": 0.7429, |
| "step": 60500 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 2.5142251078528327e-06, |
| "loss": 0.748, |
| "step": 61000 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 2.3708990841467087e-06, |
| "loss": 0.7483, |
| "step": 61500 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 2.2275730604405844e-06, |
| "loss": 0.7496, |
| "step": 62000 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 2.08424703673446e-06, |
| "loss": 0.7451, |
| "step": 62500 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 1.9409210130283356e-06, |
| "loss": 0.7466, |
| "step": 63000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 1.7975949893222114e-06, |
| "loss": 0.7425, |
| "step": 63500 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 1.654268965616087e-06, |
| "loss": 0.7525, |
| "step": 64000 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 1.5109429419099629e-06, |
| "loss": 0.7459, |
| "step": 64500 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 1.3676169182038385e-06, |
| "loss": 0.7492, |
| "step": 65000 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.224290894497714e-06, |
| "loss": 0.7443, |
| "step": 65500 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 1.0809648707915897e-06, |
| "loss": 0.7508, |
| "step": 66000 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 9.376388470854654e-07, |
| "loss": 0.7421, |
| "step": 66500 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 7.94312823379341e-07, |
| "loss": 0.7547, |
| "step": 67000 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 6.509867996732167e-07, |
| "loss": 0.7373, |
| "step": 67500 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 5.076607759670924e-07, |
| "loss": 0.7474, |
| "step": 68000 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 3.6433475226096806e-07, |
| "loss": 0.7405, |
| "step": 68500 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 2.2100872855484373e-07, |
| "loss": 0.7406, |
| "step": 69000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 7.76827048487194e-08, |
| "loss": 0.7424, |
| "step": 69500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 61.73204279164543, |
| "eval_combined_score": 61.73204279164543, |
| "eval_loss": 0.8748815059661865, |
| "eval_runtime": 5.4694, |
| "eval_samples_per_second": 1794.527, |
| "eval_steps_per_second": 7.131, |
| "step": 69771 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 69771, |
| "total_flos": 1418687059046400.0, |
| "train_loss": 0.8166523379133837, |
| "train_runtime": 2409.4346, |
| "train_samples_per_second": 926.607, |
| "train_steps_per_second": 28.957 |
| } |
| ], |
| "max_steps": 69771, |
| "num_train_epochs": 3, |
| "total_flos": 1418687059046400.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|