| { | |
| "best_metric": 1.7163910865783691, | |
| "best_model_checkpoint": "finetuning/output/electra-base-finetuned_xe_ey_fae/checkpoint-19000", | |
| "epoch": 2.642433616911575, | |
| "eval_steps": 500, | |
| "global_step": 20500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.786027326630576e-06, | |
| "loss": 2.5359, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.6227738650589344, | |
| "eval_loss": 2.0696377754211426, | |
| "eval_runtime": 35.9348, | |
| "eval_samples_per_second": 432.033, | |
| "eval_steps_per_second": 54.015, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.571195325255651e-06, | |
| "loss": 2.1807, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.6352025430222344, | |
| "eval_loss": 1.9677125215530396, | |
| "eval_runtime": 36.0518, | |
| "eval_samples_per_second": 430.631, | |
| "eval_steps_per_second": 53.839, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.356363323880726e-06, | |
| "loss": 2.1028, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.641511887420089, | |
| "eval_loss": 1.9191973209381104, | |
| "eval_runtime": 36.3057, | |
| "eval_samples_per_second": 427.619, | |
| "eval_steps_per_second": 53.463, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.141531322505801e-06, | |
| "loss": 2.0658, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.6450855805600152, | |
| "eval_loss": 1.892332673072815, | |
| "eval_runtime": 36.0414, | |
| "eval_samples_per_second": 430.754, | |
| "eval_steps_per_second": 53.855, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.926699321130876e-06, | |
| "loss": 2.0426, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.6478244526689617, | |
| "eval_loss": 1.8699322938919067, | |
| "eval_runtime": 36.0808, | |
| "eval_samples_per_second": 430.284, | |
| "eval_steps_per_second": 53.796, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 8.71186731975595e-06, | |
| "loss": 2.0133, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.6489956025492812, | |
| "eval_loss": 1.8580025434494019, | |
| "eval_runtime": 36.4103, | |
| "eval_samples_per_second": 426.391, | |
| "eval_steps_per_second": 53.309, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 8.497464982383777e-06, | |
| "loss": 1.9978, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.6506941121373793, | |
| "eval_loss": 1.8410626649856567, | |
| "eval_runtime": 36.1001, | |
| "eval_samples_per_second": 430.054, | |
| "eval_steps_per_second": 53.767, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 8.282632981008852e-06, | |
| "loss": 1.9862, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.6524157728010056, | |
| "eval_loss": 1.8297162055969238, | |
| "eval_runtime": 36.1019, | |
| "eval_samples_per_second": 430.032, | |
| "eval_steps_per_second": 53.764, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.068230643636676e-06, | |
| "loss": 1.9745, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.6545309828179512, | |
| "eval_loss": 1.8154131174087524, | |
| "eval_runtime": 36.0473, | |
| "eval_samples_per_second": 430.684, | |
| "eval_steps_per_second": 53.846, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 7.853398642261751e-06, | |
| "loss": 1.9606, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.6556562172935413, | |
| "eval_loss": 1.8056122064590454, | |
| "eval_runtime": 36.2735, | |
| "eval_samples_per_second": 427.999, | |
| "eval_steps_per_second": 53.51, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 7.638996304889577e-06, | |
| "loss": 1.9486, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.6560147022088998, | |
| "eval_loss": 1.8032631874084473, | |
| "eval_runtime": 36.0743, | |
| "eval_samples_per_second": 430.362, | |
| "eval_steps_per_second": 53.806, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.424164303514653e-06, | |
| "loss": 1.9416, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.6580551701728226, | |
| "eval_loss": 1.7894020080566406, | |
| "eval_runtime": 36.1654, | |
| "eval_samples_per_second": 429.278, | |
| "eval_steps_per_second": 53.67, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.209332302139728e-06, | |
| "loss": 1.9279, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.658183904138693, | |
| "eval_loss": 1.7848395109176636, | |
| "eval_runtime": 36.1173, | |
| "eval_samples_per_second": 429.849, | |
| "eval_steps_per_second": 53.742, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.9945003007648025e-06, | |
| "loss": 1.9196, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.6592785508757635, | |
| "eval_loss": 1.7786365747451782, | |
| "eval_runtime": 36.2739, | |
| "eval_samples_per_second": 427.994, | |
| "eval_steps_per_second": 53.51, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.779668299389877e-06, | |
| "loss": 1.9168, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.6591822827938671, | |
| "eval_loss": 1.7761502265930176, | |
| "eval_runtime": 36.6269, | |
| "eval_samples_per_second": 423.868, | |
| "eval_steps_per_second": 52.994, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 6.564836298014953e-06, | |
| "loss": 1.9123, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_accuracy": 0.6596853436378691, | |
| "eval_loss": 1.7743586301803589, | |
| "eval_runtime": 36.1389, | |
| "eval_samples_per_second": 429.592, | |
| "eval_steps_per_second": 53.709, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 6.350004296640028e-06, | |
| "loss": 1.8942, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_accuracy": 0.6610733402069573, | |
| "eval_loss": 1.7624884843826294, | |
| "eval_runtime": 36.0335, | |
| "eval_samples_per_second": 430.849, | |
| "eval_steps_per_second": 53.867, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 6.135172295265103e-06, | |
| "loss": 1.9053, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_accuracy": 0.662326418448169, | |
| "eval_loss": 1.7575763463974, | |
| "eval_runtime": 36.357, | |
| "eval_samples_per_second": 427.016, | |
| "eval_steps_per_second": 53.387, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5.9203402938901785e-06, | |
| "loss": 1.898, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_accuracy": 0.6620202516286527, | |
| "eval_loss": 1.758821725845337, | |
| "eval_runtime": 36.1788, | |
| "eval_samples_per_second": 429.118, | |
| "eval_steps_per_second": 53.65, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 5.705508292515254e-06, | |
| "loss": 1.8896, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_accuracy": 0.6625110635175566, | |
| "eval_loss": 1.7518248558044434, | |
| "eval_runtime": 36.1554, | |
| "eval_samples_per_second": 429.396, | |
| "eval_steps_per_second": 53.685, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 5.490676291140329e-06, | |
| "loss": 1.8796, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_accuracy": 0.661861605044167, | |
| "eval_loss": 1.755669116973877, | |
| "eval_runtime": 36.1342, | |
| "eval_samples_per_second": 429.648, | |
| "eval_steps_per_second": 53.716, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 5.275844289765404e-06, | |
| "loss": 1.8838, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_accuracy": 0.6628265417860324, | |
| "eval_loss": 1.7511305809020996, | |
| "eval_runtime": 36.0117, | |
| "eval_samples_per_second": 431.11, | |
| "eval_steps_per_second": 53.899, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 5.061441952393229e-06, | |
| "loss": 1.8869, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_accuracy": 0.6639589859082099, | |
| "eval_loss": 1.7436553239822388, | |
| "eval_runtime": 36.239, | |
| "eval_samples_per_second": 428.406, | |
| "eval_steps_per_second": 53.561, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.846609951018304e-06, | |
| "loss": 1.8756, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_accuracy": 0.6641049700653768, | |
| "eval_loss": 1.742509126663208, | |
| "eval_runtime": 36.1208, | |
| "eval_samples_per_second": 429.808, | |
| "eval_steps_per_second": 53.736, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.631777949643379e-06, | |
| "loss": 1.8775, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_accuracy": 0.6640769398921977, | |
| "eval_loss": 1.7409285306930542, | |
| "eval_runtime": 36.1893, | |
| "eval_samples_per_second": 428.994, | |
| "eval_steps_per_second": 53.635, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 4.416945948268455e-06, | |
| "loss": 1.8757, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_accuracy": 0.664925807451965, | |
| "eval_loss": 1.7372323274612427, | |
| "eval_runtime": 36.0287, | |
| "eval_samples_per_second": 430.906, | |
| "eval_steps_per_second": 53.874, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 4.20254361089628e-06, | |
| "loss": 1.8616, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_accuracy": 0.6645522086560093, | |
| "eval_loss": 1.7387374639511108, | |
| "eval_runtime": 36.2158, | |
| "eval_samples_per_second": 428.68, | |
| "eval_steps_per_second": 53.595, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.987711609521355e-06, | |
| "loss": 1.8675, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_accuracy": 0.6648440628084251, | |
| "eval_loss": 1.7335091829299927, | |
| "eval_runtime": 36.1693, | |
| "eval_samples_per_second": 429.231, | |
| "eval_steps_per_second": 53.664, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.7728796081464296e-06, | |
| "loss": 1.8725, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_accuracy": 0.6660341443052158, | |
| "eval_loss": 1.728769302368164, | |
| "eval_runtime": 36.1159, | |
| "eval_samples_per_second": 429.866, | |
| "eval_steps_per_second": 53.744, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.558047606771505e-06, | |
| "loss": 1.8678, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_accuracy": 0.66591998939469, | |
| "eval_loss": 1.730508804321289, | |
| "eval_runtime": 36.0446, | |
| "eval_samples_per_second": 430.716, | |
| "eval_steps_per_second": 53.85, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.34321560539658e-06, | |
| "loss": 1.8611, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6666247127717294, | |
| "eval_loss": 1.7255862951278687, | |
| "eval_runtime": 36.3664, | |
| "eval_samples_per_second": 426.905, | |
| "eval_steps_per_second": 53.373, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.1288132680244054e-06, | |
| "loss": 1.853, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_accuracy": 0.6661196617167527, | |
| "eval_loss": 1.7286032438278198, | |
| "eval_runtime": 36.0258, | |
| "eval_samples_per_second": 430.941, | |
| "eval_steps_per_second": 53.878, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 2.9139812666494803e-06, | |
| "loss": 1.8487, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_accuracy": 0.6658712718524595, | |
| "eval_loss": 1.7284834384918213, | |
| "eval_runtime": 36.2843, | |
| "eval_samples_per_second": 427.871, | |
| "eval_steps_per_second": 53.494, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 2.6991492652745556e-06, | |
| "loss": 1.8543, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_accuracy": 0.666799617645458, | |
| "eval_loss": 1.7229472398757935, | |
| "eval_runtime": 36.1382, | |
| "eval_samples_per_second": 429.601, | |
| "eval_steps_per_second": 53.71, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.484317263899631e-06, | |
| "loss": 1.8519, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_accuracy": 0.6669869788832046, | |
| "eval_loss": 1.7240232229232788, | |
| "eval_runtime": 36.2928, | |
| "eval_samples_per_second": 427.771, | |
| "eval_steps_per_second": 53.482, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.2694852625247057e-06, | |
| "loss": 1.851, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_accuracy": 0.6662479933850755, | |
| "eval_loss": 1.7275055646896362, | |
| "eval_runtime": 36.0625, | |
| "eval_samples_per_second": 430.502, | |
| "eval_steps_per_second": 53.823, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 2.054653261149781e-06, | |
| "loss": 1.8547, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_accuracy": 0.6672813284171724, | |
| "eval_loss": 1.7197449207305908, | |
| "eval_runtime": 36.3297, | |
| "eval_samples_per_second": 427.337, | |
| "eval_steps_per_second": 53.427, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.8398212597748563e-06, | |
| "loss": 1.8476, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_accuracy": 0.6674510477353122, | |
| "eval_loss": 1.7163910865783691, | |
| "eval_runtime": 36.0727, | |
| "eval_samples_per_second": 430.38, | |
| "eval_steps_per_second": 53.808, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 1.6249892583999314e-06, | |
| "loss": 1.8444, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_accuracy": 0.667601267022319, | |
| "eval_loss": 1.7213865518569946, | |
| "eval_runtime": 36.1639, | |
| "eval_samples_per_second": 429.296, | |
| "eval_steps_per_second": 53.672, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 1.4101572570250067e-06, | |
| "loss": 1.8544, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_accuracy": 0.6668017942219797, | |
| "eval_loss": 1.7217011451721191, | |
| "eval_runtime": 36.3821, | |
| "eval_samples_per_second": 426.721, | |
| "eval_steps_per_second": 53.35, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 1.1953252556500817e-06, | |
| "loss": 1.8491, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_accuracy": 0.6678062285854136, | |
| "eval_loss": 1.717513918876648, | |
| "eval_runtime": 36.0279, | |
| "eval_samples_per_second": 430.916, | |
| "eval_steps_per_second": 53.875, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "step": 20500, | |
| "total_flos": 9980146705514496.0, | |
| "train_loss": 1.9300706578696647, | |
| "train_runtime": 3479.9405, | |
| "train_samples_per_second": 107.005, | |
| "train_steps_per_second": 6.688 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 23274, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 9980146705514496.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |