| { | |
| "best_metric": 0.8369131635471003, | |
| "best_model_checkpoint": "/hdd1/mujeen/retrieval_prf/output/labeler_nq.train.v7.0_nq.dev.v7.0_rlnq_title/checkpoint-76000", | |
| "epoch": 0.9999910574558462, | |
| "global_step": 83868, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.904755091334001e-05, | |
| "loss": 0.5261, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.7910401963792575, | |
| "eval_loss": 0.4569447934627533, | |
| "eval_runtime": 61.7181, | |
| "eval_samples_per_second": 211.219, | |
| "eval_steps_per_second": 26.41, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.8093671006820245e-05, | |
| "loss": 0.4674, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.7984811291807303, | |
| "eval_loss": 0.42789292335510254, | |
| "eval_runtime": 62.0897, | |
| "eval_samples_per_second": 209.954, | |
| "eval_steps_per_second": 26.252, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.7140744980206995e-05, | |
| "loss": 0.4453, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.804311138386008, | |
| "eval_loss": 0.4111127257347107, | |
| "eval_runtime": 73.971, | |
| "eval_samples_per_second": 176.231, | |
| "eval_steps_per_second": 22.036, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.6186865073687225e-05, | |
| "loss": 0.4247, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.8105247008284749, | |
| "eval_loss": 0.4133109748363495, | |
| "eval_runtime": 67.9092, | |
| "eval_samples_per_second": 191.962, | |
| "eval_steps_per_second": 24.003, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.5233462107120716e-05, | |
| "loss": 0.4097, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.8149739183798711, | |
| "eval_loss": 0.411438524723053, | |
| "eval_runtime": 76.7711, | |
| "eval_samples_per_second": 169.803, | |
| "eval_steps_per_second": 21.232, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.4280059140554206e-05, | |
| "loss": 0.3915, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.8158944461491255, | |
| "eval_loss": 0.4367925524711609, | |
| "eval_runtime": 73.8165, | |
| "eval_samples_per_second": 176.6, | |
| "eval_steps_per_second": 22.082, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.3326179234034435e-05, | |
| "loss": 0.3783, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.8168916845658177, | |
| "eval_loss": 0.43372228741645813, | |
| "eval_runtime": 74.7748, | |
| "eval_samples_per_second": 174.337, | |
| "eval_steps_per_second": 21.799, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.2373014737444558e-05, | |
| "loss": 0.365, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.8242559067198527, | |
| "eval_loss": 0.40461644530296326, | |
| "eval_runtime": 77.9179, | |
| "eval_samples_per_second": 167.304, | |
| "eval_steps_per_second": 20.919, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.1419373300901418e-05, | |
| "loss": 0.3477, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.828628413623811, | |
| "eval_loss": 0.4262824058532715, | |
| "eval_runtime": 74.7614, | |
| "eval_samples_per_second": 174.368, | |
| "eval_steps_per_second": 21.803, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.0465731864358278e-05, | |
| "loss": 0.3341, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.8258668303160479, | |
| "eval_loss": 0.40725135803222656, | |
| "eval_runtime": 77.1145, | |
| "eval_samples_per_second": 169.047, | |
| "eval_steps_per_second": 21.137, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.51209042781514e-06, | |
| "loss": 0.3214, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.8284749923289353, | |
| "eval_loss": 0.4358045756816864, | |
| "eval_runtime": 79.2128, | |
| "eval_samples_per_second": 164.569, | |
| "eval_steps_per_second": 20.577, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.55868746124863e-06, | |
| "loss": 0.3112, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.8285517029763732, | |
| "eval_loss": 0.4215088188648224, | |
| "eval_runtime": 76.3765, | |
| "eval_samples_per_second": 170.681, | |
| "eval_steps_per_second": 21.342, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.6050460247054905e-06, | |
| "loss": 0.2996, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.8277845965019944, | |
| "eval_loss": 0.4198075234889984, | |
| "eval_runtime": 75.7616, | |
| "eval_samples_per_second": 172.066, | |
| "eval_steps_per_second": 21.515, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.651643058138981e-06, | |
| "loss": 0.2863, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.8310831543418227, | |
| "eval_loss": 0.4417212903499603, | |
| "eval_runtime": 80.0618, | |
| "eval_samples_per_second": 162.824, | |
| "eval_steps_per_second": 20.359, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 5.698240091572471e-06, | |
| "loss": 0.2739, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.8222614298864682, | |
| "eval_loss": 0.5145458579063416, | |
| "eval_runtime": 78.9172, | |
| "eval_samples_per_second": 165.186, | |
| "eval_steps_per_second": 20.655, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.744598655029332e-06, | |
| "loss": 0.2656, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.8265572261429887, | |
| "eval_loss": 0.5052666068077087, | |
| "eval_runtime": 77.2292, | |
| "eval_samples_per_second": 168.796, | |
| "eval_steps_per_second": 21.106, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.7911956884628224e-06, | |
| "loss": 0.2558, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.8353022399509051, | |
| "eval_loss": 0.47119611501693726, | |
| "eval_runtime": 79.1339, | |
| "eval_samples_per_second": 164.733, | |
| "eval_steps_per_second": 20.598, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.8377927218963137e-06, | |
| "loss": 0.2467, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.8312365756366984, | |
| "eval_loss": 0.5034319162368774, | |
| "eval_runtime": 82.6755, | |
| "eval_samples_per_second": 157.677, | |
| "eval_steps_per_second": 19.716, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.8839128153765443e-06, | |
| "loss": 0.2342, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.8369131635471003, | |
| "eval_loss": 0.4584212303161621, | |
| "eval_runtime": 81.5904, | |
| "eval_samples_per_second": 159.774, | |
| "eval_steps_per_second": 19.978, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 9.305098488100349e-07, | |
| "loss": 0.23, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.8341515802393372, | |
| "eval_loss": 0.49471235275268555, | |
| "eval_runtime": 87.9548, | |
| "eval_samples_per_second": 148.213, | |
| "eval_steps_per_second": 18.532, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 83868, | |
| "total_flos": 7.653345366712497e+17, | |
| "train_loss": 0.3355050985067308, | |
| "train_runtime": 56077.3175, | |
| "train_samples_per_second": 47.859, | |
| "train_steps_per_second": 1.496 | |
| } | |
| ], | |
| "max_steps": 83868, | |
| "num_train_epochs": 1, | |
| "total_flos": 7.653345366712497e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |