| { |
| "best_metric": 0.8369131635471003, |
| "best_model_checkpoint": "/hdd1/mujeen/retrieval_prf/output/labeler_nq.train.v7.0_nq.dev.v7.0_rlnq_title/checkpoint-76000", |
| "epoch": 0.9999910574558462, |
| "global_step": 83868, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.904755091334001e-05, |
| "loss": 0.5261, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_accuracy": 0.7910401963792575, |
| "eval_loss": 0.4569447934627533, |
| "eval_runtime": 61.7181, |
| "eval_samples_per_second": 211.219, |
| "eval_steps_per_second": 26.41, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.8093671006820245e-05, |
| "loss": 0.4674, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_accuracy": 0.7984811291807303, |
| "eval_loss": 0.42789292335510254, |
| "eval_runtime": 62.0897, |
| "eval_samples_per_second": 209.954, |
| "eval_steps_per_second": 26.252, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.7140744980206995e-05, |
| "loss": 0.4453, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_accuracy": 0.804311138386008, |
| "eval_loss": 0.4111127257347107, |
| "eval_runtime": 73.971, |
| "eval_samples_per_second": 176.231, |
| "eval_steps_per_second": 22.036, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.6186865073687225e-05, |
| "loss": 0.4247, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_accuracy": 0.8105247008284749, |
| "eval_loss": 0.4133109748363495, |
| "eval_runtime": 67.9092, |
| "eval_samples_per_second": 191.962, |
| "eval_steps_per_second": 24.003, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.5233462107120716e-05, |
| "loss": 0.4097, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_accuracy": 0.8149739183798711, |
| "eval_loss": 0.411438524723053, |
| "eval_runtime": 76.7711, |
| "eval_samples_per_second": 169.803, |
| "eval_steps_per_second": 21.232, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.4280059140554206e-05, |
| "loss": 0.3915, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_accuracy": 0.8158944461491255, |
| "eval_loss": 0.4367925524711609, |
| "eval_runtime": 73.8165, |
| "eval_samples_per_second": 176.6, |
| "eval_steps_per_second": 22.082, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.3326179234034435e-05, |
| "loss": 0.3783, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_accuracy": 0.8168916845658177, |
| "eval_loss": 0.43372228741645813, |
| "eval_runtime": 74.7748, |
| "eval_samples_per_second": 174.337, |
| "eval_steps_per_second": 21.799, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.2373014737444558e-05, |
| "loss": 0.365, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_accuracy": 0.8242559067198527, |
| "eval_loss": 0.40461644530296326, |
| "eval_runtime": 77.9179, |
| "eval_samples_per_second": 167.304, |
| "eval_steps_per_second": 20.919, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.1419373300901418e-05, |
| "loss": 0.3477, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.43, |
| "eval_accuracy": 0.828628413623811, |
| "eval_loss": 0.4262824058532715, |
| "eval_runtime": 74.7614, |
| "eval_samples_per_second": 174.368, |
| "eval_steps_per_second": 21.803, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.0465731864358278e-05, |
| "loss": 0.3341, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_accuracy": 0.8258668303160479, |
| "eval_loss": 0.40725135803222656, |
| "eval_runtime": 77.1145, |
| "eval_samples_per_second": 169.047, |
| "eval_steps_per_second": 21.137, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.51209042781514e-06, |
| "loss": 0.3214, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_accuracy": 0.8284749923289353, |
| "eval_loss": 0.4358045756816864, |
| "eval_runtime": 79.2128, |
| "eval_samples_per_second": 164.569, |
| "eval_steps_per_second": 20.577, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.55868746124863e-06, |
| "loss": 0.3112, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_accuracy": 0.8285517029763732, |
| "eval_loss": 0.4215088188648224, |
| "eval_runtime": 76.3765, |
| "eval_samples_per_second": 170.681, |
| "eval_steps_per_second": 21.342, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 7.6050460247054905e-06, |
| "loss": 0.2996, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_accuracy": 0.8277845965019944, |
| "eval_loss": 0.4198075234889984, |
| "eval_runtime": 75.7616, |
| "eval_samples_per_second": 172.066, |
| "eval_steps_per_second": 21.515, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 6.651643058138981e-06, |
| "loss": 0.2863, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_accuracy": 0.8310831543418227, |
| "eval_loss": 0.4417212903499603, |
| "eval_runtime": 80.0618, |
| "eval_samples_per_second": 162.824, |
| "eval_steps_per_second": 20.359, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.698240091572471e-06, |
| "loss": 0.2739, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_accuracy": 0.8222614298864682, |
| "eval_loss": 0.5145458579063416, |
| "eval_runtime": 78.9172, |
| "eval_samples_per_second": 165.186, |
| "eval_steps_per_second": 20.655, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.744598655029332e-06, |
| "loss": 0.2656, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.76, |
| "eval_accuracy": 0.8265572261429887, |
| "eval_loss": 0.5052666068077087, |
| "eval_runtime": 77.2292, |
| "eval_samples_per_second": 168.796, |
| "eval_steps_per_second": 21.106, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.7911956884628224e-06, |
| "loss": 0.2558, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_accuracy": 0.8353022399509051, |
| "eval_loss": 0.47119611501693726, |
| "eval_runtime": 79.1339, |
| "eval_samples_per_second": 164.733, |
| "eval_steps_per_second": 20.598, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 2.8377927218963137e-06, |
| "loss": 0.2467, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_accuracy": 0.8312365756366984, |
| "eval_loss": 0.5034319162368774, |
| "eval_runtime": 82.6755, |
| "eval_samples_per_second": 157.677, |
| "eval_steps_per_second": 19.716, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.8839128153765443e-06, |
| "loss": 0.2342, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_accuracy": 0.8369131635471003, |
| "eval_loss": 0.4584212303161621, |
| "eval_runtime": 81.5904, |
| "eval_samples_per_second": 159.774, |
| "eval_steps_per_second": 19.978, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.305098488100349e-07, |
| "loss": 0.23, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_accuracy": 0.8341515802393372, |
| "eval_loss": 0.49471235275268555, |
| "eval_runtime": 87.9548, |
| "eval_samples_per_second": 148.213, |
| "eval_steps_per_second": 18.532, |
| "step": 80000 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 83868, |
| "total_flos": 7.653345366712497e+17, |
| "train_loss": 0.3355050985067308, |
| "train_runtime": 56077.3175, |
| "train_samples_per_second": 47.859, |
| "train_steps_per_second": 1.496 |
| } |
| ], |
| "max_steps": 83868, |
| "num_train_epochs": 1, |
| "total_flos": 7.653345366712497e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|