| { | |
| "best_metric": 0.8122448979591836, | |
| "best_model_checkpoint": "/hdd1/mujeen/retrieval_prf/output/labeler_multi.train.v4.1_nq.dev.v4.1_rlmulti_title_wq/checkpoint-300", | |
| "epoch": 1.0, | |
| "global_step": 3080, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.903896103896104e-05, | |
| "loss": 0.2844, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.7714285714285715, | |
| "eval_loss": 0.5584465861320496, | |
| "eval_runtime": 3.5617, | |
| "eval_samples_per_second": 137.576, | |
| "eval_steps_per_second": 17.408, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.8064935064935067e-05, | |
| "loss": 0.2673, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.8122448979591836, | |
| "eval_loss": 0.5982859134674072, | |
| "eval_runtime": 3.1189, | |
| "eval_samples_per_second": 157.106, | |
| "eval_steps_per_second": 19.879, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.7090909090909092e-05, | |
| "loss": 0.2653, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.8040816326530612, | |
| "eval_loss": 0.5716066956520081, | |
| "eval_runtime": 3.0903, | |
| "eval_samples_per_second": 158.56, | |
| "eval_steps_per_second": 20.063, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.6116883116883118e-05, | |
| "loss": 0.2453, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.7836734693877551, | |
| "eval_loss": 0.6890708208084106, | |
| "eval_runtime": 2.9422, | |
| "eval_samples_per_second": 166.541, | |
| "eval_steps_per_second": 21.073, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.514935064935065e-05, | |
| "loss": 0.2234, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.8, | |
| "eval_loss": 0.6545684337615967, | |
| "eval_runtime": 3.0903, | |
| "eval_samples_per_second": 158.559, | |
| "eval_steps_per_second": 20.063, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.4175324675324675e-05, | |
| "loss": 0.2117, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.7938775510204081, | |
| "eval_loss": 0.6074094772338867, | |
| "eval_runtime": 3.4718, | |
| "eval_samples_per_second": 141.137, | |
| "eval_steps_per_second": 17.858, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.3201298701298702e-05, | |
| "loss": 0.1867, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.7938775510204081, | |
| "eval_loss": 0.7075100541114807, | |
| "eval_runtime": 3.2481, | |
| "eval_samples_per_second": 150.857, | |
| "eval_steps_per_second": 19.088, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.2227272727272728e-05, | |
| "loss": 0.179, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.8040816326530612, | |
| "eval_loss": 0.6317723989486694, | |
| "eval_runtime": 3.0265, | |
| "eval_samples_per_second": 161.903, | |
| "eval_steps_per_second": 20.486, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.1253246753246754e-05, | |
| "loss": 0.1718, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.7836734693877551, | |
| "eval_loss": 0.7868985533714294, | |
| "eval_runtime": 3.0224, | |
| "eval_samples_per_second": 162.125, | |
| "eval_steps_per_second": 20.514, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.027922077922078e-05, | |
| "loss": 0.1713, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.7775510204081633, | |
| "eval_loss": 0.6507557034492493, | |
| "eval_runtime": 3.6832, | |
| "eval_samples_per_second": 133.036, | |
| "eval_steps_per_second": 16.833, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.318181818181819e-06, | |
| "loss": 0.1502, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.7836734693877551, | |
| "eval_loss": 0.738645613193512, | |
| "eval_runtime": 3.2984, | |
| "eval_samples_per_second": 148.555, | |
| "eval_steps_per_second": 18.797, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.344155844155845e-06, | |
| "loss": 0.1541, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.7877551020408163, | |
| "eval_loss": 0.8032358884811401, | |
| "eval_runtime": 3.6265, | |
| "eval_samples_per_second": 135.118, | |
| "eval_steps_per_second": 17.097, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.370129870129871e-06, | |
| "loss": 0.1358, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.7959183673469388, | |
| "eval_loss": 0.7314993739128113, | |
| "eval_runtime": 3.7106, | |
| "eval_samples_per_second": 132.054, | |
| "eval_steps_per_second": 16.709, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 6.3961038961038964e-06, | |
| "loss": 0.1251, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.7816326530612245, | |
| "eval_loss": 0.9869228601455688, | |
| "eval_runtime": 3.3927, | |
| "eval_samples_per_second": 144.427, | |
| "eval_steps_per_second": 18.274, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 5.422077922077923e-06, | |
| "loss": 0.1244, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.7836734693877551, | |
| "eval_loss": 0.8370808362960815, | |
| "eval_runtime": 3.5148, | |
| "eval_samples_per_second": 139.41, | |
| "eval_steps_per_second": 17.64, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.448051948051948e-06, | |
| "loss": 0.1223, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.7816326530612245, | |
| "eval_loss": 0.833113968372345, | |
| "eval_runtime": 3.1172, | |
| "eval_samples_per_second": 157.192, | |
| "eval_steps_per_second": 19.89, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.474025974025974e-06, | |
| "loss": 0.096, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.7979591836734694, | |
| "eval_loss": 0.8976467847824097, | |
| "eval_runtime": 3.0168, | |
| "eval_samples_per_second": 162.426, | |
| "eval_steps_per_second": 20.552, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.1113, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.7755102040816326, | |
| "eval_loss": 0.96781325340271, | |
| "eval_runtime": 3.3837, | |
| "eval_samples_per_second": 144.814, | |
| "eval_steps_per_second": 18.323, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.525974025974026e-06, | |
| "loss": 0.094, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.7816326530612245, | |
| "eval_loss": 1.0002238750457764, | |
| "eval_runtime": 2.9984, | |
| "eval_samples_per_second": 163.421, | |
| "eval_steps_per_second": 20.678, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.51948051948052e-07, | |
| "loss": 0.1084, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.7836734693877551, | |
| "eval_loss": 1.0199074745178223, | |
| "eval_runtime": 3.3069, | |
| "eval_samples_per_second": 148.174, | |
| "eval_steps_per_second": 18.748, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3080, | |
| "total_flos": 2.867459611823923e+16, | |
| "train_loss": 0.16962762838834292, | |
| "train_runtime": 2524.7018, | |
| "train_samples_per_second": 39.038, | |
| "train_steps_per_second": 1.22 | |
| } | |
| ], | |
| "max_steps": 3080, | |
| "num_train_epochs": 1, | |
| "total_flos": 2.867459611823923e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |