| { | |
| "best_metric": 0.06226002424955368, | |
| "best_model_checkpoint": "training_output/reranker_focal_answer/checkpoint-26182", | |
| "epoch": 26.0, | |
| "global_step": 26182, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.950347567030785e-05, | |
| "loss": 0.0273, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.900695134061569e-05, | |
| "loss": 0.013, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.06812480837106705, | |
| "eval_runtime": 56.2831, | |
| "eval_samples_per_second": 556.508, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.8510427010923536e-05, | |
| "loss": 0.0109, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 4.801390268123138e-05, | |
| "loss": 0.0106, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.07386605441570282, | |
| "eval_runtime": 56.2854, | |
| "eval_samples_per_second": 556.486, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 4.7517378351539225e-05, | |
| "loss": 0.0095, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.702085402184707e-05, | |
| "loss": 0.0095, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.06689883023500443, | |
| "eval_runtime": 56.2997, | |
| "eval_samples_per_second": 556.344, | |
| "step": 3021 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 4.6524329692154915e-05, | |
| "loss": 0.0083, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 4.602780536246276e-05, | |
| "loss": 0.0085, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.06948165595531464, | |
| "eval_runtime": 56.3068, | |
| "eval_samples_per_second": 556.274, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 4.5531281032770604e-05, | |
| "loss": 0.0072, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 4.503475670307845e-05, | |
| "loss": 0.0073, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.06620071828365326, | |
| "eval_runtime": 56.3226, | |
| "eval_samples_per_second": 556.118, | |
| "step": 5035 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 4.453823237338629e-05, | |
| "loss": 0.0062, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 4.404170804369414e-05, | |
| "loss": 0.0064, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.06671101599931717, | |
| "eval_runtime": 56.334, | |
| "eval_samples_per_second": 556.005, | |
| "step": 6042 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 4.354518371400199e-05, | |
| "loss": 0.0054, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 4.3048659384309834e-05, | |
| "loss": 0.0056, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.06806950271129608, | |
| "eval_runtime": 56.3386, | |
| "eval_samples_per_second": 555.96, | |
| "step": 7049 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 4.255213505461768e-05, | |
| "loss": 0.0049, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 4.205561072492552e-05, | |
| "loss": 0.0049, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.06494650989770889, | |
| "eval_runtime": 56.3526, | |
| "eval_samples_per_second": 555.821, | |
| "step": 8056 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 4.155908639523337e-05, | |
| "loss": 0.0043, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 4.106256206554122e-05, | |
| "loss": 0.0044, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.06944319605827332, | |
| "eval_runtime": 56.3534, | |
| "eval_samples_per_second": 555.814, | |
| "step": 9063 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 4.0566037735849064e-05, | |
| "loss": 0.0038, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 4.006951340615691e-05, | |
| "loss": 0.0039, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.06448082625865936, | |
| "eval_runtime": 56.3612, | |
| "eval_samples_per_second": 555.737, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 3.957298907646475e-05, | |
| "loss": 0.0035, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "learning_rate": 3.90764647467726e-05, | |
| "loss": 0.0035, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.06930028647184372, | |
| "eval_runtime": 56.3501, | |
| "eval_samples_per_second": 555.846, | |
| "step": 11077 | |
| }, | |
| { | |
| "epoch": 11.42, | |
| "learning_rate": 3.857994041708044e-05, | |
| "loss": 0.0031, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 11.92, | |
| "learning_rate": 3.8083416087388287e-05, | |
| "loss": 0.0032, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.06855376809835434, | |
| "eval_runtime": 56.3685, | |
| "eval_samples_per_second": 555.665, | |
| "step": 12084 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "learning_rate": 3.758689175769613e-05, | |
| "loss": 0.0028, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 12.91, | |
| "learning_rate": 3.7090367428003976e-05, | |
| "loss": 0.003, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.06912727653980255, | |
| "eval_runtime": 56.3545, | |
| "eval_samples_per_second": 555.803, | |
| "step": 13091 | |
| }, | |
| { | |
| "epoch": 13.41, | |
| "learning_rate": 3.659384309831182e-05, | |
| "loss": 0.0027, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 13.9, | |
| "learning_rate": 3.6097318768619665e-05, | |
| "loss": 0.0027, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.06820587813854218, | |
| "eval_runtime": 56.3582, | |
| "eval_samples_per_second": 555.767, | |
| "step": 14098 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 3.560079443892751e-05, | |
| "loss": 0.0025, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "learning_rate": 3.5104270109235354e-05, | |
| "loss": 0.0025, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.06939652562141418, | |
| "eval_runtime": 56.3589, | |
| "eval_samples_per_second": 555.76, | |
| "step": 15105 | |
| }, | |
| { | |
| "epoch": 15.39, | |
| "learning_rate": 3.46077457795432e-05, | |
| "loss": 0.0023, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "learning_rate": 3.4111221449851043e-05, | |
| "loss": 0.0023, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.06309447437524796, | |
| "eval_runtime": 56.3564, | |
| "eval_samples_per_second": 555.784, | |
| "step": 16112 | |
| }, | |
| { | |
| "epoch": 16.39, | |
| "learning_rate": 3.361469712015889e-05, | |
| "loss": 0.0022, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 16.88, | |
| "learning_rate": 3.311817279046673e-05, | |
| "loss": 0.0023, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.06566380709409714, | |
| "eval_runtime": 56.4037, | |
| "eval_samples_per_second": 555.318, | |
| "step": 17119 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 3.262164846077458e-05, | |
| "loss": 0.0021, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 17.87, | |
| "learning_rate": 3.212512413108242e-05, | |
| "loss": 0.002, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.06591986119747162, | |
| "eval_runtime": 56.3845, | |
| "eval_samples_per_second": 555.507, | |
| "step": 18126 | |
| }, | |
| { | |
| "epoch": 18.37, | |
| "learning_rate": 3.1628599801390267e-05, | |
| "loss": 0.002, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 18.87, | |
| "learning_rate": 3.113207547169811e-05, | |
| "loss": 0.002, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.0679154321551323, | |
| "eval_runtime": 56.3981, | |
| "eval_samples_per_second": 555.373, | |
| "step": 19133 | |
| }, | |
| { | |
| "epoch": 19.36, | |
| "learning_rate": 3.0635551142005956e-05, | |
| "loss": 0.0018, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 19.86, | |
| "learning_rate": 3.0139026812313804e-05, | |
| "loss": 0.0019, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.06612461805343628, | |
| "eval_runtime": 56.3877, | |
| "eval_samples_per_second": 555.476, | |
| "step": 20140 | |
| }, | |
| { | |
| "epoch": 20.36, | |
| "learning_rate": 2.964250248262165e-05, | |
| "loss": 0.0019, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 20.85, | |
| "learning_rate": 2.9145978152929493e-05, | |
| "loss": 0.0017, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.06515755504369736, | |
| "eval_runtime": 56.3934, | |
| "eval_samples_per_second": 555.42, | |
| "step": 21147 | |
| }, | |
| { | |
| "epoch": 21.35, | |
| "learning_rate": 2.8649453823237338e-05, | |
| "loss": 0.0017, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 21.85, | |
| "learning_rate": 2.8152929493545182e-05, | |
| "loss": 0.0018, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.062369268387556076, | |
| "eval_runtime": 56.3907, | |
| "eval_samples_per_second": 555.446, | |
| "step": 22154 | |
| }, | |
| { | |
| "epoch": 22.34, | |
| "learning_rate": 2.7656405163853027e-05, | |
| "loss": 0.0017, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 22.84, | |
| "learning_rate": 2.7159880834160878e-05, | |
| "loss": 0.0017, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.064911849796772, | |
| "eval_runtime": 56.3943, | |
| "eval_samples_per_second": 555.411, | |
| "step": 23161 | |
| }, | |
| { | |
| "epoch": 23.34, | |
| "learning_rate": 2.6663356504468723e-05, | |
| "loss": 0.0017, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 23.83, | |
| "learning_rate": 2.6166832174776567e-05, | |
| "loss": 0.0016, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.06586817651987076, | |
| "eval_runtime": 56.3912, | |
| "eval_samples_per_second": 555.441, | |
| "step": 24168 | |
| }, | |
| { | |
| "epoch": 24.33, | |
| "learning_rate": 2.5670307845084412e-05, | |
| "loss": 0.0016, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 24.83, | |
| "learning_rate": 2.5173783515392257e-05, | |
| "loss": 0.0015, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.06666699051856995, | |
| "eval_runtime": 56.3936, | |
| "eval_samples_per_second": 555.418, | |
| "step": 25175 | |
| }, | |
| { | |
| "epoch": 25.32, | |
| "learning_rate": 2.46772591857001e-05, | |
| "loss": 0.0015, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 25.82, | |
| "learning_rate": 2.4180734856007946e-05, | |
| "loss": 0.0016, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.06226002424955368, | |
| "eval_runtime": 56.4017, | |
| "eval_samples_per_second": 555.338, | |
| "step": 26182 | |
| } | |
| ], | |
| "max_steps": 50350, | |
| "num_train_epochs": 50, | |
| "total_flos": 1.4079942379067136e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |