{ "best_metric": 0.45347079634666443, "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-77__roberta-base/checkpoint-500", "epoch": 0.8576329331046312, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.9783362218370885e-05, "loss": 16.4408, "step": 20 }, { "epoch": 0.07, "learning_rate": 4.956672443674177e-05, "loss": 13.3709, "step": 40 }, { "epoch": 0.09, "eval_distillation_accuracy_counterfactual": 0.3304258594150847, "eval_distillation_accuracy_factual": 0.85351462288353, "eval_distillation_f1_counterfactual": 0.243675431005225, "eval_distillation_f1_factual": 0.8405624277794843, "eval_groundtruth_accuracy_counterfactual": 0.3258081067213956, "eval_groundtruth_f1_counterfactual": 0.24573702955510618, "eval_groundtruth_f1_factual": 0.7133962491941664, "eval_icace_cosine": 0.5112652778625488, "eval_icace_l2": 0.8116109371185303, "eval_icace_normdiff": 0.5299729108810425, "eval_loss": 10.612378120422363, "eval_runtime": 12.8754, "eval_samples_per_second": 302.748, "eval_steps_per_second": 2.408, "step": 50 }, { "epoch": 0.1, "learning_rate": 4.935008665511265e-05, "loss": 11.6048, "step": 60 }, { "epoch": 0.14, "learning_rate": 4.913344887348354e-05, "loss": 11.1238, "step": 80 }, { "epoch": 0.17, "learning_rate": 4.891681109185442e-05, "loss": 10.5122, "step": 100 }, { "epoch": 0.17, "eval_distillation_accuracy_counterfactual": 0.5010261672652643, "eval_distillation_accuracy_factual": 0.8737814263724987, "eval_distillation_f1_counterfactual": 0.3809463454212182, "eval_distillation_f1_factual": 0.8520946306923461, "eval_groundtruth_accuracy_counterfactual": 0.47998973832734737, "eval_groundtruth_f1_counterfactual": 0.36220047443175507, "eval_groundtruth_f1_factual": 0.7127756432715844, "eval_icace_cosine": 0.46907880902290344, "eval_icace_l2": 0.6739468574523926, "eval_icace_normdiff": 0.44848179817199707, "eval_loss": 8.415631294250488, "eval_runtime": 12.9894, "eval_samples_per_second": 300.091, "eval_steps_per_second": 2.387, "step": 100 }, { "epoch": 0.21, "learning_rate": 4.8700173310225307e-05, "loss": 10.4539, "step": 120 }, { "epoch": 0.24, "learning_rate": 4.848353552859619e-05, "loss": 10.4598, "step": 140 }, { "epoch": 0.26, "eval_distillation_accuracy_counterfactual": 0.5100051308363264, "eval_distillation_accuracy_factual": 0.8648024628014367, "eval_distillation_f1_counterfactual": 0.4378381593277977, "eval_distillation_f1_factual": 0.8505368829186162, "eval_groundtruth_accuracy_counterfactual": 0.4979476654694715, "eval_groundtruth_f1_counterfactual": 0.42720537829821525, "eval_groundtruth_f1_factual": 0.6962013496902839, "eval_icace_cosine": 0.4693450629711151, "eval_icace_l2": 0.6681450009346008, "eval_icace_normdiff": 0.41851383447647095, "eval_loss": 8.433810234069824, "eval_runtime": 12.8826, "eval_samples_per_second": 302.578, "eval_steps_per_second": 2.406, "step": 150 }, { "epoch": 0.27, "learning_rate": 4.826689774696707e-05, "loss": 10.219, "step": 160 }, { "epoch": 0.31, "learning_rate": 4.8050259965337955e-05, "loss": 10.1164, "step": 180 }, { "epoch": 0.34, "learning_rate": 4.7833622183708845e-05, "loss": 10.1494, "step": 200 }, { "epoch": 0.34, "eval_distillation_accuracy_counterfactual": 0.48691636736788096, "eval_distillation_accuracy_factual": 0.8858388917393535, "eval_distillation_f1_counterfactual": 0.3500647816598059, "eval_distillation_f1_factual": 0.8801680477409537, "eval_groundtruth_accuracy_counterfactual": 0.46793227296049256, "eval_groundtruth_f1_counterfactual": 0.3413720392476469, "eval_groundtruth_f1_factual": 0.696679629826118, "eval_icace_cosine": 0.4742945730686188, "eval_icace_l2": 0.6967979073524475, "eval_icace_normdiff": 0.4476469159126282, "eval_loss": 8.772797584533691, "eval_runtime": 13.0849, "eval_samples_per_second": 297.901, "eval_steps_per_second": 2.369, "step": 200 }, { "epoch": 0.38, "learning_rate": 4.761698440207972e-05, "loss": 9.9977, "step": 220 }, { "epoch": 0.41, "learning_rate": 4.740034662045061e-05, "loss": 10.0073, "step": 240 }, { "epoch": 0.43, "eval_distillation_accuracy_counterfactual": 0.43560800410466904, "eval_distillation_accuracy_factual": 0.8519753719856337, "eval_distillation_f1_counterfactual": 0.35530824798064053, "eval_distillation_f1_factual": 0.8365066372023451, "eval_groundtruth_accuracy_counterfactual": 0.43766033863519754, "eval_groundtruth_f1_counterfactual": 0.364318999243899, "eval_groundtruth_f1_factual": 0.6915401170991118, "eval_icace_cosine": 0.48830506205558777, "eval_icace_l2": 0.7442983388900757, "eval_icace_normdiff": 0.4745895266532898, "eval_loss": 9.400979042053223, "eval_runtime": 12.983, "eval_samples_per_second": 300.24, "eval_steps_per_second": 2.388, "step": 250 }, { "epoch": 0.45, "learning_rate": 4.7183708838821494e-05, "loss": 9.7738, "step": 260 }, { "epoch": 0.48, "learning_rate": 4.6967071057192376e-05, "loss": 9.9329, "step": 280 }, { "epoch": 0.51, "learning_rate": 4.675043327556326e-05, "loss": 9.7779, "step": 300 }, { "epoch": 0.51, "eval_distillation_accuracy_counterfactual": 0.5359158542842484, "eval_distillation_accuracy_factual": 0.8619805028219599, "eval_distillation_f1_counterfactual": 0.47616723466717426, "eval_distillation_f1_factual": 0.852650039087773, "eval_groundtruth_accuracy_counterfactual": 0.5010261672652643, "eval_groundtruth_f1_counterfactual": 0.44417532302390744, "eval_groundtruth_f1_factual": 0.7073800556776944, "eval_icace_cosine": 0.46865805983543396, "eval_icace_l2": 0.6562190651893616, "eval_icace_normdiff": 0.4228982925415039, "eval_loss": 8.363639831542969, "eval_runtime": 13.0389, "eval_samples_per_second": 298.951, "eval_steps_per_second": 2.377, "step": 300 }, { "epoch": 0.55, "learning_rate": 4.653379549393415e-05, "loss": 9.6834, "step": 320 }, { "epoch": 0.58, "learning_rate": 4.6317157712305025e-05, "loss": 9.547, "step": 340 }, { "epoch": 0.6, "eval_distillation_accuracy_counterfactual": 0.5025654181631606, "eval_distillation_accuracy_factual": 0.8855823499230374, "eval_distillation_f1_counterfactual": 0.4398602753096072, "eval_distillation_f1_factual": 0.8717503260210726, "eval_groundtruth_accuracy_counterfactual": 0.4989738327347358, "eval_groundtruth_f1_counterfactual": 0.44441757184034925, "eval_groundtruth_f1_factual": 0.7229789758781793, "eval_icace_cosine": 0.4650568664073944, "eval_icace_l2": 0.6811135411262512, "eval_icace_normdiff": 0.4423196017742157, "eval_loss": 8.341341972351074, "eval_runtime": 13.1584, "eval_samples_per_second": 296.238, "eval_steps_per_second": 2.356, "step": 350 }, { "epoch": 0.62, "learning_rate": 4.6100519930675915e-05, "loss": 9.4159, "step": 360 }, { "epoch": 0.65, "learning_rate": 4.58838821490468e-05, "loss": 9.6264, "step": 380 }, { "epoch": 0.69, "learning_rate": 4.566724436741768e-05, "loss": 9.2833, "step": 400 }, { "epoch": 0.69, "eval_distillation_accuracy_counterfactual": 0.5495125705489995, "eval_distillation_accuracy_factual": 0.8494099538224731, "eval_distillation_f1_counterfactual": 0.48234074968553386, "eval_distillation_f1_factual": 0.834773556914336, "eval_groundtruth_accuracy_counterfactual": 0.5266803488968702, "eval_groundtruth_f1_counterfactual": 0.4613217521128874, "eval_groundtruth_f1_factual": 0.7080154702931097, "eval_icace_cosine": 0.4618784785270691, "eval_icace_l2": 0.6433539986610413, "eval_icace_normdiff": 0.40616923570632935, "eval_loss": 8.343696594238281, "eval_runtime": 15.6191, "eval_samples_per_second": 249.565, "eval_steps_per_second": 1.985, "step": 400 }, { "epoch": 0.72, "learning_rate": 4.5450606585788563e-05, "loss": 9.3389, "step": 420 }, { "epoch": 0.75, "learning_rate": 4.5233968804159446e-05, "loss": 9.2901, "step": 440 }, { "epoch": 0.77, "eval_distillation_accuracy_counterfactual": 0.4989738327347358, "eval_distillation_accuracy_factual": 0.8601847101077476, "eval_distillation_f1_counterfactual": 0.33860612154049907, "eval_distillation_f1_factual": 0.8490636185423319, "eval_groundtruth_accuracy_counterfactual": 0.4892252437147255, "eval_groundtruth_f1_counterfactual": 0.33971729579938115, "eval_groundtruth_f1_factual": 0.7307035270822468, "eval_icace_cosine": 0.475065141916275, "eval_icace_l2": 0.6995478272438049, "eval_icace_normdiff": 0.440874308347702, "eval_loss": 8.762495994567871, "eval_runtime": 13.0872, "eval_samples_per_second": 297.848, "eval_steps_per_second": 2.369, "step": 450 }, { "epoch": 0.79, "learning_rate": 4.501733102253033e-05, "loss": 9.3355, "step": 460 }, { "epoch": 0.82, "learning_rate": 4.480069324090121e-05, "loss": 9.1845, "step": 480 }, { "epoch": 0.86, "learning_rate": 4.45840554592721e-05, "loss": 9.2082, "step": 500 }, { "epoch": 0.86, "eval_distillation_accuracy_counterfactual": 0.5823499230374551, "eval_distillation_accuracy_factual": 0.8514622883530015, "eval_distillation_f1_counterfactual": 0.49132364435744985, "eval_distillation_f1_factual": 0.8334607656933123, "eval_groundtruth_accuracy_counterfactual": 0.5482298614674191, "eval_groundtruth_f1_counterfactual": 0.46237812507137493, "eval_groundtruth_f1_factual": 0.7129235668323691, "eval_icace_cosine": 0.45347079634666443, "eval_icace_l2": 0.6185017824172974, "eval_icace_normdiff": 0.3950257897377014, "eval_loss": 8.00942611694336, "eval_runtime": 13.0722, "eval_samples_per_second": 298.191, "eval_steps_per_second": 2.371, "step": 500 } ], "max_steps": 4616, "num_train_epochs": 8, "total_flos": 4209890279424000.0, "trial_name": null, "trial_params": null }