{ "best_metric": 0.45797210931777954, "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-77__bert-base-uncased/checkpoint-1000", "epoch": 1.7152658662092626, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.9783362218370885e-05, "loss": 15.0971, "step": 20 }, { "epoch": 0.07, "learning_rate": 4.956672443674177e-05, "loss": 10.8654, "step": 40 }, { "epoch": 0.09, "eval_distillation_accuracy_counterfactual": 0.512827090815803, "eval_distillation_accuracy_factual": 0.8973832734735762, "eval_distillation_f1_counterfactual": 0.38283775595303926, "eval_distillation_f1_factual": 0.8849844553565298, "eval_groundtruth_accuracy_counterfactual": 0.47998973832734737, "eval_groundtruth_f1_counterfactual": 0.35799290350733415, "eval_groundtruth_f1_factual": 0.6974663941237289, "eval_icace_cosine": 0.4845265746116638, "eval_icace_l2": 0.6881101131439209, "eval_icace_normdiff": 0.4512614905834198, "eval_loss": 7.50442361831665, "eval_runtime": 22.2098, "eval_samples_per_second": 175.508, "eval_steps_per_second": 2.747, "step": 50 }, { "epoch": 0.1, "learning_rate": 4.935008665511265e-05, "loss": 10.0023, "step": 60 }, { "epoch": 0.14, "learning_rate": 4.913344887348354e-05, "loss": 9.8393, "step": 80 }, { "epoch": 0.17, "learning_rate": 4.891681109185442e-05, "loss": 9.2084, "step": 100 }, { "epoch": 0.17, "eval_distillation_accuracy_counterfactual": 0.534376603386352, "eval_distillation_accuracy_factual": 0.879168804515136, "eval_distillation_f1_counterfactual": 0.47020198824723397, "eval_distillation_f1_factual": 0.8636644497351712, "eval_groundtruth_accuracy_counterfactual": 0.521806054386865, "eval_groundtruth_f1_counterfactual": 0.46415698862260973, "eval_groundtruth_f1_factual": 0.6787169266269484, "eval_icace_cosine": 0.474692165851593, "eval_icace_l2": 0.6402959823608398, "eval_icace_normdiff": 0.41231343150138855, "eval_loss": 7.25449275970459, "eval_runtime": 22.2189, "eval_samples_per_second": 175.436, "eval_steps_per_second": 2.745, "step": 100 }, { "epoch": 0.21, "learning_rate": 4.8700173310225307e-05, "loss": 9.2547, "step": 120 }, { "epoch": 0.24, "learning_rate": 4.848353552859619e-05, "loss": 9.2275, "step": 140 }, { "epoch": 0.26, "eval_distillation_accuracy_counterfactual": 0.5028219599794767, "eval_distillation_accuracy_factual": 0.8245253976398152, "eval_distillation_f1_counterfactual": 0.41359201224181497, "eval_distillation_f1_factual": 0.8013559665708911, "eval_groundtruth_accuracy_counterfactual": 0.49204720369420213, "eval_groundtruth_f1_counterfactual": 0.4109951641517333, "eval_groundtruth_f1_factual": 0.6495764505483296, "eval_icace_cosine": 0.46765828132629395, "eval_icace_l2": 0.6432615518569946, "eval_icace_normdiff": 0.38947969675064087, "eval_loss": 7.776533126831055, "eval_runtime": 22.1488, "eval_samples_per_second": 175.991, "eval_steps_per_second": 2.754, "step": 150 }, { "epoch": 0.27, "learning_rate": 4.826689774696707e-05, "loss": 8.9152, "step": 160 }, { "epoch": 0.31, "learning_rate": 4.8050259965337955e-05, "loss": 8.7736, "step": 180 }, { "epoch": 0.34, "learning_rate": 4.7833622183708845e-05, "loss": 8.8387, "step": 200 }, { "epoch": 0.34, "eval_distillation_accuracy_counterfactual": 0.5177013853258081, "eval_distillation_accuracy_factual": 0.8614674191893279, "eval_distillation_f1_counterfactual": 0.41268689402882053, "eval_distillation_f1_factual": 0.8521590725130646, "eval_groundtruth_accuracy_counterfactual": 0.5030785017957927, "eval_groundtruth_f1_counterfactual": 0.40850979925255315, "eval_groundtruth_f1_factual": 0.6669518573115495, "eval_icace_cosine": 0.4691610634326935, "eval_icace_l2": 0.6340324282646179, "eval_icace_normdiff": 0.39094486832618713, "eval_loss": 7.458261013031006, "eval_runtime": 22.3381, "eval_samples_per_second": 174.5, "eval_steps_per_second": 2.731, "step": 200 }, { "epoch": 0.38, "learning_rate": 4.761698440207972e-05, "loss": 8.772, "step": 220 }, { "epoch": 0.41, "learning_rate": 4.740034662045061e-05, "loss": 8.7734, "step": 240 }, { "epoch": 0.43, "eval_distillation_accuracy_counterfactual": 0.4671626475115444, "eval_distillation_accuracy_factual": 0.8671113391482812, "eval_distillation_f1_counterfactual": 0.38582609157213904, "eval_distillation_f1_factual": 0.8563293543368673, "eval_groundtruth_accuracy_counterfactual": 0.46151872755259105, "eval_groundtruth_f1_counterfactual": 0.39182214143822086, "eval_groundtruth_f1_factual": 0.6883791067687154, "eval_icace_cosine": 0.4946799874305725, "eval_icace_l2": 0.6939188241958618, "eval_icace_normdiff": 0.43225187063217163, "eval_loss": 8.251187324523926, "eval_runtime": 22.1682, "eval_samples_per_second": 175.838, "eval_steps_per_second": 2.752, "step": 250 }, { "epoch": 0.45, "learning_rate": 4.7183708838821494e-05, "loss": 8.5326, "step": 260 }, { "epoch": 0.48, "learning_rate": 4.6967071057192376e-05, "loss": 8.593, "step": 280 }, { "epoch": 0.51, "learning_rate": 4.675043327556326e-05, "loss": 8.5207, "step": 300 }, { "epoch": 0.51, "eval_distillation_accuracy_counterfactual": 0.5187275525910724, "eval_distillation_accuracy_factual": 0.8471010774756286, "eval_distillation_f1_counterfactual": 0.4809461537601309, "eval_distillation_f1_factual": 0.8415144400606737, "eval_groundtruth_accuracy_counterfactual": 0.5023088763468445, "eval_groundtruth_f1_counterfactual": 0.46717757401427457, "eval_groundtruth_f1_factual": 0.6817982025512722, "eval_icace_cosine": 0.48118895292282104, "eval_icace_l2": 0.6521334052085876, "eval_icace_normdiff": 0.4162002503871918, "eval_loss": 7.6342902183532715, "eval_runtime": 22.1084, "eval_samples_per_second": 176.313, "eval_steps_per_second": 2.759, "step": 300 }, { "epoch": 0.55, "learning_rate": 4.653379549393415e-05, "loss": 8.3762, "step": 320 }, { "epoch": 0.58, "learning_rate": 4.6317157712305025e-05, "loss": 8.3251, "step": 340 }, { "epoch": 0.6, "eval_distillation_accuracy_counterfactual": 0.5295023088763469, "eval_distillation_accuracy_factual": 0.8568496664956388, "eval_distillation_f1_counterfactual": 0.4641469168961435, "eval_distillation_f1_factual": 0.8494977579970042, "eval_groundtruth_accuracy_counterfactual": 0.5166752180605438, "eval_groundtruth_f1_counterfactual": 0.4567649352125045, "eval_groundtruth_f1_factual": 0.6694324318001126, "eval_icace_cosine": 0.46943578124046326, "eval_icace_l2": 0.6349437236785889, "eval_icace_normdiff": 0.39818716049194336, "eval_loss": 7.341701507568359, "eval_runtime": 22.1905, "eval_samples_per_second": 175.66, "eval_steps_per_second": 2.749, "step": 350 }, { "epoch": 0.62, "learning_rate": 4.6100519930675915e-05, "loss": 8.2535, "step": 360 }, { "epoch": 0.65, "learning_rate": 4.58838821490468e-05, "loss": 8.3502, "step": 380 }, { "epoch": 0.69, "learning_rate": 4.566724436741768e-05, "loss": 8.1628, "step": 400 }, { "epoch": 0.69, "eval_distillation_accuracy_counterfactual": 0.5397639815289892, "eval_distillation_accuracy_factual": 0.8355566957414059, "eval_distillation_f1_counterfactual": 0.46033001340715457, "eval_distillation_f1_factual": 0.8292453735319636, "eval_groundtruth_accuracy_counterfactual": 0.5223191380194971, "eval_groundtruth_f1_counterfactual": 0.44829969536055286, "eval_groundtruth_f1_factual": 0.6718756022768833, "eval_icace_cosine": 0.460631787776947, "eval_icace_l2": 0.6126735210418701, "eval_icace_normdiff": 0.3810984492301941, "eval_loss": 7.215612411499023, "eval_runtime": 22.2088, "eval_samples_per_second": 175.516, "eval_steps_per_second": 2.747, "step": 400 }, { "epoch": 0.72, "learning_rate": 4.5450606585788563e-05, "loss": 8.1842, "step": 420 }, { "epoch": 0.75, "learning_rate": 4.5233968804159446e-05, "loss": 8.2209, "step": 440 }, { "epoch": 0.77, "eval_distillation_accuracy_counterfactual": 0.5256541816316059, "eval_distillation_accuracy_factual": 0.8045151359671626, "eval_distillation_f1_counterfactual": 0.41138103040788077, "eval_distillation_f1_factual": 0.798165576043234, "eval_groundtruth_accuracy_counterfactual": 0.5248845561826577, "eval_groundtruth_f1_counterfactual": 0.4184296633356827, "eval_groundtruth_f1_factual": 0.66434863984353, "eval_icace_cosine": 0.47930216789245605, "eval_icace_l2": 0.6362362504005432, "eval_icace_normdiff": 0.4014618396759033, "eval_loss": 7.654078483581543, "eval_runtime": 22.2042, "eval_samples_per_second": 175.552, "eval_steps_per_second": 2.747, "step": 450 }, { "epoch": 0.79, "learning_rate": 4.501733102253033e-05, "loss": 8.2117, "step": 460 }, { "epoch": 0.82, "learning_rate": 4.480069324090121e-05, "loss": 8.246, "step": 480 }, { "epoch": 0.86, "learning_rate": 4.45840554592721e-05, "loss": 8.1357, "step": 500 }, { "epoch": 0.86, "eval_distillation_accuracy_counterfactual": 0.5497691123653156, "eval_distillation_accuracy_factual": 0.8237557721908672, "eval_distillation_f1_counterfactual": 0.45037064661810805, "eval_distillation_f1_factual": 0.8056668211328342, "eval_groundtruth_accuracy_counterfactual": 0.5402770651616213, "eval_groundtruth_f1_counterfactual": 0.44977490137963505, "eval_groundtruth_f1_factual": 0.6736698728564816, "eval_icace_cosine": 0.4668065309524536, "eval_icace_l2": 0.6126391887664795, "eval_icace_normdiff": 0.39072343707084656, "eval_loss": 7.208556175231934, "eval_runtime": 22.0856, "eval_samples_per_second": 176.495, "eval_steps_per_second": 2.762, "step": 500 }, { "epoch": 0.89, "learning_rate": 4.436741767764298e-05, "loss": 8.0972, "step": 520 }, { "epoch": 0.93, "learning_rate": 4.415077989601387e-05, "loss": 7.9764, "step": 540 }, { "epoch": 0.94, "eval_distillation_accuracy_counterfactual": 0.5413032324268856, "eval_distillation_accuracy_factual": 0.8263211903540277, "eval_distillation_f1_counterfactual": 0.4579261774469936, "eval_distillation_f1_factual": 0.8169241589118235, "eval_groundtruth_accuracy_counterfactual": 0.5338635197537198, "eval_groundtruth_f1_counterfactual": 0.45648415392539654, "eval_groundtruth_f1_factual": 0.6666874640710526, "eval_icace_cosine": 0.4673744738101959, "eval_icace_l2": 0.6225017309188843, "eval_icace_normdiff": 0.3859357535839081, "eval_loss": 7.50295877456665, "eval_runtime": 22.0967, "eval_samples_per_second": 176.406, "eval_steps_per_second": 2.761, "step": 550 }, { "epoch": 0.96, "learning_rate": 4.393414211438475e-05, "loss": 8.0277, "step": 560 }, { "epoch": 0.99, "learning_rate": 4.371750433275563e-05, "loss": 7.9231, "step": 580 }, { "epoch": 1.03, "learning_rate": 4.3500866551126516e-05, "loss": 8.1605, "step": 600 }, { "epoch": 1.03, "eval_distillation_accuracy_counterfactual": 0.538737814263725, "eval_distillation_accuracy_factual": 0.8424833247819394, "eval_distillation_f1_counterfactual": 0.4424763530876773, "eval_distillation_f1_factual": 0.8305147218419746, "eval_groundtruth_accuracy_counterfactual": 0.530528476141611, "eval_groundtruth_f1_counterfactual": 0.44459191212630245, "eval_groundtruth_f1_factual": 0.675893177241911, "eval_icace_cosine": 0.47036415338516235, "eval_icace_l2": 0.6303676962852478, "eval_icace_normdiff": 0.40152838826179504, "eval_loss": 7.446086406707764, "eval_runtime": 22.193, "eval_samples_per_second": 175.641, "eval_steps_per_second": 2.749, "step": 600 }, { "epoch": 1.06, "learning_rate": 4.3284228769497406e-05, "loss": 8.0369, "step": 620 }, { "epoch": 1.1, "learning_rate": 4.306759098786828e-05, "loss": 7.8522, "step": 640 }, { "epoch": 1.11, "eval_distillation_accuracy_counterfactual": 0.5325808106721396, "eval_distillation_accuracy_factual": 0.7945100051308364, "eval_distillation_f1_counterfactual": 0.45243712925320734, "eval_distillation_f1_factual": 0.7760743993737376, "eval_groundtruth_accuracy_counterfactual": 0.5248845561826577, "eval_groundtruth_f1_counterfactual": 0.4539872598778271, "eval_groundtruth_f1_factual": 0.6485178712630223, "eval_icace_cosine": 0.4656234085559845, "eval_icace_l2": 0.6243467330932617, "eval_icace_normdiff": 0.38494807481765747, "eval_loss": 7.678111553192139, "eval_runtime": 22.2461, "eval_samples_per_second": 175.222, "eval_steps_per_second": 2.742, "step": 650 }, { "epoch": 1.13, "learning_rate": 4.285095320623917e-05, "loss": 7.7876, "step": 660 }, { "epoch": 1.17, "learning_rate": 4.2634315424610055e-05, "loss": 7.8585, "step": 680 }, { "epoch": 1.2, "learning_rate": 4.241767764298094e-05, "loss": 7.9963, "step": 700 }, { "epoch": 1.2, "eval_distillation_accuracy_counterfactual": 0.5220625962031811, "eval_distillation_accuracy_factual": 0.8532580810672139, "eval_distillation_f1_counterfactual": 0.421394437601727, "eval_distillation_f1_factual": 0.843810107668179, "eval_groundtruth_accuracy_counterfactual": 0.513083632632119, "eval_groundtruth_f1_counterfactual": 0.41873912487772846, "eval_groundtruth_f1_factual": 0.6890304825574103, "eval_icace_cosine": 0.47455719113349915, "eval_icace_l2": 0.6358718276023865, "eval_icace_normdiff": 0.3861025869846344, "eval_loss": 7.682065010070801, "eval_runtime": 22.4604, "eval_samples_per_second": 173.55, "eval_steps_per_second": 2.716, "step": 700 }, { "epoch": 1.23, "learning_rate": 4.220103986135182e-05, "loss": 7.9938, "step": 720 }, { "epoch": 1.27, "learning_rate": 4.198440207972271e-05, "loss": 7.7139, "step": 740 }, { "epoch": 1.29, "eval_distillation_accuracy_counterfactual": 0.5405336069779374, "eval_distillation_accuracy_factual": 0.8509492047203694, "eval_distillation_f1_counterfactual": 0.47182248378312613, "eval_distillation_f1_factual": 0.8355752615379481, "eval_groundtruth_accuracy_counterfactual": 0.5269368907131863, "eval_groundtruth_f1_counterfactual": 0.4620646268597837, "eval_groundtruth_f1_factual": 0.6925217992694058, "eval_icace_cosine": 0.45927247405052185, "eval_icace_l2": 0.6215088367462158, "eval_icace_normdiff": 0.39000242948532104, "eval_loss": 7.237956523895264, "eval_runtime": 22.1166, "eval_samples_per_second": 176.248, "eval_steps_per_second": 2.758, "step": 750 }, { "epoch": 1.3, "learning_rate": 4.1767764298093586e-05, "loss": 7.8577, "step": 760 }, { "epoch": 1.34, "learning_rate": 4.1551126516464476e-05, "loss": 7.8288, "step": 780 }, { "epoch": 1.37, "learning_rate": 4.133448873483536e-05, "loss": 7.6519, "step": 800 }, { "epoch": 1.37, "eval_distillation_accuracy_counterfactual": 0.5279630579784504, "eval_distillation_accuracy_factual": 0.805797845048743, "eval_distillation_f1_counterfactual": 0.44398440406113127, "eval_distillation_f1_factual": 0.7889017139693454, "eval_groundtruth_accuracy_counterfactual": 0.5207798871216008, "eval_groundtruth_f1_counterfactual": 0.4413428310825916, "eval_groundtruth_f1_factual": 0.6594914555824491, "eval_icace_cosine": 0.47146424651145935, "eval_icace_l2": 0.6373152732849121, "eval_icace_normdiff": 0.40072518587112427, "eval_loss": 7.812716960906982, "eval_runtime": 22.1689, "eval_samples_per_second": 175.832, "eval_steps_per_second": 2.752, "step": 800 }, { "epoch": 1.41, "learning_rate": 4.111785095320624e-05, "loss": 7.8938, "step": 820 }, { "epoch": 1.44, "learning_rate": 4.0901213171577124e-05, "loss": 7.7746, "step": 840 }, { "epoch": 1.46, "eval_distillation_accuracy_counterfactual": 0.5243714725500257, "eval_distillation_accuracy_factual": 0.836326321190354, "eval_distillation_f1_counterfactual": 0.4465427270486524, "eval_distillation_f1_factual": 0.82730940599887, "eval_groundtruth_accuracy_counterfactual": 0.5179579271421242, "eval_groundtruth_f1_counterfactual": 0.4470807155891924, "eval_groundtruth_f1_factual": 0.6724916960045889, "eval_icace_cosine": 0.4680478870868683, "eval_icace_l2": 0.6270192861557007, "eval_icace_normdiff": 0.3876318037509918, "eval_loss": 7.512824535369873, "eval_runtime": 22.2529, "eval_samples_per_second": 175.168, "eval_steps_per_second": 2.741, "step": 850 }, { "epoch": 1.48, "learning_rate": 4.068457538994801e-05, "loss": 7.6544, "step": 860 }, { "epoch": 1.51, "learning_rate": 4.04679376083189e-05, "loss": 7.7708, "step": 880 }, { "epoch": 1.54, "learning_rate": 4.025129982668977e-05, "loss": 7.6759, "step": 900 }, { "epoch": 1.54, "eval_distillation_accuracy_counterfactual": 0.5194971780400205, "eval_distillation_accuracy_factual": 0.814520266803489, "eval_distillation_f1_counterfactual": 0.45570094642434356, "eval_distillation_f1_factual": 0.805867060477024, "eval_groundtruth_accuracy_counterfactual": 0.5007696254489482, "eval_groundtruth_f1_counterfactual": 0.43858879322504424, "eval_groundtruth_f1_factual": 0.657928782779729, "eval_icace_cosine": 0.4692392945289612, "eval_icace_l2": 0.6410536766052246, "eval_icace_normdiff": 0.4014453887939453, "eval_loss": 7.68869161605835, "eval_runtime": 22.1915, "eval_samples_per_second": 175.653, "eval_steps_per_second": 2.749, "step": 900 }, { "epoch": 1.58, "learning_rate": 4.003466204506066e-05, "loss": 7.8545, "step": 920 }, { "epoch": 1.61, "learning_rate": 3.9818024263431546e-05, "loss": 7.5503, "step": 940 }, { "epoch": 1.63, "eval_distillation_accuracy_counterfactual": 0.5402770651616213, "eval_distillation_accuracy_factual": 0.8211903540277066, "eval_distillation_f1_counterfactual": 0.453286607279591, "eval_distillation_f1_factual": 0.8132738069526344, "eval_groundtruth_accuracy_counterfactual": 0.530528476141611, "eval_groundtruth_f1_counterfactual": 0.4485755018961964, "eval_groundtruth_f1_factual": 0.6565776752764576, "eval_icace_cosine": 0.4694054126739502, "eval_icace_l2": 0.625873863697052, "eval_icace_normdiff": 0.39317741990089417, "eval_loss": 7.5347418785095215, "eval_runtime": 22.1059, "eval_samples_per_second": 176.333, "eval_steps_per_second": 2.759, "step": 950 }, { "epoch": 1.65, "learning_rate": 3.960138648180243e-05, "loss": 7.7698, "step": 960 }, { "epoch": 1.68, "learning_rate": 3.938474870017331e-05, "loss": 7.7985, "step": 980 }, { "epoch": 1.72, "learning_rate": 3.91681109185442e-05, "loss": 7.686, "step": 1000 }, { "epoch": 1.72, "eval_distillation_accuracy_counterfactual": 0.5420728578758338, "eval_distillation_accuracy_factual": 0.8396613648024628, "eval_distillation_f1_counterfactual": 0.456055619550645, "eval_distillation_f1_factual": 0.827631226039748, "eval_groundtruth_accuracy_counterfactual": 0.5292457670600308, "eval_groundtruth_f1_counterfactual": 0.45013798447650544, "eval_groundtruth_f1_factual": 0.6763146243844917, "eval_icace_cosine": 0.45797210931777954, "eval_icace_l2": 0.611527144908905, "eval_icace_normdiff": 0.37649333477020264, "eval_loss": 7.2710371017456055, "eval_runtime": 22.2409, "eval_samples_per_second": 175.262, "eval_steps_per_second": 2.743, "step": 1000 } ], "max_steps": 4616, "num_train_epochs": 8, "total_flos": 8474526459302400.0, "trial_name": null, "trial_params": null }