{ "best_metric": 0.44613537192344666, "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-66__gpt2/checkpoint-2100", "epoch": 3.6020583190394513, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.9783362218370885e-05, "loss": 9.7194, "step": 20 }, { "epoch": 0.07, "learning_rate": 4.956672443674177e-05, "loss": 8.5461, "step": 40 }, { "epoch": 0.09, "eval_distillation_accuracy_counterfactual": 0.4212416623909697, "eval_distillation_accuracy_factual": 0.8978963571062083, "eval_distillation_f1_counterfactual": 0.2795337994893706, "eval_distillation_f1_factual": 0.8820944086532225, "eval_groundtruth_accuracy_counterfactual": 0.3999486916367368, "eval_groundtruth_f1_counterfactual": 0.270038313797986, "eval_groundtruth_f1_factual": 0.6569760675646802, "eval_icace_cosine": 0.5092517733573914, "eval_icace_l2": 0.6819494366645813, "eval_icace_normdiff": 0.48499011993408203, "eval_loss": 7.299591541290283, "eval_runtime": 9.7793, "eval_samples_per_second": 398.598, "eval_steps_per_second": 6.238, "step": 50 }, { "epoch": 0.1, "learning_rate": 4.935008665511265e-05, "loss": 8.3578, "step": 60 }, { "epoch": 0.14, "learning_rate": 4.913344887348354e-05, "loss": 7.9482, "step": 80 }, { "epoch": 0.17, "learning_rate": 4.891681109185442e-05, "loss": 7.2571, "step": 100 }, { "epoch": 0.17, "eval_distillation_accuracy_counterfactual": 0.4720369420215495, "eval_distillation_accuracy_factual": 0.8304258594150846, "eval_distillation_f1_counterfactual": 0.29869558215395625, "eval_distillation_f1_factual": 0.8025416456465629, "eval_groundtruth_accuracy_counterfactual": 0.4404822986146742, "eval_groundtruth_f1_counterfactual": 0.2821387138469088, "eval_groundtruth_f1_factual": 0.6140475956813652, "eval_icace_cosine": 0.4916336238384247, "eval_icace_l2": 0.5953323245048523, "eval_icace_normdiff": 0.3702651858329773, "eval_loss": 5.76565408706665, "eval_runtime": 8.8197, "eval_samples_per_second": 441.964, "eval_steps_per_second": 6.916, "step": 100 }, { "epoch": 0.21, "learning_rate": 4.8700173310225307e-05, "loss": 6.9005, "step": 120 }, { "epoch": 0.24, "learning_rate": 4.848353552859619e-05, "loss": 6.7914, "step": 140 }, { "epoch": 0.26, "eval_distillation_accuracy_counterfactual": 0.48050282195997945, "eval_distillation_accuracy_factual": 0.9073884043099025, "eval_distillation_f1_counterfactual": 0.3236150494495036, "eval_distillation_f1_factual": 0.8927762880113139, "eval_groundtruth_accuracy_counterfactual": 0.4707542329399692, "eval_groundtruth_f1_counterfactual": 0.31791281566203955, "eval_groundtruth_f1_factual": 0.6750709743000188, "eval_icace_cosine": 0.4797770380973816, "eval_icace_l2": 0.5712994337081909, "eval_icace_normdiff": 0.33662039041519165, "eval_loss": 5.300200462341309, "eval_runtime": 10.9287, "eval_samples_per_second": 356.676, "eval_steps_per_second": 5.582, "step": 150 }, { "epoch": 0.27, "learning_rate": 4.826689774696707e-05, "loss": 6.6777, "step": 160 }, { "epoch": 0.31, "learning_rate": 4.8050259965337955e-05, "loss": 6.4593, "step": 180 }, { "epoch": 0.34, "learning_rate": 4.7833622183708845e-05, "loss": 6.4382, "step": 200 }, { "epoch": 0.34, "eval_distillation_accuracy_counterfactual": 0.47255002565418164, "eval_distillation_accuracy_factual": 0.8832734735761929, "eval_distillation_f1_counterfactual": 0.3601419344182958, "eval_distillation_f1_factual": 0.8671170830061152, "eval_groundtruth_accuracy_counterfactual": 0.4820420728578758, "eval_groundtruth_f1_counterfactual": 0.3702238764207906, "eval_groundtruth_f1_factual": 0.6827368954561245, "eval_icace_cosine": 0.480947345495224, "eval_icace_l2": 0.5894502401351929, "eval_icace_normdiff": 0.34918561577796936, "eval_loss": 5.608799457550049, "eval_runtime": 8.4944, "eval_samples_per_second": 458.888, "eval_steps_per_second": 7.181, "step": 200 }, { "epoch": 0.38, "learning_rate": 4.761698440207972e-05, "loss": 6.2352, "step": 220 }, { "epoch": 0.41, "learning_rate": 4.740034662045061e-05, "loss": 6.164, "step": 240 }, { "epoch": 0.43, "eval_distillation_accuracy_counterfactual": 0.5069266290405336, "eval_distillation_accuracy_factual": 0.883530015392509, "eval_distillation_f1_counterfactual": 0.3645351770446803, "eval_distillation_f1_factual": 0.8684151036841502, "eval_groundtruth_accuracy_counterfactual": 0.5010261672652643, "eval_groundtruth_f1_counterfactual": 0.36288166576215647, "eval_groundtruth_f1_factual": 0.6687061485706542, "eval_icace_cosine": 0.4569728672504425, "eval_icace_l2": 0.5410994291305542, "eval_icace_normdiff": 0.31389445066452026, "eval_loss": 5.045320510864258, "eval_runtime": 8.4915, "eval_samples_per_second": 459.049, "eval_steps_per_second": 7.184, "step": 250 }, { "epoch": 0.45, "learning_rate": 4.7183708838821494e-05, "loss": 5.9891, "step": 260 }, { "epoch": 0.48, "learning_rate": 4.6967071057192376e-05, "loss": 6.2235, "step": 280 }, { "epoch": 0.51, "learning_rate": 4.675043327556326e-05, "loss": 6.0128, "step": 300 }, { "epoch": 0.51, "eval_distillation_accuracy_counterfactual": 0.5146228835300154, "eval_distillation_accuracy_factual": 0.8781426372498717, "eval_distillation_f1_counterfactual": 0.3995054954775597, "eval_distillation_f1_factual": 0.853581796631387, "eval_groundtruth_accuracy_counterfactual": 0.5035915854284249, "eval_groundtruth_f1_counterfactual": 0.392088799608495, "eval_groundtruth_f1_factual": 0.6721954364344527, "eval_icace_cosine": 0.4626784920692444, "eval_icace_l2": 0.5394507646560669, "eval_icace_normdiff": 0.31402334570884705, "eval_loss": 4.970682621002197, "eval_runtime": 8.5572, "eval_samples_per_second": 455.522, "eval_steps_per_second": 7.128, "step": 300 }, { "epoch": 0.55, "learning_rate": 4.653379549393415e-05, "loss": 5.9942, "step": 320 }, { "epoch": 0.58, "learning_rate": 4.6317157712305025e-05, "loss": 6.0123, "step": 340 }, { "epoch": 0.6, "eval_distillation_accuracy_counterfactual": 0.5325808106721396, "eval_distillation_accuracy_factual": 0.8519753719856337, "eval_distillation_f1_counterfactual": 0.41232256847404497, "eval_distillation_f1_factual": 0.8256119871844103, "eval_groundtruth_accuracy_counterfactual": 0.5205233453052848, "eval_groundtruth_f1_counterfactual": 0.40645293495090373, "eval_groundtruth_f1_factual": 0.6290200232431696, "eval_icace_cosine": 0.46614569425582886, "eval_icace_l2": 0.5392053723335266, "eval_icace_normdiff": 0.31089842319488525, "eval_loss": 5.0584893226623535, "eval_runtime": 13.8278, "eval_samples_per_second": 281.896, "eval_steps_per_second": 4.411, "step": 350 }, { "epoch": 0.62, "learning_rate": 4.6100519930675915e-05, "loss": 5.7378, "step": 360 }, { "epoch": 0.65, "learning_rate": 4.58838821490468e-05, "loss": 5.9926, "step": 380 }, { "epoch": 0.69, "learning_rate": 4.566724436741768e-05, "loss": 5.8241, "step": 400 }, { "epoch": 0.69, "eval_distillation_accuracy_counterfactual": 0.5266803488968702, "eval_distillation_accuracy_factual": 0.8694202154951257, "eval_distillation_f1_counterfactual": 0.40407582352674, "eval_distillation_f1_factual": 0.8530546578727618, "eval_groundtruth_accuracy_counterfactual": 0.5100051308363264, "eval_groundtruth_f1_counterfactual": 0.39126532870837566, "eval_groundtruth_f1_factual": 0.6853852966812366, "eval_icace_cosine": 0.4609030485153198, "eval_icace_l2": 0.5407834053039551, "eval_icace_normdiff": 0.31197163462638855, "eval_loss": 5.077407360076904, "eval_runtime": 8.5264, "eval_samples_per_second": 457.166, "eval_steps_per_second": 7.154, "step": 400 }, { "epoch": 0.72, "learning_rate": 4.5450606585788563e-05, "loss": 5.6615, "step": 420 }, { "epoch": 0.75, "learning_rate": 4.5233968804159446e-05, "loss": 5.5946, "step": 440 }, { "epoch": 0.77, "eval_distillation_accuracy_counterfactual": 0.5238583889173936, "eval_distillation_accuracy_factual": 0.87044638276039, "eval_distillation_f1_counterfactual": 0.43556823460795113, "eval_distillation_f1_factual": 0.8548518250854267, "eval_groundtruth_accuracy_counterfactual": 0.5161621344279117, "eval_groundtruth_f1_counterfactual": 0.4287539684802734, "eval_groundtruth_f1_factual": 0.6839637952427478, "eval_icace_cosine": 0.46770957112312317, "eval_icace_l2": 0.5423880815505981, "eval_icace_normdiff": 0.3162823021411896, "eval_loss": 5.0952630043029785, "eval_runtime": 13.5333, "eval_samples_per_second": 288.031, "eval_steps_per_second": 4.507, "step": 450 }, { "epoch": 0.79, "learning_rate": 4.501733102253033e-05, "loss": 5.6457, "step": 460 }, { "epoch": 0.82, "learning_rate": 4.480069324090121e-05, "loss": 5.7859, "step": 480 }, { "epoch": 0.86, "learning_rate": 4.45840554592721e-05, "loss": 5.6094, "step": 500 }, { "epoch": 0.86, "eval_distillation_accuracy_counterfactual": 0.521549512570549, "eval_distillation_accuracy_factual": 0.8473576192919446, "eval_distillation_f1_counterfactual": 0.42112500539619846, "eval_distillation_f1_factual": 0.8292976585290512, "eval_groundtruth_accuracy_counterfactual": 0.508722421754746, "eval_groundtruth_f1_counterfactual": 0.4072271850844321, "eval_groundtruth_f1_factual": 0.6740351741865853, "eval_icace_cosine": 0.4634177088737488, "eval_icace_l2": 0.5543035864830017, "eval_icace_normdiff": 0.3254902958869934, "eval_loss": 5.312262535095215, "eval_runtime": 8.4543, "eval_samples_per_second": 461.069, "eval_steps_per_second": 7.215, "step": 500 }, { "epoch": 0.89, "learning_rate": 4.436741767764298e-05, "loss": 5.5785, "step": 520 }, { "epoch": 0.93, "learning_rate": 4.415077989601387e-05, "loss": 5.5593, "step": 540 }, { "epoch": 0.94, "eval_distillation_accuracy_counterfactual": 0.5402770651616213, "eval_distillation_accuracy_factual": 0.8435094920472037, "eval_distillation_f1_counterfactual": 0.43515748935807014, "eval_distillation_f1_factual": 0.8240995838936376, "eval_groundtruth_accuracy_counterfactual": 0.5256541816316059, "eval_groundtruth_f1_counterfactual": 0.4205567101531645, "eval_groundtruth_f1_factual": 0.6657749588429954, "eval_icace_cosine": 0.4569862186908722, "eval_icace_l2": 0.5416843295097351, "eval_icace_normdiff": 0.31774964928627014, "eval_loss": 5.162288665771484, "eval_runtime": 13.3649, "eval_samples_per_second": 291.66, "eval_steps_per_second": 4.564, "step": 550 }, { "epoch": 0.96, "learning_rate": 4.393414211438475e-05, "loss": 5.4634, "step": 560 }, { "epoch": 0.99, "learning_rate": 4.371750433275563e-05, "loss": 5.5253, "step": 580 }, { "epoch": 1.03, "learning_rate": 4.3500866551126516e-05, "loss": 5.6152, "step": 600 }, { "epoch": 1.03, "eval_distillation_accuracy_counterfactual": 0.5608004104669061, "eval_distillation_accuracy_factual": 0.861723961005644, "eval_distillation_f1_counterfactual": 0.4447501209092303, "eval_distillation_f1_factual": 0.8419521920224236, "eval_groundtruth_accuracy_counterfactual": 0.5400205233453053, "eval_groundtruth_f1_counterfactual": 0.4277343931802212, "eval_groundtruth_f1_factual": 0.6685555092012573, "eval_icace_cosine": 0.4536830484867096, "eval_icace_l2": 0.5286034345626831, "eval_icace_normdiff": 0.3132805824279785, "eval_loss": 4.94181489944458, "eval_runtime": 8.6375, "eval_samples_per_second": 451.289, "eval_steps_per_second": 7.062, "step": 600 }, { "epoch": 1.06, "learning_rate": 4.3284228769497406e-05, "loss": 5.4663, "step": 620 }, { "epoch": 1.1, "learning_rate": 4.306759098786828e-05, "loss": 5.4687, "step": 640 }, { "epoch": 1.11, "eval_distillation_accuracy_counterfactual": 0.5310415597742432, "eval_distillation_accuracy_factual": 0.8571062083119548, "eval_distillation_f1_counterfactual": 0.42136058295206064, "eval_distillation_f1_factual": 0.829427902934218, "eval_groundtruth_accuracy_counterfactual": 0.5166752180605438, "eval_groundtruth_f1_counterfactual": 0.4122890718560407, "eval_groundtruth_f1_factual": 0.6676086326825417, "eval_icace_cosine": 0.456624835729599, "eval_icace_l2": 0.5346877574920654, "eval_icace_normdiff": 0.31808435916900635, "eval_loss": 4.98232889175415, "eval_runtime": 13.5322, "eval_samples_per_second": 288.054, "eval_steps_per_second": 4.508, "step": 650 }, { "epoch": 1.13, "learning_rate": 4.285095320623917e-05, "loss": 5.3452, "step": 660 }, { "epoch": 1.17, "learning_rate": 4.2634315424610055e-05, "loss": 5.3117, "step": 680 }, { "epoch": 1.2, "learning_rate": 4.241767764298094e-05, "loss": 5.3126, "step": 700 }, { "epoch": 1.2, "eval_distillation_accuracy_counterfactual": 0.5407901487942535, "eval_distillation_accuracy_factual": 0.8499230374551052, "eval_distillation_f1_counterfactual": 0.4274335785359253, "eval_distillation_f1_factual": 0.8317003860006039, "eval_groundtruth_accuracy_counterfactual": 0.5325808106721396, "eval_groundtruth_f1_counterfactual": 0.4229108777271807, "eval_groundtruth_f1_factual": 0.6817984059431582, "eval_icace_cosine": 0.45930054783821106, "eval_icace_l2": 0.5305303931236267, "eval_icace_normdiff": 0.30581900477409363, "eval_loss": 5.033056259155273, "eval_runtime": 8.5097, "eval_samples_per_second": 458.065, "eval_steps_per_second": 7.168, "step": 700 }, { "epoch": 1.23, "learning_rate": 4.220103986135182e-05, "loss": 5.2491, "step": 720 }, { "epoch": 1.27, "learning_rate": 4.198440207972271e-05, "loss": 5.3605, "step": 740 }, { "epoch": 1.29, "eval_distillation_accuracy_counterfactual": 0.5377116469984607, "eval_distillation_accuracy_factual": 0.8619805028219599, "eval_distillation_f1_counterfactual": 0.42855505894972856, "eval_distillation_f1_factual": 0.8447989566482331, "eval_groundtruth_accuracy_counterfactual": 0.5243714725500257, "eval_groundtruth_f1_counterfactual": 0.4197832634929354, "eval_groundtruth_f1_factual": 0.6888914410164863, "eval_icace_cosine": 0.45965054631233215, "eval_icace_l2": 0.5459038615226746, "eval_icace_normdiff": 0.3226074278354645, "eval_loss": 5.037441253662109, "eval_runtime": 13.1642, "eval_samples_per_second": 296.107, "eval_steps_per_second": 4.634, "step": 750 }, { "epoch": 1.3, "learning_rate": 4.1767764298093586e-05, "loss": 5.2295, "step": 760 }, { "epoch": 1.34, "learning_rate": 4.1551126516464476e-05, "loss": 5.239, "step": 780 }, { "epoch": 1.37, "learning_rate": 4.133448873483536e-05, "loss": 5.3073, "step": 800 }, { "epoch": 1.37, "eval_distillation_accuracy_counterfactual": 0.5415597742432017, "eval_distillation_accuracy_factual": 0.8599281682914315, "eval_distillation_f1_counterfactual": 0.41963605943221116, "eval_distillation_f1_factual": 0.8430915620375968, "eval_groundtruth_accuracy_counterfactual": 0.5274499743458184, "eval_groundtruth_f1_counterfactual": 0.4101686967489355, "eval_groundtruth_f1_factual": 0.6786457422232816, "eval_icace_cosine": 0.4616963565349579, "eval_icace_l2": 0.5364943742752075, "eval_icace_normdiff": 0.3089084029197693, "eval_loss": 5.033254146575928, "eval_runtime": 8.6076, "eval_samples_per_second": 452.858, "eval_steps_per_second": 7.087, "step": 800 }, { "epoch": 1.41, "learning_rate": 4.111785095320624e-05, "loss": 5.2177, "step": 820 }, { "epoch": 1.44, "learning_rate": 4.0901213171577124e-05, "loss": 5.3257, "step": 840 }, { "epoch": 1.46, "eval_distillation_accuracy_counterfactual": 0.543355566957414, "eval_distillation_accuracy_factual": 0.8509492047203694, "eval_distillation_f1_counterfactual": 0.4343061450729029, "eval_distillation_f1_factual": 0.8330255589809777, "eval_groundtruth_accuracy_counterfactual": 0.5351462288353002, "eval_groundtruth_f1_counterfactual": 0.42960347362494583, "eval_groundtruth_f1_factual": 0.6795622319151484, "eval_icace_cosine": 0.4549107849597931, "eval_icace_l2": 0.5301600098609924, "eval_icace_normdiff": 0.31678125262260437, "eval_loss": 4.906150817871094, "eval_runtime": 13.5043, "eval_samples_per_second": 288.649, "eval_steps_per_second": 4.517, "step": 850 }, { "epoch": 1.48, "learning_rate": 4.068457538994801e-05, "loss": 5.2967, "step": 860 }, { "epoch": 1.51, "learning_rate": 4.04679376083189e-05, "loss": 5.1851, "step": 880 }, { "epoch": 1.54, "learning_rate": 4.025129982668977e-05, "loss": 5.0388, "step": 900 }, { "epoch": 1.54, "eval_distillation_accuracy_counterfactual": 0.5377116469984607, "eval_distillation_accuracy_factual": 0.8545407901487942, "eval_distillation_f1_counterfactual": 0.43479793735286665, "eval_distillation_f1_factual": 0.833048646828978, "eval_groundtruth_accuracy_counterfactual": 0.5315546434068753, "eval_groundtruth_f1_counterfactual": 0.4344351894458695, "eval_groundtruth_f1_factual": 0.6827800644937321, "eval_icace_cosine": 0.4591978192329407, "eval_icace_l2": 0.5361349582672119, "eval_icace_normdiff": 0.31350481510162354, "eval_loss": 5.07242488861084, "eval_runtime": 8.6901, "eval_samples_per_second": 448.556, "eval_steps_per_second": 7.019, "step": 900 }, { "epoch": 1.58, "learning_rate": 4.003466204506066e-05, "loss": 5.1534, "step": 920 }, { "epoch": 1.61, "learning_rate": 3.9818024263431546e-05, "loss": 4.936, "step": 940 }, { "epoch": 1.63, "eval_distillation_accuracy_counterfactual": 0.5543868650590046, "eval_distillation_accuracy_factual": 0.8463314520266804, "eval_distillation_f1_counterfactual": 0.4624968238722985, "eval_distillation_f1_factual": 0.8265879438232977, "eval_groundtruth_accuracy_counterfactual": 0.5395074397126731, "eval_groundtruth_f1_counterfactual": 0.45300499369091884, "eval_groundtruth_f1_factual": 0.6899654625013621, "eval_icace_cosine": 0.45697498321533203, "eval_icace_l2": 0.5305233597755432, "eval_icace_normdiff": 0.31259921193122864, "eval_loss": 5.114608287811279, "eval_runtime": 13.2302, "eval_samples_per_second": 294.63, "eval_steps_per_second": 4.611, "step": 950 }, { "epoch": 1.65, "learning_rate": 3.960138648180243e-05, "loss": 5.1669, "step": 960 }, { "epoch": 1.68, "learning_rate": 3.938474870017331e-05, "loss": 5.0256, "step": 980 }, { "epoch": 1.72, "learning_rate": 3.91681109185442e-05, "loss": 5.0776, "step": 1000 }, { "epoch": 1.72, "eval_distillation_accuracy_counterfactual": 0.5400205233453053, "eval_distillation_accuracy_factual": 0.8442791174961519, "eval_distillation_f1_counterfactual": 0.41057517072906763, "eval_distillation_f1_factual": 0.8175198147586908, "eval_groundtruth_accuracy_counterfactual": 0.5336069779374037, "eval_groundtruth_f1_counterfactual": 0.4118030335207775, "eval_groundtruth_f1_factual": 0.6645296484209967, "eval_icace_cosine": 0.4537060856819153, "eval_icace_l2": 0.5299485921859741, "eval_icace_normdiff": 0.3117276728153229, "eval_loss": 5.096031665802002, "eval_runtime": 8.5478, "eval_samples_per_second": 456.026, "eval_steps_per_second": 7.136, "step": 1000 }, { "epoch": 1.75, "learning_rate": 3.895147313691508e-05, "loss": 5.1044, "step": 1020 }, { "epoch": 1.78, "learning_rate": 3.873483535528597e-05, "loss": 5.1824, "step": 1040 }, { "epoch": 1.8, "eval_distillation_accuracy_counterfactual": 0.5454079014879425, "eval_distillation_accuracy_factual": 0.8370959466393022, "eval_distillation_f1_counterfactual": 0.4514467111829311, "eval_distillation_f1_factual": 0.8189293913112244, "eval_groundtruth_accuracy_counterfactual": 0.539250897896357, "eval_groundtruth_f1_counterfactual": 0.44818728166213734, "eval_groundtruth_f1_factual": 0.6824355248442739, "eval_icace_cosine": 0.45461148023605347, "eval_icace_l2": 0.5214051604270935, "eval_icace_normdiff": 0.30551236867904663, "eval_loss": 4.964339733123779, "eval_runtime": 13.4181, "eval_samples_per_second": 290.503, "eval_steps_per_second": 4.546, "step": 1050 }, { "epoch": 1.82, "learning_rate": 3.851819757365685e-05, "loss": 5.2425, "step": 1060 }, { "epoch": 1.85, "learning_rate": 3.830155979202773e-05, "loss": 5.0704, "step": 1080 }, { "epoch": 1.89, "learning_rate": 3.8084922010398616e-05, "loss": 5.1106, "step": 1100 }, { "epoch": 1.89, "eval_distillation_accuracy_counterfactual": 0.5138532580810672, "eval_distillation_accuracy_factual": 0.8370959466393022, "eval_distillation_f1_counterfactual": 0.43106934062357655, "eval_distillation_f1_factual": 0.8165823528984962, "eval_groundtruth_accuracy_counterfactual": 0.5110312981015905, "eval_groundtruth_f1_counterfactual": 0.43076739498450695, "eval_groundtruth_f1_factual": 0.6854469504667037, "eval_icace_cosine": 0.46039697527885437, "eval_icace_l2": 0.5523171424865723, "eval_icace_normdiff": 0.3318041265010834, "eval_loss": 5.376918315887451, "eval_runtime": 9.0752, "eval_samples_per_second": 429.522, "eval_steps_per_second": 6.722, "step": 1100 }, { "epoch": 1.92, "learning_rate": 3.78682842287695e-05, "loss": 5.1747, "step": 1120 }, { "epoch": 1.96, "learning_rate": 3.765164644714038e-05, "loss": 4.9442, "step": 1140 }, { "epoch": 1.97, "eval_distillation_accuracy_counterfactual": 0.530015392508979, "eval_distillation_accuracy_factual": 0.8468445356593125, "eval_distillation_f1_counterfactual": 0.41923047344558784, "eval_distillation_f1_factual": 0.8209617109339709, "eval_groundtruth_accuracy_counterfactual": 0.5194971780400205, "eval_groundtruth_f1_counterfactual": 0.414573449663499, "eval_groundtruth_f1_factual": 0.6814846999962294, "eval_icace_cosine": 0.45492398738861084, "eval_icace_l2": 0.5379559397697449, "eval_icace_normdiff": 0.31172436475753784, "eval_loss": 5.110229969024658, "eval_runtime": 12.5288, "eval_samples_per_second": 311.123, "eval_steps_per_second": 4.869, "step": 1150 }, { "epoch": 1.99, "learning_rate": 3.7435008665511264e-05, "loss": 5.0781, "step": 1160 }, { "epoch": 2.02, "learning_rate": 3.7218370883882154e-05, "loss": 4.8214, "step": 1180 }, { "epoch": 2.06, "learning_rate": 3.700173310225303e-05, "loss": 5.1082, "step": 1200 }, { "epoch": 2.06, "eval_distillation_accuracy_counterfactual": 0.538481272447409, "eval_distillation_accuracy_factual": 0.8483837865572088, "eval_distillation_f1_counterfactual": 0.4406972082523029, "eval_distillation_f1_factual": 0.8301964182514345, "eval_groundtruth_accuracy_counterfactual": 0.5287326834273987, "eval_groundtruth_f1_counterfactual": 0.4366241037475905, "eval_groundtruth_f1_factual": 0.6916041568169767, "eval_icace_cosine": 0.45687490701675415, "eval_icace_l2": 0.5412150621414185, "eval_icace_normdiff": 0.31677863001823425, "eval_loss": 5.133824825286865, "eval_runtime": 8.5259, "eval_samples_per_second": 457.195, "eval_steps_per_second": 7.155, "step": 1200 }, { "epoch": 2.09, "learning_rate": 3.678509532062392e-05, "loss": 4.8366, "step": 1220 }, { "epoch": 2.13, "learning_rate": 3.65684575389948e-05, "loss": 4.8667, "step": 1240 }, { "epoch": 2.14, "eval_distillation_accuracy_counterfactual": 0.5451513596716264, "eval_distillation_accuracy_factual": 0.8471010774756286, "eval_distillation_f1_counterfactual": 0.44516629001487945, "eval_distillation_f1_factual": 0.8300653148799452, "eval_groundtruth_accuracy_counterfactual": 0.5361723961005644, "eval_groundtruth_f1_counterfactual": 0.4403045959585604, "eval_groundtruth_f1_factual": 0.6972167033055753, "eval_icace_cosine": 0.4484976530075073, "eval_icace_l2": 0.5191196203231812, "eval_icace_normdiff": 0.30620598793029785, "eval_loss": 4.914973258972168, "eval_runtime": 13.2047, "eval_samples_per_second": 295.198, "eval_steps_per_second": 4.62, "step": 1250 }, { "epoch": 2.16, "learning_rate": 3.6351819757365686e-05, "loss": 5.0023, "step": 1260 }, { "epoch": 2.2, "learning_rate": 3.613518197573657e-05, "loss": 4.9106, "step": 1280 }, { "epoch": 2.23, "learning_rate": 3.591854419410746e-05, "loss": 4.9292, "step": 1300 }, { "epoch": 2.23, "eval_distillation_accuracy_counterfactual": 0.5479733196511031, "eval_distillation_accuracy_factual": 0.8619805028219599, "eval_distillation_f1_counterfactual": 0.44649902049251783, "eval_distillation_f1_factual": 0.8397438246889717, "eval_groundtruth_accuracy_counterfactual": 0.5436121087737301, "eval_groundtruth_f1_counterfactual": 0.4469947847104564, "eval_groundtruth_f1_factual": 0.6790740044432162, "eval_icace_cosine": 0.4501597285270691, "eval_icace_l2": 0.5136202573776245, "eval_icace_normdiff": 0.29706600308418274, "eval_loss": 4.897469520568848, "eval_runtime": 8.5156, "eval_samples_per_second": 457.75, "eval_steps_per_second": 7.163, "step": 1300 }, { "epoch": 2.26, "learning_rate": 3.5701906412478334e-05, "loss": 4.9032, "step": 1320 }, { "epoch": 2.3, "learning_rate": 3.5485268630849224e-05, "loss": 4.8905, "step": 1340 }, { "epoch": 2.32, "eval_distillation_accuracy_counterfactual": 0.5318111852231914, "eval_distillation_accuracy_factual": 0.8540277065161621, "eval_distillation_f1_counterfactual": 0.44265369921666026, "eval_distillation_f1_factual": 0.8360842662987222, "eval_groundtruth_accuracy_counterfactual": 0.5266803488968702, "eval_groundtruth_f1_counterfactual": 0.4413916588252841, "eval_groundtruth_f1_factual": 0.6766870153023606, "eval_icace_cosine": 0.4583234190940857, "eval_icace_l2": 0.5422101616859436, "eval_icace_normdiff": 0.32523736357688904, "eval_loss": 5.066246032714844, "eval_runtime": 13.1912, "eval_samples_per_second": 295.5, "eval_steps_per_second": 4.624, "step": 1350 }, { "epoch": 2.33, "learning_rate": 3.526863084922011e-05, "loss": 4.8723, "step": 1360 }, { "epoch": 2.37, "learning_rate": 3.505199306759099e-05, "loss": 4.8684, "step": 1380 }, { "epoch": 2.4, "learning_rate": 3.483535528596187e-05, "loss": 4.8838, "step": 1400 }, { "epoch": 2.4, "eval_distillation_accuracy_counterfactual": 0.5428424833247819, "eval_distillation_accuracy_factual": 0.8468445356593125, "eval_distillation_f1_counterfactual": 0.4218024392943229, "eval_distillation_f1_factual": 0.8302460548095396, "eval_groundtruth_accuracy_counterfactual": 0.5374551051821447, "eval_groundtruth_f1_counterfactual": 0.4210443279497268, "eval_groundtruth_f1_factual": 0.6863076976423126, "eval_icace_cosine": 0.45339435338974, "eval_icace_l2": 0.5222477316856384, "eval_icace_normdiff": 0.29324910044670105, "eval_loss": 4.967700958251953, "eval_runtime": 8.6663, "eval_samples_per_second": 449.789, "eval_steps_per_second": 7.039, "step": 1400 }, { "epoch": 2.44, "learning_rate": 3.461871750433276e-05, "loss": 4.9797, "step": 1420 }, { "epoch": 2.47, "learning_rate": 3.440207972270364e-05, "loss": 4.7387, "step": 1440 }, { "epoch": 2.49, "eval_distillation_accuracy_counterfactual": 0.5407901487942535, "eval_distillation_accuracy_factual": 0.8609543355566958, "eval_distillation_f1_counterfactual": 0.4316270282168784, "eval_distillation_f1_factual": 0.8433610743824644, "eval_groundtruth_accuracy_counterfactual": 0.5323242688558235, "eval_groundtruth_f1_counterfactual": 0.42891639415690835, "eval_groundtruth_f1_factual": 0.6812657459890642, "eval_icace_cosine": 0.45285436511039734, "eval_icace_l2": 0.5242744088172913, "eval_icace_normdiff": 0.2969839870929718, "eval_loss": 4.971090793609619, "eval_runtime": 8.4714, "eval_samples_per_second": 460.137, "eval_steps_per_second": 7.201, "step": 1450 }, { "epoch": 2.5, "learning_rate": 3.418544194107453e-05, "loss": 4.9854, "step": 1460 }, { "epoch": 2.54, "learning_rate": 3.396880415944541e-05, "loss": 4.9463, "step": 1480 }, { "epoch": 2.57, "learning_rate": 3.3752166377816294e-05, "loss": 4.8231, "step": 1500 }, { "epoch": 2.57, "eval_distillation_accuracy_counterfactual": 0.5348896870189841, "eval_distillation_accuracy_factual": 0.8365828630066701, "eval_distillation_f1_counterfactual": 0.4412310147597913, "eval_distillation_f1_factual": 0.8178683138413986, "eval_groundtruth_accuracy_counterfactual": 0.530015392508979, "eval_groundtruth_f1_counterfactual": 0.44062130454891085, "eval_groundtruth_f1_factual": 0.6915266725506644, "eval_icace_cosine": 0.45718541741371155, "eval_icace_l2": 0.5440450310707092, "eval_icace_normdiff": 0.3275880515575409, "eval_loss": 5.153608322143555, "eval_runtime": 8.5873, "eval_samples_per_second": 453.926, "eval_steps_per_second": 7.104, "step": 1500 }, { "epoch": 2.61, "learning_rate": 3.353552859618718e-05, "loss": 4.8916, "step": 1520 }, { "epoch": 2.64, "learning_rate": 3.331889081455806e-05, "loss": 4.8635, "step": 1540 }, { "epoch": 2.66, "eval_distillation_accuracy_counterfactual": 0.5513083632632119, "eval_distillation_accuracy_factual": 0.8573627501282709, "eval_distillation_f1_counterfactual": 0.44609171920181473, "eval_distillation_f1_factual": 0.8375044413424142, "eval_groundtruth_accuracy_counterfactual": 0.5428424833247819, "eval_groundtruth_f1_counterfactual": 0.4433319000026114, "eval_groundtruth_f1_factual": 0.6913476363231361, "eval_icace_cosine": 0.4538714587688446, "eval_icace_l2": 0.5147866010665894, "eval_icace_normdiff": 0.28974854946136475, "eval_loss": 4.9417548179626465, "eval_runtime": 8.5448, "eval_samples_per_second": 456.183, "eval_steps_per_second": 7.139, "step": 1550 }, { "epoch": 2.68, "learning_rate": 3.310225303292894e-05, "loss": 4.7195, "step": 1560 }, { "epoch": 2.71, "learning_rate": 3.2885615251299825e-05, "loss": 4.8921, "step": 1580 }, { "epoch": 2.74, "learning_rate": 3.2668977469670715e-05, "loss": 4.659, "step": 1600 }, { "epoch": 2.74, "eval_distillation_accuracy_counterfactual": 0.5464340687532068, "eval_distillation_accuracy_factual": 0.8512057465366855, "eval_distillation_f1_counterfactual": 0.4474569537385958, "eval_distillation_f1_factual": 0.8348714122309786, "eval_groundtruth_accuracy_counterfactual": 0.543099025141098, "eval_groundtruth_f1_counterfactual": 0.4496126854336094, "eval_groundtruth_f1_factual": 0.685952702077513, "eval_icace_cosine": 0.45106443762779236, "eval_icace_l2": 0.5234705805778503, "eval_icace_normdiff": 0.30465561151504517, "eval_loss": 4.918430805206299, "eval_runtime": 8.5119, "eval_samples_per_second": 457.949, "eval_steps_per_second": 7.166, "step": 1600 }, { "epoch": 2.78, "learning_rate": 3.245233968804159e-05, "loss": 4.8637, "step": 1620 }, { "epoch": 2.81, "learning_rate": 3.223570190641248e-05, "loss": 4.7494, "step": 1640 }, { "epoch": 2.83, "eval_distillation_accuracy_counterfactual": 0.530015392508979, "eval_distillation_accuracy_factual": 0.8558234992303746, "eval_distillation_f1_counterfactual": 0.43629277707813435, "eval_distillation_f1_factual": 0.8417596638128584, "eval_groundtruth_accuracy_counterfactual": 0.5256541816316059, "eval_groundtruth_f1_counterfactual": 0.4355238167978441, "eval_groundtruth_f1_factual": 0.6901540356179046, "eval_icace_cosine": 0.45806506276130676, "eval_icace_l2": 0.5422117710113525, "eval_icace_normdiff": 0.3198961317539215, "eval_loss": 5.055630683898926, "eval_runtime": 8.4909, "eval_samples_per_second": 459.08, "eval_steps_per_second": 7.184, "step": 1650 }, { "epoch": 2.85, "learning_rate": 3.2019064124783364e-05, "loss": 4.9948, "step": 1660 }, { "epoch": 2.88, "learning_rate": 3.1802426343154247e-05, "loss": 4.8173, "step": 1680 }, { "epoch": 2.92, "learning_rate": 3.158578856152513e-05, "loss": 4.992, "step": 1700 }, { "epoch": 2.92, "eval_distillation_accuracy_counterfactual": 0.5425859415084658, "eval_distillation_accuracy_factual": 0.85351462288353, "eval_distillation_f1_counterfactual": 0.4447236146388141, "eval_distillation_f1_factual": 0.8351101653639337, "eval_groundtruth_accuracy_counterfactual": 0.5397639815289892, "eval_groundtruth_f1_counterfactual": 0.44798180339075505, "eval_groundtruth_f1_factual": 0.6778238731391417, "eval_icace_cosine": 0.45538121461868286, "eval_icace_l2": 0.5322200655937195, "eval_icace_normdiff": 0.31347331404685974, "eval_loss": 4.9646124839782715, "eval_runtime": 8.526, "eval_samples_per_second": 457.191, "eval_steps_per_second": 7.155, "step": 1700 }, { "epoch": 2.95, "learning_rate": 3.136915077989602e-05, "loss": 4.9492, "step": 1720 }, { "epoch": 2.98, "learning_rate": 3.1152512998266895e-05, "loss": 4.7878, "step": 1740 }, { "epoch": 3.0, "eval_distillation_accuracy_counterfactual": 0.5320677270395074, "eval_distillation_accuracy_factual": 0.8496664956387892, "eval_distillation_f1_counterfactual": 0.4458143361123156, "eval_distillation_f1_factual": 0.834744839305625, "eval_groundtruth_accuracy_counterfactual": 0.5295023088763469, "eval_groundtruth_f1_counterfactual": 0.4474449904485411, "eval_groundtruth_f1_factual": 0.6845588782361415, "eval_icace_cosine": 0.4579332768917084, "eval_icace_l2": 0.5446497201919556, "eval_icace_normdiff": 0.31855684518814087, "eval_loss": 5.185755252838135, "eval_runtime": 8.4459, "eval_samples_per_second": 461.525, "eval_steps_per_second": 7.222, "step": 1750 }, { "epoch": 3.02, "learning_rate": 3.0935875216637785e-05, "loss": 4.8111, "step": 1760 }, { "epoch": 3.05, "learning_rate": 3.071923743500867e-05, "loss": 4.6818, "step": 1780 }, { "epoch": 3.09, "learning_rate": 3.0502599653379547e-05, "loss": 4.7687, "step": 1800 }, { "epoch": 3.09, "eval_distillation_accuracy_counterfactual": 0.5536172396100565, "eval_distillation_accuracy_factual": 0.8612108773730118, "eval_distillation_f1_counterfactual": 0.45069397828871016, "eval_distillation_f1_factual": 0.8386466173302395, "eval_groundtruth_accuracy_counterfactual": 0.5423293996921498, "eval_groundtruth_f1_counterfactual": 0.4442055475871095, "eval_groundtruth_f1_factual": 0.6860856434140972, "eval_icace_cosine": 0.4503948390483856, "eval_icace_l2": 0.5204057097434998, "eval_icace_normdiff": 0.30468884110450745, "eval_loss": 4.862071514129639, "eval_runtime": 8.5156, "eval_samples_per_second": 457.751, "eval_steps_per_second": 7.163, "step": 1800 }, { "epoch": 3.12, "learning_rate": 3.0285961871750434e-05, "loss": 4.7456, "step": 1820 }, { "epoch": 3.16, "learning_rate": 3.006932409012132e-05, "loss": 4.8854, "step": 1840 }, { "epoch": 3.17, "eval_distillation_accuracy_counterfactual": 0.5543868650590046, "eval_distillation_accuracy_factual": 0.8624935864545921, "eval_distillation_f1_counterfactual": 0.45637182291257544, "eval_distillation_f1_factual": 0.8486332957487601, "eval_groundtruth_accuracy_counterfactual": 0.5533606977937404, "eval_groundtruth_f1_counterfactual": 0.4612555911529408, "eval_groundtruth_f1_factual": 0.6815381521049796, "eval_icace_cosine": 0.45295801758766174, "eval_icace_l2": 0.5143481492996216, "eval_icace_normdiff": 0.3000515103340149, "eval_loss": 4.850634574890137, "eval_runtime": 10.44, "eval_samples_per_second": 373.372, "eval_steps_per_second": 5.843, "step": 1850 }, { "epoch": 3.19, "learning_rate": 2.98526863084922e-05, "loss": 4.6583, "step": 1860 }, { "epoch": 3.22, "learning_rate": 2.9636048526863086e-05, "loss": 4.722, "step": 1880 }, { "epoch": 3.26, "learning_rate": 2.9419410745233972e-05, "loss": 4.7166, "step": 1900 }, { "epoch": 3.26, "eval_distillation_accuracy_counterfactual": 0.521806054386865, "eval_distillation_accuracy_factual": 0.8386351975371986, "eval_distillation_f1_counterfactual": 0.4298153628320489, "eval_distillation_f1_factual": 0.8168198911546973, "eval_groundtruth_accuracy_counterfactual": 0.5261672652642381, "eval_groundtruth_f1_counterfactual": 0.43622988773989146, "eval_groundtruth_f1_factual": 0.6731503713772962, "eval_icace_cosine": 0.46492505073547363, "eval_icace_l2": 0.5486338138580322, "eval_icace_normdiff": 0.3204009532928467, "eval_loss": 5.321374893188477, "eval_runtime": 8.8584, "eval_samples_per_second": 440.036, "eval_steps_per_second": 6.886, "step": 1900 }, { "epoch": 3.29, "learning_rate": 2.920277296360485e-05, "loss": 4.7496, "step": 1920 }, { "epoch": 3.33, "learning_rate": 2.8986135181975738e-05, "loss": 4.859, "step": 1940 }, { "epoch": 3.34, "eval_distillation_accuracy_counterfactual": 0.5543868650590046, "eval_distillation_accuracy_factual": 0.8412006157003592, "eval_distillation_f1_counterfactual": 0.4550611014400025, "eval_distillation_f1_factual": 0.8236540467948268, "eval_groundtruth_accuracy_counterfactual": 0.547460236018471, "eval_groundtruth_f1_counterfactual": 0.45334216208035627, "eval_groundtruth_f1_factual": 0.6897532601098797, "eval_icace_cosine": 0.4536947011947632, "eval_icace_l2": 0.5238730907440186, "eval_icace_normdiff": 0.30295780301094055, "eval_loss": 4.9707112312316895, "eval_runtime": 13.3675, "eval_samples_per_second": 291.602, "eval_steps_per_second": 4.563, "step": 1950 }, { "epoch": 3.36, "learning_rate": 2.8769497400346624e-05, "loss": 4.7329, "step": 1960 }, { "epoch": 3.4, "learning_rate": 2.8552859618717503e-05, "loss": 4.9083, "step": 1980 }, { "epoch": 3.43, "learning_rate": 2.833622183708839e-05, "loss": 4.7854, "step": 2000 }, { "epoch": 3.43, "eval_distillation_accuracy_counterfactual": 0.5379681888147768, "eval_distillation_accuracy_factual": 0.8517188301693176, "eval_distillation_f1_counterfactual": 0.44136242677706106, "eval_distillation_f1_factual": 0.8329787432505646, "eval_groundtruth_accuracy_counterfactual": 0.5369420215495125, "eval_groundtruth_f1_counterfactual": 0.4465284034353478, "eval_groundtruth_f1_factual": 0.682809704153051, "eval_icace_cosine": 0.45627400279045105, "eval_icace_l2": 0.5298829078674316, "eval_icace_normdiff": 0.3082645535469055, "eval_loss": 5.060492992401123, "eval_runtime": 8.7373, "eval_samples_per_second": 446.134, "eval_steps_per_second": 6.982, "step": 2000 }, { "epoch": 3.46, "learning_rate": 2.8119584055459276e-05, "loss": 4.6758, "step": 2020 }, { "epoch": 3.5, "learning_rate": 2.7902946273830156e-05, "loss": 4.5728, "step": 2040 }, { "epoch": 3.52, "eval_distillation_accuracy_counterfactual": 0.5551564905079528, "eval_distillation_accuracy_factual": 0.8596716264751154, "eval_distillation_f1_counterfactual": 0.4609374331427321, "eval_distillation_f1_factual": 0.8450713527551953, "eval_groundtruth_accuracy_counterfactual": 0.5479733196511031, "eval_groundtruth_f1_counterfactual": 0.45960494557331416, "eval_groundtruth_f1_factual": 0.6888964775148831, "eval_icace_cosine": 0.46091988682746887, "eval_icace_l2": 0.5246635675430298, "eval_icace_normdiff": 0.3013245463371277, "eval_loss": 4.980366230010986, "eval_runtime": 13.5743, "eval_samples_per_second": 287.16, "eval_steps_per_second": 4.494, "step": 2050 }, { "epoch": 3.53, "learning_rate": 2.7686308492201042e-05, "loss": 4.6722, "step": 2060 }, { "epoch": 3.57, "learning_rate": 2.7469670710571928e-05, "loss": 4.5946, "step": 2080 }, { "epoch": 3.6, "learning_rate": 2.7253032928942808e-05, "loss": 4.7488, "step": 2100 }, { "epoch": 3.6, "eval_distillation_accuracy_counterfactual": 0.5507952796305798, "eval_distillation_accuracy_factual": 0.8530015392508979, "eval_distillation_f1_counterfactual": 0.4423387911912668, "eval_distillation_f1_factual": 0.836190452372467, "eval_groundtruth_accuracy_counterfactual": 0.5405336069779374, "eval_groundtruth_f1_counterfactual": 0.43700640203504726, "eval_groundtruth_f1_factual": 0.6881924736914886, "eval_icace_cosine": 0.44613537192344666, "eval_icace_l2": 0.5100580453872681, "eval_icace_normdiff": 0.2910933494567871, "eval_loss": 4.805124759674072, "eval_runtime": 8.5385, "eval_samples_per_second": 456.523, "eval_steps_per_second": 7.144, "step": 2100 } ], "max_steps": 4616, "num_train_epochs": 8, "total_flos": 1.76716722432384e+16, "trial_name": null, "trial_params": null }