{ "best_metric": 0.4664005935192108, "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-66__roberta-base/checkpoint-2400", "epoch": 4.11663807890223, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.9783362218370885e-05, "loss": 15.2337, "step": 20 }, { "epoch": 0.07, "learning_rate": 4.956672443674177e-05, "loss": 12.7446, "step": 40 }, { "epoch": 0.09, "eval_distillation_accuracy_counterfactual": 0.4099538224730631, "eval_distillation_accuracy_factual": 0.8694202154951257, "eval_distillation_f1_counterfactual": 0.2767586064145223, "eval_distillation_f1_factual": 0.855037145008629, "eval_groundtruth_accuracy_counterfactual": 0.4055926115956901, "eval_groundtruth_f1_counterfactual": 0.2794447729623582, "eval_groundtruth_f1_factual": 0.7140520716279329, "eval_icace_cosine": 0.5027520656585693, "eval_icace_l2": 0.7894455790519714, "eval_icace_normdiff": 0.5822004675865173, "eval_loss": 10.286812782287598, "eval_runtime": 12.8965, "eval_samples_per_second": 302.252, "eval_steps_per_second": 2.404, "step": 50 }, { "epoch": 0.1, "learning_rate": 4.935008665511265e-05, "loss": 12.0353, "step": 60 }, { "epoch": 0.14, "learning_rate": 4.913344887348354e-05, "loss": 10.8775, "step": 80 }, { "epoch": 0.17, "learning_rate": 4.891681109185442e-05, "loss": 10.4023, "step": 100 }, { "epoch": 0.17, "eval_distillation_accuracy_counterfactual": 0.5100051308363264, "eval_distillation_accuracy_factual": 0.8696767573114418, "eval_distillation_f1_counterfactual": 0.4037905431000315, "eval_distillation_f1_factual": 0.8559902744972321, "eval_groundtruth_accuracy_counterfactual": 0.4787070292457671, "eval_groundtruth_f1_counterfactual": 0.37533207411941716, "eval_groundtruth_f1_factual": 0.7208539604782886, "eval_icace_cosine": 0.4815896153450012, "eval_icace_l2": 0.6899080872535706, "eval_icace_normdiff": 0.4302704632282257, "eval_loss": 8.579986572265625, "eval_runtime": 12.9731, "eval_samples_per_second": 300.469, "eval_steps_per_second": 2.39, "step": 100 }, { "epoch": 0.21, "learning_rate": 4.8700173310225307e-05, "loss": 10.1421, "step": 120 }, { "epoch": 0.24, "learning_rate": 4.848353552859619e-05, "loss": 9.9736, "step": 140 }, { "epoch": 0.26, "eval_distillation_accuracy_counterfactual": 0.5123140071831709, "eval_distillation_accuracy_factual": 0.8655720882503848, "eval_distillation_f1_counterfactual": 0.39453533423432713, "eval_distillation_f1_factual": 0.8497690853832852, "eval_groundtruth_accuracy_counterfactual": 0.5025654181631606, "eval_groundtruth_f1_counterfactual": 0.39291773068394925, "eval_groundtruth_f1_factual": 0.7016197509963444, "eval_icace_cosine": 0.4719853699207306, "eval_icace_l2": 0.6965569853782654, "eval_icace_normdiff": 0.45266422629356384, "eval_loss": 8.405228614807129, "eval_runtime": 12.9355, "eval_samples_per_second": 301.342, "eval_steps_per_second": 2.397, "step": 150 }, { "epoch": 0.27, "learning_rate": 4.826689774696707e-05, "loss": 9.9668, "step": 160 }, { "epoch": 0.31, "learning_rate": 4.8050259965337955e-05, "loss": 9.6499, "step": 180 }, { "epoch": 0.34, "learning_rate": 4.7833622183708845e-05, "loss": 9.7115, "step": 200 }, { "epoch": 0.34, "eval_distillation_accuracy_counterfactual": 0.4807593637762955, "eval_distillation_accuracy_factual": 0.8599281682914315, "eval_distillation_f1_counterfactual": 0.37412255609926637, "eval_distillation_f1_factual": 0.8424763100922966, "eval_groundtruth_accuracy_counterfactual": 0.45407901487942537, "eval_groundtruth_f1_counterfactual": 0.3550761421793228, "eval_groundtruth_f1_factual": 0.7263728871661477, "eval_icace_cosine": 0.47865259647369385, "eval_icace_l2": 0.7145634889602661, "eval_icace_normdiff": 0.4607016146183014, "eval_loss": 8.778631210327148, "eval_runtime": 13.5302, "eval_samples_per_second": 288.096, "eval_steps_per_second": 2.291, "step": 200 }, { "epoch": 0.38, "learning_rate": 4.761698440207972e-05, "loss": 9.6127, "step": 220 }, { "epoch": 0.41, "learning_rate": 4.740034662045061e-05, "loss": 9.643, "step": 240 }, { "epoch": 0.43, "eval_distillation_accuracy_counterfactual": 0.4989738327347358, "eval_distillation_accuracy_factual": 0.8589020010261673, "eval_distillation_f1_counterfactual": 0.4148903629100139, "eval_distillation_f1_factual": 0.8451672844026795, "eval_groundtruth_accuracy_counterfactual": 0.48460749102103645, "eval_groundtruth_f1_counterfactual": 0.4101913073497503, "eval_groundtruth_f1_factual": 0.7027318362324826, "eval_icace_cosine": 0.4739187955856323, "eval_icace_l2": 0.6788182854652405, "eval_icace_normdiff": 0.4215588867664337, "eval_loss": 8.473577499389648, "eval_runtime": 12.9445, "eval_samples_per_second": 301.132, "eval_steps_per_second": 2.395, "step": 250 }, { "epoch": 0.45, "learning_rate": 4.7183708838821494e-05, "loss": 9.4834, "step": 260 }, { "epoch": 0.48, "learning_rate": 4.6967071057192376e-05, "loss": 9.5528, "step": 280 }, { "epoch": 0.51, "learning_rate": 4.675043327556326e-05, "loss": 9.4911, "step": 300 }, { "epoch": 0.51, "eval_distillation_accuracy_counterfactual": 0.48024628014366344, "eval_distillation_accuracy_factual": 0.8560800410466907, "eval_distillation_f1_counterfactual": 0.38759062114466497, "eval_distillation_f1_factual": 0.8388069726052654, "eval_groundtruth_accuracy_counterfactual": 0.4681888147768086, "eval_groundtruth_f1_counterfactual": 0.3863531311941365, "eval_groundtruth_f1_factual": 0.7022853957903076, "eval_icace_cosine": 0.4793255627155304, "eval_icace_l2": 0.6844598054885864, "eval_icace_normdiff": 0.41838327050209045, "eval_loss": 8.66569709777832, "eval_runtime": 12.9451, "eval_samples_per_second": 301.117, "eval_steps_per_second": 2.395, "step": 300 }, { "epoch": 0.55, "learning_rate": 4.653379549393415e-05, "loss": 9.2569, "step": 320 }, { "epoch": 0.58, "learning_rate": 4.6317157712305025e-05, "loss": 9.2215, "step": 340 }, { "epoch": 0.6, "eval_distillation_accuracy_counterfactual": 0.4979476654694715, "eval_distillation_accuracy_factual": 0.861723961005644, "eval_distillation_f1_counterfactual": 0.4084180941769576, "eval_distillation_f1_factual": 0.8476491763825083, "eval_groundtruth_accuracy_counterfactual": 0.4976911236531555, "eval_groundtruth_f1_counterfactual": 0.41199726943728454, "eval_groundtruth_f1_factual": 0.7017647747639904, "eval_icace_cosine": 0.48205703496932983, "eval_icace_l2": 0.6776481866836548, "eval_icace_normdiff": 0.42241370677948, "eval_loss": 8.44431209564209, "eval_runtime": 14.7727, "eval_samples_per_second": 263.866, "eval_steps_per_second": 2.098, "step": 350 }, { "epoch": 0.62, "learning_rate": 4.6100519930675915e-05, "loss": 9.0972, "step": 360 }, { "epoch": 0.65, "learning_rate": 4.58838821490468e-05, "loss": 9.1708, "step": 380 }, { "epoch": 0.69, "learning_rate": 4.566724436741768e-05, "loss": 8.8806, "step": 400 }, { "epoch": 0.69, "eval_distillation_accuracy_counterfactual": 0.5318111852231914, "eval_distillation_accuracy_factual": 0.8532580810672139, "eval_distillation_f1_counterfactual": 0.45627188162049775, "eval_distillation_f1_factual": 0.8433141363137382, "eval_groundtruth_accuracy_counterfactual": 0.5146228835300154, "eval_groundtruth_f1_counterfactual": 0.4506516610322439, "eval_groundtruth_f1_factual": 0.703544919476949, "eval_icace_cosine": 0.47363877296447754, "eval_icace_l2": 0.6601594090461731, "eval_icace_normdiff": 0.4189549684524536, "eval_loss": 8.39233684539795, "eval_runtime": 13.1803, "eval_samples_per_second": 295.745, "eval_steps_per_second": 2.352, "step": 400 }, { "epoch": 0.72, "learning_rate": 4.5450606585788563e-05, "loss": 9.0014, "step": 420 }, { "epoch": 0.75, "learning_rate": 4.5233968804159446e-05, "loss": 8.7768, "step": 440 }, { "epoch": 0.77, "eval_distillation_accuracy_counterfactual": 0.5248845561826577, "eval_distillation_accuracy_factual": 0.8471010774756286, "eval_distillation_f1_counterfactual": 0.4373197309012829, "eval_distillation_f1_factual": 0.8365706913229692, "eval_groundtruth_accuracy_counterfactual": 0.5112878399179066, "eval_groundtruth_f1_counterfactual": 0.4332310317337549, "eval_groundtruth_f1_factual": 0.706420324783281, "eval_icace_cosine": 0.47051629424095154, "eval_icace_l2": 0.6678978204727173, "eval_icace_normdiff": 0.42113974690437317, "eval_loss": 8.4581298828125, "eval_runtime": 12.9621, "eval_samples_per_second": 300.723, "eval_steps_per_second": 2.392, "step": 450 }, { "epoch": 0.79, "learning_rate": 4.501733102253033e-05, "loss": 8.8678, "step": 460 }, { "epoch": 0.82, "learning_rate": 4.480069324090121e-05, "loss": 8.9234, "step": 480 }, { "epoch": 0.86, "learning_rate": 4.45840554592721e-05, "loss": 8.8491, "step": 500 }, { "epoch": 0.86, "eval_distillation_accuracy_counterfactual": 0.5269368907131863, "eval_distillation_accuracy_factual": 0.8450487429451, "eval_distillation_f1_counterfactual": 0.44482702743850444, "eval_distillation_f1_factual": 0.8303570404668654, "eval_groundtruth_accuracy_counterfactual": 0.517188301693176, "eval_groundtruth_f1_counterfactual": 0.44803544206517765, "eval_groundtruth_f1_factual": 0.7101076620278356, "eval_icace_cosine": 0.4709581732749939, "eval_icace_l2": 0.6744429469108582, "eval_icace_normdiff": 0.4238233268260956, "eval_loss": 8.559771537780762, "eval_runtime": 12.9505, "eval_samples_per_second": 300.991, "eval_steps_per_second": 2.394, "step": 500 }, { "epoch": 0.89, "learning_rate": 4.436741767764298e-05, "loss": 8.9139, "step": 520 }, { "epoch": 0.93, "learning_rate": 4.415077989601387e-05, "loss": 8.9491, "step": 540 }, { "epoch": 0.94, "eval_distillation_accuracy_counterfactual": 0.508722421754746, "eval_distillation_accuracy_factual": 0.8270908158029758, "eval_distillation_f1_counterfactual": 0.4457012957498576, "eval_distillation_f1_factual": 0.8156237227220284, "eval_groundtruth_accuracy_counterfactual": 0.4987172909184197, "eval_groundtruth_f1_counterfactual": 0.4444355597987831, "eval_groundtruth_f1_factual": 0.704491382874623, "eval_icace_cosine": 0.4708389639854431, "eval_icace_l2": 0.6740121245384216, "eval_icace_normdiff": 0.4291449189186096, "eval_loss": 8.710935592651367, "eval_runtime": 13.1041, "eval_samples_per_second": 297.465, "eval_steps_per_second": 2.366, "step": 550 }, { "epoch": 0.96, "learning_rate": 4.393414211438475e-05, "loss": 8.6501, "step": 560 }, { "epoch": 0.99, "learning_rate": 4.371750433275563e-05, "loss": 8.8115, "step": 580 }, { "epoch": 1.03, "learning_rate": 4.3500866551126516e-05, "loss": 8.5726, "step": 600 }, { "epoch": 1.03, "eval_distillation_accuracy_counterfactual": 0.5061570035915854, "eval_distillation_accuracy_factual": 0.8486403283735249, "eval_distillation_f1_counterfactual": 0.41610676926533285, "eval_distillation_f1_factual": 0.8310444475559301, "eval_groundtruth_accuracy_counterfactual": 0.4905079527963058, "eval_groundtruth_f1_counterfactual": 0.4145735924063139, "eval_groundtruth_f1_factual": 0.7092515264551655, "eval_icace_cosine": 0.4776078760623932, "eval_icace_l2": 0.6925671100616455, "eval_icace_normdiff": 0.42703282833099365, "eval_loss": 8.773487091064453, "eval_runtime": 13.0705, "eval_samples_per_second": 298.228, "eval_steps_per_second": 2.372, "step": 600 }, { "epoch": 1.06, "learning_rate": 4.3284228769497406e-05, "loss": 8.7449, "step": 620 }, { "epoch": 1.1, "learning_rate": 4.306759098786828e-05, "loss": 8.5654, "step": 640 }, { "epoch": 1.11, "eval_distillation_accuracy_counterfactual": 0.5148794253463315, "eval_distillation_accuracy_factual": 0.8314520266803489, "eval_distillation_f1_counterfactual": 0.4350161586459501, "eval_distillation_f1_factual": 0.8207900808818138, "eval_groundtruth_accuracy_counterfactual": 0.513340174448435, "eval_groundtruth_f1_counterfactual": 0.4424165935664268, "eval_groundtruth_f1_factual": 0.6858850991781995, "eval_icace_cosine": 0.4748988747596741, "eval_icace_l2": 0.6583500504493713, "eval_icace_normdiff": 0.4023895561695099, "eval_loss": 8.626461029052734, "eval_runtime": 13.1109, "eval_samples_per_second": 297.31, "eval_steps_per_second": 2.364, "step": 650 }, { "epoch": 1.13, "learning_rate": 4.285095320623917e-05, "loss": 8.5985, "step": 660 }, { "epoch": 1.17, "learning_rate": 4.2634315424610055e-05, "loss": 8.9032, "step": 680 }, { "epoch": 1.2, "learning_rate": 4.241767764298094e-05, "loss": 8.5234, "step": 700 }, { "epoch": 1.2, "eval_distillation_accuracy_counterfactual": 0.5266803488968702, "eval_distillation_accuracy_factual": 0.8347870702924577, "eval_distillation_f1_counterfactual": 0.44418104449377827, "eval_distillation_f1_factual": 0.8166622315451695, "eval_groundtruth_accuracy_counterfactual": 0.512570548999487, "eval_groundtruth_f1_counterfactual": 0.4399064148597627, "eval_groundtruth_f1_factual": 0.7003621420252235, "eval_icace_cosine": 0.4755347669124603, "eval_icace_l2": 0.6643328666687012, "eval_icace_normdiff": 0.4135916829109192, "eval_loss": 8.528804779052734, "eval_runtime": 12.9782, "eval_samples_per_second": 300.35, "eval_steps_per_second": 2.389, "step": 700 }, { "epoch": 1.23, "learning_rate": 4.220103986135182e-05, "loss": 8.3629, "step": 720 }, { "epoch": 1.27, "learning_rate": 4.198440207972271e-05, "loss": 8.6834, "step": 740 }, { "epoch": 1.29, "eval_distillation_accuracy_counterfactual": 0.5253976398152899, "eval_distillation_accuracy_factual": 0.8499230374551052, "eval_distillation_f1_counterfactual": 0.4481280211692343, "eval_distillation_f1_factual": 0.8427632254580978, "eval_groundtruth_accuracy_counterfactual": 0.521292970754233, "eval_groundtruth_f1_counterfactual": 0.4535188471820635, "eval_groundtruth_f1_factual": 0.7050025057202923, "eval_icace_cosine": 0.478320837020874, "eval_icace_l2": 0.6507540345191956, "eval_icace_normdiff": 0.4042801558971405, "eval_loss": 8.547685623168945, "eval_runtime": 13.0628, "eval_samples_per_second": 298.406, "eval_steps_per_second": 2.373, "step": 750 }, { "epoch": 1.3, "learning_rate": 4.1767764298093586e-05, "loss": 8.373, "step": 760 }, { "epoch": 1.34, "learning_rate": 4.1551126516464476e-05, "loss": 8.545, "step": 780 }, { "epoch": 1.37, "learning_rate": 4.133448873483536e-05, "loss": 8.3301, "step": 800 }, { "epoch": 1.37, "eval_distillation_accuracy_counterfactual": 0.5246280143663418, "eval_distillation_accuracy_factual": 0.857875833760903, "eval_distillation_f1_counterfactual": 0.43678271154246406, "eval_distillation_f1_factual": 0.8404426089149801, "eval_groundtruth_accuracy_counterfactual": 0.5177013853258081, "eval_groundtruth_f1_counterfactual": 0.4372037073522039, "eval_groundtruth_f1_factual": 0.7164819803959707, "eval_icace_cosine": 0.4767405092716217, "eval_icace_l2": 0.6505340933799744, "eval_icace_normdiff": 0.3892482817173004, "eval_loss": 8.437895774841309, "eval_runtime": 12.9829, "eval_samples_per_second": 300.242, "eval_steps_per_second": 2.388, "step": 800 }, { "epoch": 1.41, "learning_rate": 4.111785095320624e-05, "loss": 8.5471, "step": 820 }, { "epoch": 1.44, "learning_rate": 4.0901213171577124e-05, "loss": 8.5899, "step": 840 }, { "epoch": 1.46, "eval_distillation_accuracy_counterfactual": 0.5389943560800411, "eval_distillation_accuracy_factual": 0.8668547973319651, "eval_distillation_f1_counterfactual": 0.43508558636235506, "eval_distillation_f1_factual": 0.8544181051951583, "eval_groundtruth_accuracy_counterfactual": 0.5287326834273987, "eval_groundtruth_f1_counterfactual": 0.4313681541912551, "eval_groundtruth_f1_factual": 0.7021443551588085, "eval_icace_cosine": 0.47022899985313416, "eval_icace_l2": 0.6471868753433228, "eval_icace_normdiff": 0.3974311053752899, "eval_loss": 8.187715530395508, "eval_runtime": 12.9448, "eval_samples_per_second": 301.125, "eval_steps_per_second": 2.395, "step": 850 }, { "epoch": 1.48, "learning_rate": 4.068457538994801e-05, "loss": 8.4412, "step": 860 }, { "epoch": 1.51, "learning_rate": 4.04679376083189e-05, "loss": 8.5992, "step": 880 }, { "epoch": 1.54, "learning_rate": 4.025129982668977e-05, "loss": 8.5057, "step": 900 }, { "epoch": 1.54, "eval_distillation_accuracy_counterfactual": 0.5338635197537198, "eval_distillation_accuracy_factual": 0.8494099538224731, "eval_distillation_f1_counterfactual": 0.4538940008145664, "eval_distillation_f1_factual": 0.8393372080212285, "eval_groundtruth_accuracy_counterfactual": 0.521549512570549, "eval_groundtruth_f1_counterfactual": 0.44602586407123895, "eval_groundtruth_f1_factual": 0.7062665719976977, "eval_icace_cosine": 0.47390881180763245, "eval_icace_l2": 0.6635516881942749, "eval_icace_normdiff": 0.41677233576774597, "eval_loss": 8.466891288757324, "eval_runtime": 12.9586, "eval_samples_per_second": 300.803, "eval_steps_per_second": 2.392, "step": 900 }, { "epoch": 1.58, "learning_rate": 4.003466204506066e-05, "loss": 8.5256, "step": 920 }, { "epoch": 1.61, "learning_rate": 3.9818024263431546e-05, "loss": 8.4663, "step": 940 }, { "epoch": 1.63, "eval_distillation_accuracy_counterfactual": 0.539250897896357, "eval_distillation_accuracy_factual": 0.8429964084145716, "eval_distillation_f1_counterfactual": 0.4552348813290624, "eval_distillation_f1_factual": 0.8279955804250164, "eval_groundtruth_accuracy_counterfactual": 0.5248845561826577, "eval_groundtruth_f1_counterfactual": 0.44541029004084864, "eval_groundtruth_f1_factual": 0.7174847360382134, "eval_icace_cosine": 0.47177237272262573, "eval_icace_l2": 0.6441071629524231, "eval_icace_normdiff": 0.3987690806388855, "eval_loss": 8.24371337890625, "eval_runtime": 13.1576, "eval_samples_per_second": 296.255, "eval_steps_per_second": 2.356, "step": 950 }, { "epoch": 1.65, "learning_rate": 3.960138648180243e-05, "loss": 8.4244, "step": 960 }, { "epoch": 1.68, "learning_rate": 3.938474870017331e-05, "loss": 8.5347, "step": 980 }, { "epoch": 1.72, "learning_rate": 3.91681109185442e-05, "loss": 8.5181, "step": 1000 }, { "epoch": 1.72, "eval_distillation_accuracy_counterfactual": 0.5523345305284761, "eval_distillation_accuracy_factual": 0.8568496664956388, "eval_distillation_f1_counterfactual": 0.43192107651721756, "eval_distillation_f1_factual": 0.8446400408995643, "eval_groundtruth_accuracy_counterfactual": 0.5310415597742432, "eval_groundtruth_f1_counterfactual": 0.4221211937230137, "eval_groundtruth_f1_factual": 0.7176363988738272, "eval_icace_cosine": 0.4691796898841858, "eval_icace_l2": 0.6469516158103943, "eval_icace_normdiff": 0.40605321526527405, "eval_loss": 8.24507999420166, "eval_runtime": 12.9758, "eval_samples_per_second": 300.405, "eval_steps_per_second": 2.389, "step": 1000 }, { "epoch": 1.75, "learning_rate": 3.895147313691508e-05, "loss": 8.4384, "step": 1020 }, { "epoch": 1.78, "learning_rate": 3.873483535528597e-05, "loss": 8.4464, "step": 1040 }, { "epoch": 1.8, "eval_distillation_accuracy_counterfactual": 0.5341200615700359, "eval_distillation_accuracy_factual": 0.8471010774756286, "eval_distillation_f1_counterfactual": 0.44797373858638706, "eval_distillation_f1_factual": 0.8352221547782517, "eval_groundtruth_accuracy_counterfactual": 0.5187275525910724, "eval_groundtruth_f1_counterfactual": 0.438959174201451, "eval_groundtruth_f1_factual": 0.7045395682269071, "eval_icace_cosine": 0.4699629247188568, "eval_icace_l2": 0.650560200214386, "eval_icace_normdiff": 0.4021012485027313, "eval_loss": 8.46190357208252, "eval_runtime": 12.9778, "eval_samples_per_second": 300.359, "eval_steps_per_second": 2.389, "step": 1050 }, { "epoch": 1.82, "learning_rate": 3.851819757365685e-05, "loss": 8.3977, "step": 1060 }, { "epoch": 1.85, "learning_rate": 3.830155979202773e-05, "loss": 8.4469, "step": 1080 }, { "epoch": 1.89, "learning_rate": 3.8084922010398616e-05, "loss": 8.4589, "step": 1100 }, { "epoch": 1.89, "eval_distillation_accuracy_counterfactual": 0.538481272447409, "eval_distillation_accuracy_factual": 0.853771164699846, "eval_distillation_f1_counterfactual": 0.4574351380443254, "eval_distillation_f1_factual": 0.8413133626862945, "eval_groundtruth_accuracy_counterfactual": 0.5338635197537198, "eval_groundtruth_f1_counterfactual": 0.4618955956329911, "eval_groundtruth_f1_factual": 0.712001520958305, "eval_icace_cosine": 0.47186800837516785, "eval_icace_l2": 0.6550236940383911, "eval_icace_normdiff": 0.4062390625476837, "eval_loss": 8.270210266113281, "eval_runtime": 13.0076, "eval_samples_per_second": 299.671, "eval_steps_per_second": 2.383, "step": 1100 }, { "epoch": 1.92, "learning_rate": 3.78682842287695e-05, "loss": 8.2726, "step": 1120 }, { "epoch": 1.96, "learning_rate": 3.765164644714038e-05, "loss": 8.543, "step": 1140 }, { "epoch": 1.97, "eval_distillation_accuracy_counterfactual": 0.5454079014879425, "eval_distillation_accuracy_factual": 0.8565931246793227, "eval_distillation_f1_counterfactual": 0.4358856901648176, "eval_distillation_f1_factual": 0.8450511347625259, "eval_groundtruth_accuracy_counterfactual": 0.5369420215495125, "eval_groundtruth_f1_counterfactual": 0.4328763616035284, "eval_groundtruth_f1_factual": 0.7122842482342289, "eval_icace_cosine": 0.470956027507782, "eval_icace_l2": 0.6414260864257812, "eval_icace_normdiff": 0.3959381878376007, "eval_loss": 8.278718948364258, "eval_runtime": 13.0747, "eval_samples_per_second": 298.133, "eval_steps_per_second": 2.371, "step": 1150 }, { "epoch": 1.99, "learning_rate": 3.7435008665511264e-05, "loss": 8.5847, "step": 1160 }, { "epoch": 2.02, "learning_rate": 3.7218370883882154e-05, "loss": 8.495, "step": 1180 }, { "epoch": 2.06, "learning_rate": 3.700173310225303e-05, "loss": 8.2151, "step": 1200 }, { "epoch": 2.06, "eval_distillation_accuracy_counterfactual": 0.5323242688558235, "eval_distillation_accuracy_factual": 0.844792201128784, "eval_distillation_f1_counterfactual": 0.4578721834749787, "eval_distillation_f1_factual": 0.8335573328949983, "eval_groundtruth_accuracy_counterfactual": 0.5205233453052848, "eval_groundtruth_f1_counterfactual": 0.4529105959701748, "eval_groundtruth_f1_factual": 0.6982997202664809, "eval_icace_cosine": 0.4770417809486389, "eval_icace_l2": 0.664779782295227, "eval_icace_normdiff": 0.4089834690093994, "eval_loss": 8.583990097045898, "eval_runtime": 12.9637, "eval_samples_per_second": 300.686, "eval_steps_per_second": 2.391, "step": 1200 }, { "epoch": 2.09, "learning_rate": 3.678509532062392e-05, "loss": 8.2576, "step": 1220 }, { "epoch": 2.13, "learning_rate": 3.65684575389948e-05, "loss": 8.3456, "step": 1240 }, { "epoch": 2.14, "eval_distillation_accuracy_counterfactual": 0.5210364289379169, "eval_distillation_accuracy_factual": 0.8414571575166753, "eval_distillation_f1_counterfactual": 0.44541850110843295, "eval_distillation_f1_factual": 0.8216034713991617, "eval_groundtruth_accuracy_counterfactual": 0.5228322216521293, "eval_groundtruth_f1_counterfactual": 0.4520848884145957, "eval_groundtruth_f1_factual": 0.7136759478326009, "eval_icace_cosine": 0.48024165630340576, "eval_icace_l2": 0.6682620048522949, "eval_icace_normdiff": 0.41649964451789856, "eval_loss": 8.683144569396973, "eval_runtime": 12.9156, "eval_samples_per_second": 301.806, "eval_steps_per_second": 2.4, "step": 1250 }, { "epoch": 2.16, "learning_rate": 3.6351819757365686e-05, "loss": 8.2687, "step": 1260 }, { "epoch": 2.2, "learning_rate": 3.613518197573657e-05, "loss": 8.343, "step": 1280 }, { "epoch": 2.23, "learning_rate": 3.591854419410746e-05, "loss": 8.2615, "step": 1300 }, { "epoch": 2.23, "eval_distillation_accuracy_counterfactual": 0.5210364289379169, "eval_distillation_accuracy_factual": 0.8376090302719343, "eval_distillation_f1_counterfactual": 0.4470542011154386, "eval_distillation_f1_factual": 0.8243930760094701, "eval_groundtruth_accuracy_counterfactual": 0.5094920472036942, "eval_groundtruth_f1_counterfactual": 0.4424943238928763, "eval_groundtruth_f1_factual": 0.7180425551041923, "eval_icace_cosine": 0.4772956669330597, "eval_icace_l2": 0.675339937210083, "eval_icace_normdiff": 0.4207656979560852, "eval_loss": 8.777983665466309, "eval_runtime": 12.95, "eval_samples_per_second": 301.004, "eval_steps_per_second": 2.394, "step": 1300 }, { "epoch": 2.26, "learning_rate": 3.5701906412478334e-05, "loss": 8.3164, "step": 1320 }, { "epoch": 2.3, "learning_rate": 3.5485268630849224e-05, "loss": 8.2886, "step": 1340 }, { "epoch": 2.32, "eval_distillation_accuracy_counterfactual": 0.5233453052847614, "eval_distillation_accuracy_factual": 0.8563365828630066, "eval_distillation_f1_counterfactual": 0.4352218638479065, "eval_distillation_f1_factual": 0.842128331673918, "eval_groundtruth_accuracy_counterfactual": 0.504617752693689, "eval_groundtruth_f1_counterfactual": 0.42672300871162205, "eval_groundtruth_f1_factual": 0.7038431621141455, "eval_icace_cosine": 0.47232314944267273, "eval_icace_l2": 0.6612510681152344, "eval_icace_normdiff": 0.4119323790073395, "eval_loss": 8.546701431274414, "eval_runtime": 12.8815, "eval_samples_per_second": 302.605, "eval_steps_per_second": 2.407, "step": 1350 }, { "epoch": 2.33, "learning_rate": 3.526863084922011e-05, "loss": 8.0323, "step": 1360 }, { "epoch": 2.37, "learning_rate": 3.505199306759099e-05, "loss": 8.2412, "step": 1380 }, { "epoch": 2.4, "learning_rate": 3.483535528596187e-05, "loss": 8.2755, "step": 1400 }, { "epoch": 2.4, "eval_distillation_accuracy_counterfactual": 0.5279630579784504, "eval_distillation_accuracy_factual": 0.8512057465366855, "eval_distillation_f1_counterfactual": 0.42885545419249443, "eval_distillation_f1_factual": 0.8336172051549792, "eval_groundtruth_accuracy_counterfactual": 0.5110312981015905, "eval_groundtruth_f1_counterfactual": 0.4182726548657759, "eval_groundtruth_f1_factual": 0.7081817865697444, "eval_icace_cosine": 0.47038188576698303, "eval_icace_l2": 0.6541587114334106, "eval_icace_normdiff": 0.4046129286289215, "eval_loss": 8.557218551635742, "eval_runtime": 12.9809, "eval_samples_per_second": 300.286, "eval_steps_per_second": 2.388, "step": 1400 }, { "epoch": 2.44, "learning_rate": 3.461871750433276e-05, "loss": 8.1159, "step": 1420 }, { "epoch": 2.47, "learning_rate": 3.440207972270364e-05, "loss": 8.2512, "step": 1440 }, { "epoch": 2.49, "eval_distillation_accuracy_counterfactual": 0.5048742945100051, "eval_distillation_accuracy_factual": 0.8442791174961519, "eval_distillation_f1_counterfactual": 0.4471986183427893, "eval_distillation_f1_factual": 0.8328539372383206, "eval_groundtruth_accuracy_counterfactual": 0.5061570035915854, "eval_groundtruth_f1_counterfactual": 0.45459231049993376, "eval_groundtruth_f1_factual": 0.7243435837992515, "eval_icace_cosine": 0.47215959429740906, "eval_icace_l2": 0.6676862239837646, "eval_icace_normdiff": 0.4189312160015106, "eval_loss": 8.632984161376953, "eval_runtime": 12.9685, "eval_samples_per_second": 300.574, "eval_steps_per_second": 2.39, "step": 1450 }, { "epoch": 2.5, "learning_rate": 3.418544194107453e-05, "loss": 8.4124, "step": 1460 }, { "epoch": 2.54, "learning_rate": 3.396880415944541e-05, "loss": 8.2396, "step": 1480 }, { "epoch": 2.57, "learning_rate": 3.3752166377816294e-05, "loss": 8.1802, "step": 1500 }, { "epoch": 2.57, "eval_distillation_accuracy_counterfactual": 0.5266803488968702, "eval_distillation_accuracy_factual": 0.8424833247819394, "eval_distillation_f1_counterfactual": 0.4296539407490461, "eval_distillation_f1_factual": 0.8232924538803463, "eval_groundtruth_accuracy_counterfactual": 0.521292970754233, "eval_groundtruth_f1_counterfactual": 0.4329058498324728, "eval_groundtruth_f1_factual": 0.693798726302169, "eval_icace_cosine": 0.47156795859336853, "eval_icace_l2": 0.6467342972755432, "eval_icace_normdiff": 0.39208564162254333, "eval_loss": 8.581865310668945, "eval_runtime": 12.9632, "eval_samples_per_second": 300.697, "eval_steps_per_second": 2.391, "step": 1500 }, { "epoch": 2.61, "learning_rate": 3.353552859618718e-05, "loss": 8.0483, "step": 1520 }, { "epoch": 2.64, "learning_rate": 3.331889081455806e-05, "loss": 8.3776, "step": 1540 }, { "epoch": 2.66, "eval_distillation_accuracy_counterfactual": 0.5395074397126731, "eval_distillation_accuracy_factual": 0.8624935864545921, "eval_distillation_f1_counterfactual": 0.4487826083438827, "eval_distillation_f1_factual": 0.8447571530576526, "eval_groundtruth_accuracy_counterfactual": 0.5233453052847614, "eval_groundtruth_f1_counterfactual": 0.44381794658084023, "eval_groundtruth_f1_factual": 0.7228365951086511, "eval_icace_cosine": 0.467881977558136, "eval_icace_l2": 0.6500933766365051, "eval_icace_normdiff": 0.4116222858428955, "eval_loss": 8.251863479614258, "eval_runtime": 13.0173, "eval_samples_per_second": 299.447, "eval_steps_per_second": 2.381, "step": 1550 }, { "epoch": 2.68, "learning_rate": 3.310225303292894e-05, "loss": 8.3191, "step": 1560 }, { "epoch": 2.71, "learning_rate": 3.2885615251299825e-05, "loss": 8.3499, "step": 1580 }, { "epoch": 2.74, "learning_rate": 3.2668977469670715e-05, "loss": 8.2759, "step": 1600 }, { "epoch": 2.74, "eval_distillation_accuracy_counterfactual": 0.5264238070805541, "eval_distillation_accuracy_factual": 0.8381221139045665, "eval_distillation_f1_counterfactual": 0.4640025505726335, "eval_distillation_f1_factual": 0.8226839825314822, "eval_groundtruth_accuracy_counterfactual": 0.5112878399179066, "eval_groundtruth_f1_counterfactual": 0.4572176402176623, "eval_groundtruth_f1_factual": 0.7122677529021709, "eval_icace_cosine": 0.46834951639175415, "eval_icace_l2": 0.6554086208343506, "eval_icace_normdiff": 0.4115924835205078, "eval_loss": 8.593127250671387, "eval_runtime": 12.9442, "eval_samples_per_second": 301.138, "eval_steps_per_second": 2.395, "step": 1600 }, { "epoch": 2.78, "learning_rate": 3.245233968804159e-05, "loss": 8.2134, "step": 1620 }, { "epoch": 2.81, "learning_rate": 3.223570190641248e-05, "loss": 8.2762, "step": 1640 }, { "epoch": 2.83, "eval_distillation_accuracy_counterfactual": 0.5274499743458184, "eval_distillation_accuracy_factual": 0.8532580810672139, "eval_distillation_f1_counterfactual": 0.4582506359486283, "eval_distillation_f1_factual": 0.8369130644925307, "eval_groundtruth_accuracy_counterfactual": 0.5105182144689584, "eval_groundtruth_f1_counterfactual": 0.4492512316522279, "eval_groundtruth_f1_factual": 0.7137344968653884, "eval_icace_cosine": 0.4689863324165344, "eval_icace_l2": 0.6501835584640503, "eval_icace_normdiff": 0.40152183175086975, "eval_loss": 8.484435081481934, "eval_runtime": 12.9458, "eval_samples_per_second": 301.102, "eval_steps_per_second": 2.395, "step": 1650 }, { "epoch": 2.85, "learning_rate": 3.2019064124783364e-05, "loss": 8.3236, "step": 1660 }, { "epoch": 2.88, "learning_rate": 3.1802426343154247e-05, "loss": 8.1251, "step": 1680 }, { "epoch": 2.92, "learning_rate": 3.158578856152513e-05, "loss": 8.3658, "step": 1700 }, { "epoch": 2.92, "eval_distillation_accuracy_counterfactual": 0.5236018471010775, "eval_distillation_accuracy_factual": 0.8442791174961519, "eval_distillation_f1_counterfactual": 0.4500677919517996, "eval_distillation_f1_factual": 0.8294190656695536, "eval_groundtruth_accuracy_counterfactual": 0.5161621344279117, "eval_groundtruth_f1_counterfactual": 0.44921994678493393, "eval_groundtruth_f1_factual": 0.7157702918851219, "eval_icace_cosine": 0.47242775559425354, "eval_icace_l2": 0.6650809049606323, "eval_icace_normdiff": 0.4171872138977051, "eval_loss": 8.604122161865234, "eval_runtime": 12.9963, "eval_samples_per_second": 299.932, "eval_steps_per_second": 2.385, "step": 1700 }, { "epoch": 2.95, "learning_rate": 3.136915077989602e-05, "loss": 8.1394, "step": 1720 }, { "epoch": 2.98, "learning_rate": 3.1152512998266895e-05, "loss": 8.1326, "step": 1740 }, { "epoch": 3.0, "eval_distillation_accuracy_counterfactual": 0.5328373524884557, "eval_distillation_accuracy_factual": 0.8496664956387892, "eval_distillation_f1_counterfactual": 0.466763064256023, "eval_distillation_f1_factual": 0.8351501069683767, "eval_groundtruth_accuracy_counterfactual": 0.5230887634684454, "eval_groundtruth_f1_counterfactual": 0.46309383057125386, "eval_groundtruth_f1_factual": 0.7189084641368721, "eval_icace_cosine": 0.47281646728515625, "eval_icace_l2": 0.6447048783302307, "eval_icace_normdiff": 0.3949274718761444, "eval_loss": 8.471474647521973, "eval_runtime": 12.9383, "eval_samples_per_second": 301.276, "eval_steps_per_second": 2.396, "step": 1750 }, { "epoch": 3.02, "learning_rate": 3.0935875216637785e-05, "loss": 8.0867, "step": 1760 }, { "epoch": 3.05, "learning_rate": 3.071923743500867e-05, "loss": 8.2012, "step": 1780 }, { "epoch": 3.09, "learning_rate": 3.0502599653379547e-05, "loss": 8.1176, "step": 1800 }, { "epoch": 3.09, "eval_distillation_accuracy_counterfactual": 0.5320677270395074, "eval_distillation_accuracy_factual": 0.8473576192919446, "eval_distillation_f1_counterfactual": 0.44372130842528623, "eval_distillation_f1_factual": 0.8338491634681494, "eval_groundtruth_accuracy_counterfactual": 0.5076962544894817, "eval_groundtruth_f1_counterfactual": 0.4271300744187679, "eval_groundtruth_f1_factual": 0.7089616826609815, "eval_icace_cosine": 0.4722249507904053, "eval_icace_l2": 0.65628582239151, "eval_icace_normdiff": 0.40942007303237915, "eval_loss": 8.602928161621094, "eval_runtime": 12.9577, "eval_samples_per_second": 300.825, "eval_steps_per_second": 2.392, "step": 1800 }, { "epoch": 3.12, "learning_rate": 3.0285961871750434e-05, "loss": 8.1985, "step": 1820 }, { "epoch": 3.16, "learning_rate": 3.006932409012132e-05, "loss": 8.2358, "step": 1840 }, { "epoch": 3.17, "eval_distillation_accuracy_counterfactual": 0.5179579271421242, "eval_distillation_accuracy_factual": 0.8463314520266804, "eval_distillation_f1_counterfactual": 0.4547140810789615, "eval_distillation_f1_factual": 0.8318522122913702, "eval_groundtruth_accuracy_counterfactual": 0.5179579271421242, "eval_groundtruth_f1_counterfactual": 0.46111342311742015, "eval_groundtruth_f1_factual": 0.702200862113018, "eval_icace_cosine": 0.47784164547920227, "eval_icace_l2": 0.6577954292297363, "eval_icace_normdiff": 0.40519601106643677, "eval_loss": 8.596319198608398, "eval_runtime": 13.0549, "eval_samples_per_second": 298.584, "eval_steps_per_second": 2.375, "step": 1850 }, { "epoch": 3.19, "learning_rate": 2.98526863084922e-05, "loss": 8.1927, "step": 1860 }, { "epoch": 3.22, "learning_rate": 2.9636048526863086e-05, "loss": 8.0759, "step": 1880 }, { "epoch": 3.26, "learning_rate": 2.9419410745233972e-05, "loss": 8.0023, "step": 1900 }, { "epoch": 3.26, "eval_distillation_accuracy_counterfactual": 0.5410466906105695, "eval_distillation_accuracy_factual": 0.8442791174961519, "eval_distillation_f1_counterfactual": 0.4708691905840775, "eval_distillation_f1_factual": 0.8314060332226945, "eval_groundtruth_accuracy_counterfactual": 0.5287326834273987, "eval_groundtruth_f1_counterfactual": 0.4662134522268956, "eval_groundtruth_f1_factual": 0.7036825171946501, "eval_icace_cosine": 0.46827077865600586, "eval_icace_l2": 0.6418067812919617, "eval_icace_normdiff": 0.39047762751579285, "eval_loss": 8.510624885559082, "eval_runtime": 12.9429, "eval_samples_per_second": 301.169, "eval_steps_per_second": 2.395, "step": 1900 }, { "epoch": 3.29, "learning_rate": 2.920277296360485e-05, "loss": 8.0164, "step": 1920 }, { "epoch": 3.33, "learning_rate": 2.8986135181975738e-05, "loss": 8.0343, "step": 1940 }, { "epoch": 3.34, "eval_distillation_accuracy_counterfactual": 0.5356593124679323, "eval_distillation_accuracy_factual": 0.8594150846587993, "eval_distillation_f1_counterfactual": 0.4646747781692526, "eval_distillation_f1_factual": 0.8458748080305407, "eval_groundtruth_accuracy_counterfactual": 0.5230887634684454, "eval_groundtruth_f1_counterfactual": 0.4587518415854972, "eval_groundtruth_f1_factual": 0.725017307995671, "eval_icace_cosine": 0.46998363733291626, "eval_icace_l2": 0.6633685231208801, "eval_icace_normdiff": 0.410533607006073, "eval_loss": 8.549391746520996, "eval_runtime": 13.0331, "eval_samples_per_second": 299.085, "eval_steps_per_second": 2.379, "step": 1950 }, { "epoch": 3.36, "learning_rate": 2.8769497400346624e-05, "loss": 8.1109, "step": 1960 }, { "epoch": 3.4, "learning_rate": 2.8552859618717503e-05, "loss": 7.8661, "step": 1980 }, { "epoch": 3.43, "learning_rate": 2.833622183708839e-05, "loss": 7.9558, "step": 2000 }, { "epoch": 3.43, "eval_distillation_accuracy_counterfactual": 0.5338635197537198, "eval_distillation_accuracy_factual": 0.8483837865572088, "eval_distillation_f1_counterfactual": 0.46569718446185726, "eval_distillation_f1_factual": 0.8346114011854626, "eval_groundtruth_accuracy_counterfactual": 0.5269368907131863, "eval_groundtruth_f1_counterfactual": 0.46415122006349785, "eval_groundtruth_f1_factual": 0.7156362232258703, "eval_icace_cosine": 0.46725159883499146, "eval_icace_l2": 0.660638153553009, "eval_icace_normdiff": 0.40762051939964294, "eval_loss": 8.534335136413574, "eval_runtime": 12.9678, "eval_samples_per_second": 300.591, "eval_steps_per_second": 2.391, "step": 2000 }, { "epoch": 3.46, "learning_rate": 2.8119584055459276e-05, "loss": 8.0163, "step": 2020 }, { "epoch": 3.5, "learning_rate": 2.7902946273830156e-05, "loss": 8.1543, "step": 2040 }, { "epoch": 3.52, "eval_distillation_accuracy_counterfactual": 0.5184710107747563, "eval_distillation_accuracy_factual": 0.8417136993329912, "eval_distillation_f1_counterfactual": 0.45048032767829593, "eval_distillation_f1_factual": 0.8277337474547333, "eval_groundtruth_accuracy_counterfactual": 0.5187275525910724, "eval_groundtruth_f1_counterfactual": 0.4579942611753098, "eval_groundtruth_f1_factual": 0.7262909345250802, "eval_icace_cosine": 0.47337350249290466, "eval_icace_l2": 0.6622626781463623, "eval_icace_normdiff": 0.40650057792663574, "eval_loss": 8.670907020568848, "eval_runtime": 12.913, "eval_samples_per_second": 301.867, "eval_steps_per_second": 2.401, "step": 2050 }, { "epoch": 3.53, "learning_rate": 2.7686308492201042e-05, "loss": 8.0555, "step": 2060 }, { "epoch": 3.57, "learning_rate": 2.7469670710571928e-05, "loss": 8.003, "step": 2080 }, { "epoch": 3.6, "learning_rate": 2.7253032928942808e-05, "loss": 8.1222, "step": 2100 }, { "epoch": 3.6, "eval_distillation_accuracy_counterfactual": 0.538481272447409, "eval_distillation_accuracy_factual": 0.8324781939456132, "eval_distillation_f1_counterfactual": 0.45321151762235823, "eval_distillation_f1_factual": 0.816404323283801, "eval_groundtruth_accuracy_counterfactual": 0.5256541816316059, "eval_groundtruth_f1_counterfactual": 0.4475943574688602, "eval_groundtruth_f1_factual": 0.7218836713261483, "eval_icace_cosine": 0.47021886706352234, "eval_icace_l2": 0.6517447233200073, "eval_icace_normdiff": 0.403333842754364, "eval_loss": 8.577171325683594, "eval_runtime": 12.9514, "eval_samples_per_second": 300.97, "eval_steps_per_second": 2.394, "step": 2100 }, { "epoch": 3.64, "learning_rate": 2.7036395147313694e-05, "loss": 8.2765, "step": 2120 }, { "epoch": 3.67, "learning_rate": 2.6819757365684577e-05, "loss": 8.0517, "step": 2140 }, { "epoch": 3.69, "eval_distillation_accuracy_counterfactual": 0.5318111852231914, "eval_distillation_accuracy_factual": 0.8437660338635198, "eval_distillation_f1_counterfactual": 0.4762348812215591, "eval_distillation_f1_factual": 0.8278687320161431, "eval_groundtruth_accuracy_counterfactual": 0.5200102616726526, "eval_groundtruth_f1_counterfactual": 0.47065260912789525, "eval_groundtruth_f1_factual": 0.7123545192728437, "eval_icace_cosine": 0.47268736362457275, "eval_icace_l2": 0.6466090083122253, "eval_icace_normdiff": 0.4019717872142792, "eval_loss": 8.487833023071289, "eval_runtime": 12.9963, "eval_samples_per_second": 299.933, "eval_steps_per_second": 2.385, "step": 2150 }, { "epoch": 3.7, "learning_rate": 2.660311958405546e-05, "loss": 8.1498, "step": 2160 }, { "epoch": 3.74, "learning_rate": 2.6386481802426343e-05, "loss": 8.1688, "step": 2180 }, { "epoch": 3.77, "learning_rate": 2.616984402079723e-05, "loss": 7.9705, "step": 2200 }, { "epoch": 3.77, "eval_distillation_accuracy_counterfactual": 0.5377116469984607, "eval_distillation_accuracy_factual": 0.8240123140071832, "eval_distillation_f1_counterfactual": 0.4741425223222236, "eval_distillation_f1_factual": 0.8055011349134608, "eval_groundtruth_accuracy_counterfactual": 0.5269368907131863, "eval_groundtruth_f1_counterfactual": 0.4693711263304287, "eval_groundtruth_f1_factual": 0.7106558328588921, "eval_icace_cosine": 0.4721924662590027, "eval_icace_l2": 0.6477442383766174, "eval_icace_normdiff": 0.39685875177383423, "eval_loss": 8.761940956115723, "eval_runtime": 12.9582, "eval_samples_per_second": 300.813, "eval_steps_per_second": 2.392, "step": 2200 }, { "epoch": 3.81, "learning_rate": 2.595320623916811e-05, "loss": 7.943, "step": 2220 }, { "epoch": 3.84, "learning_rate": 2.5736568457538995e-05, "loss": 8.1214, "step": 2240 }, { "epoch": 3.86, "eval_distillation_accuracy_counterfactual": 0.5348896870189841, "eval_distillation_accuracy_factual": 0.8435094920472037, "eval_distillation_f1_counterfactual": 0.46585115605288197, "eval_distillation_f1_factual": 0.8294546068203583, "eval_groundtruth_accuracy_counterfactual": 0.5207798871216008, "eval_groundtruth_f1_counterfactual": 0.45831312719861905, "eval_groundtruth_f1_factual": 0.7120266943001898, "eval_icace_cosine": 0.4707246422767639, "eval_icace_l2": 0.6443170309066772, "eval_icace_normdiff": 0.3977286219596863, "eval_loss": 8.511881828308105, "eval_runtime": 13.0295, "eval_samples_per_second": 299.167, "eval_steps_per_second": 2.379, "step": 2250 }, { "epoch": 3.88, "learning_rate": 2.551993067590988e-05, "loss": 7.963, "step": 2260 }, { "epoch": 3.91, "learning_rate": 2.530329289428076e-05, "loss": 7.9637, "step": 2280 }, { "epoch": 3.95, "learning_rate": 2.5086655112651647e-05, "loss": 8.1591, "step": 2300 }, { "epoch": 3.95, "eval_distillation_accuracy_counterfactual": 0.5092355053873782, "eval_distillation_accuracy_factual": 0.8486403283735249, "eval_distillation_f1_counterfactual": 0.42695574999404207, "eval_distillation_f1_factual": 0.8334085301941027, "eval_groundtruth_accuracy_counterfactual": 0.5025654181631606, "eval_groundtruth_f1_counterfactual": 0.4290408562754823, "eval_groundtruth_f1_factual": 0.7225390626347794, "eval_icace_cosine": 0.4755522310733795, "eval_icace_l2": 0.6715899109840393, "eval_icace_normdiff": 0.42764294147491455, "eval_loss": 8.715620994567871, "eval_runtime": 12.9596, "eval_samples_per_second": 300.781, "eval_steps_per_second": 2.392, "step": 2300 }, { "epoch": 3.98, "learning_rate": 2.4870017331022533e-05, "loss": 8.0792, "step": 2320 }, { "epoch": 4.01, "learning_rate": 2.4653379549393416e-05, "loss": 7.8229, "step": 2340 }, { "epoch": 4.03, "eval_distillation_accuracy_counterfactual": 0.5531041559774243, "eval_distillation_accuracy_factual": 0.8340174448435095, "eval_distillation_f1_counterfactual": 0.4729251628273044, "eval_distillation_f1_factual": 0.8182553778403516, "eval_groundtruth_accuracy_counterfactual": 0.5364289379168804, "eval_groundtruth_f1_counterfactual": 0.4660967256300985, "eval_groundtruth_f1_factual": 0.7131629444896277, "eval_icace_cosine": 0.47335678339004517, "eval_icace_l2": 0.6358506679534912, "eval_icace_normdiff": 0.38684460520744324, "eval_loss": 8.613683700561523, "eval_runtime": 12.9404, "eval_samples_per_second": 301.226, "eval_steps_per_second": 2.396, "step": 2350 }, { "epoch": 4.05, "learning_rate": 2.44367417677643e-05, "loss": 8.022, "step": 2360 }, { "epoch": 4.08, "learning_rate": 2.4220103986135185e-05, "loss": 7.8912, "step": 2380 }, { "epoch": 4.12, "learning_rate": 2.4003466204506068e-05, "loss": 8.0281, "step": 2400 }, { "epoch": 4.12, "eval_distillation_accuracy_counterfactual": 0.5456644433042586, "eval_distillation_accuracy_factual": 0.848896870189841, "eval_distillation_f1_counterfactual": 0.4776857954065169, "eval_distillation_f1_factual": 0.8326616872239038, "eval_groundtruth_accuracy_counterfactual": 0.5297588506926629, "eval_groundtruth_f1_counterfactual": 0.46905822427531607, "eval_groundtruth_f1_factual": 0.7201395082994183, "eval_icace_cosine": 0.4664005935192108, "eval_icace_l2": 0.6372502446174622, "eval_icace_normdiff": 0.39846205711364746, "eval_loss": 8.315762519836426, "eval_runtime": 13.0359, "eval_samples_per_second": 299.021, "eval_steps_per_second": 2.378, "step": 2400 } ], "max_steps": 4616, "num_train_epochs": 8, "total_flos": 2.0194317434112e+16, "trial_name": null, "trial_params": null }