| { | |
| "best_metric": 0.4664005935192108, | |
| "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-66__roberta-base/checkpoint-2400", | |
| "epoch": 4.11663807890223, | |
| "global_step": 2400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9783362218370885e-05, | |
| "loss": 15.2337, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.956672443674177e-05, | |
| "loss": 12.7446, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_distillation_accuracy_counterfactual": 0.4099538224730631, | |
| "eval_distillation_accuracy_factual": 0.8694202154951257, | |
| "eval_distillation_f1_counterfactual": 0.2767586064145223, | |
| "eval_distillation_f1_factual": 0.855037145008629, | |
| "eval_groundtruth_accuracy_counterfactual": 0.4055926115956901, | |
| "eval_groundtruth_f1_counterfactual": 0.2794447729623582, | |
| "eval_groundtruth_f1_factual": 0.7140520716279329, | |
| "eval_icace_cosine": 0.5027520656585693, | |
| "eval_icace_l2": 0.7894455790519714, | |
| "eval_icace_normdiff": 0.5822004675865173, | |
| "eval_loss": 10.286812782287598, | |
| "eval_runtime": 12.8965, | |
| "eval_samples_per_second": 302.252, | |
| "eval_steps_per_second": 2.404, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.935008665511265e-05, | |
| "loss": 12.0353, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.913344887348354e-05, | |
| "loss": 10.8775, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.891681109185442e-05, | |
| "loss": 10.4023, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_distillation_accuracy_counterfactual": 0.5100051308363264, | |
| "eval_distillation_accuracy_factual": 0.8696767573114418, | |
| "eval_distillation_f1_counterfactual": 0.4037905431000315, | |
| "eval_distillation_f1_factual": 0.8559902744972321, | |
| "eval_groundtruth_accuracy_counterfactual": 0.4787070292457671, | |
| "eval_groundtruth_f1_counterfactual": 0.37533207411941716, | |
| "eval_groundtruth_f1_factual": 0.7208539604782886, | |
| "eval_icace_cosine": 0.4815896153450012, | |
| "eval_icace_l2": 0.6899080872535706, | |
| "eval_icace_normdiff": 0.4302704632282257, | |
| "eval_loss": 8.579986572265625, | |
| "eval_runtime": 12.9731, | |
| "eval_samples_per_second": 300.469, | |
| "eval_steps_per_second": 2.39, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.8700173310225307e-05, | |
| "loss": 10.1421, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.848353552859619e-05, | |
| "loss": 9.9736, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_distillation_accuracy_counterfactual": 0.5123140071831709, | |
| "eval_distillation_accuracy_factual": 0.8655720882503848, | |
| "eval_distillation_f1_counterfactual": 0.39453533423432713, | |
| "eval_distillation_f1_factual": 0.8497690853832852, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5025654181631606, | |
| "eval_groundtruth_f1_counterfactual": 0.39291773068394925, | |
| "eval_groundtruth_f1_factual": 0.7016197509963444, | |
| "eval_icace_cosine": 0.4719853699207306, | |
| "eval_icace_l2": 0.6965569853782654, | |
| "eval_icace_normdiff": 0.45266422629356384, | |
| "eval_loss": 8.405228614807129, | |
| "eval_runtime": 12.9355, | |
| "eval_samples_per_second": 301.342, | |
| "eval_steps_per_second": 2.397, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.826689774696707e-05, | |
| "loss": 9.9668, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.8050259965337955e-05, | |
| "loss": 9.6499, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.7833622183708845e-05, | |
| "loss": 9.7115, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_distillation_accuracy_counterfactual": 0.4807593637762955, | |
| "eval_distillation_accuracy_factual": 0.8599281682914315, | |
| "eval_distillation_f1_counterfactual": 0.37412255609926637, | |
| "eval_distillation_f1_factual": 0.8424763100922966, | |
| "eval_groundtruth_accuracy_counterfactual": 0.45407901487942537, | |
| "eval_groundtruth_f1_counterfactual": 0.3550761421793228, | |
| "eval_groundtruth_f1_factual": 0.7263728871661477, | |
| "eval_icace_cosine": 0.47865259647369385, | |
| "eval_icace_l2": 0.7145634889602661, | |
| "eval_icace_normdiff": 0.4607016146183014, | |
| "eval_loss": 8.778631210327148, | |
| "eval_runtime": 13.5302, | |
| "eval_samples_per_second": 288.096, | |
| "eval_steps_per_second": 2.291, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.761698440207972e-05, | |
| "loss": 9.6127, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.740034662045061e-05, | |
| "loss": 9.643, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_distillation_accuracy_counterfactual": 0.4989738327347358, | |
| "eval_distillation_accuracy_factual": 0.8589020010261673, | |
| "eval_distillation_f1_counterfactual": 0.4148903629100139, | |
| "eval_distillation_f1_factual": 0.8451672844026795, | |
| "eval_groundtruth_accuracy_counterfactual": 0.48460749102103645, | |
| "eval_groundtruth_f1_counterfactual": 0.4101913073497503, | |
| "eval_groundtruth_f1_factual": 0.7027318362324826, | |
| "eval_icace_cosine": 0.4739187955856323, | |
| "eval_icace_l2": 0.6788182854652405, | |
| "eval_icace_normdiff": 0.4215588867664337, | |
| "eval_loss": 8.473577499389648, | |
| "eval_runtime": 12.9445, | |
| "eval_samples_per_second": 301.132, | |
| "eval_steps_per_second": 2.395, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.7183708838821494e-05, | |
| "loss": 9.4834, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.6967071057192376e-05, | |
| "loss": 9.5528, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.675043327556326e-05, | |
| "loss": 9.4911, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_distillation_accuracy_counterfactual": 0.48024628014366344, | |
| "eval_distillation_accuracy_factual": 0.8560800410466907, | |
| "eval_distillation_f1_counterfactual": 0.38759062114466497, | |
| "eval_distillation_f1_factual": 0.8388069726052654, | |
| "eval_groundtruth_accuracy_counterfactual": 0.4681888147768086, | |
| "eval_groundtruth_f1_counterfactual": 0.3863531311941365, | |
| "eval_groundtruth_f1_factual": 0.7022853957903076, | |
| "eval_icace_cosine": 0.4793255627155304, | |
| "eval_icace_l2": 0.6844598054885864, | |
| "eval_icace_normdiff": 0.41838327050209045, | |
| "eval_loss": 8.66569709777832, | |
| "eval_runtime": 12.9451, | |
| "eval_samples_per_second": 301.117, | |
| "eval_steps_per_second": 2.395, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.653379549393415e-05, | |
| "loss": 9.2569, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.6317157712305025e-05, | |
| "loss": 9.2215, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_distillation_accuracy_counterfactual": 0.4979476654694715, | |
| "eval_distillation_accuracy_factual": 0.861723961005644, | |
| "eval_distillation_f1_counterfactual": 0.4084180941769576, | |
| "eval_distillation_f1_factual": 0.8476491763825083, | |
| "eval_groundtruth_accuracy_counterfactual": 0.4976911236531555, | |
| "eval_groundtruth_f1_counterfactual": 0.41199726943728454, | |
| "eval_groundtruth_f1_factual": 0.7017647747639904, | |
| "eval_icace_cosine": 0.48205703496932983, | |
| "eval_icace_l2": 0.6776481866836548, | |
| "eval_icace_normdiff": 0.42241370677948, | |
| "eval_loss": 8.44431209564209, | |
| "eval_runtime": 14.7727, | |
| "eval_samples_per_second": 263.866, | |
| "eval_steps_per_second": 2.098, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.6100519930675915e-05, | |
| "loss": 9.0972, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.58838821490468e-05, | |
| "loss": 9.1708, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.566724436741768e-05, | |
| "loss": 8.8806, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_distillation_accuracy_counterfactual": 0.5318111852231914, | |
| "eval_distillation_accuracy_factual": 0.8532580810672139, | |
| "eval_distillation_f1_counterfactual": 0.45627188162049775, | |
| "eval_distillation_f1_factual": 0.8433141363137382, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5146228835300154, | |
| "eval_groundtruth_f1_counterfactual": 0.4506516610322439, | |
| "eval_groundtruth_f1_factual": 0.703544919476949, | |
| "eval_icace_cosine": 0.47363877296447754, | |
| "eval_icace_l2": 0.6601594090461731, | |
| "eval_icace_normdiff": 0.4189549684524536, | |
| "eval_loss": 8.39233684539795, | |
| "eval_runtime": 13.1803, | |
| "eval_samples_per_second": 295.745, | |
| "eval_steps_per_second": 2.352, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.5450606585788563e-05, | |
| "loss": 9.0014, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.5233968804159446e-05, | |
| "loss": 8.7768, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_distillation_accuracy_counterfactual": 0.5248845561826577, | |
| "eval_distillation_accuracy_factual": 0.8471010774756286, | |
| "eval_distillation_f1_counterfactual": 0.4373197309012829, | |
| "eval_distillation_f1_factual": 0.8365706913229692, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5112878399179066, | |
| "eval_groundtruth_f1_counterfactual": 0.4332310317337549, | |
| "eval_groundtruth_f1_factual": 0.706420324783281, | |
| "eval_icace_cosine": 0.47051629424095154, | |
| "eval_icace_l2": 0.6678978204727173, | |
| "eval_icace_normdiff": 0.42113974690437317, | |
| "eval_loss": 8.4581298828125, | |
| "eval_runtime": 12.9621, | |
| "eval_samples_per_second": 300.723, | |
| "eval_steps_per_second": 2.392, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.501733102253033e-05, | |
| "loss": 8.8678, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.480069324090121e-05, | |
| "loss": 8.9234, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.45840554592721e-05, | |
| "loss": 8.8491, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_distillation_accuracy_counterfactual": 0.5269368907131863, | |
| "eval_distillation_accuracy_factual": 0.8450487429451, | |
| "eval_distillation_f1_counterfactual": 0.44482702743850444, | |
| "eval_distillation_f1_factual": 0.8303570404668654, | |
| "eval_groundtruth_accuracy_counterfactual": 0.517188301693176, | |
| "eval_groundtruth_f1_counterfactual": 0.44803544206517765, | |
| "eval_groundtruth_f1_factual": 0.7101076620278356, | |
| "eval_icace_cosine": 0.4709581732749939, | |
| "eval_icace_l2": 0.6744429469108582, | |
| "eval_icace_normdiff": 0.4238233268260956, | |
| "eval_loss": 8.559771537780762, | |
| "eval_runtime": 12.9505, | |
| "eval_samples_per_second": 300.991, | |
| "eval_steps_per_second": 2.394, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.436741767764298e-05, | |
| "loss": 8.9139, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.415077989601387e-05, | |
| "loss": 8.9491, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_distillation_accuracy_counterfactual": 0.508722421754746, | |
| "eval_distillation_accuracy_factual": 0.8270908158029758, | |
| "eval_distillation_f1_counterfactual": 0.4457012957498576, | |
| "eval_distillation_f1_factual": 0.8156237227220284, | |
| "eval_groundtruth_accuracy_counterfactual": 0.4987172909184197, | |
| "eval_groundtruth_f1_counterfactual": 0.4444355597987831, | |
| "eval_groundtruth_f1_factual": 0.704491382874623, | |
| "eval_icace_cosine": 0.4708389639854431, | |
| "eval_icace_l2": 0.6740121245384216, | |
| "eval_icace_normdiff": 0.4291449189186096, | |
| "eval_loss": 8.710935592651367, | |
| "eval_runtime": 13.1041, | |
| "eval_samples_per_second": 297.465, | |
| "eval_steps_per_second": 2.366, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.393414211438475e-05, | |
| "loss": 8.6501, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.371750433275563e-05, | |
| "loss": 8.8115, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.3500866551126516e-05, | |
| "loss": 8.5726, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_distillation_accuracy_counterfactual": 0.5061570035915854, | |
| "eval_distillation_accuracy_factual": 0.8486403283735249, | |
| "eval_distillation_f1_counterfactual": 0.41610676926533285, | |
| "eval_distillation_f1_factual": 0.8310444475559301, | |
| "eval_groundtruth_accuracy_counterfactual": 0.4905079527963058, | |
| "eval_groundtruth_f1_counterfactual": 0.4145735924063139, | |
| "eval_groundtruth_f1_factual": 0.7092515264551655, | |
| "eval_icace_cosine": 0.4776078760623932, | |
| "eval_icace_l2": 0.6925671100616455, | |
| "eval_icace_normdiff": 0.42703282833099365, | |
| "eval_loss": 8.773487091064453, | |
| "eval_runtime": 13.0705, | |
| "eval_samples_per_second": 298.228, | |
| "eval_steps_per_second": 2.372, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.3284228769497406e-05, | |
| "loss": 8.7449, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.306759098786828e-05, | |
| "loss": 8.5654, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_distillation_accuracy_counterfactual": 0.5148794253463315, | |
| "eval_distillation_accuracy_factual": 0.8314520266803489, | |
| "eval_distillation_f1_counterfactual": 0.4350161586459501, | |
| "eval_distillation_f1_factual": 0.8207900808818138, | |
| "eval_groundtruth_accuracy_counterfactual": 0.513340174448435, | |
| "eval_groundtruth_f1_counterfactual": 0.4424165935664268, | |
| "eval_groundtruth_f1_factual": 0.6858850991781995, | |
| "eval_icace_cosine": 0.4748988747596741, | |
| "eval_icace_l2": 0.6583500504493713, | |
| "eval_icace_normdiff": 0.4023895561695099, | |
| "eval_loss": 8.626461029052734, | |
| "eval_runtime": 13.1109, | |
| "eval_samples_per_second": 297.31, | |
| "eval_steps_per_second": 2.364, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 4.285095320623917e-05, | |
| "loss": 8.5985, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.2634315424610055e-05, | |
| "loss": 8.9032, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.241767764298094e-05, | |
| "loss": 8.5234, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_distillation_accuracy_counterfactual": 0.5266803488968702, | |
| "eval_distillation_accuracy_factual": 0.8347870702924577, | |
| "eval_distillation_f1_counterfactual": 0.44418104449377827, | |
| "eval_distillation_f1_factual": 0.8166622315451695, | |
| "eval_groundtruth_accuracy_counterfactual": 0.512570548999487, | |
| "eval_groundtruth_f1_counterfactual": 0.4399064148597627, | |
| "eval_groundtruth_f1_factual": 0.7003621420252235, | |
| "eval_icace_cosine": 0.4755347669124603, | |
| "eval_icace_l2": 0.6643328666687012, | |
| "eval_icace_normdiff": 0.4135916829109192, | |
| "eval_loss": 8.528804779052734, | |
| "eval_runtime": 12.9782, | |
| "eval_samples_per_second": 300.35, | |
| "eval_steps_per_second": 2.389, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.220103986135182e-05, | |
| "loss": 8.3629, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.198440207972271e-05, | |
| "loss": 8.6834, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_distillation_accuracy_counterfactual": 0.5253976398152899, | |
| "eval_distillation_accuracy_factual": 0.8499230374551052, | |
| "eval_distillation_f1_counterfactual": 0.4481280211692343, | |
| "eval_distillation_f1_factual": 0.8427632254580978, | |
| "eval_groundtruth_accuracy_counterfactual": 0.521292970754233, | |
| "eval_groundtruth_f1_counterfactual": 0.4535188471820635, | |
| "eval_groundtruth_f1_factual": 0.7050025057202923, | |
| "eval_icace_cosine": 0.478320837020874, | |
| "eval_icace_l2": 0.6507540345191956, | |
| "eval_icace_normdiff": 0.4042801558971405, | |
| "eval_loss": 8.547685623168945, | |
| "eval_runtime": 13.0628, | |
| "eval_samples_per_second": 298.406, | |
| "eval_steps_per_second": 2.373, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.1767764298093586e-05, | |
| "loss": 8.373, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.1551126516464476e-05, | |
| "loss": 8.545, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.133448873483536e-05, | |
| "loss": 8.3301, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_distillation_accuracy_counterfactual": 0.5246280143663418, | |
| "eval_distillation_accuracy_factual": 0.857875833760903, | |
| "eval_distillation_f1_counterfactual": 0.43678271154246406, | |
| "eval_distillation_f1_factual": 0.8404426089149801, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5177013853258081, | |
| "eval_groundtruth_f1_counterfactual": 0.4372037073522039, | |
| "eval_groundtruth_f1_factual": 0.7164819803959707, | |
| "eval_icace_cosine": 0.4767405092716217, | |
| "eval_icace_l2": 0.6505340933799744, | |
| "eval_icace_normdiff": 0.3892482817173004, | |
| "eval_loss": 8.437895774841309, | |
| "eval_runtime": 12.9829, | |
| "eval_samples_per_second": 300.242, | |
| "eval_steps_per_second": 2.388, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.111785095320624e-05, | |
| "loss": 8.5471, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.0901213171577124e-05, | |
| "loss": 8.5899, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_distillation_accuracy_counterfactual": 0.5389943560800411, | |
| "eval_distillation_accuracy_factual": 0.8668547973319651, | |
| "eval_distillation_f1_counterfactual": 0.43508558636235506, | |
| "eval_distillation_f1_factual": 0.8544181051951583, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5287326834273987, | |
| "eval_groundtruth_f1_counterfactual": 0.4313681541912551, | |
| "eval_groundtruth_f1_factual": 0.7021443551588085, | |
| "eval_icace_cosine": 0.47022899985313416, | |
| "eval_icace_l2": 0.6471868753433228, | |
| "eval_icace_normdiff": 0.3974311053752899, | |
| "eval_loss": 8.187715530395508, | |
| "eval_runtime": 12.9448, | |
| "eval_samples_per_second": 301.125, | |
| "eval_steps_per_second": 2.395, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 4.068457538994801e-05, | |
| "loss": 8.4412, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.04679376083189e-05, | |
| "loss": 8.5992, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.025129982668977e-05, | |
| "loss": 8.5057, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_distillation_accuracy_counterfactual": 0.5338635197537198, | |
| "eval_distillation_accuracy_factual": 0.8494099538224731, | |
| "eval_distillation_f1_counterfactual": 0.4538940008145664, | |
| "eval_distillation_f1_factual": 0.8393372080212285, | |
| "eval_groundtruth_accuracy_counterfactual": 0.521549512570549, | |
| "eval_groundtruth_f1_counterfactual": 0.44602586407123895, | |
| "eval_groundtruth_f1_factual": 0.7062665719976977, | |
| "eval_icace_cosine": 0.47390881180763245, | |
| "eval_icace_l2": 0.6635516881942749, | |
| "eval_icace_normdiff": 0.41677233576774597, | |
| "eval_loss": 8.466891288757324, | |
| "eval_runtime": 12.9586, | |
| "eval_samples_per_second": 300.803, | |
| "eval_steps_per_second": 2.392, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.003466204506066e-05, | |
| "loss": 8.5256, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.9818024263431546e-05, | |
| "loss": 8.4663, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_distillation_accuracy_counterfactual": 0.539250897896357, | |
| "eval_distillation_accuracy_factual": 0.8429964084145716, | |
| "eval_distillation_f1_counterfactual": 0.4552348813290624, | |
| "eval_distillation_f1_factual": 0.8279955804250164, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5248845561826577, | |
| "eval_groundtruth_f1_counterfactual": 0.44541029004084864, | |
| "eval_groundtruth_f1_factual": 0.7174847360382134, | |
| "eval_icace_cosine": 0.47177237272262573, | |
| "eval_icace_l2": 0.6441071629524231, | |
| "eval_icace_normdiff": 0.3987690806388855, | |
| "eval_loss": 8.24371337890625, | |
| "eval_runtime": 13.1576, | |
| "eval_samples_per_second": 296.255, | |
| "eval_steps_per_second": 2.356, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.960138648180243e-05, | |
| "loss": 8.4244, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.938474870017331e-05, | |
| "loss": 8.5347, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.91681109185442e-05, | |
| "loss": 8.5181, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_distillation_accuracy_counterfactual": 0.5523345305284761, | |
| "eval_distillation_accuracy_factual": 0.8568496664956388, | |
| "eval_distillation_f1_counterfactual": 0.43192107651721756, | |
| "eval_distillation_f1_factual": 0.8446400408995643, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5310415597742432, | |
| "eval_groundtruth_f1_counterfactual": 0.4221211937230137, | |
| "eval_groundtruth_f1_factual": 0.7176363988738272, | |
| "eval_icace_cosine": 0.4691796898841858, | |
| "eval_icace_l2": 0.6469516158103943, | |
| "eval_icace_normdiff": 0.40605321526527405, | |
| "eval_loss": 8.24507999420166, | |
| "eval_runtime": 12.9758, | |
| "eval_samples_per_second": 300.405, | |
| "eval_steps_per_second": 2.389, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.895147313691508e-05, | |
| "loss": 8.4384, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.873483535528597e-05, | |
| "loss": 8.4464, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_distillation_accuracy_counterfactual": 0.5341200615700359, | |
| "eval_distillation_accuracy_factual": 0.8471010774756286, | |
| "eval_distillation_f1_counterfactual": 0.44797373858638706, | |
| "eval_distillation_f1_factual": 0.8352221547782517, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5187275525910724, | |
| "eval_groundtruth_f1_counterfactual": 0.438959174201451, | |
| "eval_groundtruth_f1_factual": 0.7045395682269071, | |
| "eval_icace_cosine": 0.4699629247188568, | |
| "eval_icace_l2": 0.650560200214386, | |
| "eval_icace_normdiff": 0.4021012485027313, | |
| "eval_loss": 8.46190357208252, | |
| "eval_runtime": 12.9778, | |
| "eval_samples_per_second": 300.359, | |
| "eval_steps_per_second": 2.389, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.851819757365685e-05, | |
| "loss": 8.3977, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.830155979202773e-05, | |
| "loss": 8.4469, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.8084922010398616e-05, | |
| "loss": 8.4589, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_distillation_accuracy_counterfactual": 0.538481272447409, | |
| "eval_distillation_accuracy_factual": 0.853771164699846, | |
| "eval_distillation_f1_counterfactual": 0.4574351380443254, | |
| "eval_distillation_f1_factual": 0.8413133626862945, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5338635197537198, | |
| "eval_groundtruth_f1_counterfactual": 0.4618955956329911, | |
| "eval_groundtruth_f1_factual": 0.712001520958305, | |
| "eval_icace_cosine": 0.47186800837516785, | |
| "eval_icace_l2": 0.6550236940383911, | |
| "eval_icace_normdiff": 0.4062390625476837, | |
| "eval_loss": 8.270210266113281, | |
| "eval_runtime": 13.0076, | |
| "eval_samples_per_second": 299.671, | |
| "eval_steps_per_second": 2.383, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.78682842287695e-05, | |
| "loss": 8.2726, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.765164644714038e-05, | |
| "loss": 8.543, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_distillation_accuracy_counterfactual": 0.5454079014879425, | |
| "eval_distillation_accuracy_factual": 0.8565931246793227, | |
| "eval_distillation_f1_counterfactual": 0.4358856901648176, | |
| "eval_distillation_f1_factual": 0.8450511347625259, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5369420215495125, | |
| "eval_groundtruth_f1_counterfactual": 0.4328763616035284, | |
| "eval_groundtruth_f1_factual": 0.7122842482342289, | |
| "eval_icace_cosine": 0.470956027507782, | |
| "eval_icace_l2": 0.6414260864257812, | |
| "eval_icace_normdiff": 0.3959381878376007, | |
| "eval_loss": 8.278718948364258, | |
| "eval_runtime": 13.0747, | |
| "eval_samples_per_second": 298.133, | |
| "eval_steps_per_second": 2.371, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.7435008665511264e-05, | |
| "loss": 8.5847, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.7218370883882154e-05, | |
| "loss": 8.495, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 3.700173310225303e-05, | |
| "loss": 8.2151, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_distillation_accuracy_counterfactual": 0.5323242688558235, | |
| "eval_distillation_accuracy_factual": 0.844792201128784, | |
| "eval_distillation_f1_counterfactual": 0.4578721834749787, | |
| "eval_distillation_f1_factual": 0.8335573328949983, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5205233453052848, | |
| "eval_groundtruth_f1_counterfactual": 0.4529105959701748, | |
| "eval_groundtruth_f1_factual": 0.6982997202664809, | |
| "eval_icace_cosine": 0.4770417809486389, | |
| "eval_icace_l2": 0.664779782295227, | |
| "eval_icace_normdiff": 0.4089834690093994, | |
| "eval_loss": 8.583990097045898, | |
| "eval_runtime": 12.9637, | |
| "eval_samples_per_second": 300.686, | |
| "eval_steps_per_second": 2.391, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 3.678509532062392e-05, | |
| "loss": 8.2576, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.65684575389948e-05, | |
| "loss": 8.3456, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_distillation_accuracy_counterfactual": 0.5210364289379169, | |
| "eval_distillation_accuracy_factual": 0.8414571575166753, | |
| "eval_distillation_f1_counterfactual": 0.44541850110843295, | |
| "eval_distillation_f1_factual": 0.8216034713991617, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5228322216521293, | |
| "eval_groundtruth_f1_counterfactual": 0.4520848884145957, | |
| "eval_groundtruth_f1_factual": 0.7136759478326009, | |
| "eval_icace_cosine": 0.48024165630340576, | |
| "eval_icace_l2": 0.6682620048522949, | |
| "eval_icace_normdiff": 0.41649964451789856, | |
| "eval_loss": 8.683144569396973, | |
| "eval_runtime": 12.9156, | |
| "eval_samples_per_second": 301.806, | |
| "eval_steps_per_second": 2.4, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.6351819757365686e-05, | |
| "loss": 8.2687, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.613518197573657e-05, | |
| "loss": 8.343, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 3.591854419410746e-05, | |
| "loss": 8.2615, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_distillation_accuracy_counterfactual": 0.5210364289379169, | |
| "eval_distillation_accuracy_factual": 0.8376090302719343, | |
| "eval_distillation_f1_counterfactual": 0.4470542011154386, | |
| "eval_distillation_f1_factual": 0.8243930760094701, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5094920472036942, | |
| "eval_groundtruth_f1_counterfactual": 0.4424943238928763, | |
| "eval_groundtruth_f1_factual": 0.7180425551041923, | |
| "eval_icace_cosine": 0.4772956669330597, | |
| "eval_icace_l2": 0.675339937210083, | |
| "eval_icace_normdiff": 0.4207656979560852, | |
| "eval_loss": 8.777983665466309, | |
| "eval_runtime": 12.95, | |
| "eval_samples_per_second": 301.004, | |
| "eval_steps_per_second": 2.394, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 3.5701906412478334e-05, | |
| "loss": 8.3164, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.5485268630849224e-05, | |
| "loss": 8.2886, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_distillation_accuracy_counterfactual": 0.5233453052847614, | |
| "eval_distillation_accuracy_factual": 0.8563365828630066, | |
| "eval_distillation_f1_counterfactual": 0.4352218638479065, | |
| "eval_distillation_f1_factual": 0.842128331673918, | |
| "eval_groundtruth_accuracy_counterfactual": 0.504617752693689, | |
| "eval_groundtruth_f1_counterfactual": 0.42672300871162205, | |
| "eval_groundtruth_f1_factual": 0.7038431621141455, | |
| "eval_icace_cosine": 0.47232314944267273, | |
| "eval_icace_l2": 0.6612510681152344, | |
| "eval_icace_normdiff": 0.4119323790073395, | |
| "eval_loss": 8.546701431274414, | |
| "eval_runtime": 12.8815, | |
| "eval_samples_per_second": 302.605, | |
| "eval_steps_per_second": 2.407, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 3.526863084922011e-05, | |
| "loss": 8.0323, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.505199306759099e-05, | |
| "loss": 8.2412, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.483535528596187e-05, | |
| "loss": 8.2755, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_distillation_accuracy_counterfactual": 0.5279630579784504, | |
| "eval_distillation_accuracy_factual": 0.8512057465366855, | |
| "eval_distillation_f1_counterfactual": 0.42885545419249443, | |
| "eval_distillation_f1_factual": 0.8336172051549792, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5110312981015905, | |
| "eval_groundtruth_f1_counterfactual": 0.4182726548657759, | |
| "eval_groundtruth_f1_factual": 0.7081817865697444, | |
| "eval_icace_cosine": 0.47038188576698303, | |
| "eval_icace_l2": 0.6541587114334106, | |
| "eval_icace_normdiff": 0.4046129286289215, | |
| "eval_loss": 8.557218551635742, | |
| "eval_runtime": 12.9809, | |
| "eval_samples_per_second": 300.286, | |
| "eval_steps_per_second": 2.388, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.461871750433276e-05, | |
| "loss": 8.1159, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 3.440207972270364e-05, | |
| "loss": 8.2512, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_distillation_accuracy_counterfactual": 0.5048742945100051, | |
| "eval_distillation_accuracy_factual": 0.8442791174961519, | |
| "eval_distillation_f1_counterfactual": 0.4471986183427893, | |
| "eval_distillation_f1_factual": 0.8328539372383206, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5061570035915854, | |
| "eval_groundtruth_f1_counterfactual": 0.45459231049993376, | |
| "eval_groundtruth_f1_factual": 0.7243435837992515, | |
| "eval_icace_cosine": 0.47215959429740906, | |
| "eval_icace_l2": 0.6676862239837646, | |
| "eval_icace_normdiff": 0.4189312160015106, | |
| "eval_loss": 8.632984161376953, | |
| "eval_runtime": 12.9685, | |
| "eval_samples_per_second": 300.574, | |
| "eval_steps_per_second": 2.39, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.418544194107453e-05, | |
| "loss": 8.4124, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.396880415944541e-05, | |
| "loss": 8.2396, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 3.3752166377816294e-05, | |
| "loss": 8.1802, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_distillation_accuracy_counterfactual": 0.5266803488968702, | |
| "eval_distillation_accuracy_factual": 0.8424833247819394, | |
| "eval_distillation_f1_counterfactual": 0.4296539407490461, | |
| "eval_distillation_f1_factual": 0.8232924538803463, | |
| "eval_groundtruth_accuracy_counterfactual": 0.521292970754233, | |
| "eval_groundtruth_f1_counterfactual": 0.4329058498324728, | |
| "eval_groundtruth_f1_factual": 0.693798726302169, | |
| "eval_icace_cosine": 0.47156795859336853, | |
| "eval_icace_l2": 0.6467342972755432, | |
| "eval_icace_normdiff": 0.39208564162254333, | |
| "eval_loss": 8.581865310668945, | |
| "eval_runtime": 12.9632, | |
| "eval_samples_per_second": 300.697, | |
| "eval_steps_per_second": 2.391, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.353552859618718e-05, | |
| "loss": 8.0483, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.331889081455806e-05, | |
| "loss": 8.3776, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_distillation_accuracy_counterfactual": 0.5395074397126731, | |
| "eval_distillation_accuracy_factual": 0.8624935864545921, | |
| "eval_distillation_f1_counterfactual": 0.4487826083438827, | |
| "eval_distillation_f1_factual": 0.8447571530576526, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5233453052847614, | |
| "eval_groundtruth_f1_counterfactual": 0.44381794658084023, | |
| "eval_groundtruth_f1_factual": 0.7228365951086511, | |
| "eval_icace_cosine": 0.467881977558136, | |
| "eval_icace_l2": 0.6500933766365051, | |
| "eval_icace_normdiff": 0.4116222858428955, | |
| "eval_loss": 8.251863479614258, | |
| "eval_runtime": 13.0173, | |
| "eval_samples_per_second": 299.447, | |
| "eval_steps_per_second": 2.381, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.310225303292894e-05, | |
| "loss": 8.3191, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.2885615251299825e-05, | |
| "loss": 8.3499, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.2668977469670715e-05, | |
| "loss": 8.2759, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_distillation_accuracy_counterfactual": 0.5264238070805541, | |
| "eval_distillation_accuracy_factual": 0.8381221139045665, | |
| "eval_distillation_f1_counterfactual": 0.4640025505726335, | |
| "eval_distillation_f1_factual": 0.8226839825314822, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5112878399179066, | |
| "eval_groundtruth_f1_counterfactual": 0.4572176402176623, | |
| "eval_groundtruth_f1_factual": 0.7122677529021709, | |
| "eval_icace_cosine": 0.46834951639175415, | |
| "eval_icace_l2": 0.6554086208343506, | |
| "eval_icace_normdiff": 0.4115924835205078, | |
| "eval_loss": 8.593127250671387, | |
| "eval_runtime": 12.9442, | |
| "eval_samples_per_second": 301.138, | |
| "eval_steps_per_second": 2.395, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.245233968804159e-05, | |
| "loss": 8.2134, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.223570190641248e-05, | |
| "loss": 8.2762, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_distillation_accuracy_counterfactual": 0.5274499743458184, | |
| "eval_distillation_accuracy_factual": 0.8532580810672139, | |
| "eval_distillation_f1_counterfactual": 0.4582506359486283, | |
| "eval_distillation_f1_factual": 0.8369130644925307, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5105182144689584, | |
| "eval_groundtruth_f1_counterfactual": 0.4492512316522279, | |
| "eval_groundtruth_f1_factual": 0.7137344968653884, | |
| "eval_icace_cosine": 0.4689863324165344, | |
| "eval_icace_l2": 0.6501835584640503, | |
| "eval_icace_normdiff": 0.40152183175086975, | |
| "eval_loss": 8.484435081481934, | |
| "eval_runtime": 12.9458, | |
| "eval_samples_per_second": 301.102, | |
| "eval_steps_per_second": 2.395, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 3.2019064124783364e-05, | |
| "loss": 8.3236, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.1802426343154247e-05, | |
| "loss": 8.1251, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 3.158578856152513e-05, | |
| "loss": 8.3658, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_distillation_accuracy_counterfactual": 0.5236018471010775, | |
| "eval_distillation_accuracy_factual": 0.8442791174961519, | |
| "eval_distillation_f1_counterfactual": 0.4500677919517996, | |
| "eval_distillation_f1_factual": 0.8294190656695536, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5161621344279117, | |
| "eval_groundtruth_f1_counterfactual": 0.44921994678493393, | |
| "eval_groundtruth_f1_factual": 0.7157702918851219, | |
| "eval_icace_cosine": 0.47242775559425354, | |
| "eval_icace_l2": 0.6650809049606323, | |
| "eval_icace_normdiff": 0.4171872138977051, | |
| "eval_loss": 8.604122161865234, | |
| "eval_runtime": 12.9963, | |
| "eval_samples_per_second": 299.932, | |
| "eval_steps_per_second": 2.385, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.136915077989602e-05, | |
| "loss": 8.1394, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.1152512998266895e-05, | |
| "loss": 8.1326, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_distillation_accuracy_counterfactual": 0.5328373524884557, | |
| "eval_distillation_accuracy_factual": 0.8496664956387892, | |
| "eval_distillation_f1_counterfactual": 0.466763064256023, | |
| "eval_distillation_f1_factual": 0.8351501069683767, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5230887634684454, | |
| "eval_groundtruth_f1_counterfactual": 0.46309383057125386, | |
| "eval_groundtruth_f1_factual": 0.7189084641368721, | |
| "eval_icace_cosine": 0.47281646728515625, | |
| "eval_icace_l2": 0.6447048783302307, | |
| "eval_icace_normdiff": 0.3949274718761444, | |
| "eval_loss": 8.471474647521973, | |
| "eval_runtime": 12.9383, | |
| "eval_samples_per_second": 301.276, | |
| "eval_steps_per_second": 2.396, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 3.0935875216637785e-05, | |
| "loss": 8.0867, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 3.071923743500867e-05, | |
| "loss": 8.2012, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 3.0502599653379547e-05, | |
| "loss": 8.1176, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "eval_distillation_accuracy_counterfactual": 0.5320677270395074, | |
| "eval_distillation_accuracy_factual": 0.8473576192919446, | |
| "eval_distillation_f1_counterfactual": 0.44372130842528623, | |
| "eval_distillation_f1_factual": 0.8338491634681494, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5076962544894817, | |
| "eval_groundtruth_f1_counterfactual": 0.4271300744187679, | |
| "eval_groundtruth_f1_factual": 0.7089616826609815, | |
| "eval_icace_cosine": 0.4722249507904053, | |
| "eval_icace_l2": 0.65628582239151, | |
| "eval_icace_normdiff": 0.40942007303237915, | |
| "eval_loss": 8.602928161621094, | |
| "eval_runtime": 12.9577, | |
| "eval_samples_per_second": 300.825, | |
| "eval_steps_per_second": 2.392, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 3.0285961871750434e-05, | |
| "loss": 8.1985, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.006932409012132e-05, | |
| "loss": 8.2358, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "eval_distillation_accuracy_counterfactual": 0.5179579271421242, | |
| "eval_distillation_accuracy_factual": 0.8463314520266804, | |
| "eval_distillation_f1_counterfactual": 0.4547140810789615, | |
| "eval_distillation_f1_factual": 0.8318522122913702, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5179579271421242, | |
| "eval_groundtruth_f1_counterfactual": 0.46111342311742015, | |
| "eval_groundtruth_f1_factual": 0.702200862113018, | |
| "eval_icace_cosine": 0.47784164547920227, | |
| "eval_icace_l2": 0.6577954292297363, | |
| "eval_icace_normdiff": 0.40519601106643677, | |
| "eval_loss": 8.596319198608398, | |
| "eval_runtime": 13.0549, | |
| "eval_samples_per_second": 298.584, | |
| "eval_steps_per_second": 2.375, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 2.98526863084922e-05, | |
| "loss": 8.1927, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 2.9636048526863086e-05, | |
| "loss": 8.0759, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 2.9419410745233972e-05, | |
| "loss": 8.0023, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "eval_distillation_accuracy_counterfactual": 0.5410466906105695, | |
| "eval_distillation_accuracy_factual": 0.8442791174961519, | |
| "eval_distillation_f1_counterfactual": 0.4708691905840775, | |
| "eval_distillation_f1_factual": 0.8314060332226945, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5287326834273987, | |
| "eval_groundtruth_f1_counterfactual": 0.4662134522268956, | |
| "eval_groundtruth_f1_factual": 0.7036825171946501, | |
| "eval_icace_cosine": 0.46827077865600586, | |
| "eval_icace_l2": 0.6418067812919617, | |
| "eval_icace_normdiff": 0.39047762751579285, | |
| "eval_loss": 8.510624885559082, | |
| "eval_runtime": 12.9429, | |
| "eval_samples_per_second": 301.169, | |
| "eval_steps_per_second": 2.395, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.920277296360485e-05, | |
| "loss": 8.0164, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 2.8986135181975738e-05, | |
| "loss": 8.0343, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "eval_distillation_accuracy_counterfactual": 0.5356593124679323, | |
| "eval_distillation_accuracy_factual": 0.8594150846587993, | |
| "eval_distillation_f1_counterfactual": 0.4646747781692526, | |
| "eval_distillation_f1_factual": 0.8458748080305407, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5230887634684454, | |
| "eval_groundtruth_f1_counterfactual": 0.4587518415854972, | |
| "eval_groundtruth_f1_factual": 0.725017307995671, | |
| "eval_icace_cosine": 0.46998363733291626, | |
| "eval_icace_l2": 0.6633685231208801, | |
| "eval_icace_normdiff": 0.410533607006073, | |
| "eval_loss": 8.549391746520996, | |
| "eval_runtime": 13.0331, | |
| "eval_samples_per_second": 299.085, | |
| "eval_steps_per_second": 2.379, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.8769497400346624e-05, | |
| "loss": 8.1109, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 2.8552859618717503e-05, | |
| "loss": 7.8661, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 2.833622183708839e-05, | |
| "loss": 7.9558, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "eval_distillation_accuracy_counterfactual": 0.5338635197537198, | |
| "eval_distillation_accuracy_factual": 0.8483837865572088, | |
| "eval_distillation_f1_counterfactual": 0.46569718446185726, | |
| "eval_distillation_f1_factual": 0.8346114011854626, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5269368907131863, | |
| "eval_groundtruth_f1_counterfactual": 0.46415122006349785, | |
| "eval_groundtruth_f1_factual": 0.7156362232258703, | |
| "eval_icace_cosine": 0.46725159883499146, | |
| "eval_icace_l2": 0.660638153553009, | |
| "eval_icace_normdiff": 0.40762051939964294, | |
| "eval_loss": 8.534335136413574, | |
| "eval_runtime": 12.9678, | |
| "eval_samples_per_second": 300.591, | |
| "eval_steps_per_second": 2.391, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 2.8119584055459276e-05, | |
| "loss": 8.0163, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 2.7902946273830156e-05, | |
| "loss": 8.1543, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "eval_distillation_accuracy_counterfactual": 0.5184710107747563, | |
| "eval_distillation_accuracy_factual": 0.8417136993329912, | |
| "eval_distillation_f1_counterfactual": 0.45048032767829593, | |
| "eval_distillation_f1_factual": 0.8277337474547333, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5187275525910724, | |
| "eval_groundtruth_f1_counterfactual": 0.4579942611753098, | |
| "eval_groundtruth_f1_factual": 0.7262909345250802, | |
| "eval_icace_cosine": 0.47337350249290466, | |
| "eval_icace_l2": 0.6622626781463623, | |
| "eval_icace_normdiff": 0.40650057792663574, | |
| "eval_loss": 8.670907020568848, | |
| "eval_runtime": 12.913, | |
| "eval_samples_per_second": 301.867, | |
| "eval_steps_per_second": 2.401, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 2.7686308492201042e-05, | |
| "loss": 8.0555, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 2.7469670710571928e-05, | |
| "loss": 8.003, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 2.7253032928942808e-05, | |
| "loss": 8.1222, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_distillation_accuracy_counterfactual": 0.538481272447409, | |
| "eval_distillation_accuracy_factual": 0.8324781939456132, | |
| "eval_distillation_f1_counterfactual": 0.45321151762235823, | |
| "eval_distillation_f1_factual": 0.816404323283801, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5256541816316059, | |
| "eval_groundtruth_f1_counterfactual": 0.4475943574688602, | |
| "eval_groundtruth_f1_factual": 0.7218836713261483, | |
| "eval_icace_cosine": 0.47021886706352234, | |
| "eval_icace_l2": 0.6517447233200073, | |
| "eval_icace_normdiff": 0.403333842754364, | |
| "eval_loss": 8.577171325683594, | |
| "eval_runtime": 12.9514, | |
| "eval_samples_per_second": 300.97, | |
| "eval_steps_per_second": 2.394, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 2.7036395147313694e-05, | |
| "loss": 8.2765, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 2.6819757365684577e-05, | |
| "loss": 8.0517, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "eval_distillation_accuracy_counterfactual": 0.5318111852231914, | |
| "eval_distillation_accuracy_factual": 0.8437660338635198, | |
| "eval_distillation_f1_counterfactual": 0.4762348812215591, | |
| "eval_distillation_f1_factual": 0.8278687320161431, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5200102616726526, | |
| "eval_groundtruth_f1_counterfactual": 0.47065260912789525, | |
| "eval_groundtruth_f1_factual": 0.7123545192728437, | |
| "eval_icace_cosine": 0.47268736362457275, | |
| "eval_icace_l2": 0.6466090083122253, | |
| "eval_icace_normdiff": 0.4019717872142792, | |
| "eval_loss": 8.487833023071289, | |
| "eval_runtime": 12.9963, | |
| "eval_samples_per_second": 299.933, | |
| "eval_steps_per_second": 2.385, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 2.660311958405546e-05, | |
| "loss": 8.1498, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 2.6386481802426343e-05, | |
| "loss": 8.1688, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 2.616984402079723e-05, | |
| "loss": 7.9705, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "eval_distillation_accuracy_counterfactual": 0.5377116469984607, | |
| "eval_distillation_accuracy_factual": 0.8240123140071832, | |
| "eval_distillation_f1_counterfactual": 0.4741425223222236, | |
| "eval_distillation_f1_factual": 0.8055011349134608, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5269368907131863, | |
| "eval_groundtruth_f1_counterfactual": 0.4693711263304287, | |
| "eval_groundtruth_f1_factual": 0.7106558328588921, | |
| "eval_icace_cosine": 0.4721924662590027, | |
| "eval_icace_l2": 0.6477442383766174, | |
| "eval_icace_normdiff": 0.39685875177383423, | |
| "eval_loss": 8.761940956115723, | |
| "eval_runtime": 12.9582, | |
| "eval_samples_per_second": 300.813, | |
| "eval_steps_per_second": 2.392, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 2.595320623916811e-05, | |
| "loss": 7.943, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 2.5736568457538995e-05, | |
| "loss": 8.1214, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "eval_distillation_accuracy_counterfactual": 0.5348896870189841, | |
| "eval_distillation_accuracy_factual": 0.8435094920472037, | |
| "eval_distillation_f1_counterfactual": 0.46585115605288197, | |
| "eval_distillation_f1_factual": 0.8294546068203583, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5207798871216008, | |
| "eval_groundtruth_f1_counterfactual": 0.45831312719861905, | |
| "eval_groundtruth_f1_factual": 0.7120266943001898, | |
| "eval_icace_cosine": 0.4707246422767639, | |
| "eval_icace_l2": 0.6443170309066772, | |
| "eval_icace_normdiff": 0.3977286219596863, | |
| "eval_loss": 8.511881828308105, | |
| "eval_runtime": 13.0295, | |
| "eval_samples_per_second": 299.167, | |
| "eval_steps_per_second": 2.379, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 2.551993067590988e-05, | |
| "loss": 7.963, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 2.530329289428076e-05, | |
| "loss": 7.9637, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 2.5086655112651647e-05, | |
| "loss": 8.1591, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "eval_distillation_accuracy_counterfactual": 0.5092355053873782, | |
| "eval_distillation_accuracy_factual": 0.8486403283735249, | |
| "eval_distillation_f1_counterfactual": 0.42695574999404207, | |
| "eval_distillation_f1_factual": 0.8334085301941027, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5025654181631606, | |
| "eval_groundtruth_f1_counterfactual": 0.4290408562754823, | |
| "eval_groundtruth_f1_factual": 0.7225390626347794, | |
| "eval_icace_cosine": 0.4755522310733795, | |
| "eval_icace_l2": 0.6715899109840393, | |
| "eval_icace_normdiff": 0.42764294147491455, | |
| "eval_loss": 8.715620994567871, | |
| "eval_runtime": 12.9596, | |
| "eval_samples_per_second": 300.781, | |
| "eval_steps_per_second": 2.392, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 2.4870017331022533e-05, | |
| "loss": 8.0792, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 2.4653379549393416e-05, | |
| "loss": 7.8229, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_distillation_accuracy_counterfactual": 0.5531041559774243, | |
| "eval_distillation_accuracy_factual": 0.8340174448435095, | |
| "eval_distillation_f1_counterfactual": 0.4729251628273044, | |
| "eval_distillation_f1_factual": 0.8182553778403516, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5364289379168804, | |
| "eval_groundtruth_f1_counterfactual": 0.4660967256300985, | |
| "eval_groundtruth_f1_factual": 0.7131629444896277, | |
| "eval_icace_cosine": 0.47335678339004517, | |
| "eval_icace_l2": 0.6358506679534912, | |
| "eval_icace_normdiff": 0.38684460520744324, | |
| "eval_loss": 8.613683700561523, | |
| "eval_runtime": 12.9404, | |
| "eval_samples_per_second": 301.226, | |
| "eval_steps_per_second": 2.396, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 2.44367417677643e-05, | |
| "loss": 8.022, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 2.4220103986135185e-05, | |
| "loss": 7.8912, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 2.4003466204506068e-05, | |
| "loss": 8.0281, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "eval_distillation_accuracy_counterfactual": 0.5456644433042586, | |
| "eval_distillation_accuracy_factual": 0.848896870189841, | |
| "eval_distillation_f1_counterfactual": 0.4776857954065169, | |
| "eval_distillation_f1_factual": 0.8326616872239038, | |
| "eval_groundtruth_accuracy_counterfactual": 0.5297588506926629, | |
| "eval_groundtruth_f1_counterfactual": 0.46905822427531607, | |
| "eval_groundtruth_f1_factual": 0.7201395082994183, | |
| "eval_icace_cosine": 0.4664005935192108, | |
| "eval_icace_l2": 0.6372502446174622, | |
| "eval_icace_normdiff": 0.39846205711364746, | |
| "eval_loss": 8.315762519836426, | |
| "eval_runtime": 13.0359, | |
| "eval_samples_per_second": 299.021, | |
| "eval_steps_per_second": 2.378, | |
| "step": 2400 | |
| } | |
| ], | |
| "max_steps": 4616, | |
| "num_train_epochs": 8, | |
| "total_flos": 2.0194317434112e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |