KarelDO's picture
commit files to HF hub
04d8e6b
{
"best_metric": 0.4664005935192108,
"best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-66__roberta-base/checkpoint-2400",
"epoch": 4.11663807890223,
"global_step": 2400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 4.9783362218370885e-05,
"loss": 15.2337,
"step": 20
},
{
"epoch": 0.07,
"learning_rate": 4.956672443674177e-05,
"loss": 12.7446,
"step": 40
},
{
"epoch": 0.09,
"eval_distillation_accuracy_counterfactual": 0.4099538224730631,
"eval_distillation_accuracy_factual": 0.8694202154951257,
"eval_distillation_f1_counterfactual": 0.2767586064145223,
"eval_distillation_f1_factual": 0.855037145008629,
"eval_groundtruth_accuracy_counterfactual": 0.4055926115956901,
"eval_groundtruth_f1_counterfactual": 0.2794447729623582,
"eval_groundtruth_f1_factual": 0.7140520716279329,
"eval_icace_cosine": 0.5027520656585693,
"eval_icace_l2": 0.7894455790519714,
"eval_icace_normdiff": 0.5822004675865173,
"eval_loss": 10.286812782287598,
"eval_runtime": 12.8965,
"eval_samples_per_second": 302.252,
"eval_steps_per_second": 2.404,
"step": 50
},
{
"epoch": 0.1,
"learning_rate": 4.935008665511265e-05,
"loss": 12.0353,
"step": 60
},
{
"epoch": 0.14,
"learning_rate": 4.913344887348354e-05,
"loss": 10.8775,
"step": 80
},
{
"epoch": 0.17,
"learning_rate": 4.891681109185442e-05,
"loss": 10.4023,
"step": 100
},
{
"epoch": 0.17,
"eval_distillation_accuracy_counterfactual": 0.5100051308363264,
"eval_distillation_accuracy_factual": 0.8696767573114418,
"eval_distillation_f1_counterfactual": 0.4037905431000315,
"eval_distillation_f1_factual": 0.8559902744972321,
"eval_groundtruth_accuracy_counterfactual": 0.4787070292457671,
"eval_groundtruth_f1_counterfactual": 0.37533207411941716,
"eval_groundtruth_f1_factual": 0.7208539604782886,
"eval_icace_cosine": 0.4815896153450012,
"eval_icace_l2": 0.6899080872535706,
"eval_icace_normdiff": 0.4302704632282257,
"eval_loss": 8.579986572265625,
"eval_runtime": 12.9731,
"eval_samples_per_second": 300.469,
"eval_steps_per_second": 2.39,
"step": 100
},
{
"epoch": 0.21,
"learning_rate": 4.8700173310225307e-05,
"loss": 10.1421,
"step": 120
},
{
"epoch": 0.24,
"learning_rate": 4.848353552859619e-05,
"loss": 9.9736,
"step": 140
},
{
"epoch": 0.26,
"eval_distillation_accuracy_counterfactual": 0.5123140071831709,
"eval_distillation_accuracy_factual": 0.8655720882503848,
"eval_distillation_f1_counterfactual": 0.39453533423432713,
"eval_distillation_f1_factual": 0.8497690853832852,
"eval_groundtruth_accuracy_counterfactual": 0.5025654181631606,
"eval_groundtruth_f1_counterfactual": 0.39291773068394925,
"eval_groundtruth_f1_factual": 0.7016197509963444,
"eval_icace_cosine": 0.4719853699207306,
"eval_icace_l2": 0.6965569853782654,
"eval_icace_normdiff": 0.45266422629356384,
"eval_loss": 8.405228614807129,
"eval_runtime": 12.9355,
"eval_samples_per_second": 301.342,
"eval_steps_per_second": 2.397,
"step": 150
},
{
"epoch": 0.27,
"learning_rate": 4.826689774696707e-05,
"loss": 9.9668,
"step": 160
},
{
"epoch": 0.31,
"learning_rate": 4.8050259965337955e-05,
"loss": 9.6499,
"step": 180
},
{
"epoch": 0.34,
"learning_rate": 4.7833622183708845e-05,
"loss": 9.7115,
"step": 200
},
{
"epoch": 0.34,
"eval_distillation_accuracy_counterfactual": 0.4807593637762955,
"eval_distillation_accuracy_factual": 0.8599281682914315,
"eval_distillation_f1_counterfactual": 0.37412255609926637,
"eval_distillation_f1_factual": 0.8424763100922966,
"eval_groundtruth_accuracy_counterfactual": 0.45407901487942537,
"eval_groundtruth_f1_counterfactual": 0.3550761421793228,
"eval_groundtruth_f1_factual": 0.7263728871661477,
"eval_icace_cosine": 0.47865259647369385,
"eval_icace_l2": 0.7145634889602661,
"eval_icace_normdiff": 0.4607016146183014,
"eval_loss": 8.778631210327148,
"eval_runtime": 13.5302,
"eval_samples_per_second": 288.096,
"eval_steps_per_second": 2.291,
"step": 200
},
{
"epoch": 0.38,
"learning_rate": 4.761698440207972e-05,
"loss": 9.6127,
"step": 220
},
{
"epoch": 0.41,
"learning_rate": 4.740034662045061e-05,
"loss": 9.643,
"step": 240
},
{
"epoch": 0.43,
"eval_distillation_accuracy_counterfactual": 0.4989738327347358,
"eval_distillation_accuracy_factual": 0.8589020010261673,
"eval_distillation_f1_counterfactual": 0.4148903629100139,
"eval_distillation_f1_factual": 0.8451672844026795,
"eval_groundtruth_accuracy_counterfactual": 0.48460749102103645,
"eval_groundtruth_f1_counterfactual": 0.4101913073497503,
"eval_groundtruth_f1_factual": 0.7027318362324826,
"eval_icace_cosine": 0.4739187955856323,
"eval_icace_l2": 0.6788182854652405,
"eval_icace_normdiff": 0.4215588867664337,
"eval_loss": 8.473577499389648,
"eval_runtime": 12.9445,
"eval_samples_per_second": 301.132,
"eval_steps_per_second": 2.395,
"step": 250
},
{
"epoch": 0.45,
"learning_rate": 4.7183708838821494e-05,
"loss": 9.4834,
"step": 260
},
{
"epoch": 0.48,
"learning_rate": 4.6967071057192376e-05,
"loss": 9.5528,
"step": 280
},
{
"epoch": 0.51,
"learning_rate": 4.675043327556326e-05,
"loss": 9.4911,
"step": 300
},
{
"epoch": 0.51,
"eval_distillation_accuracy_counterfactual": 0.48024628014366344,
"eval_distillation_accuracy_factual": 0.8560800410466907,
"eval_distillation_f1_counterfactual": 0.38759062114466497,
"eval_distillation_f1_factual": 0.8388069726052654,
"eval_groundtruth_accuracy_counterfactual": 0.4681888147768086,
"eval_groundtruth_f1_counterfactual": 0.3863531311941365,
"eval_groundtruth_f1_factual": 0.7022853957903076,
"eval_icace_cosine": 0.4793255627155304,
"eval_icace_l2": 0.6844598054885864,
"eval_icace_normdiff": 0.41838327050209045,
"eval_loss": 8.66569709777832,
"eval_runtime": 12.9451,
"eval_samples_per_second": 301.117,
"eval_steps_per_second": 2.395,
"step": 300
},
{
"epoch": 0.55,
"learning_rate": 4.653379549393415e-05,
"loss": 9.2569,
"step": 320
},
{
"epoch": 0.58,
"learning_rate": 4.6317157712305025e-05,
"loss": 9.2215,
"step": 340
},
{
"epoch": 0.6,
"eval_distillation_accuracy_counterfactual": 0.4979476654694715,
"eval_distillation_accuracy_factual": 0.861723961005644,
"eval_distillation_f1_counterfactual": 0.4084180941769576,
"eval_distillation_f1_factual": 0.8476491763825083,
"eval_groundtruth_accuracy_counterfactual": 0.4976911236531555,
"eval_groundtruth_f1_counterfactual": 0.41199726943728454,
"eval_groundtruth_f1_factual": 0.7017647747639904,
"eval_icace_cosine": 0.48205703496932983,
"eval_icace_l2": 0.6776481866836548,
"eval_icace_normdiff": 0.42241370677948,
"eval_loss": 8.44431209564209,
"eval_runtime": 14.7727,
"eval_samples_per_second": 263.866,
"eval_steps_per_second": 2.098,
"step": 350
},
{
"epoch": 0.62,
"learning_rate": 4.6100519930675915e-05,
"loss": 9.0972,
"step": 360
},
{
"epoch": 0.65,
"learning_rate": 4.58838821490468e-05,
"loss": 9.1708,
"step": 380
},
{
"epoch": 0.69,
"learning_rate": 4.566724436741768e-05,
"loss": 8.8806,
"step": 400
},
{
"epoch": 0.69,
"eval_distillation_accuracy_counterfactual": 0.5318111852231914,
"eval_distillation_accuracy_factual": 0.8532580810672139,
"eval_distillation_f1_counterfactual": 0.45627188162049775,
"eval_distillation_f1_factual": 0.8433141363137382,
"eval_groundtruth_accuracy_counterfactual": 0.5146228835300154,
"eval_groundtruth_f1_counterfactual": 0.4506516610322439,
"eval_groundtruth_f1_factual": 0.703544919476949,
"eval_icace_cosine": 0.47363877296447754,
"eval_icace_l2": 0.6601594090461731,
"eval_icace_normdiff": 0.4189549684524536,
"eval_loss": 8.39233684539795,
"eval_runtime": 13.1803,
"eval_samples_per_second": 295.745,
"eval_steps_per_second": 2.352,
"step": 400
},
{
"epoch": 0.72,
"learning_rate": 4.5450606585788563e-05,
"loss": 9.0014,
"step": 420
},
{
"epoch": 0.75,
"learning_rate": 4.5233968804159446e-05,
"loss": 8.7768,
"step": 440
},
{
"epoch": 0.77,
"eval_distillation_accuracy_counterfactual": 0.5248845561826577,
"eval_distillation_accuracy_factual": 0.8471010774756286,
"eval_distillation_f1_counterfactual": 0.4373197309012829,
"eval_distillation_f1_factual": 0.8365706913229692,
"eval_groundtruth_accuracy_counterfactual": 0.5112878399179066,
"eval_groundtruth_f1_counterfactual": 0.4332310317337549,
"eval_groundtruth_f1_factual": 0.706420324783281,
"eval_icace_cosine": 0.47051629424095154,
"eval_icace_l2": 0.6678978204727173,
"eval_icace_normdiff": 0.42113974690437317,
"eval_loss": 8.4581298828125,
"eval_runtime": 12.9621,
"eval_samples_per_second": 300.723,
"eval_steps_per_second": 2.392,
"step": 450
},
{
"epoch": 0.79,
"learning_rate": 4.501733102253033e-05,
"loss": 8.8678,
"step": 460
},
{
"epoch": 0.82,
"learning_rate": 4.480069324090121e-05,
"loss": 8.9234,
"step": 480
},
{
"epoch": 0.86,
"learning_rate": 4.45840554592721e-05,
"loss": 8.8491,
"step": 500
},
{
"epoch": 0.86,
"eval_distillation_accuracy_counterfactual": 0.5269368907131863,
"eval_distillation_accuracy_factual": 0.8450487429451,
"eval_distillation_f1_counterfactual": 0.44482702743850444,
"eval_distillation_f1_factual": 0.8303570404668654,
"eval_groundtruth_accuracy_counterfactual": 0.517188301693176,
"eval_groundtruth_f1_counterfactual": 0.44803544206517765,
"eval_groundtruth_f1_factual": 0.7101076620278356,
"eval_icace_cosine": 0.4709581732749939,
"eval_icace_l2": 0.6744429469108582,
"eval_icace_normdiff": 0.4238233268260956,
"eval_loss": 8.559771537780762,
"eval_runtime": 12.9505,
"eval_samples_per_second": 300.991,
"eval_steps_per_second": 2.394,
"step": 500
},
{
"epoch": 0.89,
"learning_rate": 4.436741767764298e-05,
"loss": 8.9139,
"step": 520
},
{
"epoch": 0.93,
"learning_rate": 4.415077989601387e-05,
"loss": 8.9491,
"step": 540
},
{
"epoch": 0.94,
"eval_distillation_accuracy_counterfactual": 0.508722421754746,
"eval_distillation_accuracy_factual": 0.8270908158029758,
"eval_distillation_f1_counterfactual": 0.4457012957498576,
"eval_distillation_f1_factual": 0.8156237227220284,
"eval_groundtruth_accuracy_counterfactual": 0.4987172909184197,
"eval_groundtruth_f1_counterfactual": 0.4444355597987831,
"eval_groundtruth_f1_factual": 0.704491382874623,
"eval_icace_cosine": 0.4708389639854431,
"eval_icace_l2": 0.6740121245384216,
"eval_icace_normdiff": 0.4291449189186096,
"eval_loss": 8.710935592651367,
"eval_runtime": 13.1041,
"eval_samples_per_second": 297.465,
"eval_steps_per_second": 2.366,
"step": 550
},
{
"epoch": 0.96,
"learning_rate": 4.393414211438475e-05,
"loss": 8.6501,
"step": 560
},
{
"epoch": 0.99,
"learning_rate": 4.371750433275563e-05,
"loss": 8.8115,
"step": 580
},
{
"epoch": 1.03,
"learning_rate": 4.3500866551126516e-05,
"loss": 8.5726,
"step": 600
},
{
"epoch": 1.03,
"eval_distillation_accuracy_counterfactual": 0.5061570035915854,
"eval_distillation_accuracy_factual": 0.8486403283735249,
"eval_distillation_f1_counterfactual": 0.41610676926533285,
"eval_distillation_f1_factual": 0.8310444475559301,
"eval_groundtruth_accuracy_counterfactual": 0.4905079527963058,
"eval_groundtruth_f1_counterfactual": 0.4145735924063139,
"eval_groundtruth_f1_factual": 0.7092515264551655,
"eval_icace_cosine": 0.4776078760623932,
"eval_icace_l2": 0.6925671100616455,
"eval_icace_normdiff": 0.42703282833099365,
"eval_loss": 8.773487091064453,
"eval_runtime": 13.0705,
"eval_samples_per_second": 298.228,
"eval_steps_per_second": 2.372,
"step": 600
},
{
"epoch": 1.06,
"learning_rate": 4.3284228769497406e-05,
"loss": 8.7449,
"step": 620
},
{
"epoch": 1.1,
"learning_rate": 4.306759098786828e-05,
"loss": 8.5654,
"step": 640
},
{
"epoch": 1.11,
"eval_distillation_accuracy_counterfactual": 0.5148794253463315,
"eval_distillation_accuracy_factual": 0.8314520266803489,
"eval_distillation_f1_counterfactual": 0.4350161586459501,
"eval_distillation_f1_factual": 0.8207900808818138,
"eval_groundtruth_accuracy_counterfactual": 0.513340174448435,
"eval_groundtruth_f1_counterfactual": 0.4424165935664268,
"eval_groundtruth_f1_factual": 0.6858850991781995,
"eval_icace_cosine": 0.4748988747596741,
"eval_icace_l2": 0.6583500504493713,
"eval_icace_normdiff": 0.4023895561695099,
"eval_loss": 8.626461029052734,
"eval_runtime": 13.1109,
"eval_samples_per_second": 297.31,
"eval_steps_per_second": 2.364,
"step": 650
},
{
"epoch": 1.13,
"learning_rate": 4.285095320623917e-05,
"loss": 8.5985,
"step": 660
},
{
"epoch": 1.17,
"learning_rate": 4.2634315424610055e-05,
"loss": 8.9032,
"step": 680
},
{
"epoch": 1.2,
"learning_rate": 4.241767764298094e-05,
"loss": 8.5234,
"step": 700
},
{
"epoch": 1.2,
"eval_distillation_accuracy_counterfactual": 0.5266803488968702,
"eval_distillation_accuracy_factual": 0.8347870702924577,
"eval_distillation_f1_counterfactual": 0.44418104449377827,
"eval_distillation_f1_factual": 0.8166622315451695,
"eval_groundtruth_accuracy_counterfactual": 0.512570548999487,
"eval_groundtruth_f1_counterfactual": 0.4399064148597627,
"eval_groundtruth_f1_factual": 0.7003621420252235,
"eval_icace_cosine": 0.4755347669124603,
"eval_icace_l2": 0.6643328666687012,
"eval_icace_normdiff": 0.4135916829109192,
"eval_loss": 8.528804779052734,
"eval_runtime": 12.9782,
"eval_samples_per_second": 300.35,
"eval_steps_per_second": 2.389,
"step": 700
},
{
"epoch": 1.23,
"learning_rate": 4.220103986135182e-05,
"loss": 8.3629,
"step": 720
},
{
"epoch": 1.27,
"learning_rate": 4.198440207972271e-05,
"loss": 8.6834,
"step": 740
},
{
"epoch": 1.29,
"eval_distillation_accuracy_counterfactual": 0.5253976398152899,
"eval_distillation_accuracy_factual": 0.8499230374551052,
"eval_distillation_f1_counterfactual": 0.4481280211692343,
"eval_distillation_f1_factual": 0.8427632254580978,
"eval_groundtruth_accuracy_counterfactual": 0.521292970754233,
"eval_groundtruth_f1_counterfactual": 0.4535188471820635,
"eval_groundtruth_f1_factual": 0.7050025057202923,
"eval_icace_cosine": 0.478320837020874,
"eval_icace_l2": 0.6507540345191956,
"eval_icace_normdiff": 0.4042801558971405,
"eval_loss": 8.547685623168945,
"eval_runtime": 13.0628,
"eval_samples_per_second": 298.406,
"eval_steps_per_second": 2.373,
"step": 750
},
{
"epoch": 1.3,
"learning_rate": 4.1767764298093586e-05,
"loss": 8.373,
"step": 760
},
{
"epoch": 1.34,
"learning_rate": 4.1551126516464476e-05,
"loss": 8.545,
"step": 780
},
{
"epoch": 1.37,
"learning_rate": 4.133448873483536e-05,
"loss": 8.3301,
"step": 800
},
{
"epoch": 1.37,
"eval_distillation_accuracy_counterfactual": 0.5246280143663418,
"eval_distillation_accuracy_factual": 0.857875833760903,
"eval_distillation_f1_counterfactual": 0.43678271154246406,
"eval_distillation_f1_factual": 0.8404426089149801,
"eval_groundtruth_accuracy_counterfactual": 0.5177013853258081,
"eval_groundtruth_f1_counterfactual": 0.4372037073522039,
"eval_groundtruth_f1_factual": 0.7164819803959707,
"eval_icace_cosine": 0.4767405092716217,
"eval_icace_l2": 0.6505340933799744,
"eval_icace_normdiff": 0.3892482817173004,
"eval_loss": 8.437895774841309,
"eval_runtime": 12.9829,
"eval_samples_per_second": 300.242,
"eval_steps_per_second": 2.388,
"step": 800
},
{
"epoch": 1.41,
"learning_rate": 4.111785095320624e-05,
"loss": 8.5471,
"step": 820
},
{
"epoch": 1.44,
"learning_rate": 4.0901213171577124e-05,
"loss": 8.5899,
"step": 840
},
{
"epoch": 1.46,
"eval_distillation_accuracy_counterfactual": 0.5389943560800411,
"eval_distillation_accuracy_factual": 0.8668547973319651,
"eval_distillation_f1_counterfactual": 0.43508558636235506,
"eval_distillation_f1_factual": 0.8544181051951583,
"eval_groundtruth_accuracy_counterfactual": 0.5287326834273987,
"eval_groundtruth_f1_counterfactual": 0.4313681541912551,
"eval_groundtruth_f1_factual": 0.7021443551588085,
"eval_icace_cosine": 0.47022899985313416,
"eval_icace_l2": 0.6471868753433228,
"eval_icace_normdiff": 0.3974311053752899,
"eval_loss": 8.187715530395508,
"eval_runtime": 12.9448,
"eval_samples_per_second": 301.125,
"eval_steps_per_second": 2.395,
"step": 850
},
{
"epoch": 1.48,
"learning_rate": 4.068457538994801e-05,
"loss": 8.4412,
"step": 860
},
{
"epoch": 1.51,
"learning_rate": 4.04679376083189e-05,
"loss": 8.5992,
"step": 880
},
{
"epoch": 1.54,
"learning_rate": 4.025129982668977e-05,
"loss": 8.5057,
"step": 900
},
{
"epoch": 1.54,
"eval_distillation_accuracy_counterfactual": 0.5338635197537198,
"eval_distillation_accuracy_factual": 0.8494099538224731,
"eval_distillation_f1_counterfactual": 0.4538940008145664,
"eval_distillation_f1_factual": 0.8393372080212285,
"eval_groundtruth_accuracy_counterfactual": 0.521549512570549,
"eval_groundtruth_f1_counterfactual": 0.44602586407123895,
"eval_groundtruth_f1_factual": 0.7062665719976977,
"eval_icace_cosine": 0.47390881180763245,
"eval_icace_l2": 0.6635516881942749,
"eval_icace_normdiff": 0.41677233576774597,
"eval_loss": 8.466891288757324,
"eval_runtime": 12.9586,
"eval_samples_per_second": 300.803,
"eval_steps_per_second": 2.392,
"step": 900
},
{
"epoch": 1.58,
"learning_rate": 4.003466204506066e-05,
"loss": 8.5256,
"step": 920
},
{
"epoch": 1.61,
"learning_rate": 3.9818024263431546e-05,
"loss": 8.4663,
"step": 940
},
{
"epoch": 1.63,
"eval_distillation_accuracy_counterfactual": 0.539250897896357,
"eval_distillation_accuracy_factual": 0.8429964084145716,
"eval_distillation_f1_counterfactual": 0.4552348813290624,
"eval_distillation_f1_factual": 0.8279955804250164,
"eval_groundtruth_accuracy_counterfactual": 0.5248845561826577,
"eval_groundtruth_f1_counterfactual": 0.44541029004084864,
"eval_groundtruth_f1_factual": 0.7174847360382134,
"eval_icace_cosine": 0.47177237272262573,
"eval_icace_l2": 0.6441071629524231,
"eval_icace_normdiff": 0.3987690806388855,
"eval_loss": 8.24371337890625,
"eval_runtime": 13.1576,
"eval_samples_per_second": 296.255,
"eval_steps_per_second": 2.356,
"step": 950
},
{
"epoch": 1.65,
"learning_rate": 3.960138648180243e-05,
"loss": 8.4244,
"step": 960
},
{
"epoch": 1.68,
"learning_rate": 3.938474870017331e-05,
"loss": 8.5347,
"step": 980
},
{
"epoch": 1.72,
"learning_rate": 3.91681109185442e-05,
"loss": 8.5181,
"step": 1000
},
{
"epoch": 1.72,
"eval_distillation_accuracy_counterfactual": 0.5523345305284761,
"eval_distillation_accuracy_factual": 0.8568496664956388,
"eval_distillation_f1_counterfactual": 0.43192107651721756,
"eval_distillation_f1_factual": 0.8446400408995643,
"eval_groundtruth_accuracy_counterfactual": 0.5310415597742432,
"eval_groundtruth_f1_counterfactual": 0.4221211937230137,
"eval_groundtruth_f1_factual": 0.7176363988738272,
"eval_icace_cosine": 0.4691796898841858,
"eval_icace_l2": 0.6469516158103943,
"eval_icace_normdiff": 0.40605321526527405,
"eval_loss": 8.24507999420166,
"eval_runtime": 12.9758,
"eval_samples_per_second": 300.405,
"eval_steps_per_second": 2.389,
"step": 1000
},
{
"epoch": 1.75,
"learning_rate": 3.895147313691508e-05,
"loss": 8.4384,
"step": 1020
},
{
"epoch": 1.78,
"learning_rate": 3.873483535528597e-05,
"loss": 8.4464,
"step": 1040
},
{
"epoch": 1.8,
"eval_distillation_accuracy_counterfactual": 0.5341200615700359,
"eval_distillation_accuracy_factual": 0.8471010774756286,
"eval_distillation_f1_counterfactual": 0.44797373858638706,
"eval_distillation_f1_factual": 0.8352221547782517,
"eval_groundtruth_accuracy_counterfactual": 0.5187275525910724,
"eval_groundtruth_f1_counterfactual": 0.438959174201451,
"eval_groundtruth_f1_factual": 0.7045395682269071,
"eval_icace_cosine": 0.4699629247188568,
"eval_icace_l2": 0.650560200214386,
"eval_icace_normdiff": 0.4021012485027313,
"eval_loss": 8.46190357208252,
"eval_runtime": 12.9778,
"eval_samples_per_second": 300.359,
"eval_steps_per_second": 2.389,
"step": 1050
},
{
"epoch": 1.82,
"learning_rate": 3.851819757365685e-05,
"loss": 8.3977,
"step": 1060
},
{
"epoch": 1.85,
"learning_rate": 3.830155979202773e-05,
"loss": 8.4469,
"step": 1080
},
{
"epoch": 1.89,
"learning_rate": 3.8084922010398616e-05,
"loss": 8.4589,
"step": 1100
},
{
"epoch": 1.89,
"eval_distillation_accuracy_counterfactual": 0.538481272447409,
"eval_distillation_accuracy_factual": 0.853771164699846,
"eval_distillation_f1_counterfactual": 0.4574351380443254,
"eval_distillation_f1_factual": 0.8413133626862945,
"eval_groundtruth_accuracy_counterfactual": 0.5338635197537198,
"eval_groundtruth_f1_counterfactual": 0.4618955956329911,
"eval_groundtruth_f1_factual": 0.712001520958305,
"eval_icace_cosine": 0.47186800837516785,
"eval_icace_l2": 0.6550236940383911,
"eval_icace_normdiff": 0.4062390625476837,
"eval_loss": 8.270210266113281,
"eval_runtime": 13.0076,
"eval_samples_per_second": 299.671,
"eval_steps_per_second": 2.383,
"step": 1100
},
{
"epoch": 1.92,
"learning_rate": 3.78682842287695e-05,
"loss": 8.2726,
"step": 1120
},
{
"epoch": 1.96,
"learning_rate": 3.765164644714038e-05,
"loss": 8.543,
"step": 1140
},
{
"epoch": 1.97,
"eval_distillation_accuracy_counterfactual": 0.5454079014879425,
"eval_distillation_accuracy_factual": 0.8565931246793227,
"eval_distillation_f1_counterfactual": 0.4358856901648176,
"eval_distillation_f1_factual": 0.8450511347625259,
"eval_groundtruth_accuracy_counterfactual": 0.5369420215495125,
"eval_groundtruth_f1_counterfactual": 0.4328763616035284,
"eval_groundtruth_f1_factual": 0.7122842482342289,
"eval_icace_cosine": 0.470956027507782,
"eval_icace_l2": 0.6414260864257812,
"eval_icace_normdiff": 0.3959381878376007,
"eval_loss": 8.278718948364258,
"eval_runtime": 13.0747,
"eval_samples_per_second": 298.133,
"eval_steps_per_second": 2.371,
"step": 1150
},
{
"epoch": 1.99,
"learning_rate": 3.7435008665511264e-05,
"loss": 8.5847,
"step": 1160
},
{
"epoch": 2.02,
"learning_rate": 3.7218370883882154e-05,
"loss": 8.495,
"step": 1180
},
{
"epoch": 2.06,
"learning_rate": 3.700173310225303e-05,
"loss": 8.2151,
"step": 1200
},
{
"epoch": 2.06,
"eval_distillation_accuracy_counterfactual": 0.5323242688558235,
"eval_distillation_accuracy_factual": 0.844792201128784,
"eval_distillation_f1_counterfactual": 0.4578721834749787,
"eval_distillation_f1_factual": 0.8335573328949983,
"eval_groundtruth_accuracy_counterfactual": 0.5205233453052848,
"eval_groundtruth_f1_counterfactual": 0.4529105959701748,
"eval_groundtruth_f1_factual": 0.6982997202664809,
"eval_icace_cosine": 0.4770417809486389,
"eval_icace_l2": 0.664779782295227,
"eval_icace_normdiff": 0.4089834690093994,
"eval_loss": 8.583990097045898,
"eval_runtime": 12.9637,
"eval_samples_per_second": 300.686,
"eval_steps_per_second": 2.391,
"step": 1200
},
{
"epoch": 2.09,
"learning_rate": 3.678509532062392e-05,
"loss": 8.2576,
"step": 1220
},
{
"epoch": 2.13,
"learning_rate": 3.65684575389948e-05,
"loss": 8.3456,
"step": 1240
},
{
"epoch": 2.14,
"eval_distillation_accuracy_counterfactual": 0.5210364289379169,
"eval_distillation_accuracy_factual": 0.8414571575166753,
"eval_distillation_f1_counterfactual": 0.44541850110843295,
"eval_distillation_f1_factual": 0.8216034713991617,
"eval_groundtruth_accuracy_counterfactual": 0.5228322216521293,
"eval_groundtruth_f1_counterfactual": 0.4520848884145957,
"eval_groundtruth_f1_factual": 0.7136759478326009,
"eval_icace_cosine": 0.48024165630340576,
"eval_icace_l2": 0.6682620048522949,
"eval_icace_normdiff": 0.41649964451789856,
"eval_loss": 8.683144569396973,
"eval_runtime": 12.9156,
"eval_samples_per_second": 301.806,
"eval_steps_per_second": 2.4,
"step": 1250
},
{
"epoch": 2.16,
"learning_rate": 3.6351819757365686e-05,
"loss": 8.2687,
"step": 1260
},
{
"epoch": 2.2,
"learning_rate": 3.613518197573657e-05,
"loss": 8.343,
"step": 1280
},
{
"epoch": 2.23,
"learning_rate": 3.591854419410746e-05,
"loss": 8.2615,
"step": 1300
},
{
"epoch": 2.23,
"eval_distillation_accuracy_counterfactual": 0.5210364289379169,
"eval_distillation_accuracy_factual": 0.8376090302719343,
"eval_distillation_f1_counterfactual": 0.4470542011154386,
"eval_distillation_f1_factual": 0.8243930760094701,
"eval_groundtruth_accuracy_counterfactual": 0.5094920472036942,
"eval_groundtruth_f1_counterfactual": 0.4424943238928763,
"eval_groundtruth_f1_factual": 0.7180425551041923,
"eval_icace_cosine": 0.4772956669330597,
"eval_icace_l2": 0.675339937210083,
"eval_icace_normdiff": 0.4207656979560852,
"eval_loss": 8.777983665466309,
"eval_runtime": 12.95,
"eval_samples_per_second": 301.004,
"eval_steps_per_second": 2.394,
"step": 1300
},
{
"epoch": 2.26,
"learning_rate": 3.5701906412478334e-05,
"loss": 8.3164,
"step": 1320
},
{
"epoch": 2.3,
"learning_rate": 3.5485268630849224e-05,
"loss": 8.2886,
"step": 1340
},
{
"epoch": 2.32,
"eval_distillation_accuracy_counterfactual": 0.5233453052847614,
"eval_distillation_accuracy_factual": 0.8563365828630066,
"eval_distillation_f1_counterfactual": 0.4352218638479065,
"eval_distillation_f1_factual": 0.842128331673918,
"eval_groundtruth_accuracy_counterfactual": 0.504617752693689,
"eval_groundtruth_f1_counterfactual": 0.42672300871162205,
"eval_groundtruth_f1_factual": 0.7038431621141455,
"eval_icace_cosine": 0.47232314944267273,
"eval_icace_l2": 0.6612510681152344,
"eval_icace_normdiff": 0.4119323790073395,
"eval_loss": 8.546701431274414,
"eval_runtime": 12.8815,
"eval_samples_per_second": 302.605,
"eval_steps_per_second": 2.407,
"step": 1350
},
{
"epoch": 2.33,
"learning_rate": 3.526863084922011e-05,
"loss": 8.0323,
"step": 1360
},
{
"epoch": 2.37,
"learning_rate": 3.505199306759099e-05,
"loss": 8.2412,
"step": 1380
},
{
"epoch": 2.4,
"learning_rate": 3.483535528596187e-05,
"loss": 8.2755,
"step": 1400
},
{
"epoch": 2.4,
"eval_distillation_accuracy_counterfactual": 0.5279630579784504,
"eval_distillation_accuracy_factual": 0.8512057465366855,
"eval_distillation_f1_counterfactual": 0.42885545419249443,
"eval_distillation_f1_factual": 0.8336172051549792,
"eval_groundtruth_accuracy_counterfactual": 0.5110312981015905,
"eval_groundtruth_f1_counterfactual": 0.4182726548657759,
"eval_groundtruth_f1_factual": 0.7081817865697444,
"eval_icace_cosine": 0.47038188576698303,
"eval_icace_l2": 0.6541587114334106,
"eval_icace_normdiff": 0.4046129286289215,
"eval_loss": 8.557218551635742,
"eval_runtime": 12.9809,
"eval_samples_per_second": 300.286,
"eval_steps_per_second": 2.388,
"step": 1400
},
{
"epoch": 2.44,
"learning_rate": 3.461871750433276e-05,
"loss": 8.1159,
"step": 1420
},
{
"epoch": 2.47,
"learning_rate": 3.440207972270364e-05,
"loss": 8.2512,
"step": 1440
},
{
"epoch": 2.49,
"eval_distillation_accuracy_counterfactual": 0.5048742945100051,
"eval_distillation_accuracy_factual": 0.8442791174961519,
"eval_distillation_f1_counterfactual": 0.4471986183427893,
"eval_distillation_f1_factual": 0.8328539372383206,
"eval_groundtruth_accuracy_counterfactual": 0.5061570035915854,
"eval_groundtruth_f1_counterfactual": 0.45459231049993376,
"eval_groundtruth_f1_factual": 0.7243435837992515,
"eval_icace_cosine": 0.47215959429740906,
"eval_icace_l2": 0.6676862239837646,
"eval_icace_normdiff": 0.4189312160015106,
"eval_loss": 8.632984161376953,
"eval_runtime": 12.9685,
"eval_samples_per_second": 300.574,
"eval_steps_per_second": 2.39,
"step": 1450
},
{
"epoch": 2.5,
"learning_rate": 3.418544194107453e-05,
"loss": 8.4124,
"step": 1460
},
{
"epoch": 2.54,
"learning_rate": 3.396880415944541e-05,
"loss": 8.2396,
"step": 1480
},
{
"epoch": 2.57,
"learning_rate": 3.3752166377816294e-05,
"loss": 8.1802,
"step": 1500
},
{
"epoch": 2.57,
"eval_distillation_accuracy_counterfactual": 0.5266803488968702,
"eval_distillation_accuracy_factual": 0.8424833247819394,
"eval_distillation_f1_counterfactual": 0.4296539407490461,
"eval_distillation_f1_factual": 0.8232924538803463,
"eval_groundtruth_accuracy_counterfactual": 0.521292970754233,
"eval_groundtruth_f1_counterfactual": 0.4329058498324728,
"eval_groundtruth_f1_factual": 0.693798726302169,
"eval_icace_cosine": 0.47156795859336853,
"eval_icace_l2": 0.6467342972755432,
"eval_icace_normdiff": 0.39208564162254333,
"eval_loss": 8.581865310668945,
"eval_runtime": 12.9632,
"eval_samples_per_second": 300.697,
"eval_steps_per_second": 2.391,
"step": 1500
},
{
"epoch": 2.61,
"learning_rate": 3.353552859618718e-05,
"loss": 8.0483,
"step": 1520
},
{
"epoch": 2.64,
"learning_rate": 3.331889081455806e-05,
"loss": 8.3776,
"step": 1540
},
{
"epoch": 2.66,
"eval_distillation_accuracy_counterfactual": 0.5395074397126731,
"eval_distillation_accuracy_factual": 0.8624935864545921,
"eval_distillation_f1_counterfactual": 0.4487826083438827,
"eval_distillation_f1_factual": 0.8447571530576526,
"eval_groundtruth_accuracy_counterfactual": 0.5233453052847614,
"eval_groundtruth_f1_counterfactual": 0.44381794658084023,
"eval_groundtruth_f1_factual": 0.7228365951086511,
"eval_icace_cosine": 0.467881977558136,
"eval_icace_l2": 0.6500933766365051,
"eval_icace_normdiff": 0.4116222858428955,
"eval_loss": 8.251863479614258,
"eval_runtime": 13.0173,
"eval_samples_per_second": 299.447,
"eval_steps_per_second": 2.381,
"step": 1550
},
{
"epoch": 2.68,
"learning_rate": 3.310225303292894e-05,
"loss": 8.3191,
"step": 1560
},
{
"epoch": 2.71,
"learning_rate": 3.2885615251299825e-05,
"loss": 8.3499,
"step": 1580
},
{
"epoch": 2.74,
"learning_rate": 3.2668977469670715e-05,
"loss": 8.2759,
"step": 1600
},
{
"epoch": 2.74,
"eval_distillation_accuracy_counterfactual": 0.5264238070805541,
"eval_distillation_accuracy_factual": 0.8381221139045665,
"eval_distillation_f1_counterfactual": 0.4640025505726335,
"eval_distillation_f1_factual": 0.8226839825314822,
"eval_groundtruth_accuracy_counterfactual": 0.5112878399179066,
"eval_groundtruth_f1_counterfactual": 0.4572176402176623,
"eval_groundtruth_f1_factual": 0.7122677529021709,
"eval_icace_cosine": 0.46834951639175415,
"eval_icace_l2": 0.6554086208343506,
"eval_icace_normdiff": 0.4115924835205078,
"eval_loss": 8.593127250671387,
"eval_runtime": 12.9442,
"eval_samples_per_second": 301.138,
"eval_steps_per_second": 2.395,
"step": 1600
},
{
"epoch": 2.78,
"learning_rate": 3.245233968804159e-05,
"loss": 8.2134,
"step": 1620
},
{
"epoch": 2.81,
"learning_rate": 3.223570190641248e-05,
"loss": 8.2762,
"step": 1640
},
{
"epoch": 2.83,
"eval_distillation_accuracy_counterfactual": 0.5274499743458184,
"eval_distillation_accuracy_factual": 0.8532580810672139,
"eval_distillation_f1_counterfactual": 0.4582506359486283,
"eval_distillation_f1_factual": 0.8369130644925307,
"eval_groundtruth_accuracy_counterfactual": 0.5105182144689584,
"eval_groundtruth_f1_counterfactual": 0.4492512316522279,
"eval_groundtruth_f1_factual": 0.7137344968653884,
"eval_icace_cosine": 0.4689863324165344,
"eval_icace_l2": 0.6501835584640503,
"eval_icace_normdiff": 0.40152183175086975,
"eval_loss": 8.484435081481934,
"eval_runtime": 12.9458,
"eval_samples_per_second": 301.102,
"eval_steps_per_second": 2.395,
"step": 1650
},
{
"epoch": 2.85,
"learning_rate": 3.2019064124783364e-05,
"loss": 8.3236,
"step": 1660
},
{
"epoch": 2.88,
"learning_rate": 3.1802426343154247e-05,
"loss": 8.1251,
"step": 1680
},
{
"epoch": 2.92,
"learning_rate": 3.158578856152513e-05,
"loss": 8.3658,
"step": 1700
},
{
"epoch": 2.92,
"eval_distillation_accuracy_counterfactual": 0.5236018471010775,
"eval_distillation_accuracy_factual": 0.8442791174961519,
"eval_distillation_f1_counterfactual": 0.4500677919517996,
"eval_distillation_f1_factual": 0.8294190656695536,
"eval_groundtruth_accuracy_counterfactual": 0.5161621344279117,
"eval_groundtruth_f1_counterfactual": 0.44921994678493393,
"eval_groundtruth_f1_factual": 0.7157702918851219,
"eval_icace_cosine": 0.47242775559425354,
"eval_icace_l2": 0.6650809049606323,
"eval_icace_normdiff": 0.4171872138977051,
"eval_loss": 8.604122161865234,
"eval_runtime": 12.9963,
"eval_samples_per_second": 299.932,
"eval_steps_per_second": 2.385,
"step": 1700
},
{
"epoch": 2.95,
"learning_rate": 3.136915077989602e-05,
"loss": 8.1394,
"step": 1720
},
{
"epoch": 2.98,
"learning_rate": 3.1152512998266895e-05,
"loss": 8.1326,
"step": 1740
},
{
"epoch": 3.0,
"eval_distillation_accuracy_counterfactual": 0.5328373524884557,
"eval_distillation_accuracy_factual": 0.8496664956387892,
"eval_distillation_f1_counterfactual": 0.466763064256023,
"eval_distillation_f1_factual": 0.8351501069683767,
"eval_groundtruth_accuracy_counterfactual": 0.5230887634684454,
"eval_groundtruth_f1_counterfactual": 0.46309383057125386,
"eval_groundtruth_f1_factual": 0.7189084641368721,
"eval_icace_cosine": 0.47281646728515625,
"eval_icace_l2": 0.6447048783302307,
"eval_icace_normdiff": 0.3949274718761444,
"eval_loss": 8.471474647521973,
"eval_runtime": 12.9383,
"eval_samples_per_second": 301.276,
"eval_steps_per_second": 2.396,
"step": 1750
},
{
"epoch": 3.02,
"learning_rate": 3.0935875216637785e-05,
"loss": 8.0867,
"step": 1760
},
{
"epoch": 3.05,
"learning_rate": 3.071923743500867e-05,
"loss": 8.2012,
"step": 1780
},
{
"epoch": 3.09,
"learning_rate": 3.0502599653379547e-05,
"loss": 8.1176,
"step": 1800
},
{
"epoch": 3.09,
"eval_distillation_accuracy_counterfactual": 0.5320677270395074,
"eval_distillation_accuracy_factual": 0.8473576192919446,
"eval_distillation_f1_counterfactual": 0.44372130842528623,
"eval_distillation_f1_factual": 0.8338491634681494,
"eval_groundtruth_accuracy_counterfactual": 0.5076962544894817,
"eval_groundtruth_f1_counterfactual": 0.4271300744187679,
"eval_groundtruth_f1_factual": 0.7089616826609815,
"eval_icace_cosine": 0.4722249507904053,
"eval_icace_l2": 0.65628582239151,
"eval_icace_normdiff": 0.40942007303237915,
"eval_loss": 8.602928161621094,
"eval_runtime": 12.9577,
"eval_samples_per_second": 300.825,
"eval_steps_per_second": 2.392,
"step": 1800
},
{
"epoch": 3.12,
"learning_rate": 3.0285961871750434e-05,
"loss": 8.1985,
"step": 1820
},
{
"epoch": 3.16,
"learning_rate": 3.006932409012132e-05,
"loss": 8.2358,
"step": 1840
},
{
"epoch": 3.17,
"eval_distillation_accuracy_counterfactual": 0.5179579271421242,
"eval_distillation_accuracy_factual": 0.8463314520266804,
"eval_distillation_f1_counterfactual": 0.4547140810789615,
"eval_distillation_f1_factual": 0.8318522122913702,
"eval_groundtruth_accuracy_counterfactual": 0.5179579271421242,
"eval_groundtruth_f1_counterfactual": 0.46111342311742015,
"eval_groundtruth_f1_factual": 0.702200862113018,
"eval_icace_cosine": 0.47784164547920227,
"eval_icace_l2": 0.6577954292297363,
"eval_icace_normdiff": 0.40519601106643677,
"eval_loss": 8.596319198608398,
"eval_runtime": 13.0549,
"eval_samples_per_second": 298.584,
"eval_steps_per_second": 2.375,
"step": 1850
},
{
"epoch": 3.19,
"learning_rate": 2.98526863084922e-05,
"loss": 8.1927,
"step": 1860
},
{
"epoch": 3.22,
"learning_rate": 2.9636048526863086e-05,
"loss": 8.0759,
"step": 1880
},
{
"epoch": 3.26,
"learning_rate": 2.9419410745233972e-05,
"loss": 8.0023,
"step": 1900
},
{
"epoch": 3.26,
"eval_distillation_accuracy_counterfactual": 0.5410466906105695,
"eval_distillation_accuracy_factual": 0.8442791174961519,
"eval_distillation_f1_counterfactual": 0.4708691905840775,
"eval_distillation_f1_factual": 0.8314060332226945,
"eval_groundtruth_accuracy_counterfactual": 0.5287326834273987,
"eval_groundtruth_f1_counterfactual": 0.4662134522268956,
"eval_groundtruth_f1_factual": 0.7036825171946501,
"eval_icace_cosine": 0.46827077865600586,
"eval_icace_l2": 0.6418067812919617,
"eval_icace_normdiff": 0.39047762751579285,
"eval_loss": 8.510624885559082,
"eval_runtime": 12.9429,
"eval_samples_per_second": 301.169,
"eval_steps_per_second": 2.395,
"step": 1900
},
{
"epoch": 3.29,
"learning_rate": 2.920277296360485e-05,
"loss": 8.0164,
"step": 1920
},
{
"epoch": 3.33,
"learning_rate": 2.8986135181975738e-05,
"loss": 8.0343,
"step": 1940
},
{
"epoch": 3.34,
"eval_distillation_accuracy_counterfactual": 0.5356593124679323,
"eval_distillation_accuracy_factual": 0.8594150846587993,
"eval_distillation_f1_counterfactual": 0.4646747781692526,
"eval_distillation_f1_factual": 0.8458748080305407,
"eval_groundtruth_accuracy_counterfactual": 0.5230887634684454,
"eval_groundtruth_f1_counterfactual": 0.4587518415854972,
"eval_groundtruth_f1_factual": 0.725017307995671,
"eval_icace_cosine": 0.46998363733291626,
"eval_icace_l2": 0.6633685231208801,
"eval_icace_normdiff": 0.410533607006073,
"eval_loss": 8.549391746520996,
"eval_runtime": 13.0331,
"eval_samples_per_second": 299.085,
"eval_steps_per_second": 2.379,
"step": 1950
},
{
"epoch": 3.36,
"learning_rate": 2.8769497400346624e-05,
"loss": 8.1109,
"step": 1960
},
{
"epoch": 3.4,
"learning_rate": 2.8552859618717503e-05,
"loss": 7.8661,
"step": 1980
},
{
"epoch": 3.43,
"learning_rate": 2.833622183708839e-05,
"loss": 7.9558,
"step": 2000
},
{
"epoch": 3.43,
"eval_distillation_accuracy_counterfactual": 0.5338635197537198,
"eval_distillation_accuracy_factual": 0.8483837865572088,
"eval_distillation_f1_counterfactual": 0.46569718446185726,
"eval_distillation_f1_factual": 0.8346114011854626,
"eval_groundtruth_accuracy_counterfactual": 0.5269368907131863,
"eval_groundtruth_f1_counterfactual": 0.46415122006349785,
"eval_groundtruth_f1_factual": 0.7156362232258703,
"eval_icace_cosine": 0.46725159883499146,
"eval_icace_l2": 0.660638153553009,
"eval_icace_normdiff": 0.40762051939964294,
"eval_loss": 8.534335136413574,
"eval_runtime": 12.9678,
"eval_samples_per_second": 300.591,
"eval_steps_per_second": 2.391,
"step": 2000
},
{
"epoch": 3.46,
"learning_rate": 2.8119584055459276e-05,
"loss": 8.0163,
"step": 2020
},
{
"epoch": 3.5,
"learning_rate": 2.7902946273830156e-05,
"loss": 8.1543,
"step": 2040
},
{
"epoch": 3.52,
"eval_distillation_accuracy_counterfactual": 0.5184710107747563,
"eval_distillation_accuracy_factual": 0.8417136993329912,
"eval_distillation_f1_counterfactual": 0.45048032767829593,
"eval_distillation_f1_factual": 0.8277337474547333,
"eval_groundtruth_accuracy_counterfactual": 0.5187275525910724,
"eval_groundtruth_f1_counterfactual": 0.4579942611753098,
"eval_groundtruth_f1_factual": 0.7262909345250802,
"eval_icace_cosine": 0.47337350249290466,
"eval_icace_l2": 0.6622626781463623,
"eval_icace_normdiff": 0.40650057792663574,
"eval_loss": 8.670907020568848,
"eval_runtime": 12.913,
"eval_samples_per_second": 301.867,
"eval_steps_per_second": 2.401,
"step": 2050
},
{
"epoch": 3.53,
"learning_rate": 2.7686308492201042e-05,
"loss": 8.0555,
"step": 2060
},
{
"epoch": 3.57,
"learning_rate": 2.7469670710571928e-05,
"loss": 8.003,
"step": 2080
},
{
"epoch": 3.6,
"learning_rate": 2.7253032928942808e-05,
"loss": 8.1222,
"step": 2100
},
{
"epoch": 3.6,
"eval_distillation_accuracy_counterfactual": 0.538481272447409,
"eval_distillation_accuracy_factual": 0.8324781939456132,
"eval_distillation_f1_counterfactual": 0.45321151762235823,
"eval_distillation_f1_factual": 0.816404323283801,
"eval_groundtruth_accuracy_counterfactual": 0.5256541816316059,
"eval_groundtruth_f1_counterfactual": 0.4475943574688602,
"eval_groundtruth_f1_factual": 0.7218836713261483,
"eval_icace_cosine": 0.47021886706352234,
"eval_icace_l2": 0.6517447233200073,
"eval_icace_normdiff": 0.403333842754364,
"eval_loss": 8.577171325683594,
"eval_runtime": 12.9514,
"eval_samples_per_second": 300.97,
"eval_steps_per_second": 2.394,
"step": 2100
},
{
"epoch": 3.64,
"learning_rate": 2.7036395147313694e-05,
"loss": 8.2765,
"step": 2120
},
{
"epoch": 3.67,
"learning_rate": 2.6819757365684577e-05,
"loss": 8.0517,
"step": 2140
},
{
"epoch": 3.69,
"eval_distillation_accuracy_counterfactual": 0.5318111852231914,
"eval_distillation_accuracy_factual": 0.8437660338635198,
"eval_distillation_f1_counterfactual": 0.4762348812215591,
"eval_distillation_f1_factual": 0.8278687320161431,
"eval_groundtruth_accuracy_counterfactual": 0.5200102616726526,
"eval_groundtruth_f1_counterfactual": 0.47065260912789525,
"eval_groundtruth_f1_factual": 0.7123545192728437,
"eval_icace_cosine": 0.47268736362457275,
"eval_icace_l2": 0.6466090083122253,
"eval_icace_normdiff": 0.4019717872142792,
"eval_loss": 8.487833023071289,
"eval_runtime": 12.9963,
"eval_samples_per_second": 299.933,
"eval_steps_per_second": 2.385,
"step": 2150
},
{
"epoch": 3.7,
"learning_rate": 2.660311958405546e-05,
"loss": 8.1498,
"step": 2160
},
{
"epoch": 3.74,
"learning_rate": 2.6386481802426343e-05,
"loss": 8.1688,
"step": 2180
},
{
"epoch": 3.77,
"learning_rate": 2.616984402079723e-05,
"loss": 7.9705,
"step": 2200
},
{
"epoch": 3.77,
"eval_distillation_accuracy_counterfactual": 0.5377116469984607,
"eval_distillation_accuracy_factual": 0.8240123140071832,
"eval_distillation_f1_counterfactual": 0.4741425223222236,
"eval_distillation_f1_factual": 0.8055011349134608,
"eval_groundtruth_accuracy_counterfactual": 0.5269368907131863,
"eval_groundtruth_f1_counterfactual": 0.4693711263304287,
"eval_groundtruth_f1_factual": 0.7106558328588921,
"eval_icace_cosine": 0.4721924662590027,
"eval_icace_l2": 0.6477442383766174,
"eval_icace_normdiff": 0.39685875177383423,
"eval_loss": 8.761940956115723,
"eval_runtime": 12.9582,
"eval_samples_per_second": 300.813,
"eval_steps_per_second": 2.392,
"step": 2200
},
{
"epoch": 3.81,
"learning_rate": 2.595320623916811e-05,
"loss": 7.943,
"step": 2220
},
{
"epoch": 3.84,
"learning_rate": 2.5736568457538995e-05,
"loss": 8.1214,
"step": 2240
},
{
"epoch": 3.86,
"eval_distillation_accuracy_counterfactual": 0.5348896870189841,
"eval_distillation_accuracy_factual": 0.8435094920472037,
"eval_distillation_f1_counterfactual": 0.46585115605288197,
"eval_distillation_f1_factual": 0.8294546068203583,
"eval_groundtruth_accuracy_counterfactual": 0.5207798871216008,
"eval_groundtruth_f1_counterfactual": 0.45831312719861905,
"eval_groundtruth_f1_factual": 0.7120266943001898,
"eval_icace_cosine": 0.4707246422767639,
"eval_icace_l2": 0.6443170309066772,
"eval_icace_normdiff": 0.3977286219596863,
"eval_loss": 8.511881828308105,
"eval_runtime": 13.0295,
"eval_samples_per_second": 299.167,
"eval_steps_per_second": 2.379,
"step": 2250
},
{
"epoch": 3.88,
"learning_rate": 2.551993067590988e-05,
"loss": 7.963,
"step": 2260
},
{
"epoch": 3.91,
"learning_rate": 2.530329289428076e-05,
"loss": 7.9637,
"step": 2280
},
{
"epoch": 3.95,
"learning_rate": 2.5086655112651647e-05,
"loss": 8.1591,
"step": 2300
},
{
"epoch": 3.95,
"eval_distillation_accuracy_counterfactual": 0.5092355053873782,
"eval_distillation_accuracy_factual": 0.8486403283735249,
"eval_distillation_f1_counterfactual": 0.42695574999404207,
"eval_distillation_f1_factual": 0.8334085301941027,
"eval_groundtruth_accuracy_counterfactual": 0.5025654181631606,
"eval_groundtruth_f1_counterfactual": 0.4290408562754823,
"eval_groundtruth_f1_factual": 0.7225390626347794,
"eval_icace_cosine": 0.4755522310733795,
"eval_icace_l2": 0.6715899109840393,
"eval_icace_normdiff": 0.42764294147491455,
"eval_loss": 8.715620994567871,
"eval_runtime": 12.9596,
"eval_samples_per_second": 300.781,
"eval_steps_per_second": 2.392,
"step": 2300
},
{
"epoch": 3.98,
"learning_rate": 2.4870017331022533e-05,
"loss": 8.0792,
"step": 2320
},
{
"epoch": 4.01,
"learning_rate": 2.4653379549393416e-05,
"loss": 7.8229,
"step": 2340
},
{
"epoch": 4.03,
"eval_distillation_accuracy_counterfactual": 0.5531041559774243,
"eval_distillation_accuracy_factual": 0.8340174448435095,
"eval_distillation_f1_counterfactual": 0.4729251628273044,
"eval_distillation_f1_factual": 0.8182553778403516,
"eval_groundtruth_accuracy_counterfactual": 0.5364289379168804,
"eval_groundtruth_f1_counterfactual": 0.4660967256300985,
"eval_groundtruth_f1_factual": 0.7131629444896277,
"eval_icace_cosine": 0.47335678339004517,
"eval_icace_l2": 0.6358506679534912,
"eval_icace_normdiff": 0.38684460520744324,
"eval_loss": 8.613683700561523,
"eval_runtime": 12.9404,
"eval_samples_per_second": 301.226,
"eval_steps_per_second": 2.396,
"step": 2350
},
{
"epoch": 4.05,
"learning_rate": 2.44367417677643e-05,
"loss": 8.022,
"step": 2360
},
{
"epoch": 4.08,
"learning_rate": 2.4220103986135185e-05,
"loss": 7.8912,
"step": 2380
},
{
"epoch": 4.12,
"learning_rate": 2.4003466204506068e-05,
"loss": 8.0281,
"step": 2400
},
{
"epoch": 4.12,
"eval_distillation_accuracy_counterfactual": 0.5456644433042586,
"eval_distillation_accuracy_factual": 0.848896870189841,
"eval_distillation_f1_counterfactual": 0.4776857954065169,
"eval_distillation_f1_factual": 0.8326616872239038,
"eval_groundtruth_accuracy_counterfactual": 0.5297588506926629,
"eval_groundtruth_f1_counterfactual": 0.46905822427531607,
"eval_groundtruth_f1_factual": 0.7201395082994183,
"eval_icace_cosine": 0.4664005935192108,
"eval_icace_l2": 0.6372502446174622,
"eval_icace_normdiff": 0.39846205711364746,
"eval_loss": 8.315762519836426,
"eval_runtime": 13.0359,
"eval_samples_per_second": 299.021,
"eval_steps_per_second": 2.378,
"step": 2400
}
],
"max_steps": 4616,
"num_train_epochs": 8,
"total_flos": 2.0194317434112e+16,
"trial_name": null,
"trial_params": null
}