cpm.in.gpt2.approximate.seed66 / trainer_state.json
KarelDO's picture
commit files to HF hub
d7592d5
{
"best_metric": 0.44613537192344666,
"best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-66__gpt2/checkpoint-2100",
"epoch": 3.6020583190394513,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 4.9783362218370885e-05,
"loss": 9.7194,
"step": 20
},
{
"epoch": 0.07,
"learning_rate": 4.956672443674177e-05,
"loss": 8.5461,
"step": 40
},
{
"epoch": 0.09,
"eval_distillation_accuracy_counterfactual": 0.4212416623909697,
"eval_distillation_accuracy_factual": 0.8978963571062083,
"eval_distillation_f1_counterfactual": 0.2795337994893706,
"eval_distillation_f1_factual": 0.8820944086532225,
"eval_groundtruth_accuracy_counterfactual": 0.3999486916367368,
"eval_groundtruth_f1_counterfactual": 0.270038313797986,
"eval_groundtruth_f1_factual": 0.6569760675646802,
"eval_icace_cosine": 0.5092517733573914,
"eval_icace_l2": 0.6819494366645813,
"eval_icace_normdiff": 0.48499011993408203,
"eval_loss": 7.299591541290283,
"eval_runtime": 9.7793,
"eval_samples_per_second": 398.598,
"eval_steps_per_second": 6.238,
"step": 50
},
{
"epoch": 0.1,
"learning_rate": 4.935008665511265e-05,
"loss": 8.3578,
"step": 60
},
{
"epoch": 0.14,
"learning_rate": 4.913344887348354e-05,
"loss": 7.9482,
"step": 80
},
{
"epoch": 0.17,
"learning_rate": 4.891681109185442e-05,
"loss": 7.2571,
"step": 100
},
{
"epoch": 0.17,
"eval_distillation_accuracy_counterfactual": 0.4720369420215495,
"eval_distillation_accuracy_factual": 0.8304258594150846,
"eval_distillation_f1_counterfactual": 0.29869558215395625,
"eval_distillation_f1_factual": 0.8025416456465629,
"eval_groundtruth_accuracy_counterfactual": 0.4404822986146742,
"eval_groundtruth_f1_counterfactual": 0.2821387138469088,
"eval_groundtruth_f1_factual": 0.6140475956813652,
"eval_icace_cosine": 0.4916336238384247,
"eval_icace_l2": 0.5953323245048523,
"eval_icace_normdiff": 0.3702651858329773,
"eval_loss": 5.76565408706665,
"eval_runtime": 8.8197,
"eval_samples_per_second": 441.964,
"eval_steps_per_second": 6.916,
"step": 100
},
{
"epoch": 0.21,
"learning_rate": 4.8700173310225307e-05,
"loss": 6.9005,
"step": 120
},
{
"epoch": 0.24,
"learning_rate": 4.848353552859619e-05,
"loss": 6.7914,
"step": 140
},
{
"epoch": 0.26,
"eval_distillation_accuracy_counterfactual": 0.48050282195997945,
"eval_distillation_accuracy_factual": 0.9073884043099025,
"eval_distillation_f1_counterfactual": 0.3236150494495036,
"eval_distillation_f1_factual": 0.8927762880113139,
"eval_groundtruth_accuracy_counterfactual": 0.4707542329399692,
"eval_groundtruth_f1_counterfactual": 0.31791281566203955,
"eval_groundtruth_f1_factual": 0.6750709743000188,
"eval_icace_cosine": 0.4797770380973816,
"eval_icace_l2": 0.5712994337081909,
"eval_icace_normdiff": 0.33662039041519165,
"eval_loss": 5.300200462341309,
"eval_runtime": 10.9287,
"eval_samples_per_second": 356.676,
"eval_steps_per_second": 5.582,
"step": 150
},
{
"epoch": 0.27,
"learning_rate": 4.826689774696707e-05,
"loss": 6.6777,
"step": 160
},
{
"epoch": 0.31,
"learning_rate": 4.8050259965337955e-05,
"loss": 6.4593,
"step": 180
},
{
"epoch": 0.34,
"learning_rate": 4.7833622183708845e-05,
"loss": 6.4382,
"step": 200
},
{
"epoch": 0.34,
"eval_distillation_accuracy_counterfactual": 0.47255002565418164,
"eval_distillation_accuracy_factual": 0.8832734735761929,
"eval_distillation_f1_counterfactual": 0.3601419344182958,
"eval_distillation_f1_factual": 0.8671170830061152,
"eval_groundtruth_accuracy_counterfactual": 0.4820420728578758,
"eval_groundtruth_f1_counterfactual": 0.3702238764207906,
"eval_groundtruth_f1_factual": 0.6827368954561245,
"eval_icace_cosine": 0.480947345495224,
"eval_icace_l2": 0.5894502401351929,
"eval_icace_normdiff": 0.34918561577796936,
"eval_loss": 5.608799457550049,
"eval_runtime": 8.4944,
"eval_samples_per_second": 458.888,
"eval_steps_per_second": 7.181,
"step": 200
},
{
"epoch": 0.38,
"learning_rate": 4.761698440207972e-05,
"loss": 6.2352,
"step": 220
},
{
"epoch": 0.41,
"learning_rate": 4.740034662045061e-05,
"loss": 6.164,
"step": 240
},
{
"epoch": 0.43,
"eval_distillation_accuracy_counterfactual": 0.5069266290405336,
"eval_distillation_accuracy_factual": 0.883530015392509,
"eval_distillation_f1_counterfactual": 0.3645351770446803,
"eval_distillation_f1_factual": 0.8684151036841502,
"eval_groundtruth_accuracy_counterfactual": 0.5010261672652643,
"eval_groundtruth_f1_counterfactual": 0.36288166576215647,
"eval_groundtruth_f1_factual": 0.6687061485706542,
"eval_icace_cosine": 0.4569728672504425,
"eval_icace_l2": 0.5410994291305542,
"eval_icace_normdiff": 0.31389445066452026,
"eval_loss": 5.045320510864258,
"eval_runtime": 8.4915,
"eval_samples_per_second": 459.049,
"eval_steps_per_second": 7.184,
"step": 250
},
{
"epoch": 0.45,
"learning_rate": 4.7183708838821494e-05,
"loss": 5.9891,
"step": 260
},
{
"epoch": 0.48,
"learning_rate": 4.6967071057192376e-05,
"loss": 6.2235,
"step": 280
},
{
"epoch": 0.51,
"learning_rate": 4.675043327556326e-05,
"loss": 6.0128,
"step": 300
},
{
"epoch": 0.51,
"eval_distillation_accuracy_counterfactual": 0.5146228835300154,
"eval_distillation_accuracy_factual": 0.8781426372498717,
"eval_distillation_f1_counterfactual": 0.3995054954775597,
"eval_distillation_f1_factual": 0.853581796631387,
"eval_groundtruth_accuracy_counterfactual": 0.5035915854284249,
"eval_groundtruth_f1_counterfactual": 0.392088799608495,
"eval_groundtruth_f1_factual": 0.6721954364344527,
"eval_icace_cosine": 0.4626784920692444,
"eval_icace_l2": 0.5394507646560669,
"eval_icace_normdiff": 0.31402334570884705,
"eval_loss": 4.970682621002197,
"eval_runtime": 8.5572,
"eval_samples_per_second": 455.522,
"eval_steps_per_second": 7.128,
"step": 300
},
{
"epoch": 0.55,
"learning_rate": 4.653379549393415e-05,
"loss": 5.9942,
"step": 320
},
{
"epoch": 0.58,
"learning_rate": 4.6317157712305025e-05,
"loss": 6.0123,
"step": 340
},
{
"epoch": 0.6,
"eval_distillation_accuracy_counterfactual": 0.5325808106721396,
"eval_distillation_accuracy_factual": 0.8519753719856337,
"eval_distillation_f1_counterfactual": 0.41232256847404497,
"eval_distillation_f1_factual": 0.8256119871844103,
"eval_groundtruth_accuracy_counterfactual": 0.5205233453052848,
"eval_groundtruth_f1_counterfactual": 0.40645293495090373,
"eval_groundtruth_f1_factual": 0.6290200232431696,
"eval_icace_cosine": 0.46614569425582886,
"eval_icace_l2": 0.5392053723335266,
"eval_icace_normdiff": 0.31089842319488525,
"eval_loss": 5.0584893226623535,
"eval_runtime": 13.8278,
"eval_samples_per_second": 281.896,
"eval_steps_per_second": 4.411,
"step": 350
},
{
"epoch": 0.62,
"learning_rate": 4.6100519930675915e-05,
"loss": 5.7378,
"step": 360
},
{
"epoch": 0.65,
"learning_rate": 4.58838821490468e-05,
"loss": 5.9926,
"step": 380
},
{
"epoch": 0.69,
"learning_rate": 4.566724436741768e-05,
"loss": 5.8241,
"step": 400
},
{
"epoch": 0.69,
"eval_distillation_accuracy_counterfactual": 0.5266803488968702,
"eval_distillation_accuracy_factual": 0.8694202154951257,
"eval_distillation_f1_counterfactual": 0.40407582352674,
"eval_distillation_f1_factual": 0.8530546578727618,
"eval_groundtruth_accuracy_counterfactual": 0.5100051308363264,
"eval_groundtruth_f1_counterfactual": 0.39126532870837566,
"eval_groundtruth_f1_factual": 0.6853852966812366,
"eval_icace_cosine": 0.4609030485153198,
"eval_icace_l2": 0.5407834053039551,
"eval_icace_normdiff": 0.31197163462638855,
"eval_loss": 5.077407360076904,
"eval_runtime": 8.5264,
"eval_samples_per_second": 457.166,
"eval_steps_per_second": 7.154,
"step": 400
},
{
"epoch": 0.72,
"learning_rate": 4.5450606585788563e-05,
"loss": 5.6615,
"step": 420
},
{
"epoch": 0.75,
"learning_rate": 4.5233968804159446e-05,
"loss": 5.5946,
"step": 440
},
{
"epoch": 0.77,
"eval_distillation_accuracy_counterfactual": 0.5238583889173936,
"eval_distillation_accuracy_factual": 0.87044638276039,
"eval_distillation_f1_counterfactual": 0.43556823460795113,
"eval_distillation_f1_factual": 0.8548518250854267,
"eval_groundtruth_accuracy_counterfactual": 0.5161621344279117,
"eval_groundtruth_f1_counterfactual": 0.4287539684802734,
"eval_groundtruth_f1_factual": 0.6839637952427478,
"eval_icace_cosine": 0.46770957112312317,
"eval_icace_l2": 0.5423880815505981,
"eval_icace_normdiff": 0.3162823021411896,
"eval_loss": 5.0952630043029785,
"eval_runtime": 13.5333,
"eval_samples_per_second": 288.031,
"eval_steps_per_second": 4.507,
"step": 450
},
{
"epoch": 0.79,
"learning_rate": 4.501733102253033e-05,
"loss": 5.6457,
"step": 460
},
{
"epoch": 0.82,
"learning_rate": 4.480069324090121e-05,
"loss": 5.7859,
"step": 480
},
{
"epoch": 0.86,
"learning_rate": 4.45840554592721e-05,
"loss": 5.6094,
"step": 500
},
{
"epoch": 0.86,
"eval_distillation_accuracy_counterfactual": 0.521549512570549,
"eval_distillation_accuracy_factual": 0.8473576192919446,
"eval_distillation_f1_counterfactual": 0.42112500539619846,
"eval_distillation_f1_factual": 0.8292976585290512,
"eval_groundtruth_accuracy_counterfactual": 0.508722421754746,
"eval_groundtruth_f1_counterfactual": 0.4072271850844321,
"eval_groundtruth_f1_factual": 0.6740351741865853,
"eval_icace_cosine": 0.4634177088737488,
"eval_icace_l2": 0.5543035864830017,
"eval_icace_normdiff": 0.3254902958869934,
"eval_loss": 5.312262535095215,
"eval_runtime": 8.4543,
"eval_samples_per_second": 461.069,
"eval_steps_per_second": 7.215,
"step": 500
},
{
"epoch": 0.89,
"learning_rate": 4.436741767764298e-05,
"loss": 5.5785,
"step": 520
},
{
"epoch": 0.93,
"learning_rate": 4.415077989601387e-05,
"loss": 5.5593,
"step": 540
},
{
"epoch": 0.94,
"eval_distillation_accuracy_counterfactual": 0.5402770651616213,
"eval_distillation_accuracy_factual": 0.8435094920472037,
"eval_distillation_f1_counterfactual": 0.43515748935807014,
"eval_distillation_f1_factual": 0.8240995838936376,
"eval_groundtruth_accuracy_counterfactual": 0.5256541816316059,
"eval_groundtruth_f1_counterfactual": 0.4205567101531645,
"eval_groundtruth_f1_factual": 0.6657749588429954,
"eval_icace_cosine": 0.4569862186908722,
"eval_icace_l2": 0.5416843295097351,
"eval_icace_normdiff": 0.31774964928627014,
"eval_loss": 5.162288665771484,
"eval_runtime": 13.3649,
"eval_samples_per_second": 291.66,
"eval_steps_per_second": 4.564,
"step": 550
},
{
"epoch": 0.96,
"learning_rate": 4.393414211438475e-05,
"loss": 5.4634,
"step": 560
},
{
"epoch": 0.99,
"learning_rate": 4.371750433275563e-05,
"loss": 5.5253,
"step": 580
},
{
"epoch": 1.03,
"learning_rate": 4.3500866551126516e-05,
"loss": 5.6152,
"step": 600
},
{
"epoch": 1.03,
"eval_distillation_accuracy_counterfactual": 0.5608004104669061,
"eval_distillation_accuracy_factual": 0.861723961005644,
"eval_distillation_f1_counterfactual": 0.4447501209092303,
"eval_distillation_f1_factual": 0.8419521920224236,
"eval_groundtruth_accuracy_counterfactual": 0.5400205233453053,
"eval_groundtruth_f1_counterfactual": 0.4277343931802212,
"eval_groundtruth_f1_factual": 0.6685555092012573,
"eval_icace_cosine": 0.4536830484867096,
"eval_icace_l2": 0.5286034345626831,
"eval_icace_normdiff": 0.3132805824279785,
"eval_loss": 4.94181489944458,
"eval_runtime": 8.6375,
"eval_samples_per_second": 451.289,
"eval_steps_per_second": 7.062,
"step": 600
},
{
"epoch": 1.06,
"learning_rate": 4.3284228769497406e-05,
"loss": 5.4663,
"step": 620
},
{
"epoch": 1.1,
"learning_rate": 4.306759098786828e-05,
"loss": 5.4687,
"step": 640
},
{
"epoch": 1.11,
"eval_distillation_accuracy_counterfactual": 0.5310415597742432,
"eval_distillation_accuracy_factual": 0.8571062083119548,
"eval_distillation_f1_counterfactual": 0.42136058295206064,
"eval_distillation_f1_factual": 0.829427902934218,
"eval_groundtruth_accuracy_counterfactual": 0.5166752180605438,
"eval_groundtruth_f1_counterfactual": 0.4122890718560407,
"eval_groundtruth_f1_factual": 0.6676086326825417,
"eval_icace_cosine": 0.456624835729599,
"eval_icace_l2": 0.5346877574920654,
"eval_icace_normdiff": 0.31808435916900635,
"eval_loss": 4.98232889175415,
"eval_runtime": 13.5322,
"eval_samples_per_second": 288.054,
"eval_steps_per_second": 4.508,
"step": 650
},
{
"epoch": 1.13,
"learning_rate": 4.285095320623917e-05,
"loss": 5.3452,
"step": 660
},
{
"epoch": 1.17,
"learning_rate": 4.2634315424610055e-05,
"loss": 5.3117,
"step": 680
},
{
"epoch": 1.2,
"learning_rate": 4.241767764298094e-05,
"loss": 5.3126,
"step": 700
},
{
"epoch": 1.2,
"eval_distillation_accuracy_counterfactual": 0.5407901487942535,
"eval_distillation_accuracy_factual": 0.8499230374551052,
"eval_distillation_f1_counterfactual": 0.4274335785359253,
"eval_distillation_f1_factual": 0.8317003860006039,
"eval_groundtruth_accuracy_counterfactual": 0.5325808106721396,
"eval_groundtruth_f1_counterfactual": 0.4229108777271807,
"eval_groundtruth_f1_factual": 0.6817984059431582,
"eval_icace_cosine": 0.45930054783821106,
"eval_icace_l2": 0.5305303931236267,
"eval_icace_normdiff": 0.30581900477409363,
"eval_loss": 5.033056259155273,
"eval_runtime": 8.5097,
"eval_samples_per_second": 458.065,
"eval_steps_per_second": 7.168,
"step": 700
},
{
"epoch": 1.23,
"learning_rate": 4.220103986135182e-05,
"loss": 5.2491,
"step": 720
},
{
"epoch": 1.27,
"learning_rate": 4.198440207972271e-05,
"loss": 5.3605,
"step": 740
},
{
"epoch": 1.29,
"eval_distillation_accuracy_counterfactual": 0.5377116469984607,
"eval_distillation_accuracy_factual": 0.8619805028219599,
"eval_distillation_f1_counterfactual": 0.42855505894972856,
"eval_distillation_f1_factual": 0.8447989566482331,
"eval_groundtruth_accuracy_counterfactual": 0.5243714725500257,
"eval_groundtruth_f1_counterfactual": 0.4197832634929354,
"eval_groundtruth_f1_factual": 0.6888914410164863,
"eval_icace_cosine": 0.45965054631233215,
"eval_icace_l2": 0.5459038615226746,
"eval_icace_normdiff": 0.3226074278354645,
"eval_loss": 5.037441253662109,
"eval_runtime": 13.1642,
"eval_samples_per_second": 296.107,
"eval_steps_per_second": 4.634,
"step": 750
},
{
"epoch": 1.3,
"learning_rate": 4.1767764298093586e-05,
"loss": 5.2295,
"step": 760
},
{
"epoch": 1.34,
"learning_rate": 4.1551126516464476e-05,
"loss": 5.239,
"step": 780
},
{
"epoch": 1.37,
"learning_rate": 4.133448873483536e-05,
"loss": 5.3073,
"step": 800
},
{
"epoch": 1.37,
"eval_distillation_accuracy_counterfactual": 0.5415597742432017,
"eval_distillation_accuracy_factual": 0.8599281682914315,
"eval_distillation_f1_counterfactual": 0.41963605943221116,
"eval_distillation_f1_factual": 0.8430915620375968,
"eval_groundtruth_accuracy_counterfactual": 0.5274499743458184,
"eval_groundtruth_f1_counterfactual": 0.4101686967489355,
"eval_groundtruth_f1_factual": 0.6786457422232816,
"eval_icace_cosine": 0.4616963565349579,
"eval_icace_l2": 0.5364943742752075,
"eval_icace_normdiff": 0.3089084029197693,
"eval_loss": 5.033254146575928,
"eval_runtime": 8.6076,
"eval_samples_per_second": 452.858,
"eval_steps_per_second": 7.087,
"step": 800
},
{
"epoch": 1.41,
"learning_rate": 4.111785095320624e-05,
"loss": 5.2177,
"step": 820
},
{
"epoch": 1.44,
"learning_rate": 4.0901213171577124e-05,
"loss": 5.3257,
"step": 840
},
{
"epoch": 1.46,
"eval_distillation_accuracy_counterfactual": 0.543355566957414,
"eval_distillation_accuracy_factual": 0.8509492047203694,
"eval_distillation_f1_counterfactual": 0.4343061450729029,
"eval_distillation_f1_factual": 0.8330255589809777,
"eval_groundtruth_accuracy_counterfactual": 0.5351462288353002,
"eval_groundtruth_f1_counterfactual": 0.42960347362494583,
"eval_groundtruth_f1_factual": 0.6795622319151484,
"eval_icace_cosine": 0.4549107849597931,
"eval_icace_l2": 0.5301600098609924,
"eval_icace_normdiff": 0.31678125262260437,
"eval_loss": 4.906150817871094,
"eval_runtime": 13.5043,
"eval_samples_per_second": 288.649,
"eval_steps_per_second": 4.517,
"step": 850
},
{
"epoch": 1.48,
"learning_rate": 4.068457538994801e-05,
"loss": 5.2967,
"step": 860
},
{
"epoch": 1.51,
"learning_rate": 4.04679376083189e-05,
"loss": 5.1851,
"step": 880
},
{
"epoch": 1.54,
"learning_rate": 4.025129982668977e-05,
"loss": 5.0388,
"step": 900
},
{
"epoch": 1.54,
"eval_distillation_accuracy_counterfactual": 0.5377116469984607,
"eval_distillation_accuracy_factual": 0.8545407901487942,
"eval_distillation_f1_counterfactual": 0.43479793735286665,
"eval_distillation_f1_factual": 0.833048646828978,
"eval_groundtruth_accuracy_counterfactual": 0.5315546434068753,
"eval_groundtruth_f1_counterfactual": 0.4344351894458695,
"eval_groundtruth_f1_factual": 0.6827800644937321,
"eval_icace_cosine": 0.4591978192329407,
"eval_icace_l2": 0.5361349582672119,
"eval_icace_normdiff": 0.31350481510162354,
"eval_loss": 5.07242488861084,
"eval_runtime": 8.6901,
"eval_samples_per_second": 448.556,
"eval_steps_per_second": 7.019,
"step": 900
},
{
"epoch": 1.58,
"learning_rate": 4.003466204506066e-05,
"loss": 5.1534,
"step": 920
},
{
"epoch": 1.61,
"learning_rate": 3.9818024263431546e-05,
"loss": 4.936,
"step": 940
},
{
"epoch": 1.63,
"eval_distillation_accuracy_counterfactual": 0.5543868650590046,
"eval_distillation_accuracy_factual": 0.8463314520266804,
"eval_distillation_f1_counterfactual": 0.4624968238722985,
"eval_distillation_f1_factual": 0.8265879438232977,
"eval_groundtruth_accuracy_counterfactual": 0.5395074397126731,
"eval_groundtruth_f1_counterfactual": 0.45300499369091884,
"eval_groundtruth_f1_factual": 0.6899654625013621,
"eval_icace_cosine": 0.45697498321533203,
"eval_icace_l2": 0.5305233597755432,
"eval_icace_normdiff": 0.31259921193122864,
"eval_loss": 5.114608287811279,
"eval_runtime": 13.2302,
"eval_samples_per_second": 294.63,
"eval_steps_per_second": 4.611,
"step": 950
},
{
"epoch": 1.65,
"learning_rate": 3.960138648180243e-05,
"loss": 5.1669,
"step": 960
},
{
"epoch": 1.68,
"learning_rate": 3.938474870017331e-05,
"loss": 5.0256,
"step": 980
},
{
"epoch": 1.72,
"learning_rate": 3.91681109185442e-05,
"loss": 5.0776,
"step": 1000
},
{
"epoch": 1.72,
"eval_distillation_accuracy_counterfactual": 0.5400205233453053,
"eval_distillation_accuracy_factual": 0.8442791174961519,
"eval_distillation_f1_counterfactual": 0.41057517072906763,
"eval_distillation_f1_factual": 0.8175198147586908,
"eval_groundtruth_accuracy_counterfactual": 0.5336069779374037,
"eval_groundtruth_f1_counterfactual": 0.4118030335207775,
"eval_groundtruth_f1_factual": 0.6645296484209967,
"eval_icace_cosine": 0.4537060856819153,
"eval_icace_l2": 0.5299485921859741,
"eval_icace_normdiff": 0.3117276728153229,
"eval_loss": 5.096031665802002,
"eval_runtime": 8.5478,
"eval_samples_per_second": 456.026,
"eval_steps_per_second": 7.136,
"step": 1000
},
{
"epoch": 1.75,
"learning_rate": 3.895147313691508e-05,
"loss": 5.1044,
"step": 1020
},
{
"epoch": 1.78,
"learning_rate": 3.873483535528597e-05,
"loss": 5.1824,
"step": 1040
},
{
"epoch": 1.8,
"eval_distillation_accuracy_counterfactual": 0.5454079014879425,
"eval_distillation_accuracy_factual": 0.8370959466393022,
"eval_distillation_f1_counterfactual": 0.4514467111829311,
"eval_distillation_f1_factual": 0.8189293913112244,
"eval_groundtruth_accuracy_counterfactual": 0.539250897896357,
"eval_groundtruth_f1_counterfactual": 0.44818728166213734,
"eval_groundtruth_f1_factual": 0.6824355248442739,
"eval_icace_cosine": 0.45461148023605347,
"eval_icace_l2": 0.5214051604270935,
"eval_icace_normdiff": 0.30551236867904663,
"eval_loss": 4.964339733123779,
"eval_runtime": 13.4181,
"eval_samples_per_second": 290.503,
"eval_steps_per_second": 4.546,
"step": 1050
},
{
"epoch": 1.82,
"learning_rate": 3.851819757365685e-05,
"loss": 5.2425,
"step": 1060
},
{
"epoch": 1.85,
"learning_rate": 3.830155979202773e-05,
"loss": 5.0704,
"step": 1080
},
{
"epoch": 1.89,
"learning_rate": 3.8084922010398616e-05,
"loss": 5.1106,
"step": 1100
},
{
"epoch": 1.89,
"eval_distillation_accuracy_counterfactual": 0.5138532580810672,
"eval_distillation_accuracy_factual": 0.8370959466393022,
"eval_distillation_f1_counterfactual": 0.43106934062357655,
"eval_distillation_f1_factual": 0.8165823528984962,
"eval_groundtruth_accuracy_counterfactual": 0.5110312981015905,
"eval_groundtruth_f1_counterfactual": 0.43076739498450695,
"eval_groundtruth_f1_factual": 0.6854469504667037,
"eval_icace_cosine": 0.46039697527885437,
"eval_icace_l2": 0.5523171424865723,
"eval_icace_normdiff": 0.3318041265010834,
"eval_loss": 5.376918315887451,
"eval_runtime": 9.0752,
"eval_samples_per_second": 429.522,
"eval_steps_per_second": 6.722,
"step": 1100
},
{
"epoch": 1.92,
"learning_rate": 3.78682842287695e-05,
"loss": 5.1747,
"step": 1120
},
{
"epoch": 1.96,
"learning_rate": 3.765164644714038e-05,
"loss": 4.9442,
"step": 1140
},
{
"epoch": 1.97,
"eval_distillation_accuracy_counterfactual": 0.530015392508979,
"eval_distillation_accuracy_factual": 0.8468445356593125,
"eval_distillation_f1_counterfactual": 0.41923047344558784,
"eval_distillation_f1_factual": 0.8209617109339709,
"eval_groundtruth_accuracy_counterfactual": 0.5194971780400205,
"eval_groundtruth_f1_counterfactual": 0.414573449663499,
"eval_groundtruth_f1_factual": 0.6814846999962294,
"eval_icace_cosine": 0.45492398738861084,
"eval_icace_l2": 0.5379559397697449,
"eval_icace_normdiff": 0.31172436475753784,
"eval_loss": 5.110229969024658,
"eval_runtime": 12.5288,
"eval_samples_per_second": 311.123,
"eval_steps_per_second": 4.869,
"step": 1150
},
{
"epoch": 1.99,
"learning_rate": 3.7435008665511264e-05,
"loss": 5.0781,
"step": 1160
},
{
"epoch": 2.02,
"learning_rate": 3.7218370883882154e-05,
"loss": 4.8214,
"step": 1180
},
{
"epoch": 2.06,
"learning_rate": 3.700173310225303e-05,
"loss": 5.1082,
"step": 1200
},
{
"epoch": 2.06,
"eval_distillation_accuracy_counterfactual": 0.538481272447409,
"eval_distillation_accuracy_factual": 0.8483837865572088,
"eval_distillation_f1_counterfactual": 0.4406972082523029,
"eval_distillation_f1_factual": 0.8301964182514345,
"eval_groundtruth_accuracy_counterfactual": 0.5287326834273987,
"eval_groundtruth_f1_counterfactual": 0.4366241037475905,
"eval_groundtruth_f1_factual": 0.6916041568169767,
"eval_icace_cosine": 0.45687490701675415,
"eval_icace_l2": 0.5412150621414185,
"eval_icace_normdiff": 0.31677863001823425,
"eval_loss": 5.133824825286865,
"eval_runtime": 8.5259,
"eval_samples_per_second": 457.195,
"eval_steps_per_second": 7.155,
"step": 1200
},
{
"epoch": 2.09,
"learning_rate": 3.678509532062392e-05,
"loss": 4.8366,
"step": 1220
},
{
"epoch": 2.13,
"learning_rate": 3.65684575389948e-05,
"loss": 4.8667,
"step": 1240
},
{
"epoch": 2.14,
"eval_distillation_accuracy_counterfactual": 0.5451513596716264,
"eval_distillation_accuracy_factual": 0.8471010774756286,
"eval_distillation_f1_counterfactual": 0.44516629001487945,
"eval_distillation_f1_factual": 0.8300653148799452,
"eval_groundtruth_accuracy_counterfactual": 0.5361723961005644,
"eval_groundtruth_f1_counterfactual": 0.4403045959585604,
"eval_groundtruth_f1_factual": 0.6972167033055753,
"eval_icace_cosine": 0.4484976530075073,
"eval_icace_l2": 0.5191196203231812,
"eval_icace_normdiff": 0.30620598793029785,
"eval_loss": 4.914973258972168,
"eval_runtime": 13.2047,
"eval_samples_per_second": 295.198,
"eval_steps_per_second": 4.62,
"step": 1250
},
{
"epoch": 2.16,
"learning_rate": 3.6351819757365686e-05,
"loss": 5.0023,
"step": 1260
},
{
"epoch": 2.2,
"learning_rate": 3.613518197573657e-05,
"loss": 4.9106,
"step": 1280
},
{
"epoch": 2.23,
"learning_rate": 3.591854419410746e-05,
"loss": 4.9292,
"step": 1300
},
{
"epoch": 2.23,
"eval_distillation_accuracy_counterfactual": 0.5479733196511031,
"eval_distillation_accuracy_factual": 0.8619805028219599,
"eval_distillation_f1_counterfactual": 0.44649902049251783,
"eval_distillation_f1_factual": 0.8397438246889717,
"eval_groundtruth_accuracy_counterfactual": 0.5436121087737301,
"eval_groundtruth_f1_counterfactual": 0.4469947847104564,
"eval_groundtruth_f1_factual": 0.6790740044432162,
"eval_icace_cosine": 0.4501597285270691,
"eval_icace_l2": 0.5136202573776245,
"eval_icace_normdiff": 0.29706600308418274,
"eval_loss": 4.897469520568848,
"eval_runtime": 8.5156,
"eval_samples_per_second": 457.75,
"eval_steps_per_second": 7.163,
"step": 1300
},
{
"epoch": 2.26,
"learning_rate": 3.5701906412478334e-05,
"loss": 4.9032,
"step": 1320
},
{
"epoch": 2.3,
"learning_rate": 3.5485268630849224e-05,
"loss": 4.8905,
"step": 1340
},
{
"epoch": 2.32,
"eval_distillation_accuracy_counterfactual": 0.5318111852231914,
"eval_distillation_accuracy_factual": 0.8540277065161621,
"eval_distillation_f1_counterfactual": 0.44265369921666026,
"eval_distillation_f1_factual": 0.8360842662987222,
"eval_groundtruth_accuracy_counterfactual": 0.5266803488968702,
"eval_groundtruth_f1_counterfactual": 0.4413916588252841,
"eval_groundtruth_f1_factual": 0.6766870153023606,
"eval_icace_cosine": 0.4583234190940857,
"eval_icace_l2": 0.5422101616859436,
"eval_icace_normdiff": 0.32523736357688904,
"eval_loss": 5.066246032714844,
"eval_runtime": 13.1912,
"eval_samples_per_second": 295.5,
"eval_steps_per_second": 4.624,
"step": 1350
},
{
"epoch": 2.33,
"learning_rate": 3.526863084922011e-05,
"loss": 4.8723,
"step": 1360
},
{
"epoch": 2.37,
"learning_rate": 3.505199306759099e-05,
"loss": 4.8684,
"step": 1380
},
{
"epoch": 2.4,
"learning_rate": 3.483535528596187e-05,
"loss": 4.8838,
"step": 1400
},
{
"epoch": 2.4,
"eval_distillation_accuracy_counterfactual": 0.5428424833247819,
"eval_distillation_accuracy_factual": 0.8468445356593125,
"eval_distillation_f1_counterfactual": 0.4218024392943229,
"eval_distillation_f1_factual": 0.8302460548095396,
"eval_groundtruth_accuracy_counterfactual": 0.5374551051821447,
"eval_groundtruth_f1_counterfactual": 0.4210443279497268,
"eval_groundtruth_f1_factual": 0.6863076976423126,
"eval_icace_cosine": 0.45339435338974,
"eval_icace_l2": 0.5222477316856384,
"eval_icace_normdiff": 0.29324910044670105,
"eval_loss": 4.967700958251953,
"eval_runtime": 8.6663,
"eval_samples_per_second": 449.789,
"eval_steps_per_second": 7.039,
"step": 1400
},
{
"epoch": 2.44,
"learning_rate": 3.461871750433276e-05,
"loss": 4.9797,
"step": 1420
},
{
"epoch": 2.47,
"learning_rate": 3.440207972270364e-05,
"loss": 4.7387,
"step": 1440
},
{
"epoch": 2.49,
"eval_distillation_accuracy_counterfactual": 0.5407901487942535,
"eval_distillation_accuracy_factual": 0.8609543355566958,
"eval_distillation_f1_counterfactual": 0.4316270282168784,
"eval_distillation_f1_factual": 0.8433610743824644,
"eval_groundtruth_accuracy_counterfactual": 0.5323242688558235,
"eval_groundtruth_f1_counterfactual": 0.42891639415690835,
"eval_groundtruth_f1_factual": 0.6812657459890642,
"eval_icace_cosine": 0.45285436511039734,
"eval_icace_l2": 0.5242744088172913,
"eval_icace_normdiff": 0.2969839870929718,
"eval_loss": 4.971090793609619,
"eval_runtime": 8.4714,
"eval_samples_per_second": 460.137,
"eval_steps_per_second": 7.201,
"step": 1450
},
{
"epoch": 2.5,
"learning_rate": 3.418544194107453e-05,
"loss": 4.9854,
"step": 1460
},
{
"epoch": 2.54,
"learning_rate": 3.396880415944541e-05,
"loss": 4.9463,
"step": 1480
},
{
"epoch": 2.57,
"learning_rate": 3.3752166377816294e-05,
"loss": 4.8231,
"step": 1500
},
{
"epoch": 2.57,
"eval_distillation_accuracy_counterfactual": 0.5348896870189841,
"eval_distillation_accuracy_factual": 0.8365828630066701,
"eval_distillation_f1_counterfactual": 0.4412310147597913,
"eval_distillation_f1_factual": 0.8178683138413986,
"eval_groundtruth_accuracy_counterfactual": 0.530015392508979,
"eval_groundtruth_f1_counterfactual": 0.44062130454891085,
"eval_groundtruth_f1_factual": 0.6915266725506644,
"eval_icace_cosine": 0.45718541741371155,
"eval_icace_l2": 0.5440450310707092,
"eval_icace_normdiff": 0.3275880515575409,
"eval_loss": 5.153608322143555,
"eval_runtime": 8.5873,
"eval_samples_per_second": 453.926,
"eval_steps_per_second": 7.104,
"step": 1500
},
{
"epoch": 2.61,
"learning_rate": 3.353552859618718e-05,
"loss": 4.8916,
"step": 1520
},
{
"epoch": 2.64,
"learning_rate": 3.331889081455806e-05,
"loss": 4.8635,
"step": 1540
},
{
"epoch": 2.66,
"eval_distillation_accuracy_counterfactual": 0.5513083632632119,
"eval_distillation_accuracy_factual": 0.8573627501282709,
"eval_distillation_f1_counterfactual": 0.44609171920181473,
"eval_distillation_f1_factual": 0.8375044413424142,
"eval_groundtruth_accuracy_counterfactual": 0.5428424833247819,
"eval_groundtruth_f1_counterfactual": 0.4433319000026114,
"eval_groundtruth_f1_factual": 0.6913476363231361,
"eval_icace_cosine": 0.4538714587688446,
"eval_icace_l2": 0.5147866010665894,
"eval_icace_normdiff": 0.28974854946136475,
"eval_loss": 4.9417548179626465,
"eval_runtime": 8.5448,
"eval_samples_per_second": 456.183,
"eval_steps_per_second": 7.139,
"step": 1550
},
{
"epoch": 2.68,
"learning_rate": 3.310225303292894e-05,
"loss": 4.7195,
"step": 1560
},
{
"epoch": 2.71,
"learning_rate": 3.2885615251299825e-05,
"loss": 4.8921,
"step": 1580
},
{
"epoch": 2.74,
"learning_rate": 3.2668977469670715e-05,
"loss": 4.659,
"step": 1600
},
{
"epoch": 2.74,
"eval_distillation_accuracy_counterfactual": 0.5464340687532068,
"eval_distillation_accuracy_factual": 0.8512057465366855,
"eval_distillation_f1_counterfactual": 0.4474569537385958,
"eval_distillation_f1_factual": 0.8348714122309786,
"eval_groundtruth_accuracy_counterfactual": 0.543099025141098,
"eval_groundtruth_f1_counterfactual": 0.4496126854336094,
"eval_groundtruth_f1_factual": 0.685952702077513,
"eval_icace_cosine": 0.45106443762779236,
"eval_icace_l2": 0.5234705805778503,
"eval_icace_normdiff": 0.30465561151504517,
"eval_loss": 4.918430805206299,
"eval_runtime": 8.5119,
"eval_samples_per_second": 457.949,
"eval_steps_per_second": 7.166,
"step": 1600
},
{
"epoch": 2.78,
"learning_rate": 3.245233968804159e-05,
"loss": 4.8637,
"step": 1620
},
{
"epoch": 2.81,
"learning_rate": 3.223570190641248e-05,
"loss": 4.7494,
"step": 1640
},
{
"epoch": 2.83,
"eval_distillation_accuracy_counterfactual": 0.530015392508979,
"eval_distillation_accuracy_factual": 0.8558234992303746,
"eval_distillation_f1_counterfactual": 0.43629277707813435,
"eval_distillation_f1_factual": 0.8417596638128584,
"eval_groundtruth_accuracy_counterfactual": 0.5256541816316059,
"eval_groundtruth_f1_counterfactual": 0.4355238167978441,
"eval_groundtruth_f1_factual": 0.6901540356179046,
"eval_icace_cosine": 0.45806506276130676,
"eval_icace_l2": 0.5422117710113525,
"eval_icace_normdiff": 0.3198961317539215,
"eval_loss": 5.055630683898926,
"eval_runtime": 8.4909,
"eval_samples_per_second": 459.08,
"eval_steps_per_second": 7.184,
"step": 1650
},
{
"epoch": 2.85,
"learning_rate": 3.2019064124783364e-05,
"loss": 4.9948,
"step": 1660
},
{
"epoch": 2.88,
"learning_rate": 3.1802426343154247e-05,
"loss": 4.8173,
"step": 1680
},
{
"epoch": 2.92,
"learning_rate": 3.158578856152513e-05,
"loss": 4.992,
"step": 1700
},
{
"epoch": 2.92,
"eval_distillation_accuracy_counterfactual": 0.5425859415084658,
"eval_distillation_accuracy_factual": 0.85351462288353,
"eval_distillation_f1_counterfactual": 0.4447236146388141,
"eval_distillation_f1_factual": 0.8351101653639337,
"eval_groundtruth_accuracy_counterfactual": 0.5397639815289892,
"eval_groundtruth_f1_counterfactual": 0.44798180339075505,
"eval_groundtruth_f1_factual": 0.6778238731391417,
"eval_icace_cosine": 0.45538121461868286,
"eval_icace_l2": 0.5322200655937195,
"eval_icace_normdiff": 0.31347331404685974,
"eval_loss": 4.9646124839782715,
"eval_runtime": 8.526,
"eval_samples_per_second": 457.191,
"eval_steps_per_second": 7.155,
"step": 1700
},
{
"epoch": 2.95,
"learning_rate": 3.136915077989602e-05,
"loss": 4.9492,
"step": 1720
},
{
"epoch": 2.98,
"learning_rate": 3.1152512998266895e-05,
"loss": 4.7878,
"step": 1740
},
{
"epoch": 3.0,
"eval_distillation_accuracy_counterfactual": 0.5320677270395074,
"eval_distillation_accuracy_factual": 0.8496664956387892,
"eval_distillation_f1_counterfactual": 0.4458143361123156,
"eval_distillation_f1_factual": 0.834744839305625,
"eval_groundtruth_accuracy_counterfactual": 0.5295023088763469,
"eval_groundtruth_f1_counterfactual": 0.4474449904485411,
"eval_groundtruth_f1_factual": 0.6845588782361415,
"eval_icace_cosine": 0.4579332768917084,
"eval_icace_l2": 0.5446497201919556,
"eval_icace_normdiff": 0.31855684518814087,
"eval_loss": 5.185755252838135,
"eval_runtime": 8.4459,
"eval_samples_per_second": 461.525,
"eval_steps_per_second": 7.222,
"step": 1750
},
{
"epoch": 3.02,
"learning_rate": 3.0935875216637785e-05,
"loss": 4.8111,
"step": 1760
},
{
"epoch": 3.05,
"learning_rate": 3.071923743500867e-05,
"loss": 4.6818,
"step": 1780
},
{
"epoch": 3.09,
"learning_rate": 3.0502599653379547e-05,
"loss": 4.7687,
"step": 1800
},
{
"epoch": 3.09,
"eval_distillation_accuracy_counterfactual": 0.5536172396100565,
"eval_distillation_accuracy_factual": 0.8612108773730118,
"eval_distillation_f1_counterfactual": 0.45069397828871016,
"eval_distillation_f1_factual": 0.8386466173302395,
"eval_groundtruth_accuracy_counterfactual": 0.5423293996921498,
"eval_groundtruth_f1_counterfactual": 0.4442055475871095,
"eval_groundtruth_f1_factual": 0.6860856434140972,
"eval_icace_cosine": 0.4503948390483856,
"eval_icace_l2": 0.5204057097434998,
"eval_icace_normdiff": 0.30468884110450745,
"eval_loss": 4.862071514129639,
"eval_runtime": 8.5156,
"eval_samples_per_second": 457.751,
"eval_steps_per_second": 7.163,
"step": 1800
},
{
"epoch": 3.12,
"learning_rate": 3.0285961871750434e-05,
"loss": 4.7456,
"step": 1820
},
{
"epoch": 3.16,
"learning_rate": 3.006932409012132e-05,
"loss": 4.8854,
"step": 1840
},
{
"epoch": 3.17,
"eval_distillation_accuracy_counterfactual": 0.5543868650590046,
"eval_distillation_accuracy_factual": 0.8624935864545921,
"eval_distillation_f1_counterfactual": 0.45637182291257544,
"eval_distillation_f1_factual": 0.8486332957487601,
"eval_groundtruth_accuracy_counterfactual": 0.5533606977937404,
"eval_groundtruth_f1_counterfactual": 0.4612555911529408,
"eval_groundtruth_f1_factual": 0.6815381521049796,
"eval_icace_cosine": 0.45295801758766174,
"eval_icace_l2": 0.5143481492996216,
"eval_icace_normdiff": 0.3000515103340149,
"eval_loss": 4.850634574890137,
"eval_runtime": 10.44,
"eval_samples_per_second": 373.372,
"eval_steps_per_second": 5.843,
"step": 1850
},
{
"epoch": 3.19,
"learning_rate": 2.98526863084922e-05,
"loss": 4.6583,
"step": 1860
},
{
"epoch": 3.22,
"learning_rate": 2.9636048526863086e-05,
"loss": 4.722,
"step": 1880
},
{
"epoch": 3.26,
"learning_rate": 2.9419410745233972e-05,
"loss": 4.7166,
"step": 1900
},
{
"epoch": 3.26,
"eval_distillation_accuracy_counterfactual": 0.521806054386865,
"eval_distillation_accuracy_factual": 0.8386351975371986,
"eval_distillation_f1_counterfactual": 0.4298153628320489,
"eval_distillation_f1_factual": 0.8168198911546973,
"eval_groundtruth_accuracy_counterfactual": 0.5261672652642381,
"eval_groundtruth_f1_counterfactual": 0.43622988773989146,
"eval_groundtruth_f1_factual": 0.6731503713772962,
"eval_icace_cosine": 0.46492505073547363,
"eval_icace_l2": 0.5486338138580322,
"eval_icace_normdiff": 0.3204009532928467,
"eval_loss": 5.321374893188477,
"eval_runtime": 8.8584,
"eval_samples_per_second": 440.036,
"eval_steps_per_second": 6.886,
"step": 1900
},
{
"epoch": 3.29,
"learning_rate": 2.920277296360485e-05,
"loss": 4.7496,
"step": 1920
},
{
"epoch": 3.33,
"learning_rate": 2.8986135181975738e-05,
"loss": 4.859,
"step": 1940
},
{
"epoch": 3.34,
"eval_distillation_accuracy_counterfactual": 0.5543868650590046,
"eval_distillation_accuracy_factual": 0.8412006157003592,
"eval_distillation_f1_counterfactual": 0.4550611014400025,
"eval_distillation_f1_factual": 0.8236540467948268,
"eval_groundtruth_accuracy_counterfactual": 0.547460236018471,
"eval_groundtruth_f1_counterfactual": 0.45334216208035627,
"eval_groundtruth_f1_factual": 0.6897532601098797,
"eval_icace_cosine": 0.4536947011947632,
"eval_icace_l2": 0.5238730907440186,
"eval_icace_normdiff": 0.30295780301094055,
"eval_loss": 4.9707112312316895,
"eval_runtime": 13.3675,
"eval_samples_per_second": 291.602,
"eval_steps_per_second": 4.563,
"step": 1950
},
{
"epoch": 3.36,
"learning_rate": 2.8769497400346624e-05,
"loss": 4.7329,
"step": 1960
},
{
"epoch": 3.4,
"learning_rate": 2.8552859618717503e-05,
"loss": 4.9083,
"step": 1980
},
{
"epoch": 3.43,
"learning_rate": 2.833622183708839e-05,
"loss": 4.7854,
"step": 2000
},
{
"epoch": 3.43,
"eval_distillation_accuracy_counterfactual": 0.5379681888147768,
"eval_distillation_accuracy_factual": 0.8517188301693176,
"eval_distillation_f1_counterfactual": 0.44136242677706106,
"eval_distillation_f1_factual": 0.8329787432505646,
"eval_groundtruth_accuracy_counterfactual": 0.5369420215495125,
"eval_groundtruth_f1_counterfactual": 0.4465284034353478,
"eval_groundtruth_f1_factual": 0.682809704153051,
"eval_icace_cosine": 0.45627400279045105,
"eval_icace_l2": 0.5298829078674316,
"eval_icace_normdiff": 0.3082645535469055,
"eval_loss": 5.060492992401123,
"eval_runtime": 8.7373,
"eval_samples_per_second": 446.134,
"eval_steps_per_second": 6.982,
"step": 2000
},
{
"epoch": 3.46,
"learning_rate": 2.8119584055459276e-05,
"loss": 4.6758,
"step": 2020
},
{
"epoch": 3.5,
"learning_rate": 2.7902946273830156e-05,
"loss": 4.5728,
"step": 2040
},
{
"epoch": 3.52,
"eval_distillation_accuracy_counterfactual": 0.5551564905079528,
"eval_distillation_accuracy_factual": 0.8596716264751154,
"eval_distillation_f1_counterfactual": 0.4609374331427321,
"eval_distillation_f1_factual": 0.8450713527551953,
"eval_groundtruth_accuracy_counterfactual": 0.5479733196511031,
"eval_groundtruth_f1_counterfactual": 0.45960494557331416,
"eval_groundtruth_f1_factual": 0.6888964775148831,
"eval_icace_cosine": 0.46091988682746887,
"eval_icace_l2": 0.5246635675430298,
"eval_icace_normdiff": 0.3013245463371277,
"eval_loss": 4.980366230010986,
"eval_runtime": 13.5743,
"eval_samples_per_second": 287.16,
"eval_steps_per_second": 4.494,
"step": 2050
},
{
"epoch": 3.53,
"learning_rate": 2.7686308492201042e-05,
"loss": 4.6722,
"step": 2060
},
{
"epoch": 3.57,
"learning_rate": 2.7469670710571928e-05,
"loss": 4.5946,
"step": 2080
},
{
"epoch": 3.6,
"learning_rate": 2.7253032928942808e-05,
"loss": 4.7488,
"step": 2100
},
{
"epoch": 3.6,
"eval_distillation_accuracy_counterfactual": 0.5507952796305798,
"eval_distillation_accuracy_factual": 0.8530015392508979,
"eval_distillation_f1_counterfactual": 0.4423387911912668,
"eval_distillation_f1_factual": 0.836190452372467,
"eval_groundtruth_accuracy_counterfactual": 0.5405336069779374,
"eval_groundtruth_f1_counterfactual": 0.43700640203504726,
"eval_groundtruth_f1_factual": 0.6881924736914886,
"eval_icace_cosine": 0.44613537192344666,
"eval_icace_l2": 0.5100580453872681,
"eval_icace_normdiff": 0.2910933494567871,
"eval_loss": 4.805124759674072,
"eval_runtime": 8.5385,
"eval_samples_per_second": 456.523,
"eval_steps_per_second": 7.144,
"step": 2100
}
],
"max_steps": 4616,
"num_train_epochs": 8,
"total_flos": 1.76716722432384e+16,
"trial_name": null,
"trial_params": null
}