AFRAgent_pure_multimodel / trainer_state.json
neeraj321's picture
Upload folder using huggingface_hub
f23f89b verified
{
"best_metric": 72.2228513192164,
"best_model_checkpoint": "experiments/all_data_any_res_adain_finetuning_blip_lr5e-05_bs128_ip512_op256_ep12/checkpoint-40190",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 56266,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.062204528489674046,
"grad_norm": 0.12522129714488983,
"learning_rate": 2.5917478747667426e-06,
"loss": 15.4626,
"step": 500
},
{
"epoch": 0.12440905697934809,
"grad_norm": 0.09409886598587036,
"learning_rate": 5.183495749533485e-06,
"loss": 0.0492,
"step": 1000
},
{
"epoch": 0.18661358546902215,
"grad_norm": 0.052524592727422714,
"learning_rate": 7.775243624300229e-06,
"loss": 0.0449,
"step": 1500
},
{
"epoch": 0.24881811395869619,
"grad_norm": 0.18614421784877777,
"learning_rate": 1.036699149906697e-05,
"loss": 0.0431,
"step": 2000
},
{
"epoch": 0.31102264244837025,
"grad_norm": 0.0732133612036705,
"learning_rate": 1.2958739373833714e-05,
"loss": 0.0421,
"step": 2500
},
{
"epoch": 0.3732271709380443,
"grad_norm": 0.05523424595594406,
"learning_rate": 1.5550487248600457e-05,
"loss": 0.0411,
"step": 3000
},
{
"epoch": 0.4354316994277183,
"grad_norm": 0.04952659457921982,
"learning_rate": 1.8142235123367197e-05,
"loss": 0.0404,
"step": 3500
},
{
"epoch": 0.49763622791739237,
"grad_norm": 0.05076267197728157,
"learning_rate": 2.073398299813394e-05,
"loss": 0.0396,
"step": 4000
},
{
"epoch": 0.5598407564070664,
"grad_norm": 0.04251039773225784,
"learning_rate": 2.3325730872900688e-05,
"loss": 0.0385,
"step": 4500
},
{
"epoch": 0.6220452848967405,
"grad_norm": 0.045916568487882614,
"learning_rate": 2.5917478747667428e-05,
"loss": 0.0382,
"step": 5000
},
{
"epoch": 0.6842498133864146,
"grad_norm": 0.0338948592543602,
"learning_rate": 2.850922662243417e-05,
"loss": 0.0376,
"step": 5500
},
{
"epoch": 0.7464543418760886,
"grad_norm": 0.03767935559153557,
"learning_rate": 3.1100974497200915e-05,
"loss": 0.0374,
"step": 6000
},
{
"epoch": 0.8086588703657627,
"grad_norm": 0.03969201073050499,
"learning_rate": 3.369272237196766e-05,
"loss": 0.0372,
"step": 6500
},
{
"epoch": 0.8708633988554366,
"grad_norm": 0.037294551730155945,
"learning_rate": 3.6284470246734395e-05,
"loss": 0.0368,
"step": 7000
},
{
"epoch": 0.9330679273451107,
"grad_norm": 0.04354941099882126,
"learning_rate": 3.887621812150114e-05,
"loss": 0.0364,
"step": 7500
},
{
"epoch": 0.9952724558347847,
"grad_norm": 0.039071835577487946,
"learning_rate": 4.146796599626788e-05,
"loss": 0.0363,
"step": 8000
},
{
"epoch": 1.0,
"eval_val_general_accuracy": 58.761243310941595,
"eval_val_general_action_correct": 5161,
"eval_val_general_loss": 0.03570643812417984,
"eval_val_general_runtime": 381.2285,
"eval_val_general_samples_per_second": 23.039,
"eval_val_general_steps_per_second": 0.092,
"eval_val_general_text_acc": 57.78207901628145,
"eval_val_general_text_correct": 5075,
"eval_val_general_type_acc": 83.9234885574405,
"eval_val_general_type_correct": 7371,
"step": 8038
},
{
"epoch": 1.0,
"eval_test_general_accuracy": 60.98969539123542,
"eval_test_general_action_correct": 5386,
"eval_test_general_loss": 0.03328056260943413,
"eval_test_general_runtime": 358.8188,
"eval_test_general_samples_per_second": 24.611,
"eval_test_general_steps_per_second": 0.098,
"eval_test_general_text_acc": 60.33291812931718,
"eval_test_general_text_correct": 5328,
"eval_test_general_type_acc": 84.04484203374476,
"eval_test_general_type_correct": 7422,
"step": 8038
},
{
"epoch": 1.0,
"eval_val_single_accuracy": 73.20165813216289,
"eval_val_single_action_correct": 6004,
"eval_val_single_loss": 0.0292816162109375,
"eval_val_single_overall_val_accuracy": 26.3925802886209,
"eval_val_single_runtime": 331.8893,
"eval_val_single_samples_per_second": 24.713,
"eval_val_single_steps_per_second": 0.099,
"eval_val_single_text_acc": 72.70178005364545,
"eval_val_single_text_correct": 5963,
"eval_val_single_type_acc": 87.3811265544989,
"eval_val_single_type_correct": 7167,
"step": 8038
},
{
"epoch": 1.0,
"eval_test_single_accuracy": 73.34963325183375,
"eval_test_single_action_correct": 6000,
"eval_test_single_loss": 0.02950906753540039,
"eval_test_single_overall_test_accuracy": 26.867865728613832,
"eval_test_single_runtime": 320.8764,
"eval_test_single_samples_per_second": 25.493,
"eval_test_single_steps_per_second": 0.1,
"eval_test_single_text_acc": 72.86063569682152,
"eval_test_single_text_correct": 5960,
"eval_test_single_type_acc": 87.70171149144255,
"eval_test_single_type_correct": 7174,
"step": 8038
},
{
"epoch": 1.057476984324459,
"grad_norm": 0.04436585307121277,
"learning_rate": 4.405971387103463e-05,
"loss": 0.036,
"step": 8500
},
{
"epoch": 1.1196815128141329,
"grad_norm": 0.036587439477443695,
"learning_rate": 4.6651461745801375e-05,
"loss": 0.0356,
"step": 9000
},
{
"epoch": 1.181886041303807,
"grad_norm": 0.036080844700336456,
"learning_rate": 4.924320962056811e-05,
"loss": 0.0356,
"step": 9500
},
{
"epoch": 1.244090569793481,
"grad_norm": 0.031715475022792816,
"learning_rate": 4.979610643935031e-05,
"loss": 0.0354,
"step": 10000
},
{
"epoch": 1.306295098283155,
"grad_norm": 0.033429063856601715,
"learning_rate": 4.950812118419537e-05,
"loss": 0.0354,
"step": 10500
},
{
"epoch": 1.368499626772829,
"grad_norm": 0.03252234682440758,
"learning_rate": 4.922013592904044e-05,
"loss": 0.035,
"step": 11000
},
{
"epoch": 1.430704155262503,
"grad_norm": 0.030717169865965843,
"learning_rate": 4.89321506738855e-05,
"loss": 0.0349,
"step": 11500
},
{
"epoch": 1.4929086837521772,
"grad_norm": 0.02983550727367401,
"learning_rate": 4.864416541873056e-05,
"loss": 0.0346,
"step": 12000
},
{
"epoch": 1.5551132122418512,
"grad_norm": 0.03211957961320877,
"learning_rate": 4.835618016357562e-05,
"loss": 0.0349,
"step": 12500
},
{
"epoch": 1.6173177407315253,
"grad_norm": 0.03446466848254204,
"learning_rate": 4.806819490842069e-05,
"loss": 0.0344,
"step": 13000
},
{
"epoch": 1.6795222692211993,
"grad_norm": 0.029267357662320137,
"learning_rate": 4.7780209653265756e-05,
"loss": 0.0346,
"step": 13500
},
{
"epoch": 1.7417267977108732,
"grad_norm": 0.02974924072623253,
"learning_rate": 4.7492224398110816e-05,
"loss": 0.0342,
"step": 14000
},
{
"epoch": 1.8039313262005474,
"grad_norm": 0.027496496215462685,
"learning_rate": 4.720423914295588e-05,
"loss": 0.0341,
"step": 14500
},
{
"epoch": 1.8661358546902216,
"grad_norm": 0.03153369575738907,
"learning_rate": 4.691625388780095e-05,
"loss": 0.0341,
"step": 15000
},
{
"epoch": 1.9283403831798955,
"grad_norm": 0.024351611733436584,
"learning_rate": 4.662826863264601e-05,
"loss": 0.0343,
"step": 15500
},
{
"epoch": 1.9905449116695695,
"grad_norm": 0.030573569238185883,
"learning_rate": 4.6340283377491076e-05,
"loss": 0.034,
"step": 16000
},
{
"epoch": 2.0,
"eval_val_general_accuracy": 65.2282819082318,
"eval_val_general_action_correct": 5729,
"eval_val_general_loss": 0.03412911668419838,
"eval_val_general_runtime": 376.6551,
"eval_val_general_samples_per_second": 23.318,
"eval_val_general_steps_per_second": 0.093,
"eval_val_general_text_acc": 64.26050324490492,
"eval_val_general_text_correct": 5644,
"eval_val_general_type_acc": 86.05260161675965,
"eval_val_general_type_correct": 7558,
"step": 16076
},
{
"epoch": 2.0,
"eval_test_general_accuracy": 66.52700713395991,
"eval_test_general_action_correct": 5875,
"eval_test_general_loss": 0.031758010387420654,
"eval_test_general_runtime": 351.859,
"eval_test_general_samples_per_second": 25.098,
"eval_test_general_steps_per_second": 0.099,
"eval_test_general_text_acc": 65.85890612614654,
"eval_test_general_text_correct": 5816,
"eval_test_general_type_acc": 85.89061261465292,
"eval_test_general_type_correct": 7585,
"step": 16076
},
{
"epoch": 2.0,
"eval_val_single_accuracy": 80.48037064130699,
"eval_val_single_action_correct": 6601,
"eval_val_single_loss": 0.02729242481291294,
"eval_val_single_overall_val_accuracy": 29.141730509907756,
"eval_val_single_runtime": 333.3923,
"eval_val_single_samples_per_second": 24.602,
"eval_val_single_steps_per_second": 0.099,
"eval_val_single_text_acc": 80.04145330407218,
"eval_val_single_text_correct": 6565,
"eval_val_single_type_acc": 90.27066569129481,
"eval_val_single_type_correct": 7404,
"step": 16076
},
{
"epoch": 2.0,
"eval_test_single_accuracy": 81.38141809290954,
"eval_test_single_action_correct": 6657,
"eval_test_single_loss": 0.027449607849121094,
"eval_test_single_overall_test_accuracy": 29.581685045373888,
"eval_test_single_runtime": 333.3531,
"eval_test_single_samples_per_second": 24.539,
"eval_test_single_steps_per_second": 0.096,
"eval_test_single_text_acc": 81.01466992665036,
"eval_test_single_text_correct": 6627,
"eval_test_single_type_acc": 90.79462102689486,
"eval_test_single_type_correct": 7427,
"step": 16076
},
{
"epoch": 2.0527494401592437,
"grad_norm": 0.027034949511289597,
"learning_rate": 4.605229812233614e-05,
"loss": 0.0336,
"step": 16500
},
{
"epoch": 2.114953968648918,
"grad_norm": 0.026598777621984482,
"learning_rate": 4.57643128671812e-05,
"loss": 0.0335,
"step": 17000
},
{
"epoch": 2.1771584971385916,
"grad_norm": 0.0320325531065464,
"learning_rate": 4.547632761202626e-05,
"loss": 0.0335,
"step": 17500
},
{
"epoch": 2.2393630256282657,
"grad_norm": 0.02739659883081913,
"learning_rate": 4.5188342356871336e-05,
"loss": 0.0333,
"step": 18000
},
{
"epoch": 2.30156755411794,
"grad_norm": 0.029264872893691063,
"learning_rate": 4.4900357101716396e-05,
"loss": 0.0333,
"step": 18500
},
{
"epoch": 2.363772082607614,
"grad_norm": 0.03222252428531647,
"learning_rate": 4.4612371846561455e-05,
"loss": 0.0334,
"step": 19000
},
{
"epoch": 2.425976611097288,
"grad_norm": 0.03381313756108284,
"learning_rate": 4.432438659140652e-05,
"loss": 0.0333,
"step": 19500
},
{
"epoch": 2.488181139586962,
"grad_norm": 0.028583131730556488,
"learning_rate": 4.403640133625159e-05,
"loss": 0.0333,
"step": 20000
},
{
"epoch": 2.550385668076636,
"grad_norm": 0.02700190059840679,
"learning_rate": 4.374841608109665e-05,
"loss": 0.0334,
"step": 20500
},
{
"epoch": 2.61259019656631,
"grad_norm": 0.026916461065411568,
"learning_rate": 4.346043082594171e-05,
"loss": 0.0332,
"step": 21000
},
{
"epoch": 2.674794725055984,
"grad_norm": 0.028127728030085564,
"learning_rate": 4.317244557078678e-05,
"loss": 0.0331,
"step": 21500
},
{
"epoch": 2.736999253545658,
"grad_norm": 0.02740449644625187,
"learning_rate": 4.288446031563184e-05,
"loss": 0.0333,
"step": 22000
},
{
"epoch": 2.799203782035332,
"grad_norm": 0.027998166158795357,
"learning_rate": 4.25964750604769e-05,
"loss": 0.0329,
"step": 22500
},
{
"epoch": 2.861408310525006,
"grad_norm": 0.03141350299119949,
"learning_rate": 4.230848980532197e-05,
"loss": 0.033,
"step": 23000
},
{
"epoch": 2.9236128390146803,
"grad_norm": 0.02853882871568203,
"learning_rate": 4.2020504550167035e-05,
"loss": 0.0329,
"step": 23500
},
{
"epoch": 2.9858173675043544,
"grad_norm": 0.027763281017541885,
"learning_rate": 4.1732519295012095e-05,
"loss": 0.0329,
"step": 24000
},
{
"epoch": 3.0,
"eval_val_general_accuracy": 68.01776158487988,
"eval_val_general_action_correct": 5974,
"eval_val_general_loss": 0.033568356186151505,
"eval_val_general_runtime": 381.1985,
"eval_val_general_samples_per_second": 23.04,
"eval_val_general_steps_per_second": 0.092,
"eval_val_general_text_acc": 67.09552544688603,
"eval_val_general_text_correct": 5893,
"eval_val_general_type_acc": 86.69019697142207,
"eval_val_general_type_correct": 7614,
"step": 24114
},
{
"epoch": 3.0,
"eval_test_general_accuracy": 69.84486468123656,
"eval_test_general_action_correct": 6168,
"eval_test_general_loss": 0.031217793002724648,
"eval_test_general_runtime": 355.8903,
"eval_test_general_samples_per_second": 24.814,
"eval_test_general_steps_per_second": 0.098,
"eval_test_general_text_acc": 69.2107349111086,
"eval_test_general_text_correct": 6112,
"eval_test_general_type_acc": 87.2834333597554,
"eval_test_general_type_correct": 7708,
"step": 24114
},
{
"epoch": 3.0,
"eval_val_single_accuracy": 83.57717629846378,
"eval_val_single_action_correct": 6855,
"eval_val_single_loss": 0.02634221874177456,
"eval_val_single_overall_val_accuracy": 30.318987576668736,
"eval_val_single_runtime": 330.8711,
"eval_val_single_samples_per_second": 24.789,
"eval_val_single_steps_per_second": 0.1,
"eval_val_single_text_acc": 83.15045110948549,
"eval_val_single_text_correct": 6820,
"eval_val_single_type_acc": 92.4530602292124,
"eval_val_single_type_correct": 7583,
"step": 24114
},
{
"epoch": 3.0,
"eval_test_single_accuracy": 84.21760391198045,
"eval_test_single_action_correct": 6889,
"eval_test_single_loss": 0.026479482650756836,
"eval_test_single_overall_test_accuracy": 30.812493718643402,
"eval_test_single_runtime": 322.2695,
"eval_test_single_samples_per_second": 25.382,
"eval_test_single_steps_per_second": 0.099,
"eval_test_single_text_acc": 83.80195599022005,
"eval_test_single_text_correct": 6855,
"eval_test_single_type_acc": 92.61613691931541,
"eval_test_single_type_correct": 7576,
"step": 24114
},
{
"epoch": 3.048021895994028,
"grad_norm": 0.028695425018668175,
"learning_rate": 4.144453403985716e-05,
"loss": 0.0325,
"step": 24500
},
{
"epoch": 3.1102264244837023,
"grad_norm": 0.02727818675339222,
"learning_rate": 4.115654878470223e-05,
"loss": 0.0322,
"step": 25000
},
{
"epoch": 3.1724309529733765,
"grad_norm": 0.03355926647782326,
"learning_rate": 4.086856352954729e-05,
"loss": 0.0326,
"step": 25500
},
{
"epoch": 3.2346354814630507,
"grad_norm": 0.029240937903523445,
"learning_rate": 4.0580578274392355e-05,
"loss": 0.0325,
"step": 26000
},
{
"epoch": 3.2968400099527244,
"grad_norm": 0.031840600073337555,
"learning_rate": 4.029259301923742e-05,
"loss": 0.0322,
"step": 26500
},
{
"epoch": 3.3590445384423986,
"grad_norm": 0.030460665002465248,
"learning_rate": 4.000460776408248e-05,
"loss": 0.0322,
"step": 27000
},
{
"epoch": 3.4212490669320728,
"grad_norm": 0.028305282816290855,
"learning_rate": 3.971662250892754e-05,
"loss": 0.0323,
"step": 27500
},
{
"epoch": 3.4834535954217465,
"grad_norm": 0.03181800991296768,
"learning_rate": 3.942863725377261e-05,
"loss": 0.0323,
"step": 28000
},
{
"epoch": 3.5456581239114207,
"grad_norm": 0.026895670220255852,
"learning_rate": 3.9140651998617674e-05,
"loss": 0.0322,
"step": 28500
},
{
"epoch": 3.607862652401095,
"grad_norm": 0.025613972917199135,
"learning_rate": 3.8852666743462734e-05,
"loss": 0.0323,
"step": 29000
},
{
"epoch": 3.670067180890769,
"grad_norm": 0.03223800286650658,
"learning_rate": 3.85646814883078e-05,
"loss": 0.0323,
"step": 29500
},
{
"epoch": 3.732271709380443,
"grad_norm": 0.028018802404403687,
"learning_rate": 3.827669623315287e-05,
"loss": 0.0322,
"step": 30000
},
{
"epoch": 3.794476237870117,
"grad_norm": 0.029745567589998245,
"learning_rate": 3.798871097799793e-05,
"loss": 0.0321,
"step": 30500
},
{
"epoch": 3.856680766359791,
"grad_norm": 0.02972617745399475,
"learning_rate": 3.7700725722842994e-05,
"loss": 0.0321,
"step": 31000
},
{
"epoch": 3.9188852948494652,
"grad_norm": 0.027803702279925346,
"learning_rate": 3.7412740467688054e-05,
"loss": 0.0323,
"step": 31500
},
{
"epoch": 3.981089823339139,
"grad_norm": 0.029762666672468185,
"learning_rate": 3.712475521253312e-05,
"loss": 0.0324,
"step": 32000
},
{
"epoch": 4.0,
"eval_val_general_accuracy": 69.29295229420471,
"eval_val_general_action_correct": 6086,
"eval_val_general_loss": 0.033421870321035385,
"eval_val_general_runtime": 353.2757,
"eval_val_general_samples_per_second": 24.862,
"eval_val_general_steps_per_second": 0.099,
"eval_val_general_text_acc": 68.39348741887737,
"eval_val_general_text_correct": 6007,
"eval_val_general_type_acc": 87.12285096208585,
"eval_val_general_type_correct": 7652,
"step": 32152
},
{
"epoch": 4.0,
"eval_test_general_accuracy": 71.30562790170988,
"eval_test_general_action_correct": 6297,
"eval_test_general_loss": 0.030959919095039368,
"eval_test_general_runtime": 353.0533,
"eval_test_general_samples_per_second": 25.013,
"eval_test_general_steps_per_second": 0.099,
"eval_test_general_text_acc": 70.66017438568679,
"eval_test_general_text_correct": 6240,
"eval_test_general_type_acc": 87.81564941682709,
"eval_test_general_type_correct": 7755,
"step": 32152
},
{
"epoch": 4.0,
"eval_val_single_accuracy": 85.67422579858571,
"eval_val_single_action_correct": 7027,
"eval_val_single_loss": 0.02561488375067711,
"eval_val_single_overall_val_accuracy": 30.993435618558085,
"eval_val_single_runtime": 337.1825,
"eval_val_single_samples_per_second": 24.325,
"eval_val_single_steps_per_second": 0.098,
"eval_val_single_text_acc": 85.2962692026335,
"eval_val_single_text_correct": 6996,
"eval_val_single_type_acc": 93.70885149963424,
"eval_val_single_type_correct": 7686,
"step": 32152
},
{
"epoch": 4.0,
"eval_test_single_accuracy": 86.44254278728606,
"eval_test_single_action_correct": 7071,
"eval_test_single_loss": 0.025772809982299805,
"eval_test_single_overall_test_accuracy": 31.54963413779919,
"eval_test_single_runtime": 325.0512,
"eval_test_single_samples_per_second": 25.165,
"eval_test_single_steps_per_second": 0.098,
"eval_test_single_text_acc": 86.06356968215158,
"eval_test_single_text_correct": 7040,
"eval_test_single_type_acc": 93.86308068459658,
"eval_test_single_type_correct": 7678,
"step": 32152
},
{
"epoch": 4.043294351828813,
"grad_norm": 0.031451448798179626,
"learning_rate": 3.683676995737819e-05,
"loss": 0.0315,
"step": 32500
},
{
"epoch": 4.105498880318487,
"grad_norm": 0.030802294611930847,
"learning_rate": 3.654878470222325e-05,
"loss": 0.0313,
"step": 33000
},
{
"epoch": 4.167703408808161,
"grad_norm": 0.027965443208813667,
"learning_rate": 3.6260799447068314e-05,
"loss": 0.0314,
"step": 33500
},
{
"epoch": 4.229907937297836,
"grad_norm": 0.02930346131324768,
"learning_rate": 3.5972814191913373e-05,
"loss": 0.0313,
"step": 34000
},
{
"epoch": 4.292112465787509,
"grad_norm": 0.03407023474574089,
"learning_rate": 3.568482893675844e-05,
"loss": 0.0314,
"step": 34500
},
{
"epoch": 4.354316994277183,
"grad_norm": 0.030333781614899635,
"learning_rate": 3.53968436816035e-05,
"loss": 0.0314,
"step": 35000
},
{
"epoch": 4.416521522766858,
"grad_norm": 0.03493532910943031,
"learning_rate": 3.510885842644857e-05,
"loss": 0.0316,
"step": 35500
},
{
"epoch": 4.4787260512565314,
"grad_norm": 0.03431040793657303,
"learning_rate": 3.482087317129363e-05,
"loss": 0.0312,
"step": 36000
},
{
"epoch": 4.540930579746205,
"grad_norm": 0.028342559933662415,
"learning_rate": 3.453288791613869e-05,
"loss": 0.0311,
"step": 36500
},
{
"epoch": 4.60313510823588,
"grad_norm": 0.03341212868690491,
"learning_rate": 3.424490266098376e-05,
"loss": 0.0313,
"step": 37000
},
{
"epoch": 4.6653396367255535,
"grad_norm": 0.03309859707951546,
"learning_rate": 3.3956917405828826e-05,
"loss": 0.0315,
"step": 37500
},
{
"epoch": 4.727544165215228,
"grad_norm": 0.0328318327665329,
"learning_rate": 3.3668932150673886e-05,
"loss": 0.0316,
"step": 38000
},
{
"epoch": 4.789748693704902,
"grad_norm": 0.029687820002436638,
"learning_rate": 3.338094689551895e-05,
"loss": 0.0315,
"step": 38500
},
{
"epoch": 4.851953222194576,
"grad_norm": 0.037615273147821426,
"learning_rate": 3.309296164036402e-05,
"loss": 0.0313,
"step": 39000
},
{
"epoch": 4.91415775068425,
"grad_norm": 0.03171864524483681,
"learning_rate": 3.280497638520908e-05,
"loss": 0.0315,
"step": 39500
},
{
"epoch": 4.976362279173924,
"grad_norm": 0.03344856947660446,
"learning_rate": 3.251699113005414e-05,
"loss": 0.0314,
"step": 40000
},
{
"epoch": 5.0,
"eval_val_general_accuracy": 70.19241716953205,
"eval_val_general_action_correct": 6165,
"eval_val_general_loss": 0.033489227294921875,
"eval_val_general_runtime": 357.9361,
"eval_val_general_samples_per_second": 24.538,
"eval_val_general_steps_per_second": 0.098,
"eval_val_general_text_acc": 69.28156666287146,
"eval_val_general_text_correct": 6085,
"eval_val_general_type_acc": 87.15700785608563,
"eval_val_general_type_correct": 7655,
"step": 40190
},
{
"epoch": 5.0,
"eval_test_general_accuracy": 72.2228513192164,
"eval_test_general_action_correct": 6378,
"eval_test_general_loss": 0.031051145866513252,
"eval_test_general_runtime": 357.8649,
"eval_test_general_samples_per_second": 24.677,
"eval_test_general_steps_per_second": 0.098,
"eval_test_general_text_acc": 71.56607405729815,
"eval_test_general_text_correct": 6320,
"eval_test_general_type_acc": 87.96285811346394,
"eval_test_general_type_correct": 7768,
"step": 40190
},
{
"epoch": 5.0,
"eval_val_single_accuracy": 87.4420872957815,
"eval_val_single_action_correct": 7172,
"eval_val_single_loss": 0.02487783692777157,
"eval_val_single_overall_val_accuracy": 31.526900893062713,
"eval_val_single_runtime": 336.6433,
"eval_val_single_samples_per_second": 24.364,
"eval_val_single_steps_per_second": 0.098,
"eval_val_single_text_acc": 87.0641306998293,
"eval_val_single_text_correct": 7141,
"eval_val_single_type_acc": 94.26969031943429,
"eval_val_single_type_correct": 7732,
"step": 40190
},
{
"epoch": 5.0,
"eval_test_single_accuracy": 88.53300733496332,
"eval_test_single_action_correct": 7242,
"eval_test_single_loss": 0.02492070198059082,
"eval_test_single_overall_test_accuracy": 32.15117173083594,
"eval_test_single_runtime": 333.0898,
"eval_test_single_samples_per_second": 24.558,
"eval_test_single_steps_per_second": 0.096,
"eval_test_single_text_acc": 88.17848410757946,
"eval_test_single_text_correct": 7213,
"eval_test_single_type_acc": 94.63325183374083,
"eval_test_single_type_correct": 7741,
"step": 40190
},
{
"epoch": 5.038566807663598,
"grad_norm": 0.03410176560282707,
"learning_rate": 3.2229005874899206e-05,
"loss": 0.0308,
"step": 40500
},
{
"epoch": 5.100771336153272,
"grad_norm": 0.03393740952014923,
"learning_rate": 3.194102061974427e-05,
"loss": 0.0303,
"step": 41000
},
{
"epoch": 5.162975864642946,
"grad_norm": 0.03690695762634277,
"learning_rate": 3.165303536458933e-05,
"loss": 0.0304,
"step": 41500
},
{
"epoch": 5.22518039313262,
"grad_norm": 0.031830355525016785,
"learning_rate": 3.13650501094344e-05,
"loss": 0.0303,
"step": 42000
},
{
"epoch": 5.287384921622294,
"grad_norm": 0.04112740978598595,
"learning_rate": 3.1077064854279466e-05,
"loss": 0.0302,
"step": 42500
},
{
"epoch": 5.349589450111968,
"grad_norm": 0.03433283418416977,
"learning_rate": 3.0789079599124526e-05,
"loss": 0.0304,
"step": 43000
},
{
"epoch": 5.411793978601642,
"grad_norm": 0.032931845635175705,
"learning_rate": 3.050109434396959e-05,
"loss": 0.0306,
"step": 43500
},
{
"epoch": 5.473998507091316,
"grad_norm": 0.035872191190719604,
"learning_rate": 3.0213109088814656e-05,
"loss": 0.0306,
"step": 44000
},
{
"epoch": 5.53620303558099,
"grad_norm": 0.03302635997533798,
"learning_rate": 2.992512383365972e-05,
"loss": 0.0305,
"step": 44500
},
{
"epoch": 5.598407564070664,
"grad_norm": 0.031577784568071365,
"learning_rate": 2.9637138578504782e-05,
"loss": 0.0306,
"step": 45000
},
{
"epoch": 5.6606120925603385,
"grad_norm": 0.03194281458854675,
"learning_rate": 2.934915332334985e-05,
"loss": 0.0305,
"step": 45500
},
{
"epoch": 5.722816621050012,
"grad_norm": 0.03472689166665077,
"learning_rate": 2.9061168068194912e-05,
"loss": 0.0305,
"step": 46000
},
{
"epoch": 5.785021149539687,
"grad_norm": 0.031870368868112564,
"learning_rate": 2.8773182813039972e-05,
"loss": 0.0306,
"step": 46500
},
{
"epoch": 5.8472256780293606,
"grad_norm": 0.031068740412592888,
"learning_rate": 2.8485197557885035e-05,
"loss": 0.0305,
"step": 47000
},
{
"epoch": 5.909430206519034,
"grad_norm": 0.03671320155262947,
"learning_rate": 2.81972123027301e-05,
"loss": 0.0304,
"step": 47500
},
{
"epoch": 5.971634735008709,
"grad_norm": 0.03440012410283089,
"learning_rate": 2.7909227047575165e-05,
"loss": 0.0305,
"step": 48000
},
{
"epoch": 6.0,
"eval_val_general_accuracy": 69.61174997153591,
"eval_val_general_action_correct": 6114,
"eval_val_general_loss": 0.03399135172367096,
"eval_val_general_runtime": 357.1857,
"eval_val_general_samples_per_second": 24.589,
"eval_val_general_steps_per_second": 0.098,
"eval_val_general_text_acc": 68.70089946487533,
"eval_val_general_text_correct": 6034,
"eval_val_general_type_acc": 87.15700785608563,
"eval_val_general_type_correct": 7655,
"step": 48228
},
{
"epoch": 6.0,
"eval_test_general_accuracy": 71.33959913939532,
"eval_test_general_action_correct": 6300,
"eval_test_general_loss": 0.03140591084957123,
"eval_test_general_runtime": 356.2452,
"eval_test_general_samples_per_second": 24.789,
"eval_test_general_steps_per_second": 0.098,
"eval_test_general_text_acc": 70.67149813158193,
"eval_test_general_text_correct": 6241,
"eval_test_general_type_acc": 87.44196580228738,
"eval_test_general_type_correct": 7722,
"step": 48228
},
{
"epoch": 6.0,
"eval_val_single_accuracy": 89.01487442087296,
"eval_val_single_action_correct": 7301,
"eval_val_single_loss": 0.024073051288723946,
"eval_val_single_overall_val_accuracy": 31.725324878481775,
"eval_val_single_runtime": 337.3699,
"eval_val_single_samples_per_second": 24.312,
"eval_val_single_steps_per_second": 0.098,
"eval_val_single_text_acc": 88.61253352840771,
"eval_val_single_text_correct": 7268,
"eval_val_single_type_acc": 94.78176054620823,
"eval_val_single_type_correct": 7774,
"step": 48228
},
{
"epoch": 6.0,
"eval_test_single_accuracy": 89.73105134474328,
"eval_test_single_action_correct": 7340,
"eval_test_single_loss": 0.02414107322692871,
"eval_test_single_overall_test_accuracy": 32.214130096827716,
"eval_test_single_runtime": 329.628,
"eval_test_single_samples_per_second": 24.816,
"eval_test_single_steps_per_second": 0.097,
"eval_test_single_text_acc": 89.3398533007335,
"eval_test_single_text_correct": 7308,
"eval_test_single_type_acc": 94.97555012224939,
"eval_test_single_type_correct": 7769,
"step": 48228
},
{
"epoch": 6.033839263498383,
"grad_norm": 0.03462919965386391,
"learning_rate": 2.7621241792420228e-05,
"loss": 0.0297,
"step": 48500
},
{
"epoch": 6.096043791988056,
"grad_norm": 0.0364200696349144,
"learning_rate": 2.7333256537265295e-05,
"loss": 0.0293,
"step": 49000
},
{
"epoch": 6.158248320477731,
"grad_norm": 0.04009445756673813,
"learning_rate": 2.7045271282110358e-05,
"loss": 0.0292,
"step": 49500
},
{
"epoch": 6.220452848967405,
"grad_norm": 0.04062647372484207,
"learning_rate": 2.6757286026955418e-05,
"loss": 0.0292,
"step": 50000
},
{
"epoch": 6.282657377457079,
"grad_norm": 0.03505484387278557,
"learning_rate": 2.6469300771800488e-05,
"loss": 0.0294,
"step": 50500
},
{
"epoch": 6.344861905946753,
"grad_norm": 0.03484097495675087,
"learning_rate": 2.6181315516645548e-05,
"loss": 0.0293,
"step": 51000
},
{
"epoch": 6.407066434436427,
"grad_norm": 0.03451770171523094,
"learning_rate": 2.589333026149061e-05,
"loss": 0.0293,
"step": 51500
},
{
"epoch": 6.469270962926101,
"grad_norm": 0.04329177364706993,
"learning_rate": 2.5605345006335678e-05,
"loss": 0.0293,
"step": 52000
},
{
"epoch": 6.531475491415775,
"grad_norm": 0.03902963548898697,
"learning_rate": 2.531735975118074e-05,
"loss": 0.0293,
"step": 52500
},
{
"epoch": 6.593680019905449,
"grad_norm": 0.0379670187830925,
"learning_rate": 2.5029374496025804e-05,
"loss": 0.0293,
"step": 53000
},
{
"epoch": 6.6558845483951234,
"grad_norm": 0.0427132174372673,
"learning_rate": 2.4741389240870868e-05,
"loss": 0.0295,
"step": 53500
},
{
"epoch": 6.718089076884797,
"grad_norm": 0.038951486349105835,
"learning_rate": 2.4453403985715934e-05,
"loss": 0.0294,
"step": 54000
},
{
"epoch": 6.780293605374471,
"grad_norm": 0.03940974548459053,
"learning_rate": 2.4165418730560997e-05,
"loss": 0.0294,
"step": 54500
},
{
"epoch": 6.8424981338641455,
"grad_norm": 0.04146928712725639,
"learning_rate": 2.387743347540606e-05,
"loss": 0.0294,
"step": 55000
},
{
"epoch": 6.904702662353819,
"grad_norm": 0.03553101047873497,
"learning_rate": 2.3589448220251124e-05,
"loss": 0.0294,
"step": 55500
},
{
"epoch": 6.966907190843493,
"grad_norm": 0.03867221623659134,
"learning_rate": 2.3301462965096187e-05,
"loss": 0.0293,
"step": 56000
},
{
"epoch": 7.0,
"eval_val_general_accuracy": 69.66867812820222,
"eval_val_general_action_correct": 6119,
"eval_val_general_loss": 0.034982189536094666,
"eval_val_general_runtime": 356.027,
"eval_val_general_samples_per_second": 24.669,
"eval_val_general_steps_per_second": 0.098,
"eval_val_general_text_acc": 68.74644199020837,
"eval_val_general_text_correct": 6038,
"eval_val_general_type_acc": 86.90652396675395,
"eval_val_general_type_correct": 7633,
"step": 56266
},
{
"epoch": 7.0,
"eval_test_general_accuracy": 70.62620314800135,
"eval_test_general_action_correct": 6237,
"eval_test_general_loss": 0.03225135803222656,
"eval_test_general_runtime": 355.1867,
"eval_test_general_samples_per_second": 24.863,
"eval_test_general_steps_per_second": 0.099,
"eval_test_general_text_acc": 69.9920733778734,
"eval_test_general_text_correct": 6181,
"eval_test_general_type_acc": 87.30608085154569,
"eval_test_general_type_correct": 7710,
"step": 56266
},
{
"epoch": 7.0,
"eval_val_single_accuracy": 89.92928554011218,
"eval_val_single_action_correct": 7376,
"eval_val_single_loss": 0.023164229467511177,
"eval_val_single_overall_val_accuracy": 31.919592733662874,
"eval_val_single_runtime": 335.5525,
"eval_val_single_samples_per_second": 24.443,
"eval_val_single_steps_per_second": 0.098,
"eval_val_single_text_acc": 89.55132894415996,
"eval_val_single_text_correct": 7345,
"eval_val_single_type_acc": 95.08656425262132,
"eval_val_single_type_correct": 7799,
"step": 56266
},
{
"epoch": 7.0,
"eval_test_single_accuracy": 91.05134474327629,
"eval_test_single_action_correct": 7448,
"eval_test_single_loss": 0.02320098876953125,
"eval_test_single_overall_test_accuracy": 32.335509578255525,
"eval_test_single_runtime": 329.0431,
"eval_test_single_samples_per_second": 24.86,
"eval_test_single_steps_per_second": 0.097,
"eval_test_single_text_acc": 90.6601466992665,
"eval_test_single_text_correct": 7416,
"eval_test_single_type_acc": 95.26894865525672,
"eval_test_single_type_correct": 7793,
"step": 56266
}
],
"logging_steps": 500,
"max_steps": 96456,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}