tea-base / trainer_state.json
zhang
Upload folder using huggingface_hub
1491531 verified
{
"best_metric": 0.34449518011161845,
"best_model_checkpoint": "/data_center/zhangyan/cache_dir/vtvqa/base/v58_final/checkpoint-30000",
"epoch": 13.42882721575649,
"eval_steps": 1000,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22381378692927484,
"grad_norm": 32.715675354003906,
"learning_rate": 2.5e-05,
"loss": 4.1862,
"step": 500
},
{
"epoch": 0.4476275738585497,
"grad_norm": 21.658960342407227,
"learning_rate": 5e-05,
"loss": 3.1109,
"step": 1000
},
{
"epoch": 0.4476275738585497,
"eval_anls": 0.19268535480716623,
"eval_loss": 2.574805974960327,
"eval_runtime": 1812.8357,
"eval_samples_per_second": 1.087,
"eval_steps_per_second": 1.087,
"eval_stvqa_acc": 0.12075088787417554,
"eval_textvqa_acc": 0.0013247646428772761,
"step": 1000
},
{
"epoch": 0.6714413607878246,
"grad_norm": 10.140119552612305,
"learning_rate": 4.913793103448276e-05,
"loss": 2.794,
"step": 1500
},
{
"epoch": 0.8952551477170994,
"grad_norm": 10.471025466918945,
"learning_rate": 4.827586206896552e-05,
"loss": 2.4305,
"step": 2000
},
{
"epoch": 0.8952551477170994,
"eval_anls": 0.2712405745659137,
"eval_loss": 2.0965969562530518,
"eval_runtime": 1835.2571,
"eval_samples_per_second": 1.074,
"eval_steps_per_second": 1.074,
"eval_stvqa_acc": 0.19583967529173008,
"eval_textvqa_acc": 0.002846834658097977,
"step": 2000
},
{
"epoch": 1.1190689346463742,
"grad_norm": 10.897268295288086,
"learning_rate": 4.741379310344828e-05,
"loss": 2.2143,
"step": 2500
},
{
"epoch": 1.3428827215756491,
"grad_norm": 11.765941619873047,
"learning_rate": 4.655172413793104e-05,
"loss": 2.0289,
"step": 3000
},
{
"epoch": 1.3428827215756491,
"eval_anls": 0.316135640478506,
"eval_loss": 1.9610668420791626,
"eval_runtime": 1823.8259,
"eval_samples_per_second": 1.081,
"eval_steps_per_second": 1.081,
"eval_stvqa_acc": 0.23997970573313038,
"eval_textvqa_acc": 0.0031568859574947865,
"step": 3000
},
{
"epoch": 1.5666965085049238,
"grad_norm": 7.180312156677246,
"learning_rate": 4.5689655172413794e-05,
"loss": 2.0053,
"step": 3500
},
{
"epoch": 1.7905102954341987,
"grad_norm": 6.897369861602783,
"learning_rate": 4.482758620689655e-05,
"loss": 1.9704,
"step": 4000
},
{
"epoch": 1.7905102954341987,
"eval_anls": 0.3412851164067496,
"eval_loss": 1.8678895235061646,
"eval_runtime": 1833.7166,
"eval_samples_per_second": 1.075,
"eval_steps_per_second": 1.075,
"eval_stvqa_acc": 0.25621511922881784,
"eval_textvqa_acc": 0.003241445402784825,
"step": 4000
},
{
"epoch": 2.0143240823634736,
"grad_norm": 15.418075561523438,
"learning_rate": 4.396551724137931e-05,
"loss": 1.9023,
"step": 4500
},
{
"epoch": 2.2381378692927485,
"grad_norm": 8.011098861694336,
"learning_rate": 4.3103448275862066e-05,
"loss": 1.6569,
"step": 5000
},
{
"epoch": 2.2381378692927485,
"eval_anls": 0.364142426818907,
"eval_loss": 1.8436781167984009,
"eval_runtime": 1870.6938,
"eval_samples_per_second": 1.054,
"eval_steps_per_second": 1.054,
"eval_stvqa_acc": 0.2810755961440893,
"eval_textvqa_acc": 0.002931394103388016,
"step": 5000
},
{
"epoch": 2.4619516562220234,
"grad_norm": 7.217813968658447,
"learning_rate": 4.224137931034483e-05,
"loss": 1.6383,
"step": 5500
},
{
"epoch": 2.6857654431512983,
"grad_norm": 8.526447296142578,
"learning_rate": 4.1379310344827587e-05,
"loss": 1.6479,
"step": 6000
},
{
"epoch": 2.6857654431512983,
"eval_anls": 0.37012127813893764,
"eval_loss": 1.7932401895523071,
"eval_runtime": 1848.1991,
"eval_samples_per_second": 1.066,
"eval_steps_per_second": 1.066,
"eval_stvqa_acc": 0.2876712328767123,
"eval_textvqa_acc": 0.0026777157675178984,
"step": 6000
},
{
"epoch": 2.909579230080573,
"grad_norm": 9.343942642211914,
"learning_rate": 4.0517241379310344e-05,
"loss": 1.624,
"step": 6500
},
{
"epoch": 3.1333930170098476,
"grad_norm": 5.952371597290039,
"learning_rate": 3.965517241379311e-05,
"loss": 1.4744,
"step": 7000
},
{
"epoch": 3.1333930170098476,
"eval_anls": 0.391052215824424,
"eval_loss": 1.8034956455230713,
"eval_runtime": 1815.7182,
"eval_samples_per_second": 1.086,
"eval_steps_per_second": 1.086,
"eval_stvqa_acc": 0.30492135971588025,
"eval_textvqa_acc": 0.0034105642933649027,
"step": 7000
},
{
"epoch": 3.3572068039391225,
"grad_norm": 6.397096157073975,
"learning_rate": 3.8793103448275865e-05,
"loss": 1.4023,
"step": 7500
},
{
"epoch": 3.5810205908683974,
"grad_norm": 11.454750061035156,
"learning_rate": 3.793103448275862e-05,
"loss": 1.4155,
"step": 8000
},
{
"epoch": 3.5810205908683974,
"eval_anls": 0.3882569968865909,
"eval_loss": 1.796372652053833,
"eval_runtime": 1820.5256,
"eval_samples_per_second": 1.083,
"eval_steps_per_second": 1.083,
"eval_stvqa_acc": 0.3008625063419584,
"eval_textvqa_acc": 0.003466937256891596,
"step": 8000
},
{
"epoch": 3.8048343777976723,
"grad_norm": 10.310056686401367,
"learning_rate": 3.7068965517241385e-05,
"loss": 1.4217,
"step": 8500
},
{
"epoch": 4.028648164726947,
"grad_norm": 8.272510528564453,
"learning_rate": 3.620689655172414e-05,
"loss": 1.4104,
"step": 9000
},
{
"epoch": 4.028648164726947,
"eval_anls": 0.39196723623184165,
"eval_loss": 1.7789984941482544,
"eval_runtime": 1830.6777,
"eval_samples_per_second": 1.077,
"eval_steps_per_second": 1.077,
"eval_stvqa_acc": 0.3084728564180619,
"eval_textvqa_acc": 0.0031005129939680937,
"step": 9000
},
{
"epoch": 4.252461951656222,
"grad_norm": 10.304888725280762,
"learning_rate": 3.53448275862069e-05,
"loss": 1.2361,
"step": 9500
},
{
"epoch": 4.476275738585497,
"grad_norm": 7.845999240875244,
"learning_rate": 3.4482758620689657e-05,
"loss": 1.2353,
"step": 10000
},
{
"epoch": 4.476275738585497,
"eval_anls": 0.4013811446389115,
"eval_loss": 1.7788571119308472,
"eval_runtime": 1825.7471,
"eval_samples_per_second": 1.08,
"eval_steps_per_second": 1.08,
"eval_stvqa_acc": 0.313039066463724,
"eval_textvqa_acc": 0.003495123738654942,
"step": 10000
},
{
"epoch": 4.700089525514771,
"grad_norm": 5.328579425811768,
"learning_rate": 3.3620689655172414e-05,
"loss": 1.2438,
"step": 10500
},
{
"epoch": 4.923903312444047,
"grad_norm": 11.987248420715332,
"learning_rate": 3.275862068965517e-05,
"loss": 1.2636,
"step": 11000
},
{
"epoch": 4.923903312444047,
"eval_anls": 0.4017189990556845,
"eval_loss": 1.758474349975586,
"eval_runtime": 1846.925,
"eval_samples_per_second": 1.067,
"eval_steps_per_second": 1.067,
"eval_stvqa_acc": 0.3145611364789447,
"eval_textvqa_acc": 0.0032696318845481715,
"step": 11000
},
{
"epoch": 5.147717099373321,
"grad_norm": 8.365703582763672,
"learning_rate": 3.1896551724137935e-05,
"loss": 1.1356,
"step": 11500
},
{
"epoch": 5.371530886302597,
"grad_norm": 6.653890132904053,
"learning_rate": 3.103448275862069e-05,
"loss": 1.1479,
"step": 12000
},
{
"epoch": 5.371530886302597,
"eval_anls": 0.40105794167271264,
"eval_loss": 1.7939367294311523,
"eval_runtime": 1859.7478,
"eval_samples_per_second": 1.06,
"eval_steps_per_second": 1.06,
"eval_stvqa_acc": 0.3145611364789447,
"eval_textvqa_acc": 0.0034387507751282493,
"step": 12000
},
{
"epoch": 5.595344673231871,
"grad_norm": 5.2766594886779785,
"learning_rate": 3.017241379310345e-05,
"loss": 1.102,
"step": 12500
},
{
"epoch": 5.819158460161146,
"grad_norm": 8.8541259765625,
"learning_rate": 2.9310344827586206e-05,
"loss": 1.1128,
"step": 13000
},
{
"epoch": 5.819158460161146,
"eval_anls": 0.40612595073357,
"eval_loss": 1.7984886169433594,
"eval_runtime": 1829.9404,
"eval_samples_per_second": 1.077,
"eval_steps_per_second": 1.077,
"eval_stvqa_acc": 0.3165905631659056,
"eval_textvqa_acc": 0.0035796831839449814,
"step": 13000
},
{
"epoch": 6.042972247090421,
"grad_norm": 5.202572345733643,
"learning_rate": 2.844827586206897e-05,
"loss": 1.1021,
"step": 13500
},
{
"epoch": 6.266786034019695,
"grad_norm": 9.060930252075195,
"learning_rate": 2.7586206896551727e-05,
"loss": 0.9918,
"step": 14000
},
{
"epoch": 6.266786034019695,
"eval_anls": 0.40779826322114054,
"eval_loss": 1.8251103162765503,
"eval_runtime": 1847.7131,
"eval_samples_per_second": 1.067,
"eval_steps_per_second": 1.067,
"eval_stvqa_acc": 0.3135464231354642,
"eval_textvqa_acc": 0.0033541913298382104,
"step": 14000
},
{
"epoch": 6.490599820948971,
"grad_norm": 6.045884609222412,
"learning_rate": 2.672413793103448e-05,
"loss": 0.9992,
"step": 14500
},
{
"epoch": 6.714413607878245,
"grad_norm": 6.779171466827393,
"learning_rate": 2.5862068965517244e-05,
"loss": 1.0211,
"step": 15000
},
{
"epoch": 6.714413607878245,
"eval_anls": 0.4161895876430021,
"eval_loss": 1.8089348077774048,
"eval_runtime": 1843.9319,
"eval_samples_per_second": 1.069,
"eval_steps_per_second": 1.069,
"eval_stvqa_acc": 0.3226788432267884,
"eval_textvqa_acc": 0.0032696318845481715,
"step": 15000
},
{
"epoch": 6.93822739480752,
"grad_norm": 6.09267520904541,
"learning_rate": 2.5e-05,
"loss": 1.0254,
"step": 15500
},
{
"epoch": 7.162041181736795,
"grad_norm": 5.587011814117432,
"learning_rate": 2.413793103448276e-05,
"loss": 0.9417,
"step": 16000
},
{
"epoch": 7.162041181736795,
"eval_anls": 0.41600295733233567,
"eval_loss": 1.8740063905715942,
"eval_runtime": 1852.6512,
"eval_samples_per_second": 1.064,
"eval_steps_per_second": 1.064,
"eval_stvqa_acc": 0.32318619989852865,
"eval_textvqa_acc": 0.0028186481763346305,
"step": 16000
},
{
"epoch": 7.38585496866607,
"grad_norm": 7.845240592956543,
"learning_rate": 2.327586206896552e-05,
"loss": 0.9246,
"step": 16500
},
{
"epoch": 7.609668755595345,
"grad_norm": 6.112607479095459,
"learning_rate": 2.2413793103448276e-05,
"loss": 0.9514,
"step": 17000
},
{
"epoch": 7.609668755595345,
"eval_anls": 0.4178276490299179,
"eval_loss": 1.8600742816925049,
"eval_runtime": 1824.9706,
"eval_samples_per_second": 1.08,
"eval_steps_per_second": 1.08,
"eval_stvqa_acc": 0.3302891933028919,
"eval_textvqa_acc": 0.003269631884548171,
"step": 17000
},
{
"epoch": 7.833482542524619,
"grad_norm": 5.07211971282959,
"learning_rate": 2.1551724137931033e-05,
"loss": 0.9365,
"step": 17500
},
{
"epoch": 8.057296329453894,
"grad_norm": 7.184695243835449,
"learning_rate": 2.0689655172413793e-05,
"loss": 0.8699,
"step": 18000
},
{
"epoch": 8.057296329453894,
"eval_anls": 0.4186198986318584,
"eval_loss": 1.9020802974700928,
"eval_runtime": 1818.6962,
"eval_samples_per_second": 1.084,
"eval_steps_per_second": 1.084,
"eval_stvqa_acc": 0.32927447995941145,
"eval_textvqa_acc": 0.0031850724392581326,
"step": 18000
},
{
"epoch": 8.28111011638317,
"grad_norm": 7.932966232299805,
"learning_rate": 1.9827586206896554e-05,
"loss": 0.837,
"step": 18500
},
{
"epoch": 8.504923903312443,
"grad_norm": 4.637597560882568,
"learning_rate": 1.896551724137931e-05,
"loss": 0.842,
"step": 19000
},
{
"epoch": 8.504923903312443,
"eval_anls": 0.41477135772440343,
"eval_loss": 1.8863146305084229,
"eval_runtime": 1837.3079,
"eval_samples_per_second": 1.073,
"eval_steps_per_second": 1.073,
"eval_stvqa_acc": 0.3267376966007103,
"eval_textvqa_acc": 0.0033260048480748643,
"step": 19000
},
{
"epoch": 8.728737690241719,
"grad_norm": 6.689283847808838,
"learning_rate": 1.810344827586207e-05,
"loss": 0.8536,
"step": 19500
},
{
"epoch": 8.952551477170994,
"grad_norm": 9.63552188873291,
"learning_rate": 1.7241379310344828e-05,
"loss": 0.8618,
"step": 20000
},
{
"epoch": 8.952551477170994,
"eval_anls": 0.42347759687703285,
"eval_loss": 1.89691960811615,
"eval_runtime": 1847.5675,
"eval_samples_per_second": 1.067,
"eval_steps_per_second": 1.067,
"eval_stvqa_acc": 0.33688483003551495,
"eval_textvqa_acc": 0.0033541913298382104,
"step": 20000
},
{
"epoch": 9.17636526410027,
"grad_norm": 5.308718204498291,
"learning_rate": 1.6379310344827585e-05,
"loss": 0.8086,
"step": 20500
},
{
"epoch": 9.400179051029543,
"grad_norm": 7.010397434234619,
"learning_rate": 1.5517241379310346e-05,
"loss": 0.7442,
"step": 21000
},
{
"epoch": 9.400179051029543,
"eval_anls": 0.4311970807329177,
"eval_loss": 1.964108943939209,
"eval_runtime": 1837.5547,
"eval_samples_per_second": 1.073,
"eval_steps_per_second": 1.073,
"eval_stvqa_acc": 0.3378995433789954,
"eval_textvqa_acc": 0.0035233102204182878,
"step": 21000
},
{
"epoch": 9.623992837958818,
"grad_norm": 7.734108924865723,
"learning_rate": 1.4655172413793103e-05,
"loss": 0.8036,
"step": 21500
},
{
"epoch": 9.847806624888094,
"grad_norm": 4.833765029907227,
"learning_rate": 1.3793103448275863e-05,
"loss": 0.7942,
"step": 22000
},
{
"epoch": 9.847806624888094,
"eval_anls": 0.4253442311091046,
"eval_loss": 1.9461325407028198,
"eval_runtime": 1824.2403,
"eval_samples_per_second": 1.08,
"eval_steps_per_second": 1.08,
"eval_stvqa_acc": 0.3404363267376966,
"eval_textvqa_acc": 0.0035233102204182878,
"step": 22000
},
{
"epoch": 10.071620411817367,
"grad_norm": 6.974804401397705,
"learning_rate": 1.2931034482758622e-05,
"loss": 0.7966,
"step": 22500
},
{
"epoch": 10.295434198746642,
"grad_norm": 11.843214988708496,
"learning_rate": 1.206896551724138e-05,
"loss": 0.7279,
"step": 23000
},
{
"epoch": 10.295434198746642,
"eval_anls": 0.4296521569606135,
"eval_loss": 1.9614735841751099,
"eval_runtime": 1839.5785,
"eval_samples_per_second": 1.071,
"eval_steps_per_second": 1.071,
"eval_stvqa_acc": 0.34145104008117705,
"eval_textvqa_acc": 0.0036360561474716733,
"step": 23000
},
{
"epoch": 10.519247985675918,
"grad_norm": 7.991017818450928,
"learning_rate": 1.1206896551724138e-05,
"loss": 0.7463,
"step": 23500
},
{
"epoch": 10.743061772605193,
"grad_norm": 9.638339042663574,
"learning_rate": 1.0344827586206897e-05,
"loss": 0.7572,
"step": 24000
},
{
"epoch": 10.743061772605193,
"eval_anls": 0.4312744122080826,
"eval_loss": 1.941930890083313,
"eval_runtime": 1821.5331,
"eval_samples_per_second": 1.082,
"eval_steps_per_second": 1.082,
"eval_stvqa_acc": 0.34145104008117705,
"eval_textvqa_acc": 0.0034669372568915955,
"step": 24000
},
{
"epoch": 10.966875559534467,
"grad_norm": 8.245118141174316,
"learning_rate": 9.482758620689655e-06,
"loss": 0.7312,
"step": 24500
},
{
"epoch": 11.190689346463742,
"grad_norm": 8.00062084197998,
"learning_rate": 8.620689655172414e-06,
"loss": 0.6966,
"step": 25000
},
{
"epoch": 11.190689346463742,
"eval_anls": 0.42783346979471903,
"eval_loss": 1.9731788635253906,
"eval_runtime": 1834.9865,
"eval_samples_per_second": 1.074,
"eval_steps_per_second": 1.074,
"eval_stvqa_acc": 0.34145104008117705,
"eval_textvqa_acc": 0.003551496702181635,
"step": 25000
},
{
"epoch": 11.414503133393017,
"grad_norm": 9.724451065063477,
"learning_rate": 7.758620689655173e-06,
"loss": 0.7131,
"step": 25500
},
{
"epoch": 11.638316920322293,
"grad_norm": 8.227492332458496,
"learning_rate": 6.896551724137932e-06,
"loss": 0.7277,
"step": 26000
},
{
"epoch": 11.638316920322293,
"eval_anls": 0.43080821443844364,
"eval_loss": 1.9889310598373413,
"eval_runtime": 1831.7239,
"eval_samples_per_second": 1.076,
"eval_steps_per_second": 1.076,
"eval_stvqa_acc": 0.3404363267376966,
"eval_textvqa_acc": 0.003551496702181635,
"step": 26000
},
{
"epoch": 11.862130707251566,
"grad_norm": 8.610105514526367,
"learning_rate": 6.03448275862069e-06,
"loss": 0.7039,
"step": 26500
},
{
"epoch": 12.085944494180842,
"grad_norm": 8.281418800354004,
"learning_rate": 5.172413793103448e-06,
"loss": 0.6983,
"step": 27000
},
{
"epoch": 12.085944494180842,
"eval_anls": 0.4299173295121815,
"eval_loss": 1.9830011129379272,
"eval_runtime": 1822.4224,
"eval_samples_per_second": 1.082,
"eval_steps_per_second": 1.082,
"eval_stvqa_acc": 0.3424657534246575,
"eval_textvqa_acc": 0.0036360561474716733,
"step": 27000
},
{
"epoch": 12.309758281110117,
"grad_norm": 7.353641986846924,
"learning_rate": 4.310344827586207e-06,
"loss": 0.6813,
"step": 27500
},
{
"epoch": 12.53357206803939,
"grad_norm": 10.350179672241211,
"learning_rate": 3.448275862068966e-06,
"loss": 0.694,
"step": 28000
},
{
"epoch": 12.53357206803939,
"eval_anls": 0.4278160827558073,
"eval_loss": 1.988090991973877,
"eval_runtime": 1832.1136,
"eval_samples_per_second": 1.076,
"eval_steps_per_second": 1.076,
"eval_stvqa_acc": 0.3419583967529173,
"eval_textvqa_acc": 0.003523310220418288,
"step": 28000
},
{
"epoch": 12.757385854968666,
"grad_norm": 6.440176010131836,
"learning_rate": 2.586206896551724e-06,
"loss": 0.6901,
"step": 28500
},
{
"epoch": 12.981199641897941,
"grad_norm": 5.959563732147217,
"learning_rate": 1.724137931034483e-06,
"loss": 0.6584,
"step": 29000
},
{
"epoch": 12.981199641897941,
"eval_anls": 0.42679352711220236,
"eval_loss": 2.003437042236328,
"eval_runtime": 1826.5505,
"eval_samples_per_second": 1.079,
"eval_steps_per_second": 1.079,
"eval_stvqa_acc": 0.34145104008117705,
"eval_textvqa_acc": 0.0034387507751282493,
"step": 29000
},
{
"epoch": 13.205013428827217,
"grad_norm": 7.324951648712158,
"learning_rate": 8.620689655172415e-07,
"loss": 0.6636,
"step": 29500
},
{
"epoch": 13.42882721575649,
"grad_norm": 7.893153190612793,
"learning_rate": 0.0,
"loss": 0.6673,
"step": 30000
},
{
"epoch": 13.42882721575649,
"eval_anls": 0.4290839479472131,
"eval_loss": 2.003530502319336,
"eval_runtime": 1825.1374,
"eval_samples_per_second": 1.08,
"eval_steps_per_second": 1.08,
"eval_stvqa_acc": 0.34449518011161845,
"eval_textvqa_acc": 0.003551496702181635,
"step": 30000
}
],
"logging_steps": 500,
"max_steps": 30000,
"num_input_tokens_seen": 0,
"num_train_epochs": 14,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.060915240465369e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}