{ "best_metric": 0.34449518011161845, "best_model_checkpoint": "/data_center/zhangyan/cache_dir/vtvqa/base/v58_final/checkpoint-30000", "epoch": 13.42882721575649, "eval_steps": 1000, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22381378692927484, "grad_norm": 32.715675354003906, "learning_rate": 2.5e-05, "loss": 4.1862, "step": 500 }, { "epoch": 0.4476275738585497, "grad_norm": 21.658960342407227, "learning_rate": 5e-05, "loss": 3.1109, "step": 1000 }, { "epoch": 0.4476275738585497, "eval_anls": 0.19268535480716623, "eval_loss": 2.574805974960327, "eval_runtime": 1812.8357, "eval_samples_per_second": 1.087, "eval_steps_per_second": 1.087, "eval_stvqa_acc": 0.12075088787417554, "eval_textvqa_acc": 0.0013247646428772761, "step": 1000 }, { "epoch": 0.6714413607878246, "grad_norm": 10.140119552612305, "learning_rate": 4.913793103448276e-05, "loss": 2.794, "step": 1500 }, { "epoch": 0.8952551477170994, "grad_norm": 10.471025466918945, "learning_rate": 4.827586206896552e-05, "loss": 2.4305, "step": 2000 }, { "epoch": 0.8952551477170994, "eval_anls": 0.2712405745659137, "eval_loss": 2.0965969562530518, "eval_runtime": 1835.2571, "eval_samples_per_second": 1.074, "eval_steps_per_second": 1.074, "eval_stvqa_acc": 0.19583967529173008, "eval_textvqa_acc": 0.002846834658097977, "step": 2000 }, { "epoch": 1.1190689346463742, "grad_norm": 10.897268295288086, "learning_rate": 4.741379310344828e-05, "loss": 2.2143, "step": 2500 }, { "epoch": 1.3428827215756491, "grad_norm": 11.765941619873047, "learning_rate": 4.655172413793104e-05, "loss": 2.0289, "step": 3000 }, { "epoch": 1.3428827215756491, "eval_anls": 0.316135640478506, "eval_loss": 1.9610668420791626, "eval_runtime": 1823.8259, "eval_samples_per_second": 1.081, "eval_steps_per_second": 1.081, "eval_stvqa_acc": 0.23997970573313038, "eval_textvqa_acc": 0.0031568859574947865, "step": 3000 }, { "epoch": 1.5666965085049238, "grad_norm": 7.180312156677246, "learning_rate": 4.5689655172413794e-05, "loss": 2.0053, "step": 3500 }, { "epoch": 1.7905102954341987, "grad_norm": 6.897369861602783, "learning_rate": 4.482758620689655e-05, "loss": 1.9704, "step": 4000 }, { "epoch": 1.7905102954341987, "eval_anls": 0.3412851164067496, "eval_loss": 1.8678895235061646, "eval_runtime": 1833.7166, "eval_samples_per_second": 1.075, "eval_steps_per_second": 1.075, "eval_stvqa_acc": 0.25621511922881784, "eval_textvqa_acc": 0.003241445402784825, "step": 4000 }, { "epoch": 2.0143240823634736, "grad_norm": 15.418075561523438, "learning_rate": 4.396551724137931e-05, "loss": 1.9023, "step": 4500 }, { "epoch": 2.2381378692927485, "grad_norm": 8.011098861694336, "learning_rate": 4.3103448275862066e-05, "loss": 1.6569, "step": 5000 }, { "epoch": 2.2381378692927485, "eval_anls": 0.364142426818907, "eval_loss": 1.8436781167984009, "eval_runtime": 1870.6938, "eval_samples_per_second": 1.054, "eval_steps_per_second": 1.054, "eval_stvqa_acc": 0.2810755961440893, "eval_textvqa_acc": 0.002931394103388016, "step": 5000 }, { "epoch": 2.4619516562220234, "grad_norm": 7.217813968658447, "learning_rate": 4.224137931034483e-05, "loss": 1.6383, "step": 5500 }, { "epoch": 2.6857654431512983, "grad_norm": 8.526447296142578, "learning_rate": 4.1379310344827587e-05, "loss": 1.6479, "step": 6000 }, { "epoch": 2.6857654431512983, "eval_anls": 0.37012127813893764, "eval_loss": 1.7932401895523071, "eval_runtime": 1848.1991, "eval_samples_per_second": 1.066, "eval_steps_per_second": 1.066, "eval_stvqa_acc": 0.2876712328767123, "eval_textvqa_acc": 0.0026777157675178984, "step": 6000 }, { "epoch": 2.909579230080573, "grad_norm": 9.343942642211914, "learning_rate": 4.0517241379310344e-05, "loss": 1.624, "step": 6500 }, { "epoch": 3.1333930170098476, "grad_norm": 5.952371597290039, "learning_rate": 3.965517241379311e-05, "loss": 1.4744, "step": 7000 }, { "epoch": 3.1333930170098476, "eval_anls": 0.391052215824424, "eval_loss": 1.8034956455230713, "eval_runtime": 1815.7182, "eval_samples_per_second": 1.086, "eval_steps_per_second": 1.086, "eval_stvqa_acc": 0.30492135971588025, "eval_textvqa_acc": 0.0034105642933649027, "step": 7000 }, { "epoch": 3.3572068039391225, "grad_norm": 6.397096157073975, "learning_rate": 3.8793103448275865e-05, "loss": 1.4023, "step": 7500 }, { "epoch": 3.5810205908683974, "grad_norm": 11.454750061035156, "learning_rate": 3.793103448275862e-05, "loss": 1.4155, "step": 8000 }, { "epoch": 3.5810205908683974, "eval_anls": 0.3882569968865909, "eval_loss": 1.796372652053833, "eval_runtime": 1820.5256, "eval_samples_per_second": 1.083, "eval_steps_per_second": 1.083, "eval_stvqa_acc": 0.3008625063419584, "eval_textvqa_acc": 0.003466937256891596, "step": 8000 }, { "epoch": 3.8048343777976723, "grad_norm": 10.310056686401367, "learning_rate": 3.7068965517241385e-05, "loss": 1.4217, "step": 8500 }, { "epoch": 4.028648164726947, "grad_norm": 8.272510528564453, "learning_rate": 3.620689655172414e-05, "loss": 1.4104, "step": 9000 }, { "epoch": 4.028648164726947, "eval_anls": 0.39196723623184165, "eval_loss": 1.7789984941482544, "eval_runtime": 1830.6777, "eval_samples_per_second": 1.077, "eval_steps_per_second": 1.077, "eval_stvqa_acc": 0.3084728564180619, "eval_textvqa_acc": 0.0031005129939680937, "step": 9000 }, { "epoch": 4.252461951656222, "grad_norm": 10.304888725280762, "learning_rate": 3.53448275862069e-05, "loss": 1.2361, "step": 9500 }, { "epoch": 4.476275738585497, "grad_norm": 7.845999240875244, "learning_rate": 3.4482758620689657e-05, "loss": 1.2353, "step": 10000 }, { "epoch": 4.476275738585497, "eval_anls": 0.4013811446389115, "eval_loss": 1.7788571119308472, "eval_runtime": 1825.7471, "eval_samples_per_second": 1.08, "eval_steps_per_second": 1.08, "eval_stvqa_acc": 0.313039066463724, "eval_textvqa_acc": 0.003495123738654942, "step": 10000 }, { "epoch": 4.700089525514771, "grad_norm": 5.328579425811768, "learning_rate": 3.3620689655172414e-05, "loss": 1.2438, "step": 10500 }, { "epoch": 4.923903312444047, "grad_norm": 11.987248420715332, "learning_rate": 3.275862068965517e-05, "loss": 1.2636, "step": 11000 }, { "epoch": 4.923903312444047, "eval_anls": 0.4017189990556845, "eval_loss": 1.758474349975586, "eval_runtime": 1846.925, "eval_samples_per_second": 1.067, "eval_steps_per_second": 1.067, "eval_stvqa_acc": 0.3145611364789447, "eval_textvqa_acc": 0.0032696318845481715, "step": 11000 }, { "epoch": 5.147717099373321, "grad_norm": 8.365703582763672, "learning_rate": 3.1896551724137935e-05, "loss": 1.1356, "step": 11500 }, { "epoch": 5.371530886302597, "grad_norm": 6.653890132904053, "learning_rate": 3.103448275862069e-05, "loss": 1.1479, "step": 12000 }, { "epoch": 5.371530886302597, "eval_anls": 0.40105794167271264, "eval_loss": 1.7939367294311523, "eval_runtime": 1859.7478, "eval_samples_per_second": 1.06, "eval_steps_per_second": 1.06, "eval_stvqa_acc": 0.3145611364789447, "eval_textvqa_acc": 0.0034387507751282493, "step": 12000 }, { "epoch": 5.595344673231871, "grad_norm": 5.2766594886779785, "learning_rate": 3.017241379310345e-05, "loss": 1.102, "step": 12500 }, { "epoch": 5.819158460161146, "grad_norm": 8.8541259765625, "learning_rate": 2.9310344827586206e-05, "loss": 1.1128, "step": 13000 }, { "epoch": 5.819158460161146, "eval_anls": 0.40612595073357, "eval_loss": 1.7984886169433594, "eval_runtime": 1829.9404, "eval_samples_per_second": 1.077, "eval_steps_per_second": 1.077, "eval_stvqa_acc": 0.3165905631659056, "eval_textvqa_acc": 0.0035796831839449814, "step": 13000 }, { "epoch": 6.042972247090421, "grad_norm": 5.202572345733643, "learning_rate": 2.844827586206897e-05, "loss": 1.1021, "step": 13500 }, { "epoch": 6.266786034019695, "grad_norm": 9.060930252075195, "learning_rate": 2.7586206896551727e-05, "loss": 0.9918, "step": 14000 }, { "epoch": 6.266786034019695, "eval_anls": 0.40779826322114054, "eval_loss": 1.8251103162765503, "eval_runtime": 1847.7131, "eval_samples_per_second": 1.067, "eval_steps_per_second": 1.067, "eval_stvqa_acc": 0.3135464231354642, "eval_textvqa_acc": 0.0033541913298382104, "step": 14000 }, { "epoch": 6.490599820948971, "grad_norm": 6.045884609222412, "learning_rate": 2.672413793103448e-05, "loss": 0.9992, "step": 14500 }, { "epoch": 6.714413607878245, "grad_norm": 6.779171466827393, "learning_rate": 2.5862068965517244e-05, "loss": 1.0211, "step": 15000 }, { "epoch": 6.714413607878245, "eval_anls": 0.4161895876430021, "eval_loss": 1.8089348077774048, "eval_runtime": 1843.9319, "eval_samples_per_second": 1.069, "eval_steps_per_second": 1.069, "eval_stvqa_acc": 0.3226788432267884, "eval_textvqa_acc": 0.0032696318845481715, "step": 15000 }, { "epoch": 6.93822739480752, "grad_norm": 6.09267520904541, "learning_rate": 2.5e-05, "loss": 1.0254, "step": 15500 }, { "epoch": 7.162041181736795, "grad_norm": 5.587011814117432, "learning_rate": 2.413793103448276e-05, "loss": 0.9417, "step": 16000 }, { "epoch": 7.162041181736795, "eval_anls": 0.41600295733233567, "eval_loss": 1.8740063905715942, "eval_runtime": 1852.6512, "eval_samples_per_second": 1.064, "eval_steps_per_second": 1.064, "eval_stvqa_acc": 0.32318619989852865, "eval_textvqa_acc": 0.0028186481763346305, "step": 16000 }, { "epoch": 7.38585496866607, "grad_norm": 7.845240592956543, "learning_rate": 2.327586206896552e-05, "loss": 0.9246, "step": 16500 }, { "epoch": 7.609668755595345, "grad_norm": 6.112607479095459, "learning_rate": 2.2413793103448276e-05, "loss": 0.9514, "step": 17000 }, { "epoch": 7.609668755595345, "eval_anls": 0.4178276490299179, "eval_loss": 1.8600742816925049, "eval_runtime": 1824.9706, "eval_samples_per_second": 1.08, "eval_steps_per_second": 1.08, "eval_stvqa_acc": 0.3302891933028919, "eval_textvqa_acc": 0.003269631884548171, "step": 17000 }, { "epoch": 7.833482542524619, "grad_norm": 5.07211971282959, "learning_rate": 2.1551724137931033e-05, "loss": 0.9365, "step": 17500 }, { "epoch": 8.057296329453894, "grad_norm": 7.184695243835449, "learning_rate": 2.0689655172413793e-05, "loss": 0.8699, "step": 18000 }, { "epoch": 8.057296329453894, "eval_anls": 0.4186198986318584, "eval_loss": 1.9020802974700928, "eval_runtime": 1818.6962, "eval_samples_per_second": 1.084, "eval_steps_per_second": 1.084, "eval_stvqa_acc": 0.32927447995941145, "eval_textvqa_acc": 0.0031850724392581326, "step": 18000 }, { "epoch": 8.28111011638317, "grad_norm": 7.932966232299805, "learning_rate": 1.9827586206896554e-05, "loss": 0.837, "step": 18500 }, { "epoch": 8.504923903312443, "grad_norm": 4.637597560882568, "learning_rate": 1.896551724137931e-05, "loss": 0.842, "step": 19000 }, { "epoch": 8.504923903312443, "eval_anls": 0.41477135772440343, "eval_loss": 1.8863146305084229, "eval_runtime": 1837.3079, "eval_samples_per_second": 1.073, "eval_steps_per_second": 1.073, "eval_stvqa_acc": 0.3267376966007103, "eval_textvqa_acc": 0.0033260048480748643, "step": 19000 }, { "epoch": 8.728737690241719, "grad_norm": 6.689283847808838, "learning_rate": 1.810344827586207e-05, "loss": 0.8536, "step": 19500 }, { "epoch": 8.952551477170994, "grad_norm": 9.63552188873291, "learning_rate": 1.7241379310344828e-05, "loss": 0.8618, "step": 20000 }, { "epoch": 8.952551477170994, "eval_anls": 0.42347759687703285, "eval_loss": 1.89691960811615, "eval_runtime": 1847.5675, "eval_samples_per_second": 1.067, "eval_steps_per_second": 1.067, "eval_stvqa_acc": 0.33688483003551495, "eval_textvqa_acc": 0.0033541913298382104, "step": 20000 }, { "epoch": 9.17636526410027, "grad_norm": 5.308718204498291, "learning_rate": 1.6379310344827585e-05, "loss": 0.8086, "step": 20500 }, { "epoch": 9.400179051029543, "grad_norm": 7.010397434234619, "learning_rate": 1.5517241379310346e-05, "loss": 0.7442, "step": 21000 }, { "epoch": 9.400179051029543, "eval_anls": 0.4311970807329177, "eval_loss": 1.964108943939209, "eval_runtime": 1837.5547, "eval_samples_per_second": 1.073, "eval_steps_per_second": 1.073, "eval_stvqa_acc": 0.3378995433789954, "eval_textvqa_acc": 0.0035233102204182878, "step": 21000 }, { "epoch": 9.623992837958818, "grad_norm": 7.734108924865723, "learning_rate": 1.4655172413793103e-05, "loss": 0.8036, "step": 21500 }, { "epoch": 9.847806624888094, "grad_norm": 4.833765029907227, "learning_rate": 1.3793103448275863e-05, "loss": 0.7942, "step": 22000 }, { "epoch": 9.847806624888094, "eval_anls": 0.4253442311091046, "eval_loss": 1.9461325407028198, "eval_runtime": 1824.2403, "eval_samples_per_second": 1.08, "eval_steps_per_second": 1.08, "eval_stvqa_acc": 0.3404363267376966, "eval_textvqa_acc": 0.0035233102204182878, "step": 22000 }, { "epoch": 10.071620411817367, "grad_norm": 6.974804401397705, "learning_rate": 1.2931034482758622e-05, "loss": 0.7966, "step": 22500 }, { "epoch": 10.295434198746642, "grad_norm": 11.843214988708496, "learning_rate": 1.206896551724138e-05, "loss": 0.7279, "step": 23000 }, { "epoch": 10.295434198746642, "eval_anls": 0.4296521569606135, "eval_loss": 1.9614735841751099, "eval_runtime": 1839.5785, "eval_samples_per_second": 1.071, "eval_steps_per_second": 1.071, "eval_stvqa_acc": 0.34145104008117705, "eval_textvqa_acc": 0.0036360561474716733, "step": 23000 }, { "epoch": 10.519247985675918, "grad_norm": 7.991017818450928, "learning_rate": 1.1206896551724138e-05, "loss": 0.7463, "step": 23500 }, { "epoch": 10.743061772605193, "grad_norm": 9.638339042663574, "learning_rate": 1.0344827586206897e-05, "loss": 0.7572, "step": 24000 }, { "epoch": 10.743061772605193, "eval_anls": 0.4312744122080826, "eval_loss": 1.941930890083313, "eval_runtime": 1821.5331, "eval_samples_per_second": 1.082, "eval_steps_per_second": 1.082, "eval_stvqa_acc": 0.34145104008117705, "eval_textvqa_acc": 0.0034669372568915955, "step": 24000 }, { "epoch": 10.966875559534467, "grad_norm": 8.245118141174316, "learning_rate": 9.482758620689655e-06, "loss": 0.7312, "step": 24500 }, { "epoch": 11.190689346463742, "grad_norm": 8.00062084197998, "learning_rate": 8.620689655172414e-06, "loss": 0.6966, "step": 25000 }, { "epoch": 11.190689346463742, "eval_anls": 0.42783346979471903, "eval_loss": 1.9731788635253906, "eval_runtime": 1834.9865, "eval_samples_per_second": 1.074, "eval_steps_per_second": 1.074, "eval_stvqa_acc": 0.34145104008117705, "eval_textvqa_acc": 0.003551496702181635, "step": 25000 }, { "epoch": 11.414503133393017, "grad_norm": 9.724451065063477, "learning_rate": 7.758620689655173e-06, "loss": 0.7131, "step": 25500 }, { "epoch": 11.638316920322293, "grad_norm": 8.227492332458496, "learning_rate": 6.896551724137932e-06, "loss": 0.7277, "step": 26000 }, { "epoch": 11.638316920322293, "eval_anls": 0.43080821443844364, "eval_loss": 1.9889310598373413, "eval_runtime": 1831.7239, "eval_samples_per_second": 1.076, "eval_steps_per_second": 1.076, "eval_stvqa_acc": 0.3404363267376966, "eval_textvqa_acc": 0.003551496702181635, "step": 26000 }, { "epoch": 11.862130707251566, "grad_norm": 8.610105514526367, "learning_rate": 6.03448275862069e-06, "loss": 0.7039, "step": 26500 }, { "epoch": 12.085944494180842, "grad_norm": 8.281418800354004, "learning_rate": 5.172413793103448e-06, "loss": 0.6983, "step": 27000 }, { "epoch": 12.085944494180842, "eval_anls": 0.4299173295121815, "eval_loss": 1.9830011129379272, "eval_runtime": 1822.4224, "eval_samples_per_second": 1.082, "eval_steps_per_second": 1.082, "eval_stvqa_acc": 0.3424657534246575, "eval_textvqa_acc": 0.0036360561474716733, "step": 27000 }, { "epoch": 12.309758281110117, "grad_norm": 7.353641986846924, "learning_rate": 4.310344827586207e-06, "loss": 0.6813, "step": 27500 }, { "epoch": 12.53357206803939, "grad_norm": 10.350179672241211, "learning_rate": 3.448275862068966e-06, "loss": 0.694, "step": 28000 }, { "epoch": 12.53357206803939, "eval_anls": 0.4278160827558073, "eval_loss": 1.988090991973877, "eval_runtime": 1832.1136, "eval_samples_per_second": 1.076, "eval_steps_per_second": 1.076, "eval_stvqa_acc": 0.3419583967529173, "eval_textvqa_acc": 0.003523310220418288, "step": 28000 }, { "epoch": 12.757385854968666, "grad_norm": 6.440176010131836, "learning_rate": 2.586206896551724e-06, "loss": 0.6901, "step": 28500 }, { "epoch": 12.981199641897941, "grad_norm": 5.959563732147217, "learning_rate": 1.724137931034483e-06, "loss": 0.6584, "step": 29000 }, { "epoch": 12.981199641897941, "eval_anls": 0.42679352711220236, "eval_loss": 2.003437042236328, "eval_runtime": 1826.5505, "eval_samples_per_second": 1.079, "eval_steps_per_second": 1.079, "eval_stvqa_acc": 0.34145104008117705, "eval_textvqa_acc": 0.0034387507751282493, "step": 29000 }, { "epoch": 13.205013428827217, "grad_norm": 7.324951648712158, "learning_rate": 8.620689655172415e-07, "loss": 0.6636, "step": 29500 }, { "epoch": 13.42882721575649, "grad_norm": 7.893153190612793, "learning_rate": 0.0, "loss": 0.6673, "step": 30000 }, { "epoch": 13.42882721575649, "eval_anls": 0.4290839479472131, "eval_loss": 2.003530502319336, "eval_runtime": 1825.1374, "eval_samples_per_second": 1.08, "eval_steps_per_second": 1.08, "eval_stvqa_acc": 0.34449518011161845, "eval_textvqa_acc": 0.003551496702181635, "step": 30000 } ], "logging_steps": 500, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 14, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.060915240465369e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }