Shawon16's picture
End of training
71e9292 verified
{
"best_metric": 0.9766666666666667,
"best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/MY DATA/VideoMAE_BdSLW60_SR_8_kineticsFinetuned_withoutAug/checkpoint-3716",
"epoch": 8.050053879310346,
"eval_steps": 500,
"global_step": 8361,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005387931034482759,
"grad_norm": 20.11014747619629,
"learning_rate": 2.6670258620689655e-06,
"loss": 16.4554,
"step": 100
},
{
"epoch": 0.010775862068965518,
"grad_norm": 26.971817016601562,
"learning_rate": 5.360991379310345e-06,
"loss": 16.1745,
"step": 200
},
{
"epoch": 0.016163793103448277,
"grad_norm": 41.51228713989258,
"learning_rate": 8.028017241379311e-06,
"loss": 15.3342,
"step": 300
},
{
"epoch": 0.021551724137931036,
"grad_norm": 51.878631591796875,
"learning_rate": 1.072198275862069e-05,
"loss": 13.6057,
"step": 400
},
{
"epoch": 0.02693965517241379,
"grad_norm": 52.984981536865234,
"learning_rate": 1.3415948275862069e-05,
"loss": 11.1741,
"step": 500
},
{
"epoch": 0.032327586206896554,
"grad_norm": 50.443359375,
"learning_rate": 1.610991379310345e-05,
"loss": 8.8003,
"step": 600
},
{
"epoch": 0.03771551724137931,
"grad_norm": 50.93900680541992,
"learning_rate": 1.880387931034483e-05,
"loss": 6.2116,
"step": 700
},
{
"epoch": 0.04310344827586207,
"grad_norm": 40.679100036621094,
"learning_rate": 2.149784482758621e-05,
"loss": 4.3287,
"step": 800
},
{
"epoch": 0.04849137931034483,
"grad_norm": 37.3469123840332,
"learning_rate": 2.4191810344827586e-05,
"loss": 2.859,
"step": 900
},
{
"epoch": 0.05005387931034483,
"eval_accuracy": 0.9183333333333333,
"eval_f1": 0.9087385884819826,
"eval_loss": 0.8475048542022705,
"eval_precision": 0.9240348890875206,
"eval_recall": 0.9183333333333333,
"eval_runtime": 192.0614,
"eval_samples_per_second": 3.124,
"eval_steps_per_second": 1.562,
"step": 929
},
{
"epoch": 1.0038254310344827,
"grad_norm": 19.420085906982422,
"learning_rate": 2.688577586206897e-05,
"loss": 1.5587,
"step": 1000
},
{
"epoch": 1.0092133620689656,
"grad_norm": 17.20052146911621,
"learning_rate": 2.9579741379310345e-05,
"loss": 0.9491,
"step": 1100
},
{
"epoch": 1.0146012931034483,
"grad_norm": 24.49296760559082,
"learning_rate": 3.2273706896551725e-05,
"loss": 0.6366,
"step": 1200
},
{
"epoch": 1.0199892241379311,
"grad_norm": 43.13790512084961,
"learning_rate": 3.496767241379311e-05,
"loss": 0.4445,
"step": 1300
},
{
"epoch": 1.0253771551724138,
"grad_norm": 1.7655223608016968,
"learning_rate": 3.7661637931034485e-05,
"loss": 0.2924,
"step": 1400
},
{
"epoch": 1.0307650862068964,
"grad_norm": 2.0372657775878906,
"learning_rate": 4.035560344827586e-05,
"loss": 0.1758,
"step": 1500
},
{
"epoch": 1.0361530172413793,
"grad_norm": 1.5986464023590088,
"learning_rate": 4.3049568965517245e-05,
"loss": 0.2277,
"step": 1600
},
{
"epoch": 1.041540948275862,
"grad_norm": 1.0040245056152344,
"learning_rate": 4.574353448275862e-05,
"loss": 0.1491,
"step": 1700
},
{
"epoch": 1.0469288793103448,
"grad_norm": 2.6459920406341553,
"learning_rate": 4.8437500000000005e-05,
"loss": 0.0555,
"step": 1800
},
{
"epoch": 1.0500538793103449,
"eval_accuracy": 0.9216666666666666,
"eval_f1": 0.9093368000230129,
"eval_loss": 0.30231231451034546,
"eval_precision": 0.9317002164502166,
"eval_recall": 0.9216666666666666,
"eval_runtime": 187.0802,
"eval_samples_per_second": 3.207,
"eval_steps_per_second": 1.604,
"step": 1858
},
{
"epoch": 2.002262931034483,
"grad_norm": 0.5840566158294678,
"learning_rate": 4.9874281609195405e-05,
"loss": 0.0647,
"step": 1900
},
{
"epoch": 2.0076508620689655,
"grad_norm": 0.16274438798427582,
"learning_rate": 4.95749521072797e-05,
"loss": 0.0725,
"step": 2000
},
{
"epoch": 2.013038793103448,
"grad_norm": 0.10402801632881165,
"learning_rate": 4.9275622605363985e-05,
"loss": 0.0139,
"step": 2100
},
{
"epoch": 2.0184267241379312,
"grad_norm": 0.0963917076587677,
"learning_rate": 4.897629310344828e-05,
"loss": 0.0206,
"step": 2200
},
{
"epoch": 2.023814655172414,
"grad_norm": 0.08095109462738037,
"learning_rate": 4.867696360153257e-05,
"loss": 0.0499,
"step": 2300
},
{
"epoch": 2.0292025862068965,
"grad_norm": 1.0376495122909546,
"learning_rate": 4.837763409961686e-05,
"loss": 0.0105,
"step": 2400
},
{
"epoch": 2.034590517241379,
"grad_norm": 0.027790505439043045,
"learning_rate": 4.808129789272031e-05,
"loss": 0.0226,
"step": 2500
},
{
"epoch": 2.0399784482758623,
"grad_norm": 0.031076578423380852,
"learning_rate": 4.778496168582376e-05,
"loss": 0.0359,
"step": 2600
},
{
"epoch": 2.045366379310345,
"grad_norm": 0.02143680490553379,
"learning_rate": 4.748563218390804e-05,
"loss": 0.0025,
"step": 2700
},
{
"epoch": 2.0500538793103447,
"eval_accuracy": 0.935,
"eval_f1": 0.927261710739568,
"eval_loss": 0.2999415695667267,
"eval_precision": 0.9378208874458874,
"eval_recall": 0.935,
"eval_runtime": 186.5359,
"eval_samples_per_second": 3.217,
"eval_steps_per_second": 1.608,
"step": 2787
},
{
"epoch": 3.000700431034483,
"grad_norm": 0.044046301394701004,
"learning_rate": 4.7186302681992336e-05,
"loss": 0.2036,
"step": 2800
},
{
"epoch": 3.0060883620689656,
"grad_norm": 0.02674109861254692,
"learning_rate": 4.688697318007663e-05,
"loss": 0.0227,
"step": 2900
},
{
"epoch": 3.011476293103448,
"grad_norm": 0.604284405708313,
"learning_rate": 4.658764367816092e-05,
"loss": 0.0018,
"step": 3000
},
{
"epoch": 3.016864224137931,
"grad_norm": 0.012123900465667248,
"learning_rate": 4.6288314176245215e-05,
"loss": 0.0013,
"step": 3100
},
{
"epoch": 3.022252155172414,
"grad_norm": 0.17276327311992645,
"learning_rate": 4.598898467432951e-05,
"loss": 0.0304,
"step": 3200
},
{
"epoch": 3.0276400862068966,
"grad_norm": 0.01406328845769167,
"learning_rate": 4.5689655172413794e-05,
"loss": 0.0529,
"step": 3300
},
{
"epoch": 3.0330280172413793,
"grad_norm": 0.026818769052624702,
"learning_rate": 4.539032567049809e-05,
"loss": 0.0059,
"step": 3400
},
{
"epoch": 3.038415948275862,
"grad_norm": 0.022273056209087372,
"learning_rate": 4.509099616858238e-05,
"loss": 0.0431,
"step": 3500
},
{
"epoch": 3.043803879310345,
"grad_norm": 0.008690926246345043,
"learning_rate": 4.4791666666666673e-05,
"loss": 0.001,
"step": 3600
},
{
"epoch": 3.0491918103448277,
"grad_norm": 0.009022524580359459,
"learning_rate": 4.449233716475096e-05,
"loss": 0.0009,
"step": 3700
},
{
"epoch": 3.0500538793103447,
"eval_accuracy": 0.9766666666666667,
"eval_f1": 0.9761724029000916,
"eval_loss": 0.08761004358530045,
"eval_precision": 0.9809054834054833,
"eval_recall": 0.9766666666666667,
"eval_runtime": 184.6188,
"eval_samples_per_second": 3.25,
"eval_steps_per_second": 1.625,
"step": 3716
},
{
"epoch": 4.004525862068966,
"grad_norm": 0.007579015102237463,
"learning_rate": 4.419300766283525e-05,
"loss": 0.0007,
"step": 3800
},
{
"epoch": 4.009913793103448,
"grad_norm": 0.010904570110142231,
"learning_rate": 4.3893678160919546e-05,
"loss": 0.0395,
"step": 3900
},
{
"epoch": 4.015301724137931,
"grad_norm": 0.006146453786641359,
"learning_rate": 4.359434865900383e-05,
"loss": 0.0006,
"step": 4000
},
{
"epoch": 4.020689655172414,
"grad_norm": 0.013002044521272182,
"learning_rate": 4.3295019157088125e-05,
"loss": 0.0011,
"step": 4100
},
{
"epoch": 4.026077586206896,
"grad_norm": 0.12733735144138336,
"learning_rate": 4.299568965517242e-05,
"loss": 0.0451,
"step": 4200
},
{
"epoch": 4.031465517241379,
"grad_norm": 11.122708320617676,
"learning_rate": 4.269636015325671e-05,
"loss": 0.305,
"step": 4300
},
{
"epoch": 4.0368534482758625,
"grad_norm": 0.13978660106658936,
"learning_rate": 4.2397030651341e-05,
"loss": 0.2215,
"step": 4400
},
{
"epoch": 4.042241379310345,
"grad_norm": 0.37221765518188477,
"learning_rate": 4.209770114942529e-05,
"loss": 0.1446,
"step": 4500
},
{
"epoch": 4.047629310344828,
"grad_norm": 20.41011619567871,
"learning_rate": 4.1801364942528733e-05,
"loss": 0.1355,
"step": 4600
},
{
"epoch": 4.050053879310345,
"eval_accuracy": 0.9266666666666666,
"eval_f1": 0.9146410798064111,
"eval_loss": 0.25398045778274536,
"eval_precision": 0.9182419154919156,
"eval_recall": 0.9266666666666666,
"eval_runtime": 176.3808,
"eval_samples_per_second": 3.402,
"eval_steps_per_second": 1.701,
"step": 4645
},
{
"epoch": 5.002963362068965,
"grad_norm": 0.017676187679171562,
"learning_rate": 4.1502035440613026e-05,
"loss": 0.1491,
"step": 4700
},
{
"epoch": 5.008351293103448,
"grad_norm": 1.5707539319992065,
"learning_rate": 4.120270593869732e-05,
"loss": 0.0298,
"step": 4800
},
{
"epoch": 5.013739224137931,
"grad_norm": 0.08283871412277222,
"learning_rate": 4.090337643678161e-05,
"loss": 0.1471,
"step": 4900
},
{
"epoch": 5.019127155172414,
"grad_norm": 0.0916813537478447,
"learning_rate": 4.0604046934865905e-05,
"loss": 0.1646,
"step": 5000
},
{
"epoch": 5.024515086206897,
"grad_norm": 0.012382575310766697,
"learning_rate": 4.03047174329502e-05,
"loss": 0.015,
"step": 5100
},
{
"epoch": 5.029903017241379,
"grad_norm": 0.008144189603626728,
"learning_rate": 4.000838122605364e-05,
"loss": 0.1365,
"step": 5200
},
{
"epoch": 5.035290948275862,
"grad_norm": 0.021491670981049538,
"learning_rate": 3.9709051724137935e-05,
"loss": 0.0302,
"step": 5300
},
{
"epoch": 5.040678879310345,
"grad_norm": 0.03770313411951065,
"learning_rate": 3.940972222222222e-05,
"loss": 0.1047,
"step": 5400
},
{
"epoch": 5.046066810344827,
"grad_norm": 0.0966072753071785,
"learning_rate": 3.9110392720306514e-05,
"loss": 0.099,
"step": 5500
},
{
"epoch": 5.050053879310345,
"eval_accuracy": 0.9633333333333334,
"eval_f1": 0.9578282624655623,
"eval_loss": 0.26155975461006165,
"eval_precision": 0.956979842282474,
"eval_recall": 0.9633333333333334,
"eval_runtime": 178.9094,
"eval_samples_per_second": 3.354,
"eval_steps_per_second": 1.677,
"step": 5574
},
{
"epoch": 6.001400862068966,
"grad_norm": 0.0034083151258528233,
"learning_rate": 3.881106321839081e-05,
"loss": 0.0444,
"step": 5600
},
{
"epoch": 6.006788793103448,
"grad_norm": 0.0034083956852555275,
"learning_rate": 3.851173371647509e-05,
"loss": 0.0439,
"step": 5700
},
{
"epoch": 6.012176724137931,
"grad_norm": 0.004689768888056278,
"learning_rate": 3.8212404214559386e-05,
"loss": 0.0007,
"step": 5800
},
{
"epoch": 6.017564655172414,
"grad_norm": 0.004528137389570475,
"learning_rate": 3.791307471264368e-05,
"loss": 0.0529,
"step": 5900
},
{
"epoch": 6.022952586206896,
"grad_norm": 0.005246564745903015,
"learning_rate": 3.7613745210727965e-05,
"loss": 0.0016,
"step": 6000
},
{
"epoch": 6.0283405172413795,
"grad_norm": 0.003063632408156991,
"learning_rate": 3.731441570881226e-05,
"loss": 0.0002,
"step": 6100
},
{
"epoch": 6.033728448275862,
"grad_norm": 0.002391215180978179,
"learning_rate": 3.701508620689655e-05,
"loss": 0.0002,
"step": 6200
},
{
"epoch": 6.039116379310345,
"grad_norm": 0.0016633198829367757,
"learning_rate": 3.6715756704980844e-05,
"loss": 0.0002,
"step": 6300
},
{
"epoch": 6.044504310344828,
"grad_norm": 0.013642443343997002,
"learning_rate": 3.641642720306514e-05,
"loss": 0.0002,
"step": 6400
},
{
"epoch": 6.04989224137931,
"grad_norm": 0.0030818418599665165,
"learning_rate": 3.611709770114943e-05,
"loss": 0.0656,
"step": 6500
},
{
"epoch": 6.050053879310345,
"eval_accuracy": 0.9616666666666667,
"eval_f1": 0.9597682298955686,
"eval_loss": 0.20607294142246246,
"eval_precision": 0.9674627687127687,
"eval_recall": 0.9616666666666667,
"eval_runtime": 184.33,
"eval_samples_per_second": 3.255,
"eval_steps_per_second": 1.628,
"step": 6503
},
{
"epoch": 7.005226293103449,
"grad_norm": 0.003281041979789734,
"learning_rate": 3.5817768199233717e-05,
"loss": 0.0108,
"step": 6600
},
{
"epoch": 7.010614224137931,
"grad_norm": 0.06187641620635986,
"learning_rate": 3.551843869731801e-05,
"loss": 0.0458,
"step": 6700
},
{
"epoch": 7.016002155172414,
"grad_norm": 0.6124621033668518,
"learning_rate": 3.52191091954023e-05,
"loss": 0.0931,
"step": 6800
},
{
"epoch": 7.021390086206897,
"grad_norm": 0.013154719024896622,
"learning_rate": 3.4919779693486596e-05,
"loss": 0.2289,
"step": 6900
},
{
"epoch": 7.026778017241379,
"grad_norm": 0.33358174562454224,
"learning_rate": 3.462045019157089e-05,
"loss": 0.0804,
"step": 7000
},
{
"epoch": 7.032165948275862,
"grad_norm": 0.010887747630476952,
"learning_rate": 3.4321120689655175e-05,
"loss": 0.0032,
"step": 7100
},
{
"epoch": 7.0375538793103445,
"grad_norm": 0.26187142729759216,
"learning_rate": 3.402179118773947e-05,
"loss": 0.0134,
"step": 7200
},
{
"epoch": 7.042941810344828,
"grad_norm": 0.01975095644593239,
"learning_rate": 3.372246168582376e-05,
"loss": 0.0507,
"step": 7300
},
{
"epoch": 7.048329741379311,
"grad_norm": 0.004032758995890617,
"learning_rate": 3.342313218390805e-05,
"loss": 0.0314,
"step": 7400
},
{
"epoch": 7.050053879310345,
"eval_accuracy": 0.9533333333333334,
"eval_f1": 0.9526073505957408,
"eval_loss": 0.20366963744163513,
"eval_precision": 0.9623829642579644,
"eval_recall": 0.9533333333333334,
"eval_runtime": 186.0707,
"eval_samples_per_second": 3.225,
"eval_steps_per_second": 1.612,
"step": 7432
},
{
"epoch": 8.003663793103449,
"grad_norm": 0.003311087377369404,
"learning_rate": 3.312380268199234e-05,
"loss": 0.0496,
"step": 7500
},
{
"epoch": 8.009051724137931,
"grad_norm": 0.03303457051515579,
"learning_rate": 3.282447318007663e-05,
"loss": 0.0004,
"step": 7600
},
{
"epoch": 8.014439655172414,
"grad_norm": 0.0018716267077252269,
"learning_rate": 3.252514367816092e-05,
"loss": 0.0002,
"step": 7700
},
{
"epoch": 8.019827586206896,
"grad_norm": 0.0035127492155879736,
"learning_rate": 3.222581417624521e-05,
"loss": 0.1371,
"step": 7800
},
{
"epoch": 8.02521551724138,
"grad_norm": 0.009622437879443169,
"learning_rate": 3.1926484674329505e-05,
"loss": 0.0445,
"step": 7900
},
{
"epoch": 8.030603448275862,
"grad_norm": 0.0022457086015492678,
"learning_rate": 3.163014846743295e-05,
"loss": 0.0073,
"step": 8000
},
{
"epoch": 8.035991379310344,
"grad_norm": 0.002641331171616912,
"learning_rate": 3.133081896551724e-05,
"loss": 0.0007,
"step": 8100
},
{
"epoch": 8.041379310344828,
"grad_norm": 0.004722919315099716,
"learning_rate": 3.1031489463601535e-05,
"loss": 0.0721,
"step": 8200
},
{
"epoch": 8.04676724137931,
"grad_norm": 0.0021432852372527122,
"learning_rate": 3.073215996168583e-05,
"loss": 0.0874,
"step": 8300
},
{
"epoch": 8.050053879310346,
"eval_accuracy": 0.95,
"eval_f1": 0.9457895216400736,
"eval_loss": 0.219703808426857,
"eval_precision": 0.9611800144300143,
"eval_recall": 0.95,
"eval_runtime": 186.2504,
"eval_samples_per_second": 3.221,
"eval_steps_per_second": 1.611,
"step": 8361
},
{
"epoch": 8.050053879310346,
"step": 8361,
"total_flos": 8.337990588259369e+19,
"train_loss": 1.2332628082536137,
"train_runtime": 25272.533,
"train_samples_per_second": 5.875,
"train_steps_per_second": 0.734
}
],
"logging_steps": 100,
"max_steps": 18560,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.337990588259369e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}