Shawon16's picture
End of training
0d88359 verified
{
"best_metric": 0.9975093399750934,
"best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/10 fold timesformer/Timesformer_default_fold_10_10_epoch_noAug_batch8/checkpoint-4520",
"epoch": 9.099003322259136,
"eval_steps": 500,
"global_step": 9030,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01107419712070875,
"grad_norm": 12.269712448120117,
"learning_rate": 5.537098560354375e-06,
"loss": 4.1912,
"step": 100
},
{
"epoch": 0.0221483942414175,
"grad_norm": 13.763405799865723,
"learning_rate": 1.107419712070875e-05,
"loss": 3.8166,
"step": 200
},
{
"epoch": 0.03322259136212625,
"grad_norm": 16.058164596557617,
"learning_rate": 1.6611295681063124e-05,
"loss": 2.9676,
"step": 300
},
{
"epoch": 0.044296788482835,
"grad_norm": 14.037968635559082,
"learning_rate": 2.21483942414175e-05,
"loss": 2.0805,
"step": 400
},
{
"epoch": 0.05537098560354374,
"grad_norm": 15.45093059539795,
"learning_rate": 2.7685492801771873e-05,
"loss": 1.2662,
"step": 500
},
{
"epoch": 0.0664451827242525,
"grad_norm": 5.890250205993652,
"learning_rate": 3.322259136212625e-05,
"loss": 0.7708,
"step": 600
},
{
"epoch": 0.07751937984496124,
"grad_norm": 8.48812484741211,
"learning_rate": 3.875968992248062e-05,
"loss": 0.4761,
"step": 700
},
{
"epoch": 0.08859357696567,
"grad_norm": 3.3065390586853027,
"learning_rate": 4.4296788482835e-05,
"loss": 0.3097,
"step": 800
},
{
"epoch": 0.09966777408637874,
"grad_norm": 4.770102024078369,
"learning_rate": 4.983388704318937e-05,
"loss": 0.2337,
"step": 900
},
{
"epoch": 0.10011074197120709,
"eval_accuracy": 0.962640099626401,
"eval_loss": 0.18786028027534485,
"eval_runtime": 199.1827,
"eval_samples_per_second": 4.031,
"eval_steps_per_second": 0.507,
"step": 904
},
{
"epoch": 1.0106312292358803,
"grad_norm": 1.31089448928833,
"learning_rate": 4.940322382182848e-05,
"loss": 0.1491,
"step": 1000
},
{
"epoch": 1.0217054263565892,
"grad_norm": 0.30288809537887573,
"learning_rate": 4.878799064845577e-05,
"loss": 0.106,
"step": 1100
},
{
"epoch": 1.032779623477298,
"grad_norm": 0.8043572902679443,
"learning_rate": 4.8172757475083056e-05,
"loss": 0.1053,
"step": 1200
},
{
"epoch": 1.0438538205980066,
"grad_norm": 2.86224627494812,
"learning_rate": 4.755752430171035e-05,
"loss": 0.0642,
"step": 1300
},
{
"epoch": 1.0549280177187155,
"grad_norm": 1.9137568473815918,
"learning_rate": 4.694229112833764e-05,
"loss": 0.0543,
"step": 1400
},
{
"epoch": 1.0660022148394241,
"grad_norm": 0.646798849105835,
"learning_rate": 4.6327057954964936e-05,
"loss": 0.036,
"step": 1500
},
{
"epoch": 1.0770764119601328,
"grad_norm": 0.1015913337469101,
"learning_rate": 4.571182478159223e-05,
"loss": 0.0312,
"step": 1600
},
{
"epoch": 1.0881506090808417,
"grad_norm": 0.1494218111038208,
"learning_rate": 4.5096591608219516e-05,
"loss": 0.0345,
"step": 1700
},
{
"epoch": 1.0992248062015504,
"grad_norm": 0.09698224812746048,
"learning_rate": 4.448135843484681e-05,
"loss": 0.0508,
"step": 1800
},
{
"epoch": 1.100110741971207,
"eval_accuracy": 0.9937733499377335,
"eval_loss": 0.027358748018741608,
"eval_runtime": 196.0788,
"eval_samples_per_second": 4.095,
"eval_steps_per_second": 0.515,
"step": 1808
},
{
"epoch": 2.010188261351052,
"grad_norm": 0.015389556996524334,
"learning_rate": 4.38661252614741e-05,
"loss": 0.0149,
"step": 1900
},
{
"epoch": 2.0212624584717607,
"grad_norm": 0.03506704792380333,
"learning_rate": 4.325089208810139e-05,
"loss": 0.0056,
"step": 2000
},
{
"epoch": 2.0323366555924696,
"grad_norm": 0.01206329744309187,
"learning_rate": 4.263565891472868e-05,
"loss": 0.0027,
"step": 2100
},
{
"epoch": 2.0434108527131785,
"grad_norm": 0.12232516705989838,
"learning_rate": 4.2020425741355975e-05,
"loss": 0.0016,
"step": 2200
},
{
"epoch": 2.054485049833887,
"grad_norm": 0.007167478557676077,
"learning_rate": 4.140519256798327e-05,
"loss": 0.0103,
"step": 2300
},
{
"epoch": 2.065559246954596,
"grad_norm": 0.02364514209330082,
"learning_rate": 4.078995939461056e-05,
"loss": 0.0007,
"step": 2400
},
{
"epoch": 2.0766334440753047,
"grad_norm": 0.00806678831577301,
"learning_rate": 4.0174726221237855e-05,
"loss": 0.0007,
"step": 2500
},
{
"epoch": 2.087707641196013,
"grad_norm": 0.013363192789256573,
"learning_rate": 3.955949304786514e-05,
"loss": 0.005,
"step": 2600
},
{
"epoch": 2.098781838316722,
"grad_norm": 0.03008888103067875,
"learning_rate": 3.8944259874492434e-05,
"loss": 0.0191,
"step": 2700
},
{
"epoch": 2.100110741971207,
"eval_accuracy": 0.9925280199252802,
"eval_loss": 0.023914234712719917,
"eval_runtime": 202.6495,
"eval_samples_per_second": 3.963,
"eval_steps_per_second": 0.498,
"step": 2712
},
{
"epoch": 3.009745293466224,
"grad_norm": 0.0100622633472085,
"learning_rate": 3.832902670111973e-05,
"loss": 0.0038,
"step": 2800
},
{
"epoch": 3.0208194905869323,
"grad_norm": 0.010149353183805943,
"learning_rate": 3.7713793527747014e-05,
"loss": 0.0009,
"step": 2900
},
{
"epoch": 3.0318936877076412,
"grad_norm": 0.005930065177381039,
"learning_rate": 3.7098560354374314e-05,
"loss": 0.0005,
"step": 3000
},
{
"epoch": 3.04296788482835,
"grad_norm": 0.010781911201775074,
"learning_rate": 3.64833271810016e-05,
"loss": 0.0044,
"step": 3100
},
{
"epoch": 3.0540420819490586,
"grad_norm": 0.010279743000864983,
"learning_rate": 3.5868094007628894e-05,
"loss": 0.0121,
"step": 3200
},
{
"epoch": 3.0651162790697675,
"grad_norm": 0.013513376004993916,
"learning_rate": 3.525286083425619e-05,
"loss": 0.0025,
"step": 3300
},
{
"epoch": 3.0761904761904764,
"grad_norm": 0.05831762030720711,
"learning_rate": 3.463762766088347e-05,
"loss": 0.0012,
"step": 3400
},
{
"epoch": 3.087264673311185,
"grad_norm": 0.05180026963353157,
"learning_rate": 3.4022394487510767e-05,
"loss": 0.0272,
"step": 3500
},
{
"epoch": 3.0983388704318937,
"grad_norm": 0.016193652525544167,
"learning_rate": 3.340716131413806e-05,
"loss": 0.0169,
"step": 3600
},
{
"epoch": 3.100110741971207,
"eval_accuracy": 0.9912826899128269,
"eval_loss": 0.03048611246049404,
"eval_runtime": 186.7323,
"eval_samples_per_second": 4.3,
"eval_steps_per_second": 0.541,
"step": 3616
},
{
"epoch": 4.0093023255813955,
"grad_norm": 0.04684939235448837,
"learning_rate": 3.2791928140765346e-05,
"loss": 0.0018,
"step": 3700
},
{
"epoch": 4.020376522702104,
"grad_norm": 0.015213378705084324,
"learning_rate": 3.2176694967392646e-05,
"loss": 0.0084,
"step": 3800
},
{
"epoch": 4.0314507198228124,
"grad_norm": 0.008424785919487476,
"learning_rate": 3.156146179401994e-05,
"loss": 0.0063,
"step": 3900
},
{
"epoch": 4.042524916943521,
"grad_norm": 0.0028675757348537445,
"learning_rate": 3.0946228620647226e-05,
"loss": 0.0003,
"step": 4000
},
{
"epoch": 4.05359911406423,
"grad_norm": 0.009860913269221783,
"learning_rate": 3.033099544727452e-05,
"loss": 0.0018,
"step": 4100
},
{
"epoch": 4.064673311184939,
"grad_norm": 0.08612042665481567,
"learning_rate": 2.971576227390181e-05,
"loss": 0.0108,
"step": 4200
},
{
"epoch": 4.075747508305648,
"grad_norm": 0.015194080770015717,
"learning_rate": 2.91005291005291e-05,
"loss": 0.0023,
"step": 4300
},
{
"epoch": 4.086821705426357,
"grad_norm": 0.0368819497525692,
"learning_rate": 2.8485295927156392e-05,
"loss": 0.001,
"step": 4400
},
{
"epoch": 4.097895902547065,
"grad_norm": 0.014080989174544811,
"learning_rate": 2.787006275378369e-05,
"loss": 0.0008,
"step": 4500
},
{
"epoch": 4.100110741971207,
"eval_accuracy": 0.9975093399750934,
"eval_loss": 0.012555562891066074,
"eval_runtime": 182.9036,
"eval_samples_per_second": 4.39,
"eval_steps_per_second": 0.552,
"step": 4520
},
{
"epoch": 5.008859357696567,
"grad_norm": 0.026085326448082924,
"learning_rate": 2.725482958041098e-05,
"loss": 0.0019,
"step": 4600
},
{
"epoch": 5.019933554817276,
"grad_norm": 0.00664695305749774,
"learning_rate": 2.6639596407038268e-05,
"loss": 0.0033,
"step": 4700
},
{
"epoch": 5.0310077519379846,
"grad_norm": 0.0021630304399877787,
"learning_rate": 2.602436323366556e-05,
"loss": 0.0003,
"step": 4800
},
{
"epoch": 5.0420819490586934,
"grad_norm": 0.005731029435992241,
"learning_rate": 2.540913006029285e-05,
"loss": 0.0002,
"step": 4900
},
{
"epoch": 5.053156146179402,
"grad_norm": 0.0024967463687062263,
"learning_rate": 2.479389688692014e-05,
"loss": 0.0002,
"step": 5000
},
{
"epoch": 5.06423034330011,
"grad_norm": 0.003203805536031723,
"learning_rate": 2.4178663713547438e-05,
"loss": 0.0002,
"step": 5100
},
{
"epoch": 5.075304540420819,
"grad_norm": 0.001881565898656845,
"learning_rate": 2.3563430540174727e-05,
"loss": 0.0002,
"step": 5200
},
{
"epoch": 5.086378737541528,
"grad_norm": 0.0017551712226122618,
"learning_rate": 2.2948197366802017e-05,
"loss": 0.0002,
"step": 5300
},
{
"epoch": 5.097452934662237,
"grad_norm": 0.0013360229786485434,
"learning_rate": 2.233296419342931e-05,
"loss": 0.0001,
"step": 5400
},
{
"epoch": 5.100110741971207,
"eval_accuracy": 0.9962640099626401,
"eval_loss": 0.007705440279096365,
"eval_runtime": 177.5303,
"eval_samples_per_second": 4.523,
"eval_steps_per_second": 0.569,
"step": 5424
},
{
"epoch": 6.008416389811739,
"grad_norm": 0.0024486565962433815,
"learning_rate": 2.1717731020056604e-05,
"loss": 0.0001,
"step": 5500
},
{
"epoch": 6.019490586932448,
"grad_norm": 0.0011960830306634307,
"learning_rate": 2.1102497846683894e-05,
"loss": 0.0001,
"step": 5600
},
{
"epoch": 6.030564784053156,
"grad_norm": 0.002331072697415948,
"learning_rate": 2.0487264673311183e-05,
"loss": 0.0001,
"step": 5700
},
{
"epoch": 6.041638981173865,
"grad_norm": 0.0012679151259362698,
"learning_rate": 1.987203149993848e-05,
"loss": 0.0001,
"step": 5800
},
{
"epoch": 6.052713178294574,
"grad_norm": 0.0020030313171446323,
"learning_rate": 1.925679832656577e-05,
"loss": 0.0001,
"step": 5900
},
{
"epoch": 6.0637873754152825,
"grad_norm": 0.003349652513861656,
"learning_rate": 1.864156515319306e-05,
"loss": 0.015,
"step": 6000
},
{
"epoch": 6.074861572535991,
"grad_norm": 0.012170190922915936,
"learning_rate": 1.8026331979820353e-05,
"loss": 0.0042,
"step": 6100
},
{
"epoch": 6.0859357696567,
"grad_norm": 0.008038354106247425,
"learning_rate": 1.7411098806447646e-05,
"loss": 0.0002,
"step": 6200
},
{
"epoch": 6.097009966777408,
"grad_norm": 0.0046376571990549564,
"learning_rate": 1.6795865633074936e-05,
"loss": 0.0001,
"step": 6300
},
{
"epoch": 6.100110741971207,
"eval_accuracy": 0.9962640099626401,
"eval_loss": 0.022780200466513634,
"eval_runtime": 177.4915,
"eval_samples_per_second": 4.524,
"eval_steps_per_second": 0.569,
"step": 6328
},
{
"epoch": 7.00797342192691,
"grad_norm": 0.001398948603309691,
"learning_rate": 1.6180632459702226e-05,
"loss": 0.0001,
"step": 6400
},
{
"epoch": 7.019047619047619,
"grad_norm": 0.0018464713357388973,
"learning_rate": 1.556539928632952e-05,
"loss": 0.0001,
"step": 6500
},
{
"epoch": 7.030121816168328,
"grad_norm": 0.0012479424476623535,
"learning_rate": 1.4950166112956812e-05,
"loss": 0.0001,
"step": 6600
},
{
"epoch": 7.041196013289037,
"grad_norm": 0.0021251088473945856,
"learning_rate": 1.4334932939584104e-05,
"loss": 0.0001,
"step": 6700
},
{
"epoch": 7.052270210409746,
"grad_norm": 0.0013992477906867862,
"learning_rate": 1.3719699766211393e-05,
"loss": 0.0001,
"step": 6800
},
{
"epoch": 7.063344407530454,
"grad_norm": 0.002737658564001322,
"learning_rate": 1.3104466592838688e-05,
"loss": 0.0001,
"step": 6900
},
{
"epoch": 7.074418604651163,
"grad_norm": 0.003131190547719598,
"learning_rate": 1.2489233419465978e-05,
"loss": 0.0001,
"step": 7000
},
{
"epoch": 7.0854928017718715,
"grad_norm": 0.0008928414317779243,
"learning_rate": 1.187400024609327e-05,
"loss": 0.0001,
"step": 7100
},
{
"epoch": 7.09656699889258,
"grad_norm": 0.003003242425620556,
"learning_rate": 1.1258767072720563e-05,
"loss": 0.0001,
"step": 7200
},
{
"epoch": 7.100110741971207,
"eval_accuracy": 0.9975093399750934,
"eval_loss": 0.0162957850843668,
"eval_runtime": 175.2944,
"eval_samples_per_second": 4.581,
"eval_steps_per_second": 0.576,
"step": 7232
},
{
"epoch": 8.007530454042081,
"grad_norm": 0.0015305023407563567,
"learning_rate": 1.0643533899347853e-05,
"loss": 0.0001,
"step": 7300
},
{
"epoch": 8.018604651162791,
"grad_norm": 0.0009707214194349945,
"learning_rate": 1.0028300725975146e-05,
"loss": 0.0001,
"step": 7400
},
{
"epoch": 8.029678848283499,
"grad_norm": 0.0015318732475861907,
"learning_rate": 9.413067552602436e-06,
"loss": 0.0001,
"step": 7500
},
{
"epoch": 8.040753045404209,
"grad_norm": 0.0013183602131903172,
"learning_rate": 8.797834379229729e-06,
"loss": 0.0001,
"step": 7600
},
{
"epoch": 8.051827242524917,
"grad_norm": 0.0007716185064055026,
"learning_rate": 8.18260120585702e-06,
"loss": 0.0001,
"step": 7700
},
{
"epoch": 8.062901439645625,
"grad_norm": 0.0033044065348803997,
"learning_rate": 7.567368032484312e-06,
"loss": 0.0001,
"step": 7800
},
{
"epoch": 8.073975636766335,
"grad_norm": 0.0012280073715373874,
"learning_rate": 6.952134859111603e-06,
"loss": 0.0001,
"step": 7900
},
{
"epoch": 8.085049833887043,
"grad_norm": 0.0006749368621967733,
"learning_rate": 6.336901685738895e-06,
"loss": 0.0001,
"step": 8000
},
{
"epoch": 8.096124031007752,
"grad_norm": 0.0012479163706302643,
"learning_rate": 5.7216685123661875e-06,
"loss": 0.0001,
"step": 8100
},
{
"epoch": 8.100110741971207,
"eval_accuracy": 0.9975093399750934,
"eval_loss": 0.015746863558888435,
"eval_runtime": 167.1139,
"eval_samples_per_second": 4.805,
"eval_steps_per_second": 0.604,
"step": 8136
},
{
"epoch": 9.007087486157253,
"grad_norm": 0.0012506992788985372,
"learning_rate": 5.106435338993479e-06,
"loss": 0.0001,
"step": 8200
},
{
"epoch": 9.018161683277963,
"grad_norm": 0.0007666904130019248,
"learning_rate": 4.4912021656207705e-06,
"loss": 0.0001,
"step": 8300
},
{
"epoch": 9.029235880398671,
"grad_norm": 0.001267068088054657,
"learning_rate": 3.875968992248062e-06,
"loss": 0.0001,
"step": 8400
},
{
"epoch": 9.04031007751938,
"grad_norm": 0.0007247941102832556,
"learning_rate": 3.260735818875354e-06,
"loss": 0.0001,
"step": 8500
},
{
"epoch": 9.051384274640089,
"grad_norm": 0.000805841526016593,
"learning_rate": 2.6455026455026455e-06,
"loss": 0.0001,
"step": 8600
},
{
"epoch": 9.062458471760797,
"grad_norm": 0.0007739869179204106,
"learning_rate": 2.0302694721299375e-06,
"loss": 0.0001,
"step": 8700
},
{
"epoch": 9.073532668881507,
"grad_norm": 0.0008207521168515086,
"learning_rate": 1.415036298757229e-06,
"loss": 0.0001,
"step": 8800
},
{
"epoch": 9.084606866002215,
"grad_norm": 0.0008044055430218577,
"learning_rate": 7.998031253845208e-07,
"loss": 0.0001,
"step": 8900
},
{
"epoch": 9.095681063122923,
"grad_norm": 0.0007077198824845254,
"learning_rate": 1.8456995201181249e-07,
"loss": 0.0001,
"step": 9000
},
{
"epoch": 9.099003322259136,
"eval_accuracy": 0.9975093399750934,
"eval_loss": 0.01572454161942005,
"eval_runtime": 175.3925,
"eval_samples_per_second": 4.578,
"eval_steps_per_second": 0.576,
"step": 9030
},
{
"epoch": 9.099003322259136,
"step": 9030,
"total_flos": 6.328460909097596e+19,
"train_loss": 0.18755709112447544,
"train_runtime": 22812.8611,
"train_samples_per_second": 3.167,
"train_steps_per_second": 0.396
},
{
"epoch": 9.099003322259136,
"eval_accuracy": 0.7641065830721003,
"eval_loss": 0.8574168086051941,
"eval_runtime": 273.0523,
"eval_samples_per_second": 4.673,
"eval_steps_per_second": 0.586,
"step": 9030
}
],
"logging_steps": 100,
"max_steps": 9030,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.328460909097596e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}