quickdraw-mobilevit-small / trainer_state.json
VinayHajare's picture
doodle-dash
2f11503 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 5000,
"global_step": 87895,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05688605722737357,
"grad_norm": 2.3711910247802734,
"learning_rate": 0.0007909073326127766,
"loss": 2.6366,
"step": 1000
},
{
"epoch": 0.11377211445474714,
"grad_norm": 2.2273147106170654,
"learning_rate": 0.0007818055634563969,
"loss": 1.7361,
"step": 2000
},
{
"epoch": 0.17065817168212072,
"grad_norm": 2.9114110469818115,
"learning_rate": 0.000772703794300017,
"loss": 1.5903,
"step": 3000
},
{
"epoch": 0.22754422890949427,
"grad_norm": 1.7726603746414185,
"learning_rate": 0.0007636020251436373,
"loss": 1.5127,
"step": 4000
},
{
"epoch": 0.2844302861368678,
"grad_norm": 1.8174991607666016,
"learning_rate": 0.0007545002559872575,
"loss": 1.4609,
"step": 5000
},
{
"epoch": 0.2844302861368678,
"eval_accuracy": 0.653196,
"eval_loss": 1.3989018201828003,
"eval_runtime": 65.7885,
"eval_samples_per_second": 3800.055,
"eval_steps_per_second": 14.851,
"step": 5000
},
{
"epoch": 0.34131634336424144,
"grad_norm": 1.7002882957458496,
"learning_rate": 0.0007453984868308778,
"loss": 1.4214,
"step": 6000
},
{
"epoch": 0.398202400591615,
"grad_norm": 1.6060094833374023,
"learning_rate": 0.0007362967176744981,
"loss": 1.3803,
"step": 7000
},
{
"epoch": 0.45508845781898855,
"grad_norm": 2.100240468978882,
"learning_rate": 0.0007271949485181182,
"loss": 1.358,
"step": 8000
},
{
"epoch": 0.5119745150463622,
"grad_norm": 1.507076621055603,
"learning_rate": 0.0007180931793617385,
"loss": 1.3392,
"step": 9000
},
{
"epoch": 0.5688605722737357,
"grad_norm": 1.8028790950775146,
"learning_rate": 0.0007089914102053587,
"loss": 1.3211,
"step": 10000
},
{
"epoch": 0.5688605722737357,
"eval_accuracy": 0.680348,
"eval_loss": 1.2739007472991943,
"eval_runtime": 64.9042,
"eval_samples_per_second": 3851.83,
"eval_steps_per_second": 15.053,
"step": 10000
},
{
"epoch": 0.6257466295011093,
"grad_norm": 1.699574589729309,
"learning_rate": 0.000699889641048979,
"loss": 1.3131,
"step": 11000
},
{
"epoch": 0.6826326867284829,
"grad_norm": 1.6491554975509644,
"learning_rate": 0.0006907878718925991,
"loss": 1.2837,
"step": 12000
},
{
"epoch": 0.7395187439558564,
"grad_norm": 1.8563138246536255,
"learning_rate": 0.0006816861027362194,
"loss": 1.276,
"step": 13000
},
{
"epoch": 0.79640480118323,
"grad_norm": 1.5511844158172607,
"learning_rate": 0.0006725843335798396,
"loss": 1.2678,
"step": 14000
},
{
"epoch": 0.8532908584106036,
"grad_norm": 1.3686333894729614,
"learning_rate": 0.0006634825644234599,
"loss": 1.2531,
"step": 15000
},
{
"epoch": 0.8532908584106036,
"eval_accuracy": 0.694232,
"eval_loss": 1.2132482528686523,
"eval_runtime": 64.8716,
"eval_samples_per_second": 3853.765,
"eval_steps_per_second": 15.061,
"step": 15000
},
{
"epoch": 0.9101769156379771,
"grad_norm": 1.958629846572876,
"learning_rate": 0.00065438079526708,
"loss": 1.2457,
"step": 16000
},
{
"epoch": 0.9670629728653507,
"grad_norm": 1.528414011001587,
"learning_rate": 0.0006452790261107003,
"loss": 1.2338,
"step": 17000
},
{
"epoch": 1.0239490300927243,
"grad_norm": 1.2693781852722168,
"learning_rate": 0.0006361772569543205,
"loss": 1.2142,
"step": 18000
},
{
"epoch": 1.0808350873200978,
"grad_norm": 1.4573434591293335,
"learning_rate": 0.0006270754877979408,
"loss": 1.19,
"step": 19000
},
{
"epoch": 1.1377211445474713,
"grad_norm": 1.236939787864685,
"learning_rate": 0.0006179737186415609,
"loss": 1.1875,
"step": 20000
},
{
"epoch": 1.1377211445474713,
"eval_accuracy": 0.704068,
"eval_loss": 1.1761754751205444,
"eval_runtime": 65.8177,
"eval_samples_per_second": 3798.369,
"eval_steps_per_second": 14.844,
"step": 20000
},
{
"epoch": 1.194607201774845,
"grad_norm": 1.241289496421814,
"learning_rate": 0.0006088719494851812,
"loss": 1.1814,
"step": 21000
},
{
"epoch": 1.2514932590022185,
"grad_norm": 1.483782410621643,
"learning_rate": 0.0005997701803288014,
"loss": 1.1822,
"step": 22000
},
{
"epoch": 1.3083793162295922,
"grad_norm": 1.5755152702331543,
"learning_rate": 0.0005906684111724217,
"loss": 1.1767,
"step": 23000
},
{
"epoch": 1.3652653734569657,
"grad_norm": 1.333516001701355,
"learning_rate": 0.0005815666420160419,
"loss": 1.1731,
"step": 24000
},
{
"epoch": 1.4221514306843392,
"grad_norm": 1.8660708665847778,
"learning_rate": 0.0005724648728596621,
"loss": 1.157,
"step": 25000
},
{
"epoch": 1.4221514306843392,
"eval_accuracy": 0.711072,
"eval_loss": 1.145967960357666,
"eval_runtime": 63.5002,
"eval_samples_per_second": 3936.992,
"eval_steps_per_second": 15.386,
"step": 25000
},
{
"epoch": 1.4790374879117127,
"grad_norm": 1.3808480501174927,
"learning_rate": 0.0005633631037032824,
"loss": 1.1574,
"step": 26000
},
{
"epoch": 1.5359235451390862,
"grad_norm": 1.1691391468048096,
"learning_rate": 0.0005542613345469026,
"loss": 1.1554,
"step": 27000
},
{
"epoch": 1.59280960236646,
"grad_norm": 1.4390947818756104,
"learning_rate": 0.0005451595653905228,
"loss": 1.1497,
"step": 28000
},
{
"epoch": 1.6496956595938337,
"grad_norm": 1.3637901544570923,
"learning_rate": 0.000536057796234143,
"loss": 1.1452,
"step": 29000
},
{
"epoch": 1.7065817168212072,
"grad_norm": 1.2076903581619263,
"learning_rate": 0.0005269560270777633,
"loss": 1.144,
"step": 30000
},
{
"epoch": 1.7065817168212072,
"eval_accuracy": 0.716336,
"eval_loss": 1.11836576461792,
"eval_runtime": 64.1718,
"eval_samples_per_second": 3895.791,
"eval_steps_per_second": 15.225,
"step": 30000
},
{
"epoch": 1.7634677740485807,
"grad_norm": 1.349098801612854,
"learning_rate": 0.0005178542579213835,
"loss": 1.1383,
"step": 31000
},
{
"epoch": 1.8203538312759542,
"grad_norm": 1.4453612565994263,
"learning_rate": 0.0005087524887650037,
"loss": 1.1391,
"step": 32000
},
{
"epoch": 1.8772398885033277,
"grad_norm": 1.0392345190048218,
"learning_rate": 0.0004996507196086239,
"loss": 1.1328,
"step": 33000
},
{
"epoch": 1.9341259457307014,
"grad_norm": 1.1520024538040161,
"learning_rate": 0.0004905489504522442,
"loss": 1.1238,
"step": 34000
},
{
"epoch": 1.9910120029580751,
"grad_norm": 1.515512228012085,
"learning_rate": 0.0004814471812958644,
"loss": 1.1217,
"step": 35000
},
{
"epoch": 1.9910120029580751,
"eval_accuracy": 0.724676,
"eval_loss": 1.0880111455917358,
"eval_runtime": 64.3813,
"eval_samples_per_second": 3883.115,
"eval_steps_per_second": 15.175,
"step": 35000
},
{
"epoch": 2.0478980601854486,
"grad_norm": 1.4771007299423218,
"learning_rate": 0.00047234541213948464,
"loss": 1.0919,
"step": 36000
},
{
"epoch": 2.104784117412822,
"grad_norm": 1.3845994472503662,
"learning_rate": 0.00046324364298310487,
"loss": 1.0838,
"step": 37000
},
{
"epoch": 2.1616701746401956,
"grad_norm": 1.250450611114502,
"learning_rate": 0.00045414187382672515,
"loss": 1.0785,
"step": 38000
},
{
"epoch": 2.218556231867569,
"grad_norm": 1.5783060789108276,
"learning_rate": 0.0004450401046703453,
"loss": 1.0753,
"step": 39000
},
{
"epoch": 2.2754422890949426,
"grad_norm": 1.7228904962539673,
"learning_rate": 0.0004359383355139656,
"loss": 1.0831,
"step": 40000
},
{
"epoch": 2.2754422890949426,
"eval_accuracy": 0.727968,
"eval_loss": 1.0728965997695923,
"eval_runtime": 64.3156,
"eval_samples_per_second": 3887.084,
"eval_steps_per_second": 15.191,
"step": 40000
},
{
"epoch": 2.3323283463223166,
"grad_norm": 1.333543062210083,
"learning_rate": 0.00042683656635758577,
"loss": 1.0798,
"step": 41000
},
{
"epoch": 2.38921440354969,
"grad_norm": 1.3213781118392944,
"learning_rate": 0.00041773479720120594,
"loss": 1.0804,
"step": 42000
},
{
"epoch": 2.4461004607770636,
"grad_norm": 1.43584406375885,
"learning_rate": 0.0004086330280448262,
"loss": 1.0713,
"step": 43000
},
{
"epoch": 2.502986518004437,
"grad_norm": 1.2614803314208984,
"learning_rate": 0.0003995312588884465,
"loss": 1.0697,
"step": 44000
},
{
"epoch": 2.5598725752318106,
"grad_norm": 1.1319971084594727,
"learning_rate": 0.0003904294897320667,
"loss": 1.0761,
"step": 45000
},
{
"epoch": 2.5598725752318106,
"eval_accuracy": 0.731168,
"eval_loss": 1.0593221187591553,
"eval_runtime": 64.6765,
"eval_samples_per_second": 3865.393,
"eval_steps_per_second": 15.106,
"step": 45000
},
{
"epoch": 2.6167586324591845,
"grad_norm": 1.2045773267745972,
"learning_rate": 0.00038132772057568694,
"loss": 1.0723,
"step": 46000
},
{
"epoch": 2.673644689686558,
"grad_norm": 1.3462469577789307,
"learning_rate": 0.00037222595141930717,
"loss": 1.067,
"step": 47000
},
{
"epoch": 2.7305307469139315,
"grad_norm": 1.3573272228240967,
"learning_rate": 0.0003631241822629274,
"loss": 1.0636,
"step": 48000
},
{
"epoch": 2.787416804141305,
"grad_norm": 1.2870041131973267,
"learning_rate": 0.0003540224131065476,
"loss": 1.0655,
"step": 49000
},
{
"epoch": 2.8443028613686785,
"grad_norm": 1.3287382125854492,
"learning_rate": 0.0003449206439501678,
"loss": 1.0565,
"step": 50000
},
{
"epoch": 2.8443028613686785,
"eval_accuracy": 0.734552,
"eval_loss": 1.0479968786239624,
"eval_runtime": 65.2161,
"eval_samples_per_second": 3833.412,
"eval_steps_per_second": 14.981,
"step": 50000
},
{
"epoch": 2.901188918596052,
"grad_norm": 1.384717345237732,
"learning_rate": 0.000335818874793788,
"loss": 1.0529,
"step": 51000
},
{
"epoch": 2.9580749758234255,
"grad_norm": 1.1834776401519775,
"learning_rate": 0.0003267171056374083,
"loss": 1.0608,
"step": 52000
},
{
"epoch": 3.0149610330507994,
"grad_norm": 1.0646686553955078,
"learning_rate": 0.0003176153364810285,
"loss": 1.0417,
"step": 53000
},
{
"epoch": 3.071847090278173,
"grad_norm": 1.348777174949646,
"learning_rate": 0.00030851356732464874,
"loss": 1.0168,
"step": 54000
},
{
"epoch": 3.1287331475055464,
"grad_norm": 1.2929068803787231,
"learning_rate": 0.00029941179816826897,
"loss": 1.0149,
"step": 55000
},
{
"epoch": 3.1287331475055464,
"eval_accuracy": 0.73796,
"eval_loss": 1.0355563163757324,
"eval_runtime": 66.0157,
"eval_samples_per_second": 3786.979,
"eval_steps_per_second": 14.8,
"step": 55000
},
{
"epoch": 3.18561920473292,
"grad_norm": 1.3426847457885742,
"learning_rate": 0.0002903100290118892,
"loss": 1.0145,
"step": 56000
},
{
"epoch": 3.2425052619602934,
"grad_norm": 1.3112365007400513,
"learning_rate": 0.0002812082598555094,
"loss": 1.013,
"step": 57000
},
{
"epoch": 3.299391319187667,
"grad_norm": 1.3956024646759033,
"learning_rate": 0.00027210649069912964,
"loss": 1.0117,
"step": 58000
},
{
"epoch": 3.356277376415041,
"grad_norm": 1.2679752111434937,
"learning_rate": 0.00026300472154274987,
"loss": 1.0155,
"step": 59000
},
{
"epoch": 3.4131634336424144,
"grad_norm": 1.5014774799346924,
"learning_rate": 0.0002539029523863701,
"loss": 1.0102,
"step": 60000
},
{
"epoch": 3.4131634336424144,
"eval_accuracy": 0.74012,
"eval_loss": 1.0263450145721436,
"eval_runtime": 64.1919,
"eval_samples_per_second": 3894.574,
"eval_steps_per_second": 15.22,
"step": 60000
},
{
"epoch": 3.470049490869788,
"grad_norm": 1.4669406414031982,
"learning_rate": 0.0002448011832299904,
"loss": 1.0145,
"step": 61000
},
{
"epoch": 3.5269355480971614,
"grad_norm": 1.3615577220916748,
"learning_rate": 0.00023569941407361057,
"loss": 1.0173,
"step": 62000
},
{
"epoch": 3.583821605324535,
"grad_norm": 1.126437783241272,
"learning_rate": 0.00022659764491723082,
"loss": 1.0125,
"step": 63000
},
{
"epoch": 3.6407076625519084,
"grad_norm": 1.2467857599258423,
"learning_rate": 0.00021749587576085105,
"loss": 1.0133,
"step": 64000
},
{
"epoch": 3.697593719779282,
"grad_norm": 1.3474713563919067,
"learning_rate": 0.00020839410660447127,
"loss": 1.0014,
"step": 65000
},
{
"epoch": 3.697593719779282,
"eval_accuracy": 0.743688,
"eval_loss": 1.0122489929199219,
"eval_runtime": 64.6438,
"eval_samples_per_second": 3867.347,
"eval_steps_per_second": 15.114,
"step": 65000
},
{
"epoch": 3.754479777006656,
"grad_norm": 1.3319435119628906,
"learning_rate": 0.00019929233744809147,
"loss": 1.0034,
"step": 66000
},
{
"epoch": 3.8113658342340293,
"grad_norm": 1.9685286283493042,
"learning_rate": 0.00019019056829171172,
"loss": 0.995,
"step": 67000
},
{
"epoch": 3.868251891461403,
"grad_norm": 1.2180532217025757,
"learning_rate": 0.00018108879913533195,
"loss": 1.0069,
"step": 68000
},
{
"epoch": 3.9251379486887763,
"grad_norm": 1.3233805894851685,
"learning_rate": 0.00017198702997895217,
"loss": 0.9983,
"step": 69000
},
{
"epoch": 3.98202400591615,
"grad_norm": 1.7491425275802612,
"learning_rate": 0.0001628852608225724,
"loss": 0.9972,
"step": 70000
},
{
"epoch": 3.98202400591615,
"eval_accuracy": 0.745936,
"eval_loss": 1.0027811527252197,
"eval_runtime": 65.7257,
"eval_samples_per_second": 3803.688,
"eval_steps_per_second": 14.865,
"step": 70000
},
{
"epoch": 4.038910063143524,
"grad_norm": 1.1467124223709106,
"learning_rate": 0.00015378349166619262,
"loss": 0.9752,
"step": 71000
},
{
"epoch": 4.095796120370897,
"grad_norm": 1.2129188776016235,
"learning_rate": 0.00014468172250981285,
"loss": 0.9652,
"step": 72000
},
{
"epoch": 4.152682177598271,
"grad_norm": 1.3177002668380737,
"learning_rate": 0.00013557995335343307,
"loss": 0.9615,
"step": 73000
},
{
"epoch": 4.209568234825644,
"grad_norm": 1.1324489116668701,
"learning_rate": 0.0001264781841970533,
"loss": 0.9629,
"step": 74000
},
{
"epoch": 4.266454292053018,
"grad_norm": 1.2428852319717407,
"learning_rate": 0.00011737641504067354,
"loss": 0.9556,
"step": 75000
},
{
"epoch": 4.266454292053018,
"eval_accuracy": 0.747436,
"eval_loss": 0.9971279501914978,
"eval_runtime": 66.3789,
"eval_samples_per_second": 3766.258,
"eval_steps_per_second": 14.719,
"step": 75000
},
{
"epoch": 4.323340349280391,
"grad_norm": 1.4413901567459106,
"learning_rate": 0.00010827464588429376,
"loss": 0.9616,
"step": 76000
},
{
"epoch": 4.380226406507765,
"grad_norm": 1.312136173248291,
"learning_rate": 9.917287672791399e-05,
"loss": 0.9657,
"step": 77000
},
{
"epoch": 4.437112463735138,
"grad_norm": 1.3660274744033813,
"learning_rate": 9.007110757153423e-05,
"loss": 0.9613,
"step": 78000
},
{
"epoch": 4.493998520962512,
"grad_norm": 1.4278331995010376,
"learning_rate": 8.096933841515445e-05,
"loss": 0.9576,
"step": 79000
},
{
"epoch": 4.550884578189885,
"grad_norm": 1.20628821849823,
"learning_rate": 7.186756925877468e-05,
"loss": 0.9606,
"step": 80000
},
{
"epoch": 4.550884578189885,
"eval_accuracy": 0.749644,
"eval_loss": 0.990385890007019,
"eval_runtime": 65.0093,
"eval_samples_per_second": 3845.605,
"eval_steps_per_second": 15.029,
"step": 80000
},
{
"epoch": 4.607770635417259,
"grad_norm": 1.8617701530456543,
"learning_rate": 6.27658001023949e-05,
"loss": 0.954,
"step": 81000
},
{
"epoch": 4.664656692644633,
"grad_norm": 1.352597951889038,
"learning_rate": 5.366403094601513e-05,
"loss": 0.957,
"step": 82000
},
{
"epoch": 4.721542749872007,
"grad_norm": 1.4314864873886108,
"learning_rate": 4.4562261789635364e-05,
"loss": 0.9541,
"step": 83000
},
{
"epoch": 4.77842880709938,
"grad_norm": 1.2464176416397095,
"learning_rate": 3.5460492633255596e-05,
"loss": 0.9545,
"step": 84000
},
{
"epoch": 4.835314864326754,
"grad_norm": 1.4721029996871948,
"learning_rate": 2.6358723476875817e-05,
"loss": 0.9544,
"step": 85000
},
{
"epoch": 4.835314864326754,
"eval_accuracy": 0.750732,
"eval_loss": 0.9842203259468079,
"eval_runtime": 65.3657,
"eval_samples_per_second": 3824.637,
"eval_steps_per_second": 14.947,
"step": 85000
},
{
"epoch": 4.892200921554127,
"grad_norm": 1.383285403251648,
"learning_rate": 1.7256954320496046e-05,
"loss": 0.9556,
"step": 86000
},
{
"epoch": 4.949086978781501,
"grad_norm": 1.3051174879074097,
"learning_rate": 8.155185164116276e-06,
"loss": 0.9503,
"step": 87000
},
{
"epoch": 5.0,
"step": 87895,
"total_flos": 5.4597447576e+17,
"train_loss": 1.1272559640920097,
"train_runtime": 10316.2205,
"train_samples_per_second": 2181.031,
"train_steps_per_second": 8.52
}
],
"logging_steps": 1000,
"max_steps": 87895,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.4597447576e+17,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}