howdyaendra's picture
Upload folder using huggingface_hub
61f26bb verified
{
"best_metric": 0.37855324149131775,
"best_model_checkpoint": "xblock-large-patch3-224/checkpoint-1872",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1872,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04006410256410257,
"grad_norm": 8.251535415649414,
"learning_rate": 6.3829787234042555e-06,
"loss": 1.3524,
"step": 25
},
{
"epoch": 0.08012820512820513,
"grad_norm": 12.843944549560547,
"learning_rate": 1.3031914893617023e-05,
"loss": 0.8737,
"step": 50
},
{
"epoch": 0.1201923076923077,
"grad_norm": 15.241576194763184,
"learning_rate": 1.9414893617021276e-05,
"loss": 0.6916,
"step": 75
},
{
"epoch": 0.16025641025641027,
"grad_norm": 6.287885665893555,
"learning_rate": 2.6063829787234046e-05,
"loss": 0.8028,
"step": 100
},
{
"epoch": 0.20032051282051283,
"grad_norm": 4.597654819488525,
"learning_rate": 3.271276595744681e-05,
"loss": 0.7592,
"step": 125
},
{
"epoch": 0.2403846153846154,
"grad_norm": 3.4197440147399902,
"learning_rate": 3.936170212765958e-05,
"loss": 0.8431,
"step": 150
},
{
"epoch": 0.28044871794871795,
"grad_norm": 7.936722755432129,
"learning_rate": 4.601063829787234e-05,
"loss": 0.6737,
"step": 175
},
{
"epoch": 0.32051282051282054,
"grad_norm": 27.45574188232422,
"learning_rate": 4.970308788598575e-05,
"loss": 0.9851,
"step": 200
},
{
"epoch": 0.3605769230769231,
"grad_norm": 7.184787273406982,
"learning_rate": 4.896080760095012e-05,
"loss": 0.9612,
"step": 225
},
{
"epoch": 0.40064102564102566,
"grad_norm": 5.246083736419678,
"learning_rate": 4.821852731591449e-05,
"loss": 0.7614,
"step": 250
},
{
"epoch": 0.4407051282051282,
"grad_norm": 14.747861862182617,
"learning_rate": 4.7476247030878864e-05,
"loss": 0.9273,
"step": 275
},
{
"epoch": 0.4807692307692308,
"grad_norm": 10.44289779663086,
"learning_rate": 4.673396674584323e-05,
"loss": 0.8679,
"step": 300
},
{
"epoch": 0.5208333333333334,
"grad_norm": 11.92969799041748,
"learning_rate": 4.5991686460807604e-05,
"loss": 0.8816,
"step": 325
},
{
"epoch": 0.5608974358974359,
"grad_norm": 8.216315269470215,
"learning_rate": 4.524940617577197e-05,
"loss": 0.8063,
"step": 350
},
{
"epoch": 0.6009615384615384,
"grad_norm": 9.245816230773926,
"learning_rate": 4.4507125890736345e-05,
"loss": 0.8215,
"step": 375
},
{
"epoch": 0.6410256410256411,
"grad_norm": 8.622091293334961,
"learning_rate": 4.376484560570072e-05,
"loss": 0.9864,
"step": 400
},
{
"epoch": 0.6810897435897436,
"grad_norm": 4.5337419509887695,
"learning_rate": 4.3022565320665086e-05,
"loss": 0.6767,
"step": 425
},
{
"epoch": 0.7211538461538461,
"grad_norm": 6.817728519439697,
"learning_rate": 4.228028503562946e-05,
"loss": 0.8387,
"step": 450
},
{
"epoch": 0.7612179487179487,
"grad_norm": 10.498202323913574,
"learning_rate": 4.153800475059383e-05,
"loss": 0.7565,
"step": 475
},
{
"epoch": 0.8012820512820513,
"grad_norm": 13.513296127319336,
"learning_rate": 4.07957244655582e-05,
"loss": 0.6909,
"step": 500
},
{
"epoch": 0.8413461538461539,
"grad_norm": 13.574014663696289,
"learning_rate": 4.005344418052257e-05,
"loss": 0.757,
"step": 525
},
{
"epoch": 0.8814102564102564,
"grad_norm": 6.092533588409424,
"learning_rate": 3.9311163895486934e-05,
"loss": 0.8626,
"step": 550
},
{
"epoch": 0.9214743589743589,
"grad_norm": 14.241544723510742,
"learning_rate": 3.856888361045131e-05,
"loss": 0.6826,
"step": 575
},
{
"epoch": 0.9615384615384616,
"grad_norm": 4.413307189941406,
"learning_rate": 3.7826603325415675e-05,
"loss": 0.7986,
"step": 600
},
{
"epoch": 1.0,
"eval_accuracy": 0.844551282051282,
"eval_f1_macro": 0.8160973181841822,
"eval_f1_micro": 0.844551282051282,
"eval_f1_weighted": 0.8402659980370851,
"eval_loss": 0.44926634430885315,
"eval_precision_macro": 0.8683442897703473,
"eval_precision_micro": 0.844551282051282,
"eval_precision_weighted": 0.8585860519825896,
"eval_recall_macro": 0.7986028134342184,
"eval_recall_micro": 0.844551282051282,
"eval_recall_weighted": 0.844551282051282,
"eval_runtime": 1324.3117,
"eval_samples_per_second": 0.942,
"eval_steps_per_second": 0.059,
"step": 624
},
{
"epoch": 1.001602564102564,
"grad_norm": 4.381113052368164,
"learning_rate": 3.708432304038005e-05,
"loss": 0.861,
"step": 625
},
{
"epoch": 1.0416666666666667,
"grad_norm": 5.680240631103516,
"learning_rate": 3.6342042755344416e-05,
"loss": 0.6535,
"step": 650
},
{
"epoch": 1.0817307692307692,
"grad_norm": 5.435142993927002,
"learning_rate": 3.559976247030879e-05,
"loss": 0.7551,
"step": 675
},
{
"epoch": 1.1217948717948718,
"grad_norm": 18.080663681030273,
"learning_rate": 3.4857482185273164e-05,
"loss": 0.7618,
"step": 700
},
{
"epoch": 1.1618589743589745,
"grad_norm": 12.246950149536133,
"learning_rate": 3.411520190023753e-05,
"loss": 0.7952,
"step": 725
},
{
"epoch": 1.2019230769230769,
"grad_norm": 9.545283317565918,
"learning_rate": 3.3372921615201904e-05,
"loss": 0.6002,
"step": 750
},
{
"epoch": 1.2419871794871795,
"grad_norm": 8.819951057434082,
"learning_rate": 3.263064133016627e-05,
"loss": 0.7556,
"step": 775
},
{
"epoch": 1.282051282051282,
"grad_norm": 10.624255180358887,
"learning_rate": 3.1888361045130645e-05,
"loss": 0.7373,
"step": 800
},
{
"epoch": 1.3221153846153846,
"grad_norm": 7.088049411773682,
"learning_rate": 3.114608076009501e-05,
"loss": 0.6166,
"step": 825
},
{
"epoch": 1.3621794871794872,
"grad_norm": 7.568330764770508,
"learning_rate": 3.0403800475059386e-05,
"loss": 0.6451,
"step": 850
},
{
"epoch": 1.4022435897435899,
"grad_norm": 6.381913185119629,
"learning_rate": 2.9661520190023756e-05,
"loss": 0.6397,
"step": 875
},
{
"epoch": 1.4423076923076923,
"grad_norm": 9.472349166870117,
"learning_rate": 2.8919239904988127e-05,
"loss": 0.6227,
"step": 900
},
{
"epoch": 1.482371794871795,
"grad_norm": 4.770060062408447,
"learning_rate": 2.8176959619952497e-05,
"loss": 0.6374,
"step": 925
},
{
"epoch": 1.5224358974358974,
"grad_norm": 6.0947771072387695,
"learning_rate": 2.7434679334916867e-05,
"loss": 0.5955,
"step": 950
},
{
"epoch": 1.5625,
"grad_norm": 5.420393466949463,
"learning_rate": 2.6692399049881234e-05,
"loss": 0.7621,
"step": 975
},
{
"epoch": 1.6025641025641026,
"grad_norm": 8.125214576721191,
"learning_rate": 2.5950118764845605e-05,
"loss": 0.624,
"step": 1000
},
{
"epoch": 1.6426282051282053,
"grad_norm": 11.498839378356934,
"learning_rate": 2.5207838479809975e-05,
"loss": 0.6889,
"step": 1025
},
{
"epoch": 1.6826923076923077,
"grad_norm": 6.599166393280029,
"learning_rate": 2.446555819477435e-05,
"loss": 0.5743,
"step": 1050
},
{
"epoch": 1.7227564102564101,
"grad_norm": 18.760013580322266,
"learning_rate": 2.372327790973872e-05,
"loss": 0.6392,
"step": 1075
},
{
"epoch": 1.7628205128205128,
"grad_norm": 10.053047180175781,
"learning_rate": 2.2980997624703086e-05,
"loss": 0.7208,
"step": 1100
},
{
"epoch": 1.8028846153846154,
"grad_norm": 6.171570777893066,
"learning_rate": 2.223871733966746e-05,
"loss": 0.5817,
"step": 1125
},
{
"epoch": 1.842948717948718,
"grad_norm": 5.438927173614502,
"learning_rate": 2.149643705463183e-05,
"loss": 0.4576,
"step": 1150
},
{
"epoch": 1.8830128205128205,
"grad_norm": 8.946006774902344,
"learning_rate": 2.07541567695962e-05,
"loss": 0.695,
"step": 1175
},
{
"epoch": 1.9230769230769231,
"grad_norm": 7.02325439453125,
"learning_rate": 2.001187648456057e-05,
"loss": 0.6768,
"step": 1200
},
{
"epoch": 1.9631410256410255,
"grad_norm": 4.542496204376221,
"learning_rate": 1.926959619952494e-05,
"loss": 0.6592,
"step": 1225
},
{
"epoch": 2.0,
"eval_accuracy": 0.8717948717948718,
"eval_f1_macro": 0.8420194226887088,
"eval_f1_micro": 0.8717948717948718,
"eval_f1_weighted": 0.8664623444122613,
"eval_loss": 0.39590924978256226,
"eval_precision_macro": 0.8724657298009468,
"eval_precision_micro": 0.8717948717948718,
"eval_precision_weighted": 0.8825043036411662,
"eval_recall_macro": 0.8395777006090396,
"eval_recall_micro": 0.8717948717948718,
"eval_recall_weighted": 0.8717948717948718,
"eval_runtime": 1293.5897,
"eval_samples_per_second": 0.965,
"eval_steps_per_second": 0.06,
"step": 1248
},
{
"epoch": 2.003205128205128,
"grad_norm": 7.841921329498291,
"learning_rate": 1.8527315914489312e-05,
"loss": 0.6025,
"step": 1250
},
{
"epoch": 2.043269230769231,
"grad_norm": 9.250267028808594,
"learning_rate": 1.7785035629453682e-05,
"loss": 0.4597,
"step": 1275
},
{
"epoch": 2.0833333333333335,
"grad_norm": 3.1655519008636475,
"learning_rate": 1.7042755344418056e-05,
"loss": 0.3962,
"step": 1300
},
{
"epoch": 2.123397435897436,
"grad_norm": 5.661818981170654,
"learning_rate": 1.6300475059382423e-05,
"loss": 0.6639,
"step": 1325
},
{
"epoch": 2.1634615384615383,
"grad_norm": 3.282247304916382,
"learning_rate": 1.5558194774346793e-05,
"loss": 0.464,
"step": 1350
},
{
"epoch": 2.203525641025641,
"grad_norm": 3.829854965209961,
"learning_rate": 1.4815914489311164e-05,
"loss": 0.4943,
"step": 1375
},
{
"epoch": 2.2435897435897436,
"grad_norm": 5.670718193054199,
"learning_rate": 1.4073634204275534e-05,
"loss": 0.6288,
"step": 1400
},
{
"epoch": 2.2836538461538463,
"grad_norm": 10.35056209564209,
"learning_rate": 1.3331353919239906e-05,
"loss": 0.4846,
"step": 1425
},
{
"epoch": 2.323717948717949,
"grad_norm": 0.6631863117218018,
"learning_rate": 1.2589073634204277e-05,
"loss": 0.3622,
"step": 1450
},
{
"epoch": 2.363782051282051,
"grad_norm": 6.619789123535156,
"learning_rate": 1.1846793349168647e-05,
"loss": 0.5013,
"step": 1475
},
{
"epoch": 2.4038461538461537,
"grad_norm": 6.15729284286499,
"learning_rate": 1.1104513064133017e-05,
"loss": 0.4955,
"step": 1500
},
{
"epoch": 2.4439102564102564,
"grad_norm": 0.05698273330926895,
"learning_rate": 1.0362232779097388e-05,
"loss": 0.4693,
"step": 1525
},
{
"epoch": 2.483974358974359,
"grad_norm": 3.433866262435913,
"learning_rate": 9.619952494061758e-06,
"loss": 0.4221,
"step": 1550
},
{
"epoch": 2.5240384615384617,
"grad_norm": 6.545916557312012,
"learning_rate": 8.877672209026128e-06,
"loss": 0.6492,
"step": 1575
},
{
"epoch": 2.564102564102564,
"grad_norm": 12.447614669799805,
"learning_rate": 8.135391923990499e-06,
"loss": 0.4888,
"step": 1600
},
{
"epoch": 2.6041666666666665,
"grad_norm": 6.291492462158203,
"learning_rate": 7.393111638954869e-06,
"loss": 0.4994,
"step": 1625
},
{
"epoch": 2.644230769230769,
"grad_norm": 5.843363285064697,
"learning_rate": 6.6508313539192404e-06,
"loss": 0.6511,
"step": 1650
},
{
"epoch": 2.684294871794872,
"grad_norm": 6.965985298156738,
"learning_rate": 5.908551068883611e-06,
"loss": 0.4697,
"step": 1675
},
{
"epoch": 2.7243589743589745,
"grad_norm": 7.191352367401123,
"learning_rate": 5.166270783847981e-06,
"loss": 0.4224,
"step": 1700
},
{
"epoch": 2.7644230769230766,
"grad_norm": 14.876649856567383,
"learning_rate": 4.4239904988123516e-06,
"loss": 0.6627,
"step": 1725
},
{
"epoch": 2.8044871794871797,
"grad_norm": 6.894955158233643,
"learning_rate": 3.681710213776722e-06,
"loss": 0.5652,
"step": 1750
},
{
"epoch": 2.844551282051282,
"grad_norm": 5.6652984619140625,
"learning_rate": 2.9394299287410927e-06,
"loss": 0.6411,
"step": 1775
},
{
"epoch": 2.8846153846153846,
"grad_norm": 8.991388320922852,
"learning_rate": 2.197149643705463e-06,
"loss": 0.4483,
"step": 1800
},
{
"epoch": 2.9246794871794872,
"grad_norm": 8.376466751098633,
"learning_rate": 1.4548693586698337e-06,
"loss": 0.4551,
"step": 1825
},
{
"epoch": 2.96474358974359,
"grad_norm": 3.4173240661621094,
"learning_rate": 7.422802850356294e-07,
"loss": 0.4227,
"step": 1850
},
{
"epoch": 3.0,
"eval_accuracy": 0.8886217948717948,
"eval_f1_macro": 0.86094260720702,
"eval_f1_micro": 0.8886217948717948,
"eval_f1_weighted": 0.883196165156119,
"eval_loss": 0.37855324149131775,
"eval_precision_macro": 0.8961444617693151,
"eval_precision_micro": 0.8886217948717948,
"eval_precision_weighted": 0.8922651559280282,
"eval_recall_macro": 0.8524486181675118,
"eval_recall_micro": 0.8886217948717948,
"eval_recall_weighted": 0.8886217948717948,
"eval_runtime": 1323.0713,
"eval_samples_per_second": 0.943,
"eval_steps_per_second": 0.059,
"step": 1872
}
],
"logging_steps": 25,
"max_steps": 1872,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 4.1009193984929587e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}