howdyaendra's picture
Upload folder using huggingface_hub
327ffdb verified
{
"best_metric": 0.4469132423400879,
"best_model_checkpoint": "xblock-base-patch1-224/checkpoint-2253",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2253,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"grad_norm": 9.94556713104248,
"learning_rate": 5.088495575221239e-06,
"loss": 1.5614,
"step": 25
},
{
"epoch": 0.07,
"grad_norm": 9.733675956726074,
"learning_rate": 1.0619469026548673e-05,
"loss": 1.4735,
"step": 50
},
{
"epoch": 0.1,
"grad_norm": 9.123199462890625,
"learning_rate": 1.6150442477876106e-05,
"loss": 1.3293,
"step": 75
},
{
"epoch": 0.13,
"grad_norm": 10.894118309020996,
"learning_rate": 2.1681415929203542e-05,
"loss": 1.2442,
"step": 100
},
{
"epoch": 0.17,
"grad_norm": 8.819581985473633,
"learning_rate": 2.721238938053097e-05,
"loss": 1.0742,
"step": 125
},
{
"epoch": 0.2,
"grad_norm": 13.125239372253418,
"learning_rate": 3.274336283185841e-05,
"loss": 1.1571,
"step": 150
},
{
"epoch": 0.23,
"grad_norm": 8.423856735229492,
"learning_rate": 3.827433628318584e-05,
"loss": 1.1193,
"step": 175
},
{
"epoch": 0.27,
"grad_norm": 11.360130310058594,
"learning_rate": 4.380530973451328e-05,
"loss": 1.0605,
"step": 200
},
{
"epoch": 0.3,
"grad_norm": 7.3759379386901855,
"learning_rate": 4.9336283185840707e-05,
"loss": 0.9883,
"step": 225
},
{
"epoch": 0.33,
"grad_norm": 12.12755298614502,
"learning_rate": 4.9457326097681306e-05,
"loss": 0.9764,
"step": 250
},
{
"epoch": 0.37,
"grad_norm": 5.0697712898254395,
"learning_rate": 4.884065120868278e-05,
"loss": 1.1213,
"step": 275
},
{
"epoch": 0.4,
"grad_norm": 8.178693771362305,
"learning_rate": 4.822397631968427e-05,
"loss": 0.9683,
"step": 300
},
{
"epoch": 0.43,
"grad_norm": 7.6824846267700195,
"learning_rate": 4.7607301430685744e-05,
"loss": 1.0408,
"step": 325
},
{
"epoch": 0.47,
"grad_norm": 10.855768203735352,
"learning_rate": 4.699062654168723e-05,
"loss": 0.9092,
"step": 350
},
{
"epoch": 0.5,
"grad_norm": 6.187016487121582,
"learning_rate": 4.6373951652688704e-05,
"loss": 1.0058,
"step": 375
},
{
"epoch": 0.53,
"grad_norm": 8.00668716430664,
"learning_rate": 4.575727676369018e-05,
"loss": 0.8086,
"step": 400
},
{
"epoch": 0.57,
"grad_norm": 5.1038336753845215,
"learning_rate": 4.5140601874691665e-05,
"loss": 0.9027,
"step": 425
},
{
"epoch": 0.6,
"grad_norm": 10.569095611572266,
"learning_rate": 4.452392698569314e-05,
"loss": 0.8234,
"step": 450
},
{
"epoch": 0.63,
"grad_norm": 8.256200790405273,
"learning_rate": 4.3907252096694626e-05,
"loss": 0.9163,
"step": 475
},
{
"epoch": 0.67,
"grad_norm": 11.042557716369629,
"learning_rate": 4.32905772076961e-05,
"loss": 0.8243,
"step": 500
},
{
"epoch": 0.7,
"grad_norm": 18.462459564208984,
"learning_rate": 4.267390231869759e-05,
"loss": 0.8703,
"step": 525
},
{
"epoch": 0.73,
"grad_norm": 12.024648666381836,
"learning_rate": 4.2057227429699064e-05,
"loss": 0.991,
"step": 550
},
{
"epoch": 0.77,
"grad_norm": 11.26354694366455,
"learning_rate": 4.144055254070055e-05,
"loss": 0.8402,
"step": 575
},
{
"epoch": 0.8,
"grad_norm": 8.839094161987305,
"learning_rate": 4.0823877651702024e-05,
"loss": 0.7653,
"step": 600
},
{
"epoch": 0.83,
"grad_norm": 10.799356460571289,
"learning_rate": 4.020720276270351e-05,
"loss": 0.7787,
"step": 625
},
{
"epoch": 0.87,
"grad_norm": 12.935748100280762,
"learning_rate": 3.9590527873704985e-05,
"loss": 0.7738,
"step": 650
},
{
"epoch": 0.9,
"grad_norm": 4.829887866973877,
"learning_rate": 3.897385298470647e-05,
"loss": 0.8329,
"step": 675
},
{
"epoch": 0.93,
"grad_norm": 4.532620429992676,
"learning_rate": 3.8357178095707946e-05,
"loss": 0.9689,
"step": 700
},
{
"epoch": 0.97,
"grad_norm": 8.8261079788208,
"learning_rate": 3.774050320670943e-05,
"loss": 0.7615,
"step": 725
},
{
"epoch": 1.0,
"grad_norm": 3.9906632900238037,
"learning_rate": 3.712382831771091e-05,
"loss": 0.8284,
"step": 750
},
{
"epoch": 1.0,
"eval_accuracy": 0.7503328894806924,
"eval_f1_macro": 0.7379493476306923,
"eval_f1_micro": 0.7503328894806925,
"eval_f1_weighted": 0.7478927601803307,
"eval_loss": 0.6315314769744873,
"eval_precision_macro": 0.7619988736851466,
"eval_precision_micro": 0.7503328894806924,
"eval_precision_weighted": 0.7668596523942972,
"eval_recall_macro": 0.7356084437086093,
"eval_recall_micro": 0.7503328894806924,
"eval_recall_weighted": 0.7503328894806924,
"eval_runtime": 103.3083,
"eval_samples_per_second": 14.539,
"eval_steps_per_second": 0.91,
"step": 751
},
{
"epoch": 1.03,
"grad_norm": 7.72317361831665,
"learning_rate": 3.650715342871239e-05,
"loss": 0.8196,
"step": 775
},
{
"epoch": 1.07,
"grad_norm": 6.5625152587890625,
"learning_rate": 3.589047853971386e-05,
"loss": 0.7794,
"step": 800
},
{
"epoch": 1.1,
"grad_norm": 5.599926948547363,
"learning_rate": 3.5273803650715344e-05,
"loss": 0.6632,
"step": 825
},
{
"epoch": 1.13,
"grad_norm": 9.425518989562988,
"learning_rate": 3.465712876171682e-05,
"loss": 0.7074,
"step": 850
},
{
"epoch": 1.17,
"grad_norm": 8.80082893371582,
"learning_rate": 3.4040453872718305e-05,
"loss": 0.7325,
"step": 875
},
{
"epoch": 1.2,
"grad_norm": 11.81970500946045,
"learning_rate": 3.342377898371978e-05,
"loss": 0.8574,
"step": 900
},
{
"epoch": 1.23,
"grad_norm": 8.871489524841309,
"learning_rate": 3.2807104094721266e-05,
"loss": 0.8407,
"step": 925
},
{
"epoch": 1.26,
"grad_norm": 7.296131610870361,
"learning_rate": 3.219042920572274e-05,
"loss": 0.6962,
"step": 950
},
{
"epoch": 1.3,
"grad_norm": 8.161062240600586,
"learning_rate": 3.1573754316724227e-05,
"loss": 0.8163,
"step": 975
},
{
"epoch": 1.33,
"grad_norm": 14.00735092163086,
"learning_rate": 3.0957079427725704e-05,
"loss": 0.6706,
"step": 1000
},
{
"epoch": 1.36,
"grad_norm": 10.980608940124512,
"learning_rate": 3.0340404538727184e-05,
"loss": 0.6639,
"step": 1025
},
{
"epoch": 1.4,
"grad_norm": 6.72366189956665,
"learning_rate": 2.9723729649728664e-05,
"loss": 0.7,
"step": 1050
},
{
"epoch": 1.43,
"grad_norm": 24.283390045166016,
"learning_rate": 2.9107054760730145e-05,
"loss": 0.7156,
"step": 1075
},
{
"epoch": 1.46,
"grad_norm": 4.696579933166504,
"learning_rate": 2.8490379871731625e-05,
"loss": 0.5984,
"step": 1100
},
{
"epoch": 1.5,
"grad_norm": 6.687375545501709,
"learning_rate": 2.7873704982733105e-05,
"loss": 0.6752,
"step": 1125
},
{
"epoch": 1.53,
"grad_norm": 12.651863098144531,
"learning_rate": 2.7257030093734586e-05,
"loss": 0.8265,
"step": 1150
},
{
"epoch": 1.56,
"grad_norm": 5.703587532043457,
"learning_rate": 2.6640355204736066e-05,
"loss": 0.6061,
"step": 1175
},
{
"epoch": 1.6,
"grad_norm": 12.219403266906738,
"learning_rate": 2.6023680315737543e-05,
"loss": 0.5414,
"step": 1200
},
{
"epoch": 1.63,
"grad_norm": 13.479424476623535,
"learning_rate": 2.5407005426739024e-05,
"loss": 0.6115,
"step": 1225
},
{
"epoch": 1.66,
"grad_norm": 2.6645803451538086,
"learning_rate": 2.4790330537740504e-05,
"loss": 0.7061,
"step": 1250
},
{
"epoch": 1.7,
"grad_norm": 7.649036884307861,
"learning_rate": 2.4173655648741984e-05,
"loss": 0.5908,
"step": 1275
},
{
"epoch": 1.73,
"grad_norm": 7.877263069152832,
"learning_rate": 2.3556980759743465e-05,
"loss": 0.5877,
"step": 1300
},
{
"epoch": 1.76,
"grad_norm": 3.582003355026245,
"learning_rate": 2.2940305870744945e-05,
"loss": 0.6062,
"step": 1325
},
{
"epoch": 1.8,
"grad_norm": 11.514890670776367,
"learning_rate": 2.2323630981746425e-05,
"loss": 0.5983,
"step": 1350
},
{
"epoch": 1.83,
"grad_norm": 14.150575637817383,
"learning_rate": 2.1706956092747906e-05,
"loss": 0.6691,
"step": 1375
},
{
"epoch": 1.86,
"grad_norm": 5.117061138153076,
"learning_rate": 2.1090281203749386e-05,
"loss": 0.6876,
"step": 1400
},
{
"epoch": 1.9,
"grad_norm": 6.246029376983643,
"learning_rate": 2.0473606314750867e-05,
"loss": 0.6186,
"step": 1425
},
{
"epoch": 1.93,
"grad_norm": 8.04295825958252,
"learning_rate": 1.9856931425752347e-05,
"loss": 0.7033,
"step": 1450
},
{
"epoch": 1.96,
"grad_norm": 6.798038482666016,
"learning_rate": 1.9240256536753827e-05,
"loss": 0.6698,
"step": 1475
},
{
"epoch": 2.0,
"grad_norm": 7.464223384857178,
"learning_rate": 1.8623581647755304e-05,
"loss": 0.6421,
"step": 1500
},
{
"epoch": 2.0,
"eval_accuracy": 0.8062583222370173,
"eval_f1_macro": 0.8030885441145199,
"eval_f1_micro": 0.8062583222370173,
"eval_f1_weighted": 0.8081177448606712,
"eval_loss": 0.49981948733329773,
"eval_precision_macro": 0.8105725964724615,
"eval_precision_micro": 0.8062583222370173,
"eval_precision_weighted": 0.8226218541008891,
"eval_recall_macro": 0.8098096026490066,
"eval_recall_micro": 0.8062583222370173,
"eval_recall_weighted": 0.8062583222370173,
"eval_runtime": 108.6859,
"eval_samples_per_second": 13.82,
"eval_steps_per_second": 0.865,
"step": 1502
},
{
"epoch": 2.03,
"grad_norm": 7.260876655578613,
"learning_rate": 1.8006906758756785e-05,
"loss": 0.6168,
"step": 1525
},
{
"epoch": 2.06,
"grad_norm": 10.966588973999023,
"learning_rate": 1.7390231869758265e-05,
"loss": 0.6235,
"step": 1550
},
{
"epoch": 2.1,
"grad_norm": 7.024848937988281,
"learning_rate": 1.6773556980759745e-05,
"loss": 0.5583,
"step": 1575
},
{
"epoch": 2.13,
"grad_norm": 7.464731693267822,
"learning_rate": 1.6156882091761226e-05,
"loss": 0.5755,
"step": 1600
},
{
"epoch": 2.16,
"grad_norm": 3.144723892211914,
"learning_rate": 1.5540207202762706e-05,
"loss": 0.5972,
"step": 1625
},
{
"epoch": 2.2,
"grad_norm": 10.064537048339844,
"learning_rate": 1.4923532313764185e-05,
"loss": 0.583,
"step": 1650
},
{
"epoch": 2.23,
"grad_norm": 8.008367538452148,
"learning_rate": 1.4306857424765665e-05,
"loss": 0.5201,
"step": 1675
},
{
"epoch": 2.26,
"grad_norm": 6.358066082000732,
"learning_rate": 1.3690182535767144e-05,
"loss": 0.5662,
"step": 1700
},
{
"epoch": 2.3,
"grad_norm": 6.239820957183838,
"learning_rate": 1.3073507646768624e-05,
"loss": 0.5854,
"step": 1725
},
{
"epoch": 2.33,
"grad_norm": 6.083053112030029,
"learning_rate": 1.2456832757770105e-05,
"loss": 0.5655,
"step": 1750
},
{
"epoch": 2.36,
"grad_norm": 17.246747970581055,
"learning_rate": 1.1840157868771585e-05,
"loss": 0.6351,
"step": 1775
},
{
"epoch": 2.4,
"grad_norm": 11.279265403747559,
"learning_rate": 1.1223482979773065e-05,
"loss": 0.5639,
"step": 1800
},
{
"epoch": 2.43,
"grad_norm": 6.290694713592529,
"learning_rate": 1.0606808090774544e-05,
"loss": 0.5125,
"step": 1825
},
{
"epoch": 2.46,
"grad_norm": 12.703798294067383,
"learning_rate": 9.990133201776024e-06,
"loss": 0.6839,
"step": 1850
},
{
"epoch": 2.5,
"grad_norm": 5.460929870605469,
"learning_rate": 9.373458312777505e-06,
"loss": 0.5766,
"step": 1875
},
{
"epoch": 2.53,
"grad_norm": 8.471376419067383,
"learning_rate": 8.756783423778985e-06,
"loss": 0.5011,
"step": 1900
},
{
"epoch": 2.56,
"grad_norm": 5.83112096786499,
"learning_rate": 8.140108534780466e-06,
"loss": 0.4984,
"step": 1925
},
{
"epoch": 2.6,
"grad_norm": 7.202915668487549,
"learning_rate": 7.523433645781943e-06,
"loss": 0.6013,
"step": 1950
},
{
"epoch": 2.63,
"grad_norm": 7.630007266998291,
"learning_rate": 6.906758756783424e-06,
"loss": 0.4976,
"step": 1975
},
{
"epoch": 2.66,
"grad_norm": 2.946850061416626,
"learning_rate": 6.290083867784904e-06,
"loss": 0.4218,
"step": 2000
},
{
"epoch": 2.7,
"grad_norm": 12.109963417053223,
"learning_rate": 5.6734089787863845e-06,
"loss": 0.5818,
"step": 2025
},
{
"epoch": 2.73,
"grad_norm": 12.913744926452637,
"learning_rate": 5.056734089787865e-06,
"loss": 0.5638,
"step": 2050
},
{
"epoch": 2.76,
"grad_norm": 21.241952896118164,
"learning_rate": 4.464726196349285e-06,
"loss": 0.6451,
"step": 2075
},
{
"epoch": 2.8,
"grad_norm": 10.033452987670898,
"learning_rate": 3.848051307350765e-06,
"loss": 0.5067,
"step": 2100
},
{
"epoch": 2.83,
"grad_norm": 15.357681274414062,
"learning_rate": 3.231376418352245e-06,
"loss": 0.6292,
"step": 2125
},
{
"epoch": 2.86,
"grad_norm": 10.886502265930176,
"learning_rate": 2.614701529353725e-06,
"loss": 0.6365,
"step": 2150
},
{
"epoch": 2.9,
"grad_norm": 6.1179986000061035,
"learning_rate": 1.998026640355205e-06,
"loss": 0.5717,
"step": 2175
},
{
"epoch": 2.93,
"grad_norm": 8.245763778686523,
"learning_rate": 1.381351751356685e-06,
"loss": 0.5669,
"step": 2200
},
{
"epoch": 2.96,
"grad_norm": 9.85698413848877,
"learning_rate": 7.646768623581648e-07,
"loss": 0.4571,
"step": 2225
},
{
"epoch": 3.0,
"grad_norm": 16.406970977783203,
"learning_rate": 1.480019733596448e-07,
"loss": 0.5549,
"step": 2250
},
{
"epoch": 3.0,
"eval_accuracy": 0.829560585885486,
"eval_f1_macro": 0.8236777117298302,
"eval_f1_micro": 0.829560585885486,
"eval_f1_weighted": 0.8289271724966029,
"eval_loss": 0.4469132423400879,
"eval_precision_macro": 0.8243514221166717,
"eval_precision_micro": 0.829560585885486,
"eval_precision_weighted": 0.8313607282611274,
"eval_recall_macro": 0.8260057947019868,
"eval_recall_micro": 0.829560585885486,
"eval_recall_weighted": 0.829560585885486,
"eval_runtime": 107.4762,
"eval_samples_per_second": 13.975,
"eval_steps_per_second": 0.875,
"step": 2253
}
],
"logging_steps": 25,
"max_steps": 2253,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.3962756971819336e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}