howdyaendra's picture
Upload folder using huggingface_hub
2b3c2fb verified
{
"best_metric": 0.9025482535362244,
"best_model_checkpoint": "xblock-large-patch1-224/checkpoint-498",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 498,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": Infinity,
"learning_rate": 7.000000000000001e-06,
"loss": 2.4849,
"step": 8
},
{
"epoch": 0.1,
"grad_norm": 11.997095108032227,
"learning_rate": 1.5e-05,
"loss": 2.3188,
"step": 16
},
{
"epoch": 0.14,
"grad_norm": 12.830357551574707,
"learning_rate": 2.2000000000000003e-05,
"loss": 2.1147,
"step": 24
},
{
"epoch": 0.19,
"grad_norm": 11.86018180847168,
"learning_rate": 3e-05,
"loss": 1.9589,
"step": 32
},
{
"epoch": 0.24,
"grad_norm": 13.479438781738281,
"learning_rate": 3.8e-05,
"loss": 1.7637,
"step": 40
},
{
"epoch": 0.29,
"grad_norm": 13.69570255279541,
"learning_rate": 4.600000000000001e-05,
"loss": 1.8184,
"step": 48
},
{
"epoch": 0.34,
"grad_norm": 9.349321365356445,
"learning_rate": 4.955357142857143e-05,
"loss": 2.0031,
"step": 56
},
{
"epoch": 0.39,
"grad_norm": 11.169551849365234,
"learning_rate": 4.866071428571429e-05,
"loss": 1.7263,
"step": 64
},
{
"epoch": 0.43,
"grad_norm": 9.97819995880127,
"learning_rate": 4.7767857142857144e-05,
"loss": 1.9287,
"step": 72
},
{
"epoch": 0.48,
"grad_norm": 11.742727279663086,
"learning_rate": 4.6875e-05,
"loss": 1.814,
"step": 80
},
{
"epoch": 0.53,
"grad_norm": 8.304338455200195,
"learning_rate": 4.598214285714286e-05,
"loss": 1.4417,
"step": 88
},
{
"epoch": 0.58,
"grad_norm": 10.799261093139648,
"learning_rate": 4.5089285714285714e-05,
"loss": 1.7538,
"step": 96
},
{
"epoch": 0.63,
"grad_norm": 8.281989097595215,
"learning_rate": 4.419642857142857e-05,
"loss": 1.7161,
"step": 104
},
{
"epoch": 0.67,
"grad_norm": 7.088228225708008,
"learning_rate": 4.3303571428571435e-05,
"loss": 1.7526,
"step": 112
},
{
"epoch": 0.72,
"grad_norm": 9.898828506469727,
"learning_rate": 4.2410714285714285e-05,
"loss": 1.8377,
"step": 120
},
{
"epoch": 0.77,
"grad_norm": 9.417756080627441,
"learning_rate": 4.151785714285715e-05,
"loss": 1.6742,
"step": 128
},
{
"epoch": 0.82,
"grad_norm": 10.03836441040039,
"learning_rate": 4.0625000000000005e-05,
"loss": 1.5157,
"step": 136
},
{
"epoch": 0.87,
"grad_norm": 10.27881908416748,
"learning_rate": 3.9732142857142855e-05,
"loss": 1.5828,
"step": 144
},
{
"epoch": 0.92,
"grad_norm": 10.77905559539795,
"learning_rate": 3.883928571428572e-05,
"loss": 1.3418,
"step": 152
},
{
"epoch": 0.96,
"grad_norm": 12.170005798339844,
"learning_rate": 3.794642857142857e-05,
"loss": 1.8624,
"step": 160
},
{
"epoch": 1.0,
"eval_accuracy": 0.5271084337349398,
"eval_f1_macro": 0.4518532713560705,
"eval_f1_micro": 0.5271084337349398,
"eval_f1_weighted": 0.496654440865482,
"eval_loss": 1.4107117652893066,
"eval_precision_macro": 0.5016472507129397,
"eval_precision_micro": 0.5271084337349398,
"eval_precision_weighted": 0.5568483249244561,
"eval_recall_macro": 0.47663109756097566,
"eval_recall_micro": 0.5271084337349398,
"eval_recall_weighted": 0.5271084337349398,
"eval_runtime": 316.798,
"eval_samples_per_second": 1.048,
"eval_steps_per_second": 0.066,
"step": 166
},
{
"epoch": 1.01,
"grad_norm": 8.142753601074219,
"learning_rate": 3.716517857142857e-05,
"loss": 1.5166,
"step": 168
},
{
"epoch": 1.06,
"grad_norm": 5.322903633117676,
"learning_rate": 3.627232142857143e-05,
"loss": 1.1869,
"step": 176
},
{
"epoch": 1.11,
"grad_norm": 9.545618057250977,
"learning_rate": 3.5379464285714287e-05,
"loss": 1.3475,
"step": 184
},
{
"epoch": 1.16,
"grad_norm": 13.654799461364746,
"learning_rate": 3.448660714285715e-05,
"loss": 1.4239,
"step": 192
},
{
"epoch": 1.2,
"grad_norm": 8.991721153259277,
"learning_rate": 3.359375e-05,
"loss": 1.3054,
"step": 200
},
{
"epoch": 1.25,
"grad_norm": 12.191709518432617,
"learning_rate": 3.270089285714286e-05,
"loss": 1.1966,
"step": 208
},
{
"epoch": 1.3,
"grad_norm": 9.003186225891113,
"learning_rate": 3.1808035714285713e-05,
"loss": 1.2069,
"step": 216
},
{
"epoch": 1.35,
"grad_norm": 12.034103393554688,
"learning_rate": 3.091517857142857e-05,
"loss": 1.4924,
"step": 224
},
{
"epoch": 1.4,
"grad_norm": 12.357869148254395,
"learning_rate": 3.013392857142857e-05,
"loss": 1.4235,
"step": 232
},
{
"epoch": 1.45,
"grad_norm": 13.986096382141113,
"learning_rate": 2.9241071428571432e-05,
"loss": 1.3803,
"step": 240
},
{
"epoch": 1.49,
"grad_norm": 10.213234901428223,
"learning_rate": 2.8348214285714285e-05,
"loss": 1.2811,
"step": 248
},
{
"epoch": 1.54,
"grad_norm": 11.94521713256836,
"learning_rate": 2.7455357142857145e-05,
"loss": 1.3474,
"step": 256
},
{
"epoch": 1.59,
"grad_norm": 7.413544654846191,
"learning_rate": 2.6562500000000002e-05,
"loss": 1.2188,
"step": 264
},
{
"epoch": 1.64,
"grad_norm": 6.420960426330566,
"learning_rate": 2.5669642857142855e-05,
"loss": 1.195,
"step": 272
},
{
"epoch": 1.69,
"grad_norm": 7.711160659790039,
"learning_rate": 2.4776785714285715e-05,
"loss": 1.4389,
"step": 280
},
{
"epoch": 1.73,
"grad_norm": 7.766310214996338,
"learning_rate": 2.3883928571428572e-05,
"loss": 1.5062,
"step": 288
},
{
"epoch": 1.78,
"grad_norm": 11.04636001586914,
"learning_rate": 2.299107142857143e-05,
"loss": 1.2456,
"step": 296
},
{
"epoch": 1.83,
"grad_norm": 7.171872138977051,
"learning_rate": 2.2098214285714286e-05,
"loss": 0.7718,
"step": 304
},
{
"epoch": 1.88,
"grad_norm": 9.676796913146973,
"learning_rate": 2.1205357142857142e-05,
"loss": 1.0983,
"step": 312
},
{
"epoch": 1.93,
"grad_norm": 9.70329761505127,
"learning_rate": 2.0312500000000002e-05,
"loss": 1.0594,
"step": 320
},
{
"epoch": 1.98,
"grad_norm": 10.712843894958496,
"learning_rate": 1.941964285714286e-05,
"loss": 1.2865,
"step": 328
},
{
"epoch": 2.0,
"eval_accuracy": 0.6295180722891566,
"eval_f1_macro": 0.6326430342148868,
"eval_f1_micro": 0.6295180722891566,
"eval_f1_weighted": 0.6385426615207972,
"eval_loss": 1.0817334651947021,
"eval_precision_macro": 0.6845586183973281,
"eval_precision_micro": 0.6295180722891566,
"eval_precision_weighted": 0.6861824846026167,
"eval_recall_macro": 0.6314774629363941,
"eval_recall_micro": 0.6295180722891566,
"eval_recall_weighted": 0.6295180722891566,
"eval_runtime": 311.4659,
"eval_samples_per_second": 1.066,
"eval_steps_per_second": 0.067,
"step": 332
},
{
"epoch": 2.02,
"grad_norm": 12.722149848937988,
"learning_rate": 1.8526785714285716e-05,
"loss": 1.0814,
"step": 336
},
{
"epoch": 2.07,
"grad_norm": 7.002964019775391,
"learning_rate": 1.7633928571428573e-05,
"loss": 0.8135,
"step": 344
},
{
"epoch": 2.12,
"grad_norm": 7.798354625701904,
"learning_rate": 1.674107142857143e-05,
"loss": 0.9007,
"step": 352
},
{
"epoch": 2.17,
"grad_norm": 5.395328998565674,
"learning_rate": 1.5848214285714286e-05,
"loss": 0.8786,
"step": 360
},
{
"epoch": 2.22,
"grad_norm": 11.069001197814941,
"learning_rate": 1.4955357142857143e-05,
"loss": 1.0465,
"step": 368
},
{
"epoch": 2.27,
"grad_norm": 11.216327667236328,
"learning_rate": 1.4062500000000001e-05,
"loss": 1.1112,
"step": 376
},
{
"epoch": 2.31,
"grad_norm": 10.935038566589355,
"learning_rate": 1.3169642857142858e-05,
"loss": 0.8433,
"step": 384
},
{
"epoch": 2.36,
"grad_norm": 18.220169067382812,
"learning_rate": 1.2276785714285715e-05,
"loss": 0.812,
"step": 392
},
{
"epoch": 2.41,
"grad_norm": 13.36108112335205,
"learning_rate": 1.1383928571428572e-05,
"loss": 1.0622,
"step": 400
},
{
"epoch": 2.46,
"grad_norm": 12.954853057861328,
"learning_rate": 1.049107142857143e-05,
"loss": 0.8341,
"step": 408
},
{
"epoch": 2.51,
"grad_norm": 15.882329940795898,
"learning_rate": 9.598214285714287e-06,
"loss": 0.8285,
"step": 416
},
{
"epoch": 2.55,
"grad_norm": 7.79279899597168,
"learning_rate": 8.705357142857143e-06,
"loss": 0.9739,
"step": 424
},
{
"epoch": 2.6,
"grad_norm": 11.043404579162598,
"learning_rate": 7.8125e-06,
"loss": 0.9797,
"step": 432
},
{
"epoch": 2.65,
"grad_norm": 7.065421104431152,
"learning_rate": 6.919642857142858e-06,
"loss": 0.8766,
"step": 440
},
{
"epoch": 2.7,
"grad_norm": 7.5092878341674805,
"learning_rate": 6.0267857142857145e-06,
"loss": 0.637,
"step": 448
},
{
"epoch": 2.75,
"grad_norm": 10.960742950439453,
"learning_rate": 5.133928571428571e-06,
"loss": 0.9343,
"step": 456
},
{
"epoch": 2.8,
"grad_norm": 23.01622772216797,
"learning_rate": 4.241071428571429e-06,
"loss": 1.0866,
"step": 464
},
{
"epoch": 2.84,
"grad_norm": 2.7104849815368652,
"learning_rate": 3.348214285714286e-06,
"loss": 0.7754,
"step": 472
},
{
"epoch": 2.89,
"grad_norm": 16.19709014892578,
"learning_rate": 2.455357142857143e-06,
"loss": 0.9792,
"step": 480
},
{
"epoch": 2.94,
"grad_norm": 9.475327491760254,
"learning_rate": 1.5625e-06,
"loss": 0.8772,
"step": 488
},
{
"epoch": 2.99,
"grad_norm": 7.728980541229248,
"learning_rate": 6.696428571428571e-07,
"loss": 0.7722,
"step": 496
},
{
"epoch": 3.0,
"eval_accuracy": 0.6716867469879518,
"eval_f1_macro": 0.6858654529218409,
"eval_f1_micro": 0.6716867469879518,
"eval_f1_weighted": 0.676828467951081,
"eval_loss": 0.9025482535362244,
"eval_precision_macro": 0.7239086041672248,
"eval_precision_micro": 0.6716867469879518,
"eval_precision_weighted": 0.7046011538585282,
"eval_recall_macro": 0.6707409732185557,
"eval_recall_micro": 0.6716867469879518,
"eval_recall_weighted": 0.6716867469879518,
"eval_runtime": 302.7239,
"eval_samples_per_second": 1.097,
"eval_steps_per_second": 0.069,
"step": 498
}
],
"logging_steps": 8,
"max_steps": 498,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.0897396284801761e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}