dacunaq's picture
End of training
5f4b17f verified
{
"best_global_step": 96,
"best_metric": 0.9411764705882353,
"best_model_checkpoint": "vit-base-patch16-384-finetuned-humid-binary-2/checkpoint-96",
"epoch": 50.0,
"eval_steps": 500,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.7205882352941176,
"eval_f1_dry": 0.6885245901639344,
"eval_f1_humid": 0.7466666666666667,
"eval_f1_macro": 0.7175956284153006,
"eval_loss": 0.614932119846344,
"eval_precision_dry": 0.65625,
"eval_precision_humid": 0.7777777777777778,
"eval_precision_macro": 0.7170138888888888,
"eval_recall_dry": 0.7241379310344828,
"eval_recall_humid": 0.717948717948718,
"eval_recall_macro": 0.7210433244916004,
"eval_runtime": 2.2698,
"eval_samples_per_second": 29.959,
"eval_steps_per_second": 2.203,
"step": 6
},
{
"epoch": 1.6666666666666665,
"grad_norm": 5.209638595581055,
"learning_rate": 1.5e-05,
"loss": 0.6101,
"step": 10
},
{
"epoch": 2.0,
"eval_accuracy": 0.8823529411764706,
"eval_f1_dry": 0.8620689655172413,
"eval_f1_humid": 0.8974358974358975,
"eval_f1_macro": 0.8797524314765695,
"eval_loss": 0.3864850401878357,
"eval_precision_dry": 0.8620689655172413,
"eval_precision_humid": 0.8974358974358975,
"eval_precision_macro": 0.8797524314765695,
"eval_recall_dry": 0.8620689655172413,
"eval_recall_humid": 0.8974358974358975,
"eval_recall_macro": 0.8797524314765695,
"eval_runtime": 2.2426,
"eval_samples_per_second": 30.322,
"eval_steps_per_second": 2.23,
"step": 12
},
{
"epoch": 3.0,
"eval_accuracy": 0.8382352941176471,
"eval_f1_dry": 0.8307692307692308,
"eval_f1_humid": 0.8450704225352113,
"eval_f1_macro": 0.837919826652221,
"eval_loss": 0.35972756147384644,
"eval_precision_dry": 0.75,
"eval_precision_humid": 0.9375,
"eval_precision_macro": 0.84375,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.7692307692307693,
"eval_recall_macro": 0.850132625994695,
"eval_runtime": 2.2171,
"eval_samples_per_second": 30.671,
"eval_steps_per_second": 2.255,
"step": 18
},
{
"epoch": 3.3333333333333335,
"grad_norm": 3.4679043292999268,
"learning_rate": 3.1666666666666666e-05,
"loss": 0.2613,
"step": 20
},
{
"epoch": 4.0,
"eval_accuracy": 0.8235294117647058,
"eval_f1_dry": 0.8235294117647058,
"eval_f1_humid": 0.8235294117647058,
"eval_f1_macro": 0.8235294117647058,
"eval_loss": 0.5013691186904907,
"eval_precision_dry": 0.717948717948718,
"eval_precision_humid": 0.9655172413793104,
"eval_precision_macro": 0.8417329796640142,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.717948717948718,
"eval_recall_macro": 0.8417329796640142,
"eval_runtime": 2.2748,
"eval_samples_per_second": 29.893,
"eval_steps_per_second": 2.198,
"step": 24
},
{
"epoch": 5.0,
"grad_norm": 4.22995662689209,
"learning_rate": 4.8333333333333334e-05,
"loss": 0.1505,
"step": 30
},
{
"epoch": 5.0,
"eval_accuracy": 0.8529411764705882,
"eval_f1_dry": 0.84375,
"eval_f1_humid": 0.8611111111111112,
"eval_f1_macro": 0.8524305555555556,
"eval_loss": 0.43828415870666504,
"eval_precision_dry": 0.7714285714285715,
"eval_precision_humid": 0.9393939393939394,
"eval_precision_macro": 0.8554112554112554,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.7948717948717948,
"eval_recall_macro": 0.8629531388152077,
"eval_runtime": 2.1723,
"eval_samples_per_second": 31.303,
"eval_steps_per_second": 2.302,
"step": 30
},
{
"epoch": 6.0,
"eval_accuracy": 0.8529411764705882,
"eval_f1_dry": 0.8484848484848485,
"eval_f1_humid": 0.8571428571428571,
"eval_f1_macro": 0.8528138528138528,
"eval_loss": 0.4248289167881012,
"eval_precision_dry": 0.7567567567567568,
"eval_precision_humid": 0.967741935483871,
"eval_precision_macro": 0.8622493461203139,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.7692307692307693,
"eval_recall_macro": 0.8673740053050398,
"eval_runtime": 2.3123,
"eval_samples_per_second": 29.408,
"eval_steps_per_second": 2.162,
"step": 36
},
{
"epoch": 6.666666666666667,
"grad_norm": 6.771641254425049,
"learning_rate": 4.8333333333333334e-05,
"loss": 0.1298,
"step": 40
},
{
"epoch": 7.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8727272727272727,
"eval_f1_humid": 0.9135802469135802,
"eval_f1_macro": 0.8931537598204264,
"eval_loss": 0.39324793219566345,
"eval_precision_dry": 0.9230769230769231,
"eval_precision_humid": 0.8809523809523809,
"eval_precision_macro": 0.902014652014652,
"eval_recall_dry": 0.8275862068965517,
"eval_recall_humid": 0.9487179487179487,
"eval_recall_macro": 0.8881520778072503,
"eval_runtime": 2.2011,
"eval_samples_per_second": 30.894,
"eval_steps_per_second": 2.272,
"step": 42
},
{
"epoch": 8.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8852459016393442,
"eval_f1_humid": 0.9066666666666666,
"eval_f1_macro": 0.8959562841530054,
"eval_loss": 0.3932932913303375,
"eval_precision_dry": 0.84375,
"eval_precision_humid": 0.9444444444444444,
"eval_precision_macro": 0.8940972222222222,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.8717948717948718,
"eval_recall_macro": 0.9014146772767462,
"eval_runtime": 2.1675,
"eval_samples_per_second": 31.373,
"eval_steps_per_second": 2.307,
"step": 48
},
{
"epoch": 8.333333333333334,
"grad_norm": 2.7544643878936768,
"learning_rate": 4.648148148148148e-05,
"loss": 0.1033,
"step": 50
},
{
"epoch": 9.0,
"eval_accuracy": 0.7941176470588235,
"eval_f1_dry": 0.8055555555555556,
"eval_f1_humid": 0.78125,
"eval_f1_macro": 0.7934027777777778,
"eval_loss": 0.7160156965255737,
"eval_precision_dry": 0.6744186046511628,
"eval_precision_humid": 1.0,
"eval_precision_macro": 0.8372093023255813,
"eval_recall_dry": 1.0,
"eval_recall_humid": 0.6410256410256411,
"eval_recall_macro": 0.8205128205128205,
"eval_runtime": 2.1872,
"eval_samples_per_second": 31.09,
"eval_steps_per_second": 2.286,
"step": 54
},
{
"epoch": 10.0,
"grad_norm": 2.3722591400146484,
"learning_rate": 4.462962962962963e-05,
"loss": 0.1135,
"step": 60
},
{
"epoch": 10.0,
"eval_accuracy": 0.9264705882352942,
"eval_f1_dry": 0.9152542372881356,
"eval_f1_humid": 0.935064935064935,
"eval_f1_macro": 0.9251595861765354,
"eval_loss": 0.40363243222236633,
"eval_precision_dry": 0.9,
"eval_precision_humid": 0.9473684210526315,
"eval_precision_macro": 0.9236842105263158,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.9230769230769231,
"eval_recall_macro": 0.927055702917772,
"eval_runtime": 2.1789,
"eval_samples_per_second": 31.209,
"eval_steps_per_second": 2.295,
"step": 60
},
{
"epoch": 11.0,
"eval_accuracy": 0.9117647058823529,
"eval_f1_dry": 0.8928571428571429,
"eval_f1_humid": 0.925,
"eval_f1_macro": 0.9089285714285715,
"eval_loss": 0.4228282868862152,
"eval_precision_dry": 0.9259259259259259,
"eval_precision_humid": 0.9024390243902439,
"eval_precision_macro": 0.9141824751580849,
"eval_recall_dry": 0.8620689655172413,
"eval_recall_humid": 0.9487179487179487,
"eval_recall_macro": 0.905393457117595,
"eval_runtime": 2.2933,
"eval_samples_per_second": 29.651,
"eval_steps_per_second": 2.18,
"step": 66
},
{
"epoch": 11.666666666666666,
"grad_norm": 0.39826828241348267,
"learning_rate": 4.277777777777778e-05,
"loss": 0.0331,
"step": 70
},
{
"epoch": 12.0,
"eval_accuracy": 0.8823529411764706,
"eval_f1_dry": 0.875,
"eval_f1_humid": 0.8888888888888888,
"eval_f1_macro": 0.8819444444444444,
"eval_loss": 0.4820828139781952,
"eval_precision_dry": 0.8,
"eval_precision_humid": 0.9696969696969697,
"eval_precision_macro": 0.8848484848484849,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8205128205128205,
"eval_recall_macro": 0.8930150309460654,
"eval_runtime": 2.2848,
"eval_samples_per_second": 29.762,
"eval_steps_per_second": 2.188,
"step": 72
},
{
"epoch": 13.0,
"eval_accuracy": 0.9117647058823529,
"eval_f1_dry": 0.9,
"eval_f1_humid": 0.9210526315789473,
"eval_f1_macro": 0.9105263157894736,
"eval_loss": 0.4474344849586487,
"eval_precision_dry": 0.8709677419354839,
"eval_precision_humid": 0.9459459459459459,
"eval_precision_macro": 0.9084568439407149,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.8974358974358975,
"eval_recall_macro": 0.9142351900972591,
"eval_runtime": 2.3596,
"eval_samples_per_second": 28.819,
"eval_steps_per_second": 2.119,
"step": 78
},
{
"epoch": 13.333333333333334,
"grad_norm": 4.4798173904418945,
"learning_rate": 4.092592592592593e-05,
"loss": 0.0142,
"step": 80
},
{
"epoch": 14.0,
"eval_accuracy": 0.9117647058823529,
"eval_f1_dry": 0.896551724137931,
"eval_f1_humid": 0.9230769230769231,
"eval_f1_macro": 0.9098143236074271,
"eval_loss": 0.4478212893009186,
"eval_precision_dry": 0.896551724137931,
"eval_precision_humid": 0.9230769230769231,
"eval_precision_macro": 0.9098143236074271,
"eval_recall_dry": 0.896551724137931,
"eval_recall_humid": 0.9230769230769231,
"eval_recall_macro": 0.9098143236074271,
"eval_runtime": 2.1905,
"eval_samples_per_second": 31.043,
"eval_steps_per_second": 2.283,
"step": 84
},
{
"epoch": 15.0,
"grad_norm": 0.2734902501106262,
"learning_rate": 3.9074074074074076e-05,
"loss": 0.0074,
"step": 90
},
{
"epoch": 15.0,
"eval_accuracy": 0.8823529411764706,
"eval_f1_dry": 0.875,
"eval_f1_humid": 0.8888888888888888,
"eval_f1_macro": 0.8819444444444444,
"eval_loss": 0.6301909685134888,
"eval_precision_dry": 0.8,
"eval_precision_humid": 0.9696969696969697,
"eval_precision_macro": 0.8848484848484849,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8205128205128205,
"eval_recall_macro": 0.8930150309460654,
"eval_runtime": 2.2,
"eval_samples_per_second": 30.909,
"eval_steps_per_second": 2.273,
"step": 90
},
{
"epoch": 16.0,
"eval_accuracy": 0.9411764705882353,
"eval_f1_dry": 0.9310344827586207,
"eval_f1_humid": 0.9487179487179487,
"eval_f1_macro": 0.9398762157382847,
"eval_loss": 0.4611157774925232,
"eval_precision_dry": 0.9310344827586207,
"eval_precision_humid": 0.9487179487179487,
"eval_precision_macro": 0.9398762157382847,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.9487179487179487,
"eval_recall_macro": 0.9398762157382847,
"eval_runtime": 2.2029,
"eval_samples_per_second": 30.868,
"eval_steps_per_second": 2.27,
"step": 96
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.033265743404626846,
"learning_rate": 3.722222222222222e-05,
"loss": 0.0114,
"step": 100
},
{
"epoch": 17.0,
"eval_accuracy": 0.8823529411764706,
"eval_f1_dry": 0.875,
"eval_f1_humid": 0.8888888888888888,
"eval_f1_macro": 0.8819444444444444,
"eval_loss": 0.6472479104995728,
"eval_precision_dry": 0.8,
"eval_precision_humid": 0.9696969696969697,
"eval_precision_macro": 0.8848484848484849,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8205128205128205,
"eval_recall_macro": 0.8930150309460654,
"eval_runtime": 2.1737,
"eval_samples_per_second": 31.282,
"eval_steps_per_second": 2.3,
"step": 102
},
{
"epoch": 18.0,
"eval_accuracy": 0.9117647058823529,
"eval_f1_dry": 0.8928571428571429,
"eval_f1_humid": 0.925,
"eval_f1_macro": 0.9089285714285715,
"eval_loss": 0.4689759910106659,
"eval_precision_dry": 0.9259259259259259,
"eval_precision_humid": 0.9024390243902439,
"eval_precision_macro": 0.9141824751580849,
"eval_recall_dry": 0.8620689655172413,
"eval_recall_humid": 0.9487179487179487,
"eval_recall_macro": 0.905393457117595,
"eval_runtime": 2.1917,
"eval_samples_per_second": 31.027,
"eval_steps_per_second": 2.281,
"step": 108
},
{
"epoch": 18.333333333333332,
"grad_norm": 0.9351687431335449,
"learning_rate": 3.537037037037037e-05,
"loss": 0.0054,
"step": 110
},
{
"epoch": 19.0,
"eval_accuracy": 0.8676470588235294,
"eval_f1_dry": 0.8615384615384616,
"eval_f1_humid": 0.8732394366197183,
"eval_f1_macro": 0.8673889490790899,
"eval_loss": 0.7610320448875427,
"eval_precision_dry": 0.7777777777777778,
"eval_precision_humid": 0.96875,
"eval_precision_macro": 0.8732638888888888,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.7948717948717948,
"eval_recall_macro": 0.8801945181255526,
"eval_runtime": 2.1632,
"eval_samples_per_second": 31.435,
"eval_steps_per_second": 2.311,
"step": 114
},
{
"epoch": 20.0,
"grad_norm": 0.03664712235331535,
"learning_rate": 3.351851851851852e-05,
"loss": 0.0024,
"step": 120
},
{
"epoch": 20.0,
"eval_accuracy": 0.9117647058823529,
"eval_f1_dry": 0.9,
"eval_f1_humid": 0.9210526315789473,
"eval_f1_macro": 0.9105263157894736,
"eval_loss": 0.5395424365997314,
"eval_precision_dry": 0.8709677419354839,
"eval_precision_humid": 0.9459459459459459,
"eval_precision_macro": 0.9084568439407149,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.8974358974358975,
"eval_recall_macro": 0.9142351900972591,
"eval_runtime": 2.1855,
"eval_samples_per_second": 31.114,
"eval_steps_per_second": 2.288,
"step": 120
},
{
"epoch": 21.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8852459016393442,
"eval_f1_humid": 0.9066666666666666,
"eval_f1_macro": 0.8959562841530054,
"eval_loss": 0.6274784207344055,
"eval_precision_dry": 0.84375,
"eval_precision_humid": 0.9444444444444444,
"eval_precision_macro": 0.8940972222222222,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.8717948717948718,
"eval_recall_macro": 0.9014146772767462,
"eval_runtime": 2.2125,
"eval_samples_per_second": 30.734,
"eval_steps_per_second": 2.26,
"step": 126
},
{
"epoch": 21.666666666666668,
"grad_norm": 0.053658369928598404,
"learning_rate": 3.1666666666666666e-05,
"loss": 0.0024,
"step": 130
},
{
"epoch": 22.0,
"eval_accuracy": 0.8676470588235294,
"eval_f1_dry": 0.8615384615384616,
"eval_f1_humid": 0.8732394366197183,
"eval_f1_macro": 0.8673889490790899,
"eval_loss": 0.7972272634506226,
"eval_precision_dry": 0.7777777777777778,
"eval_precision_humid": 0.96875,
"eval_precision_macro": 0.8732638888888888,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.7948717948717948,
"eval_recall_macro": 0.8801945181255526,
"eval_runtime": 2.2057,
"eval_samples_per_second": 30.829,
"eval_steps_per_second": 2.267,
"step": 132
},
{
"epoch": 23.0,
"eval_accuracy": 0.9117647058823529,
"eval_f1_dry": 0.9,
"eval_f1_humid": 0.9210526315789473,
"eval_f1_macro": 0.9105263157894736,
"eval_loss": 0.5465890169143677,
"eval_precision_dry": 0.8709677419354839,
"eval_precision_humid": 0.9459459459459459,
"eval_precision_macro": 0.9084568439407149,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.8974358974358975,
"eval_recall_macro": 0.9142351900972591,
"eval_runtime": 2.2093,
"eval_samples_per_second": 30.779,
"eval_steps_per_second": 2.263,
"step": 138
},
{
"epoch": 23.333333333333332,
"grad_norm": 0.01843937672674656,
"learning_rate": 2.981481481481482e-05,
"loss": 0.0009,
"step": 140
},
{
"epoch": 24.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8852459016393442,
"eval_f1_humid": 0.9066666666666666,
"eval_f1_macro": 0.8959562841530054,
"eval_loss": 0.5303817987442017,
"eval_precision_dry": 0.84375,
"eval_precision_humid": 0.9444444444444444,
"eval_precision_macro": 0.8940972222222222,
"eval_recall_dry": 0.9310344827586207,
"eval_recall_humid": 0.8717948717948718,
"eval_recall_macro": 0.9014146772767462,
"eval_runtime": 2.1789,
"eval_samples_per_second": 31.208,
"eval_steps_per_second": 2.295,
"step": 144
},
{
"epoch": 25.0,
"grad_norm": 0.0267292782664299,
"learning_rate": 2.7962962962962965e-05,
"loss": 0.0012,
"step": 150
},
{
"epoch": 25.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.656761646270752,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.288,
"eval_samples_per_second": 29.721,
"eval_steps_per_second": 2.185,
"step": 150
},
{
"epoch": 26.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.613365650177002,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.3183,
"eval_samples_per_second": 29.332,
"eval_steps_per_second": 2.157,
"step": 156
},
{
"epoch": 26.666666666666668,
"grad_norm": 0.007465701084583998,
"learning_rate": 2.6111111111111114e-05,
"loss": 0.0004,
"step": 160
},
{
"epoch": 27.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6093538999557495,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2556,
"eval_samples_per_second": 30.148,
"eval_steps_per_second": 2.217,
"step": 162
},
{
"epoch": 28.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.615250825881958,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1738,
"eval_samples_per_second": 31.282,
"eval_steps_per_second": 2.3,
"step": 168
},
{
"epoch": 28.333333333333332,
"grad_norm": 0.01064391154795885,
"learning_rate": 2.425925925925926e-05,
"loss": 0.0003,
"step": 170
},
{
"epoch": 29.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6319212913513184,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1953,
"eval_samples_per_second": 30.976,
"eval_steps_per_second": 2.278,
"step": 174
},
{
"epoch": 30.0,
"grad_norm": 0.006854104809463024,
"learning_rate": 2.240740740740741e-05,
"loss": 0.0002,
"step": 180
},
{
"epoch": 30.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6456788182258606,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1711,
"eval_samples_per_second": 31.321,
"eval_steps_per_second": 2.303,
"step": 180
},
{
"epoch": 31.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6529943346977234,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2044,
"eval_samples_per_second": 30.848,
"eval_steps_per_second": 2.268,
"step": 186
},
{
"epoch": 31.666666666666668,
"grad_norm": 0.006000952795147896,
"learning_rate": 2.0555555555555555e-05,
"loss": 0.0002,
"step": 190
},
{
"epoch": 32.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6579628586769104,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1932,
"eval_samples_per_second": 31.005,
"eval_steps_per_second": 2.28,
"step": 192
},
{
"epoch": 33.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6649767160415649,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1922,
"eval_samples_per_second": 31.019,
"eval_steps_per_second": 2.281,
"step": 198
},
{
"epoch": 33.333333333333336,
"grad_norm": 0.0073044863529503345,
"learning_rate": 1.8703703703703704e-05,
"loss": 0.0002,
"step": 200
},
{
"epoch": 34.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.677439272403717,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2369,
"eval_samples_per_second": 30.4,
"eval_steps_per_second": 2.235,
"step": 204
},
{
"epoch": 35.0,
"grad_norm": 0.00664471136406064,
"learning_rate": 1.6851851851851853e-05,
"loss": 0.0002,
"step": 210
},
{
"epoch": 35.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6827277541160583,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2152,
"eval_samples_per_second": 30.697,
"eval_steps_per_second": 2.257,
"step": 210
},
{
"epoch": 36.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6850156188011169,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1962,
"eval_samples_per_second": 30.963,
"eval_steps_per_second": 2.277,
"step": 216
},
{
"epoch": 36.666666666666664,
"grad_norm": 0.003711260389536619,
"learning_rate": 1.5e-05,
"loss": 0.0001,
"step": 220
},
{
"epoch": 37.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.684583842754364,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2135,
"eval_samples_per_second": 30.721,
"eval_steps_per_second": 2.259,
"step": 222
},
{
"epoch": 38.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6853307485580444,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2406,
"eval_samples_per_second": 30.348,
"eval_steps_per_second": 2.231,
"step": 228
},
{
"epoch": 38.333333333333336,
"grad_norm": 0.003823125036433339,
"learning_rate": 1.3148148148148148e-05,
"loss": 0.0001,
"step": 230
},
{
"epoch": 39.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6853978633880615,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1744,
"eval_samples_per_second": 31.273,
"eval_steps_per_second": 2.299,
"step": 234
},
{
"epoch": 40.0,
"grad_norm": 0.0067051672376692295,
"learning_rate": 1.1296296296296297e-05,
"loss": 0.0001,
"step": 240
},
{
"epoch": 40.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6844778656959534,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2084,
"eval_samples_per_second": 30.791,
"eval_steps_per_second": 2.264,
"step": 240
},
{
"epoch": 41.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6846556663513184,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2966,
"eval_samples_per_second": 29.61,
"eval_steps_per_second": 2.177,
"step": 246
},
{
"epoch": 41.666666666666664,
"grad_norm": 0.005940043367445469,
"learning_rate": 9.444444444444445e-06,
"loss": 0.0001,
"step": 250
},
{
"epoch": 42.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6859800815582275,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2361,
"eval_samples_per_second": 30.41,
"eval_steps_per_second": 2.236,
"step": 252
},
{
"epoch": 43.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6874513030052185,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1797,
"eval_samples_per_second": 31.196,
"eval_steps_per_second": 2.294,
"step": 258
},
{
"epoch": 43.333333333333336,
"grad_norm": 0.003746297210454941,
"learning_rate": 7.592592592592593e-06,
"loss": 0.0001,
"step": 260
},
{
"epoch": 44.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6894810199737549,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2822,
"eval_samples_per_second": 29.796,
"eval_steps_per_second": 2.191,
"step": 264
},
{
"epoch": 45.0,
"grad_norm": 0.010260913521051407,
"learning_rate": 5.740740740740741e-06,
"loss": 0.0001,
"step": 270
},
{
"epoch": 45.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6902977824211121,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2179,
"eval_samples_per_second": 30.66,
"eval_steps_per_second": 2.254,
"step": 270
},
{
"epoch": 46.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.691429078578949,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2021,
"eval_samples_per_second": 30.88,
"eval_steps_per_second": 2.271,
"step": 276
},
{
"epoch": 46.666666666666664,
"grad_norm": 0.003823869628831744,
"learning_rate": 3.888888888888889e-06,
"loss": 0.0001,
"step": 280
},
{
"epoch": 47.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6919637322425842,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1849,
"eval_samples_per_second": 31.123,
"eval_steps_per_second": 2.288,
"step": 282
},
{
"epoch": 48.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6921000480651855,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.1918,
"eval_samples_per_second": 31.025,
"eval_steps_per_second": 2.281,
"step": 288
},
{
"epoch": 48.333333333333336,
"grad_norm": 0.004158890340477228,
"learning_rate": 2.0370370370370375e-06,
"loss": 0.0001,
"step": 290
},
{
"epoch": 49.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.6922561526298523,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2198,
"eval_samples_per_second": 30.634,
"eval_steps_per_second": 2.252,
"step": 294
},
{
"epoch": 50.0,
"grad_norm": 0.003357668872922659,
"learning_rate": 1.851851851851852e-07,
"loss": 0.0001,
"step": 300
},
{
"epoch": 50.0,
"eval_accuracy": 0.8970588235294118,
"eval_f1_dry": 0.8888888888888888,
"eval_f1_humid": 0.9041095890410958,
"eval_f1_macro": 0.8964992389649924,
"eval_loss": 0.692406177520752,
"eval_precision_dry": 0.8235294117647058,
"eval_precision_humid": 0.9705882352941176,
"eval_precision_macro": 0.8970588235294117,
"eval_recall_dry": 0.9655172413793104,
"eval_recall_humid": 0.8461538461538461,
"eval_recall_macro": 0.9058355437665783,
"eval_runtime": 2.2412,
"eval_samples_per_second": 30.341,
"eval_steps_per_second": 2.231,
"step": 300
},
{
"epoch": 50.0,
"step": 300,
"total_flos": 4.3301918339334144e+18,
"train_loss": 0.04831398472228708,
"train_runtime": 1645.2014,
"train_samples_per_second": 11.518,
"train_steps_per_second": 0.182
}
],
"logging_steps": 10,
"max_steps": 300,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.3301918339334144e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}