large-algae-vit-rgb / trainer_state.json
samitizerxu's picture
End of training
28d9b91
{
"best_metric": 0.6226525821596244,
"best_model_checkpoint": "large-algae-vit-rgb/checkpoint-2280",
"epoch": 20.0,
"global_step": 2400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 2.0833333333333334e-06,
"loss": 1.7796,
"step": 10
},
{
"epoch": 0.17,
"learning_rate": 4.166666666666667e-06,
"loss": 1.6643,
"step": 20
},
{
"epoch": 0.25,
"learning_rate": 6.25e-06,
"loss": 1.4798,
"step": 30
},
{
"epoch": 0.33,
"learning_rate": 8.333333333333334e-06,
"loss": 1.3417,
"step": 40
},
{
"epoch": 0.42,
"learning_rate": 1.0416666666666668e-05,
"loss": 1.2473,
"step": 50
},
{
"epoch": 0.5,
"learning_rate": 1.25e-05,
"loss": 1.1897,
"step": 60
},
{
"epoch": 0.58,
"learning_rate": 1.4583333333333335e-05,
"loss": 1.1835,
"step": 70
},
{
"epoch": 0.67,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.1931,
"step": 80
},
{
"epoch": 0.75,
"learning_rate": 1.8750000000000002e-05,
"loss": 1.1324,
"step": 90
},
{
"epoch": 0.83,
"learning_rate": 2.0833333333333336e-05,
"loss": 1.1343,
"step": 100
},
{
"epoch": 0.92,
"learning_rate": 2.2916666666666667e-05,
"loss": 1.124,
"step": 110
},
{
"epoch": 1.0,
"learning_rate": 2.5e-05,
"loss": 1.1433,
"step": 120
},
{
"epoch": 1.0,
"eval_accuracy": 0.5575117370892019,
"eval_loss": 1.0966269969940186,
"eval_runtime": 22.9114,
"eval_samples_per_second": 74.374,
"eval_steps_per_second": 2.357,
"step": 120
},
{
"epoch": 1.08,
"learning_rate": 2.7083333333333332e-05,
"loss": 1.1211,
"step": 130
},
{
"epoch": 1.17,
"learning_rate": 2.916666666666667e-05,
"loss": 1.0712,
"step": 140
},
{
"epoch": 1.25,
"learning_rate": 3.125e-05,
"loss": 1.0919,
"step": 150
},
{
"epoch": 1.33,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.0908,
"step": 160
},
{
"epoch": 1.42,
"learning_rate": 3.541666666666667e-05,
"loss": 1.0912,
"step": 170
},
{
"epoch": 1.5,
"learning_rate": 3.7500000000000003e-05,
"loss": 1.1271,
"step": 180
},
{
"epoch": 1.58,
"learning_rate": 3.958333333333333e-05,
"loss": 1.0785,
"step": 190
},
{
"epoch": 1.67,
"learning_rate": 4.166666666666667e-05,
"loss": 1.0361,
"step": 200
},
{
"epoch": 1.75,
"learning_rate": 4.375e-05,
"loss": 1.1093,
"step": 210
},
{
"epoch": 1.83,
"learning_rate": 4.5833333333333334e-05,
"loss": 1.0827,
"step": 220
},
{
"epoch": 1.92,
"learning_rate": 4.791666666666667e-05,
"loss": 1.0748,
"step": 230
},
{
"epoch": 2.0,
"learning_rate": 5e-05,
"loss": 1.0507,
"step": 240
},
{
"epoch": 2.0,
"eval_accuracy": 0.5856807511737089,
"eval_loss": 1.0356614589691162,
"eval_runtime": 22.7063,
"eval_samples_per_second": 75.045,
"eval_steps_per_second": 2.378,
"step": 240
},
{
"epoch": 2.08,
"learning_rate": 4.976851851851852e-05,
"loss": 1.0658,
"step": 250
},
{
"epoch": 2.17,
"learning_rate": 4.9537037037037035e-05,
"loss": 1.0552,
"step": 260
},
{
"epoch": 2.25,
"learning_rate": 4.930555555555556e-05,
"loss": 1.0831,
"step": 270
},
{
"epoch": 2.33,
"learning_rate": 4.9074074074074075e-05,
"loss": 1.0378,
"step": 280
},
{
"epoch": 2.42,
"learning_rate": 4.8842592592592595e-05,
"loss": 1.0336,
"step": 290
},
{
"epoch": 2.5,
"learning_rate": 4.8611111111111115e-05,
"loss": 1.0519,
"step": 300
},
{
"epoch": 2.58,
"learning_rate": 4.837962962962963e-05,
"loss": 1.0065,
"step": 310
},
{
"epoch": 2.67,
"learning_rate": 4.814814814814815e-05,
"loss": 1.0432,
"step": 320
},
{
"epoch": 2.75,
"learning_rate": 4.791666666666667e-05,
"loss": 1.0496,
"step": 330
},
{
"epoch": 2.83,
"learning_rate": 4.768518518518519e-05,
"loss": 1.0635,
"step": 340
},
{
"epoch": 2.92,
"learning_rate": 4.745370370370371e-05,
"loss": 1.0402,
"step": 350
},
{
"epoch": 3.0,
"learning_rate": 4.722222222222222e-05,
"loss": 1.0104,
"step": 360
},
{
"epoch": 3.0,
"eval_accuracy": 0.5921361502347418,
"eval_loss": 1.0168485641479492,
"eval_runtime": 22.8587,
"eval_samples_per_second": 74.545,
"eval_steps_per_second": 2.362,
"step": 360
},
{
"epoch": 3.08,
"learning_rate": 4.699074074074074e-05,
"loss": 0.9874,
"step": 370
},
{
"epoch": 3.17,
"learning_rate": 4.675925925925926e-05,
"loss": 1.0564,
"step": 380
},
{
"epoch": 3.25,
"learning_rate": 4.652777777777778e-05,
"loss": 1.002,
"step": 390
},
{
"epoch": 3.33,
"learning_rate": 4.62962962962963e-05,
"loss": 1.0306,
"step": 400
},
{
"epoch": 3.42,
"learning_rate": 4.6064814814814814e-05,
"loss": 1.0384,
"step": 410
},
{
"epoch": 3.5,
"learning_rate": 4.5833333333333334e-05,
"loss": 1.0109,
"step": 420
},
{
"epoch": 3.58,
"learning_rate": 4.5601851851851854e-05,
"loss": 1.0797,
"step": 430
},
{
"epoch": 3.67,
"learning_rate": 4.5370370370370374e-05,
"loss": 1.0382,
"step": 440
},
{
"epoch": 3.75,
"learning_rate": 4.5138888888888894e-05,
"loss": 1.0057,
"step": 450
},
{
"epoch": 3.83,
"learning_rate": 4.490740740740741e-05,
"loss": 1.0405,
"step": 460
},
{
"epoch": 3.92,
"learning_rate": 4.467592592592593e-05,
"loss": 1.015,
"step": 470
},
{
"epoch": 4.0,
"learning_rate": 4.4444444444444447e-05,
"loss": 1.0353,
"step": 480
},
{
"epoch": 4.0,
"eval_accuracy": 0.5856807511737089,
"eval_loss": 1.0344651937484741,
"eval_runtime": 22.8799,
"eval_samples_per_second": 74.476,
"eval_steps_per_second": 2.36,
"step": 480
},
{
"epoch": 4.08,
"learning_rate": 4.4212962962962966e-05,
"loss": 1.0024,
"step": 490
},
{
"epoch": 4.17,
"learning_rate": 4.3981481481481486e-05,
"loss": 0.9605,
"step": 500
},
{
"epoch": 4.25,
"learning_rate": 4.375e-05,
"loss": 1.0132,
"step": 510
},
{
"epoch": 4.33,
"learning_rate": 4.351851851851852e-05,
"loss": 0.9964,
"step": 520
},
{
"epoch": 4.42,
"learning_rate": 4.328703703703704e-05,
"loss": 1.0063,
"step": 530
},
{
"epoch": 4.5,
"learning_rate": 4.305555555555556e-05,
"loss": 1.0355,
"step": 540
},
{
"epoch": 4.58,
"learning_rate": 4.282407407407408e-05,
"loss": 1.0556,
"step": 550
},
{
"epoch": 4.67,
"learning_rate": 4.259259259259259e-05,
"loss": 0.9885,
"step": 560
},
{
"epoch": 4.75,
"learning_rate": 4.236111111111111e-05,
"loss": 0.9804,
"step": 570
},
{
"epoch": 4.83,
"learning_rate": 4.212962962962963e-05,
"loss": 1.0036,
"step": 580
},
{
"epoch": 4.92,
"learning_rate": 4.1898148148148145e-05,
"loss": 1.0063,
"step": 590
},
{
"epoch": 5.0,
"learning_rate": 4.166666666666667e-05,
"loss": 0.9629,
"step": 600
},
{
"epoch": 5.0,
"eval_accuracy": 0.6015258215962441,
"eval_loss": 0.983875036239624,
"eval_runtime": 22.9086,
"eval_samples_per_second": 74.383,
"eval_steps_per_second": 2.357,
"step": 600
},
{
"epoch": 5.08,
"learning_rate": 4.1435185185185185e-05,
"loss": 0.9983,
"step": 610
},
{
"epoch": 5.17,
"learning_rate": 4.1203703703703705e-05,
"loss": 1.0425,
"step": 620
},
{
"epoch": 5.25,
"learning_rate": 4.0972222222222225e-05,
"loss": 1.0025,
"step": 630
},
{
"epoch": 5.33,
"learning_rate": 4.074074074074074e-05,
"loss": 0.9785,
"step": 640
},
{
"epoch": 5.42,
"learning_rate": 4.0509259259259265e-05,
"loss": 1.0057,
"step": 650
},
{
"epoch": 5.5,
"learning_rate": 4.027777777777778e-05,
"loss": 0.9692,
"step": 660
},
{
"epoch": 5.58,
"learning_rate": 4.00462962962963e-05,
"loss": 0.9976,
"step": 670
},
{
"epoch": 5.67,
"learning_rate": 3.981481481481482e-05,
"loss": 0.9917,
"step": 680
},
{
"epoch": 5.75,
"learning_rate": 3.958333333333333e-05,
"loss": 0.9818,
"step": 690
},
{
"epoch": 5.83,
"learning_rate": 3.935185185185186e-05,
"loss": 0.996,
"step": 700
},
{
"epoch": 5.92,
"learning_rate": 3.912037037037037e-05,
"loss": 0.9892,
"step": 710
},
{
"epoch": 6.0,
"learning_rate": 3.888888888888889e-05,
"loss": 0.9684,
"step": 720
},
{
"epoch": 6.0,
"eval_accuracy": 0.6068075117370892,
"eval_loss": 0.9672098159790039,
"eval_runtime": 22.7827,
"eval_samples_per_second": 74.794,
"eval_steps_per_second": 2.37,
"step": 720
},
{
"epoch": 6.08,
"learning_rate": 3.865740740740741e-05,
"loss": 0.9977,
"step": 730
},
{
"epoch": 6.17,
"learning_rate": 3.8425925925925924e-05,
"loss": 0.9858,
"step": 740
},
{
"epoch": 6.25,
"learning_rate": 3.8194444444444444e-05,
"loss": 1.0029,
"step": 750
},
{
"epoch": 6.33,
"learning_rate": 3.7962962962962964e-05,
"loss": 0.9715,
"step": 760
},
{
"epoch": 6.42,
"learning_rate": 3.7731481481481484e-05,
"loss": 0.9484,
"step": 770
},
{
"epoch": 6.5,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.9948,
"step": 780
},
{
"epoch": 6.58,
"learning_rate": 3.726851851851852e-05,
"loss": 0.9935,
"step": 790
},
{
"epoch": 6.67,
"learning_rate": 3.7037037037037037e-05,
"loss": 1.0129,
"step": 800
},
{
"epoch": 6.75,
"learning_rate": 3.6805555555555556e-05,
"loss": 0.94,
"step": 810
},
{
"epoch": 6.83,
"learning_rate": 3.6574074074074076e-05,
"loss": 0.9886,
"step": 820
},
{
"epoch": 6.92,
"learning_rate": 3.6342592592592596e-05,
"loss": 0.9441,
"step": 830
},
{
"epoch": 7.0,
"learning_rate": 3.611111111111111e-05,
"loss": 0.9727,
"step": 840
},
{
"epoch": 7.0,
"eval_accuracy": 0.6132629107981221,
"eval_loss": 0.9590299129486084,
"eval_runtime": 22.8896,
"eval_samples_per_second": 74.444,
"eval_steps_per_second": 2.359,
"step": 840
},
{
"epoch": 7.08,
"learning_rate": 3.587962962962963e-05,
"loss": 0.9784,
"step": 850
},
{
"epoch": 7.17,
"learning_rate": 3.564814814814815e-05,
"loss": 0.9698,
"step": 860
},
{
"epoch": 7.25,
"learning_rate": 3.541666666666667e-05,
"loss": 0.9599,
"step": 870
},
{
"epoch": 7.33,
"learning_rate": 3.518518518518519e-05,
"loss": 0.961,
"step": 880
},
{
"epoch": 7.42,
"learning_rate": 3.49537037037037e-05,
"loss": 0.9919,
"step": 890
},
{
"epoch": 7.5,
"learning_rate": 3.472222222222222e-05,
"loss": 0.9915,
"step": 900
},
{
"epoch": 7.58,
"learning_rate": 3.449074074074074e-05,
"loss": 0.9777,
"step": 910
},
{
"epoch": 7.67,
"learning_rate": 3.425925925925926e-05,
"loss": 0.9584,
"step": 920
},
{
"epoch": 7.75,
"learning_rate": 3.402777777777778e-05,
"loss": 0.9627,
"step": 930
},
{
"epoch": 7.83,
"learning_rate": 3.3796296296296295e-05,
"loss": 0.9634,
"step": 940
},
{
"epoch": 7.92,
"learning_rate": 3.3564814814814815e-05,
"loss": 0.9518,
"step": 950
},
{
"epoch": 8.0,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.9626,
"step": 960
},
{
"epoch": 8.0,
"eval_accuracy": 0.6126760563380281,
"eval_loss": 0.9426460266113281,
"eval_runtime": 22.8408,
"eval_samples_per_second": 74.603,
"eval_steps_per_second": 2.364,
"step": 960
},
{
"epoch": 8.08,
"learning_rate": 3.3101851851851855e-05,
"loss": 0.9318,
"step": 970
},
{
"epoch": 8.17,
"learning_rate": 3.2870370370370375e-05,
"loss": 0.993,
"step": 980
},
{
"epoch": 8.25,
"learning_rate": 3.263888888888889e-05,
"loss": 0.9247,
"step": 990
},
{
"epoch": 8.33,
"learning_rate": 3.240740740740741e-05,
"loss": 0.9803,
"step": 1000
},
{
"epoch": 8.42,
"learning_rate": 3.217592592592593e-05,
"loss": 0.9353,
"step": 1010
},
{
"epoch": 8.5,
"learning_rate": 3.194444444444444e-05,
"loss": 0.9683,
"step": 1020
},
{
"epoch": 8.58,
"learning_rate": 3.171296296296297e-05,
"loss": 0.9368,
"step": 1030
},
{
"epoch": 8.67,
"learning_rate": 3.148148148148148e-05,
"loss": 0.9811,
"step": 1040
},
{
"epoch": 8.75,
"learning_rate": 3.125e-05,
"loss": 0.9497,
"step": 1050
},
{
"epoch": 8.83,
"learning_rate": 3.101851851851852e-05,
"loss": 0.9227,
"step": 1060
},
{
"epoch": 8.92,
"learning_rate": 3.0787037037037034e-05,
"loss": 0.9876,
"step": 1070
},
{
"epoch": 9.0,
"learning_rate": 3.055555555555556e-05,
"loss": 0.9857,
"step": 1080
},
{
"epoch": 9.0,
"eval_accuracy": 0.607981220657277,
"eval_loss": 0.9669484496116638,
"eval_runtime": 22.8277,
"eval_samples_per_second": 74.646,
"eval_steps_per_second": 2.366,
"step": 1080
},
{
"epoch": 9.08,
"learning_rate": 3.0324074074074077e-05,
"loss": 0.8884,
"step": 1090
},
{
"epoch": 9.17,
"learning_rate": 3.0092592592592593e-05,
"loss": 0.9424,
"step": 1100
},
{
"epoch": 9.25,
"learning_rate": 2.9861111111111113e-05,
"loss": 0.9701,
"step": 1110
},
{
"epoch": 9.33,
"learning_rate": 2.962962962962963e-05,
"loss": 0.9577,
"step": 1120
},
{
"epoch": 9.42,
"learning_rate": 2.9398148148148146e-05,
"loss": 0.9542,
"step": 1130
},
{
"epoch": 9.5,
"learning_rate": 2.916666666666667e-05,
"loss": 0.9483,
"step": 1140
},
{
"epoch": 9.58,
"learning_rate": 2.8935185185185186e-05,
"loss": 0.9406,
"step": 1150
},
{
"epoch": 9.67,
"learning_rate": 2.8703703703703706e-05,
"loss": 0.9335,
"step": 1160
},
{
"epoch": 9.75,
"learning_rate": 2.8472222222222223e-05,
"loss": 0.9205,
"step": 1170
},
{
"epoch": 9.83,
"learning_rate": 2.824074074074074e-05,
"loss": 0.9193,
"step": 1180
},
{
"epoch": 9.92,
"learning_rate": 2.8009259259259263e-05,
"loss": 0.956,
"step": 1190
},
{
"epoch": 10.0,
"learning_rate": 2.777777777777778e-05,
"loss": 0.9321,
"step": 1200
},
{
"epoch": 10.0,
"eval_accuracy": 0.6109154929577465,
"eval_loss": 0.9396657347679138,
"eval_runtime": 22.8434,
"eval_samples_per_second": 74.595,
"eval_steps_per_second": 2.364,
"step": 1200
},
{
"epoch": 10.08,
"learning_rate": 2.75462962962963e-05,
"loss": 0.9324,
"step": 1210
},
{
"epoch": 10.17,
"learning_rate": 2.7314814814814816e-05,
"loss": 0.9112,
"step": 1220
},
{
"epoch": 10.25,
"learning_rate": 2.7083333333333332e-05,
"loss": 0.9613,
"step": 1230
},
{
"epoch": 10.33,
"learning_rate": 2.6851851851851855e-05,
"loss": 0.9083,
"step": 1240
},
{
"epoch": 10.42,
"learning_rate": 2.6620370370370372e-05,
"loss": 0.9691,
"step": 1250
},
{
"epoch": 10.5,
"learning_rate": 2.6388888888888892e-05,
"loss": 0.9256,
"step": 1260
},
{
"epoch": 10.58,
"learning_rate": 2.615740740740741e-05,
"loss": 0.929,
"step": 1270
},
{
"epoch": 10.67,
"learning_rate": 2.5925925925925925e-05,
"loss": 0.9056,
"step": 1280
},
{
"epoch": 10.75,
"learning_rate": 2.5694444444444445e-05,
"loss": 0.979,
"step": 1290
},
{
"epoch": 10.83,
"learning_rate": 2.5462962962962965e-05,
"loss": 0.9242,
"step": 1300
},
{
"epoch": 10.92,
"learning_rate": 2.5231481481481485e-05,
"loss": 0.9118,
"step": 1310
},
{
"epoch": 11.0,
"learning_rate": 2.5e-05,
"loss": 0.9052,
"step": 1320
},
{
"epoch": 11.0,
"eval_accuracy": 0.602112676056338,
"eval_loss": 0.9402132034301758,
"eval_runtime": 23.1769,
"eval_samples_per_second": 73.522,
"eval_steps_per_second": 2.33,
"step": 1320
},
{
"epoch": 11.08,
"learning_rate": 2.4768518518518518e-05,
"loss": 0.9046,
"step": 1330
},
{
"epoch": 11.17,
"learning_rate": 2.4537037037037038e-05,
"loss": 0.9401,
"step": 1340
},
{
"epoch": 11.25,
"learning_rate": 2.4305555555555558e-05,
"loss": 0.8835,
"step": 1350
},
{
"epoch": 11.33,
"learning_rate": 2.4074074074074074e-05,
"loss": 0.8835,
"step": 1360
},
{
"epoch": 11.42,
"learning_rate": 2.3842592592592594e-05,
"loss": 0.8896,
"step": 1370
},
{
"epoch": 11.5,
"learning_rate": 2.361111111111111e-05,
"loss": 0.9183,
"step": 1380
},
{
"epoch": 11.58,
"learning_rate": 2.337962962962963e-05,
"loss": 0.9436,
"step": 1390
},
{
"epoch": 11.67,
"learning_rate": 2.314814814814815e-05,
"loss": 0.8846,
"step": 1400
},
{
"epoch": 11.75,
"learning_rate": 2.2916666666666667e-05,
"loss": 0.9532,
"step": 1410
},
{
"epoch": 11.83,
"learning_rate": 2.2685185185185187e-05,
"loss": 0.9215,
"step": 1420
},
{
"epoch": 11.92,
"learning_rate": 2.2453703703703703e-05,
"loss": 0.933,
"step": 1430
},
{
"epoch": 12.0,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.9457,
"step": 1440
},
{
"epoch": 12.0,
"eval_accuracy": 0.6214788732394366,
"eval_loss": 0.9181148409843445,
"eval_runtime": 22.9806,
"eval_samples_per_second": 74.15,
"eval_steps_per_second": 2.35,
"step": 1440
},
{
"epoch": 12.08,
"learning_rate": 2.1990740740740743e-05,
"loss": 0.8826,
"step": 1450
},
{
"epoch": 12.17,
"learning_rate": 2.175925925925926e-05,
"loss": 0.908,
"step": 1460
},
{
"epoch": 12.25,
"learning_rate": 2.152777777777778e-05,
"loss": 0.916,
"step": 1470
},
{
"epoch": 12.33,
"learning_rate": 2.1296296296296296e-05,
"loss": 0.8875,
"step": 1480
},
{
"epoch": 12.42,
"learning_rate": 2.1064814814814816e-05,
"loss": 0.897,
"step": 1490
},
{
"epoch": 12.5,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.9158,
"step": 1500
},
{
"epoch": 12.58,
"learning_rate": 2.0601851851851853e-05,
"loss": 0.9398,
"step": 1510
},
{
"epoch": 12.67,
"learning_rate": 2.037037037037037e-05,
"loss": 0.9385,
"step": 1520
},
{
"epoch": 12.75,
"learning_rate": 2.013888888888889e-05,
"loss": 0.8998,
"step": 1530
},
{
"epoch": 12.83,
"learning_rate": 1.990740740740741e-05,
"loss": 0.9058,
"step": 1540
},
{
"epoch": 12.92,
"learning_rate": 1.967592592592593e-05,
"loss": 0.8997,
"step": 1550
},
{
"epoch": 13.0,
"learning_rate": 1.9444444444444445e-05,
"loss": 0.9101,
"step": 1560
},
{
"epoch": 13.0,
"eval_accuracy": 0.6185446009389671,
"eval_loss": 0.9350312352180481,
"eval_runtime": 23.0031,
"eval_samples_per_second": 74.077,
"eval_steps_per_second": 2.348,
"step": 1560
},
{
"epoch": 13.08,
"learning_rate": 1.9212962962962962e-05,
"loss": 0.9129,
"step": 1570
},
{
"epoch": 13.17,
"learning_rate": 1.8981481481481482e-05,
"loss": 0.8878,
"step": 1580
},
{
"epoch": 13.25,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.8675,
"step": 1590
},
{
"epoch": 13.33,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.8817,
"step": 1600
},
{
"epoch": 13.42,
"learning_rate": 1.8287037037037038e-05,
"loss": 0.8981,
"step": 1610
},
{
"epoch": 13.5,
"learning_rate": 1.8055555555555555e-05,
"loss": 0.9016,
"step": 1620
},
{
"epoch": 13.58,
"learning_rate": 1.7824074074074075e-05,
"loss": 0.8743,
"step": 1630
},
{
"epoch": 13.67,
"learning_rate": 1.7592592592592595e-05,
"loss": 0.8778,
"step": 1640
},
{
"epoch": 13.75,
"learning_rate": 1.736111111111111e-05,
"loss": 0.8631,
"step": 1650
},
{
"epoch": 13.83,
"learning_rate": 1.712962962962963e-05,
"loss": 0.9453,
"step": 1660
},
{
"epoch": 13.92,
"learning_rate": 1.6898148148148148e-05,
"loss": 0.9266,
"step": 1670
},
{
"epoch": 14.0,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.8772,
"step": 1680
},
{
"epoch": 14.0,
"eval_accuracy": 0.6050469483568075,
"eval_loss": 0.9537418484687805,
"eval_runtime": 22.909,
"eval_samples_per_second": 74.381,
"eval_steps_per_second": 2.357,
"step": 1680
},
{
"epoch": 14.08,
"learning_rate": 1.6435185185185187e-05,
"loss": 0.8748,
"step": 1690
},
{
"epoch": 14.17,
"learning_rate": 1.6203703703703704e-05,
"loss": 0.8794,
"step": 1700
},
{
"epoch": 14.25,
"learning_rate": 1.597222222222222e-05,
"loss": 0.8645,
"step": 1710
},
{
"epoch": 14.33,
"learning_rate": 1.574074074074074e-05,
"loss": 0.902,
"step": 1720
},
{
"epoch": 14.42,
"learning_rate": 1.550925925925926e-05,
"loss": 0.9094,
"step": 1730
},
{
"epoch": 14.5,
"learning_rate": 1.527777777777778e-05,
"loss": 0.9081,
"step": 1740
},
{
"epoch": 14.58,
"learning_rate": 1.5046296296296297e-05,
"loss": 0.8869,
"step": 1750
},
{
"epoch": 14.67,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.8552,
"step": 1760
},
{
"epoch": 14.75,
"learning_rate": 1.4583333333333335e-05,
"loss": 0.9357,
"step": 1770
},
{
"epoch": 14.83,
"learning_rate": 1.4351851851851853e-05,
"loss": 0.8996,
"step": 1780
},
{
"epoch": 14.92,
"learning_rate": 1.412037037037037e-05,
"loss": 0.9254,
"step": 1790
},
{
"epoch": 15.0,
"learning_rate": 1.388888888888889e-05,
"loss": 0.8865,
"step": 1800
},
{
"epoch": 15.0,
"eval_accuracy": 0.6126760563380281,
"eval_loss": 0.9256580471992493,
"eval_runtime": 22.8459,
"eval_samples_per_second": 74.587,
"eval_steps_per_second": 2.364,
"step": 1800
},
{
"epoch": 15.08,
"learning_rate": 1.3657407407407408e-05,
"loss": 0.8656,
"step": 1810
},
{
"epoch": 15.17,
"learning_rate": 1.3425925925925928e-05,
"loss": 0.8727,
"step": 1820
},
{
"epoch": 15.25,
"learning_rate": 1.3194444444444446e-05,
"loss": 0.8891,
"step": 1830
},
{
"epoch": 15.33,
"learning_rate": 1.2962962962962962e-05,
"loss": 0.8763,
"step": 1840
},
{
"epoch": 15.42,
"learning_rate": 1.2731481481481482e-05,
"loss": 0.883,
"step": 1850
},
{
"epoch": 15.5,
"learning_rate": 1.25e-05,
"loss": 0.9007,
"step": 1860
},
{
"epoch": 15.58,
"learning_rate": 1.2268518518518519e-05,
"loss": 0.8868,
"step": 1870
},
{
"epoch": 15.67,
"learning_rate": 1.2037037037037037e-05,
"loss": 0.8687,
"step": 1880
},
{
"epoch": 15.75,
"learning_rate": 1.1805555555555555e-05,
"loss": 0.8334,
"step": 1890
},
{
"epoch": 15.83,
"learning_rate": 1.1574074074074075e-05,
"loss": 0.8756,
"step": 1900
},
{
"epoch": 15.92,
"learning_rate": 1.1342592592592593e-05,
"loss": 0.8799,
"step": 1910
},
{
"epoch": 16.0,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.8454,
"step": 1920
},
{
"epoch": 16.0,
"eval_accuracy": 0.6214788732394366,
"eval_loss": 0.9159719347953796,
"eval_runtime": 22.7638,
"eval_samples_per_second": 74.856,
"eval_steps_per_second": 2.372,
"step": 1920
},
{
"epoch": 16.08,
"learning_rate": 1.087962962962963e-05,
"loss": 0.9168,
"step": 1930
},
{
"epoch": 16.17,
"learning_rate": 1.0648148148148148e-05,
"loss": 0.8289,
"step": 1940
},
{
"epoch": 16.25,
"learning_rate": 1.0416666666666668e-05,
"loss": 0.8544,
"step": 1950
},
{
"epoch": 16.33,
"learning_rate": 1.0185185185185185e-05,
"loss": 0.8349,
"step": 1960
},
{
"epoch": 16.42,
"learning_rate": 9.953703703703704e-06,
"loss": 0.9044,
"step": 1970
},
{
"epoch": 16.5,
"learning_rate": 9.722222222222223e-06,
"loss": 0.8508,
"step": 1980
},
{
"epoch": 16.58,
"learning_rate": 9.490740740740741e-06,
"loss": 0.8482,
"step": 1990
},
{
"epoch": 16.67,
"learning_rate": 9.259259259259259e-06,
"loss": 0.8412,
"step": 2000
},
{
"epoch": 16.75,
"learning_rate": 9.027777777777777e-06,
"loss": 0.8827,
"step": 2010
},
{
"epoch": 16.83,
"learning_rate": 8.796296296296297e-06,
"loss": 0.9122,
"step": 2020
},
{
"epoch": 16.92,
"learning_rate": 8.564814814814816e-06,
"loss": 0.8558,
"step": 2030
},
{
"epoch": 17.0,
"learning_rate": 8.333333333333334e-06,
"loss": 0.8909,
"step": 2040
},
{
"epoch": 17.0,
"eval_accuracy": 0.613849765258216,
"eval_loss": 0.9154210090637207,
"eval_runtime": 22.6154,
"eval_samples_per_second": 75.347,
"eval_steps_per_second": 2.388,
"step": 2040
},
{
"epoch": 17.08,
"learning_rate": 8.101851851851852e-06,
"loss": 0.8449,
"step": 2050
},
{
"epoch": 17.17,
"learning_rate": 7.87037037037037e-06,
"loss": 0.8799,
"step": 2060
},
{
"epoch": 17.25,
"learning_rate": 7.63888888888889e-06,
"loss": 0.8713,
"step": 2070
},
{
"epoch": 17.33,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.8708,
"step": 2080
},
{
"epoch": 17.42,
"learning_rate": 7.1759259259259266e-06,
"loss": 0.8545,
"step": 2090
},
{
"epoch": 17.5,
"learning_rate": 6.944444444444445e-06,
"loss": 0.853,
"step": 2100
},
{
"epoch": 17.58,
"learning_rate": 6.712962962962964e-06,
"loss": 0.8293,
"step": 2110
},
{
"epoch": 17.67,
"learning_rate": 6.481481481481481e-06,
"loss": 0.8409,
"step": 2120
},
{
"epoch": 17.75,
"learning_rate": 6.25e-06,
"loss": 0.8404,
"step": 2130
},
{
"epoch": 17.83,
"learning_rate": 6.0185185185185185e-06,
"loss": 0.836,
"step": 2140
},
{
"epoch": 17.92,
"learning_rate": 5.787037037037038e-06,
"loss": 0.8787,
"step": 2150
},
{
"epoch": 18.0,
"learning_rate": 5.555555555555556e-06,
"loss": 0.8473,
"step": 2160
},
{
"epoch": 18.0,
"eval_accuracy": 0.6185446009389671,
"eval_loss": 0.9096030592918396,
"eval_runtime": 22.3019,
"eval_samples_per_second": 76.406,
"eval_steps_per_second": 2.421,
"step": 2160
},
{
"epoch": 18.08,
"learning_rate": 5.324074074074074e-06,
"loss": 0.8474,
"step": 2170
},
{
"epoch": 18.17,
"learning_rate": 5.092592592592592e-06,
"loss": 0.8388,
"step": 2180
},
{
"epoch": 18.25,
"learning_rate": 4.861111111111111e-06,
"loss": 0.8502,
"step": 2190
},
{
"epoch": 18.33,
"learning_rate": 4.6296296296296296e-06,
"loss": 0.8149,
"step": 2200
},
{
"epoch": 18.42,
"learning_rate": 4.398148148148149e-06,
"loss": 0.8442,
"step": 2210
},
{
"epoch": 18.5,
"learning_rate": 4.166666666666667e-06,
"loss": 0.8348,
"step": 2220
},
{
"epoch": 18.58,
"learning_rate": 3.935185185185185e-06,
"loss": 0.8841,
"step": 2230
},
{
"epoch": 18.67,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.8669,
"step": 2240
},
{
"epoch": 18.75,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.8684,
"step": 2250
},
{
"epoch": 18.83,
"learning_rate": 3.2407407407407406e-06,
"loss": 0.8048,
"step": 2260
},
{
"epoch": 18.92,
"learning_rate": 3.0092592592592593e-06,
"loss": 0.8316,
"step": 2270
},
{
"epoch": 19.0,
"learning_rate": 2.777777777777778e-06,
"loss": 0.8979,
"step": 2280
},
{
"epoch": 19.0,
"eval_accuracy": 0.6226525821596244,
"eval_loss": 0.9150213599205017,
"eval_runtime": 22.6094,
"eval_samples_per_second": 75.367,
"eval_steps_per_second": 2.388,
"step": 2280
},
{
"epoch": 19.08,
"learning_rate": 2.546296296296296e-06,
"loss": 0.8163,
"step": 2290
},
{
"epoch": 19.17,
"learning_rate": 2.3148148148148148e-06,
"loss": 0.823,
"step": 2300
},
{
"epoch": 19.25,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.8712,
"step": 2310
},
{
"epoch": 19.33,
"learning_rate": 1.8518518518518519e-06,
"loss": 0.8384,
"step": 2320
},
{
"epoch": 19.42,
"learning_rate": 1.6203703703703703e-06,
"loss": 0.855,
"step": 2330
},
{
"epoch": 19.5,
"learning_rate": 1.388888888888889e-06,
"loss": 0.8418,
"step": 2340
},
{
"epoch": 19.58,
"learning_rate": 1.1574074074074074e-06,
"loss": 0.8504,
"step": 2350
},
{
"epoch": 19.67,
"learning_rate": 9.259259259259259e-07,
"loss": 0.8736,
"step": 2360
},
{
"epoch": 19.75,
"learning_rate": 6.944444444444445e-07,
"loss": 0.7959,
"step": 2370
},
{
"epoch": 19.83,
"learning_rate": 4.6296296296296297e-07,
"loss": 0.8431,
"step": 2380
},
{
"epoch": 19.92,
"learning_rate": 2.3148148148148148e-07,
"loss": 0.8712,
"step": 2390
},
{
"epoch": 20.0,
"learning_rate": 0.0,
"loss": 0.8337,
"step": 2400
},
{
"epoch": 20.0,
"eval_accuracy": 0.6220657276995305,
"eval_loss": 0.9112741351127625,
"eval_runtime": 23.2097,
"eval_samples_per_second": 73.418,
"eval_steps_per_second": 2.327,
"step": 2400
},
{
"epoch": 20.0,
"step": 2400,
"total_flos": 2.3761445690374963e+19,
"train_loss": 0.9564308677117029,
"train_runtime": 12406.5537,
"train_samples_per_second": 24.714,
"train_steps_per_second": 0.193
}
],
"max_steps": 2400,
"num_train_epochs": 20,
"total_flos": 2.3761445690374963e+19,
"trial_name": null,
"trial_params": null
}