Matcha_clips_224_fintuned_5 / trainer_state.json
miladfa7's picture
Training in progress, epoch 0
1fc0236 verified
{
"best_global_step": 460,
"best_metric": 0.782051282051282,
"best_model_checkpoint": "./Models/Matcha_clips_224_fintuned_5/checkpoint-460",
"epoch": 6.142857142857143,
"eval_steps": 500,
"global_step": 805,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.024844720496894408,
"grad_norm": 11.83416748046875,
"learning_rate": 1.1728395061728396e-05,
"loss": 1.7545,
"step": 20
},
{
"epoch": 0.049689440993788817,
"grad_norm": 14.771072387695312,
"learning_rate": 2.4074074074074074e-05,
"loss": 1.0393,
"step": 40
},
{
"epoch": 0.07453416149068323,
"grad_norm": 3.698537826538086,
"learning_rate": 3.6419753086419754e-05,
"loss": 0.6203,
"step": 60
},
{
"epoch": 0.09937888198757763,
"grad_norm": 1.2401633262634277,
"learning_rate": 4.876543209876544e-05,
"loss": 0.3541,
"step": 80
},
{
"epoch": 0.12422360248447205,
"grad_norm": 27.550607681274414,
"learning_rate": 4.875690607734807e-05,
"loss": 0.4705,
"step": 100
},
{
"epoch": 0.14285714285714285,
"eval_accuracy": 0.7317073170731707,
"eval_loss": 1.0046272277832031,
"eval_runtime": 6.2101,
"eval_samples_per_second": 13.204,
"eval_steps_per_second": 3.382,
"step": 115
},
{
"epoch": 1.0062111801242235,
"grad_norm": 0.25678861141204834,
"learning_rate": 4.737569060773481e-05,
"loss": 0.224,
"step": 120
},
{
"epoch": 1.031055900621118,
"grad_norm": 0.9883336424827576,
"learning_rate": 4.599447513812155e-05,
"loss": 0.1258,
"step": 140
},
{
"epoch": 1.0559006211180124,
"grad_norm": 0.18707028031349182,
"learning_rate": 4.461325966850829e-05,
"loss": 0.2609,
"step": 160
},
{
"epoch": 1.0807453416149069,
"grad_norm": 0.08376210927963257,
"learning_rate": 4.323204419889503e-05,
"loss": 0.049,
"step": 180
},
{
"epoch": 1.1055900621118013,
"grad_norm": 0.08791894465684891,
"learning_rate": 4.1850828729281773e-05,
"loss": 0.1955,
"step": 200
},
{
"epoch": 1.1304347826086956,
"grad_norm": 0.12012392282485962,
"learning_rate": 4.046961325966851e-05,
"loss": 0.1493,
"step": 220
},
{
"epoch": 1.1428571428571428,
"eval_accuracy": 0.7560975609756098,
"eval_loss": 1.226146936416626,
"eval_runtime": 6.1393,
"eval_samples_per_second": 13.357,
"eval_steps_per_second": 3.421,
"step": 230
},
{
"epoch": 2.012422360248447,
"grad_norm": 0.09424888342618942,
"learning_rate": 3.9088397790055245e-05,
"loss": 0.1158,
"step": 240
},
{
"epoch": 2.0372670807453415,
"grad_norm": 0.05555570125579834,
"learning_rate": 3.770718232044199e-05,
"loss": 0.0235,
"step": 260
},
{
"epoch": 2.062111801242236,
"grad_norm": 0.02632896415889263,
"learning_rate": 3.632596685082873e-05,
"loss": 0.0072,
"step": 280
},
{
"epoch": 2.0869565217391304,
"grad_norm": 1.1457473039627075,
"learning_rate": 3.4944751381215476e-05,
"loss": 0.0063,
"step": 300
},
{
"epoch": 2.111801242236025,
"grad_norm": 1.7765306234359741,
"learning_rate": 3.3563535911602215e-05,
"loss": 0.0898,
"step": 320
},
{
"epoch": 2.1366459627329193,
"grad_norm": 0.04399551451206207,
"learning_rate": 3.218232044198895e-05,
"loss": 0.0965,
"step": 340
},
{
"epoch": 2.142857142857143,
"eval_accuracy": 0.7692307692307693,
"eval_loss": 1.1611318588256836,
"eval_runtime": 5.888,
"eval_samples_per_second": 13.247,
"eval_steps_per_second": 3.397,
"step": 345
},
{
"epoch": 3.018633540372671,
"grad_norm": 0.022856663912534714,
"learning_rate": 3.0801104972375693e-05,
"loss": 0.0181,
"step": 360
},
{
"epoch": 3.0434782608695654,
"grad_norm": 0.019706225022673607,
"learning_rate": 2.9419889502762433e-05,
"loss": 0.002,
"step": 380
},
{
"epoch": 3.0683229813664594,
"grad_norm": 0.028400592505931854,
"learning_rate": 2.8038674033149172e-05,
"loss": 0.0045,
"step": 400
},
{
"epoch": 3.093167701863354,
"grad_norm": 0.01030923891812563,
"learning_rate": 2.6657458563535914e-05,
"loss": 0.0011,
"step": 420
},
{
"epoch": 3.1180124223602483,
"grad_norm": 0.10795993357896805,
"learning_rate": 2.5276243093922653e-05,
"loss": 0.1063,
"step": 440
},
{
"epoch": 3.142857142857143,
"grad_norm": 0.020210983231663704,
"learning_rate": 2.3895027624309393e-05,
"loss": 0.0018,
"step": 460
},
{
"epoch": 3.142857142857143,
"eval_accuracy": 0.782051282051282,
"eval_loss": 1.2064043283462524,
"eval_runtime": 5.8576,
"eval_samples_per_second": 13.316,
"eval_steps_per_second": 3.414,
"step": 460
},
{
"epoch": 4.024844720496894,
"grad_norm": 0.029298782348632812,
"learning_rate": 2.2513812154696135e-05,
"loss": 0.0012,
"step": 480
},
{
"epoch": 4.049689440993789,
"grad_norm": 0.014873038977384567,
"learning_rate": 2.1132596685082874e-05,
"loss": 0.0009,
"step": 500
},
{
"epoch": 4.074534161490683,
"grad_norm": 0.011718147434294224,
"learning_rate": 1.9751381215469613e-05,
"loss": 0.0008,
"step": 520
},
{
"epoch": 4.099378881987578,
"grad_norm": 0.010744371451437473,
"learning_rate": 1.8370165745856356e-05,
"loss": 0.0012,
"step": 540
},
{
"epoch": 4.124223602484472,
"grad_norm": 0.014341841451823711,
"learning_rate": 1.6988950276243095e-05,
"loss": 0.012,
"step": 560
},
{
"epoch": 4.142857142857143,
"eval_accuracy": 0.782051282051282,
"eval_loss": 1.1255377531051636,
"eval_runtime": 6.0116,
"eval_samples_per_second": 12.975,
"eval_steps_per_second": 3.327,
"step": 575
},
{
"epoch": 5.0062111801242235,
"grad_norm": 0.016358228400349617,
"learning_rate": 1.5607734806629834e-05,
"loss": 0.0007,
"step": 580
},
{
"epoch": 5.031055900621118,
"grad_norm": 0.03938188776373863,
"learning_rate": 1.4226519337016575e-05,
"loss": 0.0007,
"step": 600
},
{
"epoch": 5.055900621118012,
"grad_norm": 0.00977695919573307,
"learning_rate": 1.2845303867403316e-05,
"loss": 0.0007,
"step": 620
},
{
"epoch": 5.080745341614906,
"grad_norm": 0.014728990383446217,
"learning_rate": 1.1464088397790055e-05,
"loss": 0.0008,
"step": 640
},
{
"epoch": 5.105590062111801,
"grad_norm": 0.016683587804436684,
"learning_rate": 1.0082872928176797e-05,
"loss": 0.0006,
"step": 660
},
{
"epoch": 5.130434782608695,
"grad_norm": 0.038530658930540085,
"learning_rate": 8.701657458563537e-06,
"loss": 0.0009,
"step": 680
},
{
"epoch": 5.142857142857143,
"eval_accuracy": 0.782051282051282,
"eval_loss": 1.0889217853546143,
"eval_runtime": 6.0113,
"eval_samples_per_second": 12.976,
"eval_steps_per_second": 3.327,
"step": 690
},
{
"epoch": 6.012422360248447,
"grad_norm": 0.007233364041894674,
"learning_rate": 7.320441988950276e-06,
"loss": 0.0006,
"step": 700
},
{
"epoch": 6.037267080745342,
"grad_norm": 0.01591545157134533,
"learning_rate": 5.939226519337017e-06,
"loss": 0.0006,
"step": 720
},
{
"epoch": 6.062111801242236,
"grad_norm": 0.010462045669555664,
"learning_rate": 4.5580110497237574e-06,
"loss": 0.0006,
"step": 740
},
{
"epoch": 6.086956521739131,
"grad_norm": 0.006039341911673546,
"learning_rate": 3.1767955801104974e-06,
"loss": 0.0006,
"step": 760
},
{
"epoch": 6.111801242236025,
"grad_norm": 0.010201127268373966,
"learning_rate": 1.7955801104972376e-06,
"loss": 0.0006,
"step": 780
},
{
"epoch": 6.136645962732919,
"grad_norm": 0.013513385318219662,
"learning_rate": 4.143646408839779e-07,
"loss": 0.0007,
"step": 800
},
{
"epoch": 6.142857142857143,
"eval_accuracy": 0.7702702702702703,
"eval_loss": 1.14006769657135,
"eval_runtime": 5.5871,
"eval_samples_per_second": 13.245,
"eval_steps_per_second": 3.401,
"step": 805
},
{
"epoch": 6.142857142857143,
"step": 805,
"total_flos": 4.012506890622075e+18,
"train_loss": 0.1426203187803813,
"train_runtime": 889.4005,
"train_samples_per_second": 3.62,
"train_steps_per_second": 0.905
},
{
"epoch": 6.142857142857143,
"eval_accuracy": 0.8028169014084507,
"eval_loss": 1.103053331375122,
"eval_runtime": 4.9334,
"eval_samples_per_second": 14.392,
"eval_steps_per_second": 3.649,
"step": 805
}
],
"logging_steps": 20,
"max_steps": 805,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.012506890622075e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}