videomae_v1_rwf-2000 / trainer_state.json
DanJoshua's picture
End of training
a6109ed verified
{
"best_metric": 0.875,
"best_model_checkpoint": "videomae_v1_rwf-2000/checkpoint-254",
"epoch": 5.041831357048748,
"eval_steps": 500,
"global_step": 508,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004940711462450593,
"grad_norm": 12.13394832611084,
"learning_rate": 1.990118577075099e-05,
"loss": 6.5747,
"step": 10
},
{
"epoch": 0.009881422924901186,
"grad_norm": 11.626209259033203,
"learning_rate": 1.9802371541501976e-05,
"loss": 4.4884,
"step": 20
},
{
"epoch": 0.014822134387351778,
"grad_norm": 10.6156644821167,
"learning_rate": 1.9703557312252965e-05,
"loss": 3.1466,
"step": 30
},
{
"epoch": 0.019762845849802372,
"grad_norm": 9.512907028198242,
"learning_rate": 1.9604743083003953e-05,
"loss": 2.3691,
"step": 40
},
{
"epoch": 0.024703557312252964,
"grad_norm": 9.6091947555542,
"learning_rate": 1.9505928853754942e-05,
"loss": 2.0846,
"step": 50
},
{
"epoch": 0.029644268774703556,
"grad_norm": 9.88412094116211,
"learning_rate": 1.940711462450593e-05,
"loss": 1.8647,
"step": 60
},
{
"epoch": 0.03458498023715415,
"grad_norm": 8.997875213623047,
"learning_rate": 1.930830039525692e-05,
"loss": 1.6523,
"step": 70
},
{
"epoch": 0.039525691699604744,
"grad_norm": 13.033836364746094,
"learning_rate": 1.920948616600791e-05,
"loss": 1.4589,
"step": 80
},
{
"epoch": 0.041501976284584984,
"eval_accuracy": 0.7,
"eval_f1": 0.6992481203007519,
"eval_loss": 1.434981107711792,
"eval_precision": 0.702020202020202,
"eval_runtime": 83.5513,
"eval_samples_per_second": 1.915,
"eval_steps_per_second": 0.323,
"step": 84
},
{
"epoch": 1.002635046113307,
"grad_norm": 7.341766357421875,
"learning_rate": 1.9110671936758897e-05,
"loss": 1.3173,
"step": 90
},
{
"epoch": 1.0075757575757576,
"grad_norm": 9.149868965148926,
"learning_rate": 1.9011857707509883e-05,
"loss": 1.3309,
"step": 100
},
{
"epoch": 1.0125164690382082,
"grad_norm": 6.38141393661499,
"learning_rate": 1.891304347826087e-05,
"loss": 1.067,
"step": 110
},
{
"epoch": 1.0174571805006587,
"grad_norm": 7.950096130371094,
"learning_rate": 1.881422924901186e-05,
"loss": 0.9792,
"step": 120
},
{
"epoch": 1.0223978919631094,
"grad_norm": 9.717514991760254,
"learning_rate": 1.871541501976285e-05,
"loss": 1.0362,
"step": 130
},
{
"epoch": 1.02733860342556,
"grad_norm": 6.719062328338623,
"learning_rate": 1.8616600790513834e-05,
"loss": 0.8712,
"step": 140
},
{
"epoch": 1.0322793148880105,
"grad_norm": 6.9779510498046875,
"learning_rate": 1.8517786561264823e-05,
"loss": 0.8797,
"step": 150
},
{
"epoch": 1.0372200263504612,
"grad_norm": 8.313773155212402,
"learning_rate": 1.8418972332015812e-05,
"loss": 0.7575,
"step": 160
},
{
"epoch": 1.0416666666666667,
"eval_accuracy": 0.78125,
"eval_f1": 0.7738196211478655,
"eval_loss": 0.8168264627456665,
"eval_precision": 0.8237992444684294,
"eval_runtime": 82.516,
"eval_samples_per_second": 1.939,
"eval_steps_per_second": 0.327,
"step": 169
},
{
"epoch": 2.0003293807641636,
"grad_norm": 8.686193466186523,
"learning_rate": 1.83201581027668e-05,
"loss": 0.8684,
"step": 170
},
{
"epoch": 2.005270092226614,
"grad_norm": 6.766555309295654,
"learning_rate": 1.8221343873517786e-05,
"loss": 0.8991,
"step": 180
},
{
"epoch": 2.0102108036890645,
"grad_norm": 7.252024173736572,
"learning_rate": 1.8122529644268775e-05,
"loss": 0.6937,
"step": 190
},
{
"epoch": 2.015151515151515,
"grad_norm": 8.432581901550293,
"learning_rate": 1.8023715415019763e-05,
"loss": 0.6598,
"step": 200
},
{
"epoch": 2.020092226613966,
"grad_norm": 8.45376205444336,
"learning_rate": 1.7924901185770752e-05,
"loss": 0.5522,
"step": 210
},
{
"epoch": 2.0250329380764165,
"grad_norm": 12.200201034545898,
"learning_rate": 1.782608695652174e-05,
"loss": 0.66,
"step": 220
},
{
"epoch": 2.0299736495388667,
"grad_norm": 7.500545501708984,
"learning_rate": 1.772727272727273e-05,
"loss": 0.5698,
"step": 230
},
{
"epoch": 2.0349143610013174,
"grad_norm": 5.759820461273193,
"learning_rate": 1.762845849802372e-05,
"loss": 0.494,
"step": 240
},
{
"epoch": 2.039855072463768,
"grad_norm": 6.814697265625,
"learning_rate": 1.7529644268774707e-05,
"loss": 0.6272,
"step": 250
},
{
"epoch": 2.0418313570487485,
"eval_accuracy": 0.875,
"eval_f1": 0.8740355849472525,
"eval_loss": 0.5021969079971313,
"eval_precision": 0.8868471953578337,
"eval_runtime": 80.0519,
"eval_samples_per_second": 1.999,
"eval_steps_per_second": 0.337,
"step": 254
},
{
"epoch": 3.0029644268774702,
"grad_norm": 4.731899738311768,
"learning_rate": 1.7430830039525693e-05,
"loss": 0.4245,
"step": 260
},
{
"epoch": 3.007905138339921,
"grad_norm": 15.187234878540039,
"learning_rate": 1.733201581027668e-05,
"loss": 0.6465,
"step": 270
},
{
"epoch": 3.0128458498023716,
"grad_norm": 8.394930839538574,
"learning_rate": 1.723320158102767e-05,
"loss": 0.3855,
"step": 280
},
{
"epoch": 3.0177865612648223,
"grad_norm": 13.709633827209473,
"learning_rate": 1.713438735177866e-05,
"loss": 0.2623,
"step": 290
},
{
"epoch": 3.022727272727273,
"grad_norm": 13.21254825592041,
"learning_rate": 1.7035573122529644e-05,
"loss": 0.566,
"step": 300
},
{
"epoch": 3.027667984189723,
"grad_norm": 17.71151351928711,
"learning_rate": 1.6936758893280633e-05,
"loss": 0.3897,
"step": 310
},
{
"epoch": 3.032608695652174,
"grad_norm": 9.521211624145508,
"learning_rate": 1.6837944664031622e-05,
"loss": 0.405,
"step": 320
},
{
"epoch": 3.0375494071146245,
"grad_norm": 10.889870643615723,
"learning_rate": 1.673913043478261e-05,
"loss": 0.4146,
"step": 330
},
{
"epoch": 3.041501976284585,
"eval_accuracy": 0.86875,
"eval_f1": 0.8686217008797653,
"eval_loss": 0.4903731942176819,
"eval_precision": 0.8701960784313727,
"eval_runtime": 76.9154,
"eval_samples_per_second": 2.08,
"eval_steps_per_second": 0.351,
"step": 338
},
{
"epoch": 4.000658761528327,
"grad_norm": 19.58037567138672,
"learning_rate": 1.6640316205533596e-05,
"loss": 0.3677,
"step": 340
},
{
"epoch": 4.005599472990777,
"grad_norm": 11.428667068481445,
"learning_rate": 1.6541501976284585e-05,
"loss": 0.2998,
"step": 350
},
{
"epoch": 4.010540184453228,
"grad_norm": 17.140522003173828,
"learning_rate": 1.6442687747035574e-05,
"loss": 0.404,
"step": 360
},
{
"epoch": 4.015480895915679,
"grad_norm": 13.671965599060059,
"learning_rate": 1.6343873517786562e-05,
"loss": 0.3015,
"step": 370
},
{
"epoch": 4.020421607378129,
"grad_norm": 20.311363220214844,
"learning_rate": 1.624505928853755e-05,
"loss": 0.3093,
"step": 380
},
{
"epoch": 4.02536231884058,
"grad_norm": 18.359455108642578,
"learning_rate": 1.614624505928854e-05,
"loss": 0.5012,
"step": 390
},
{
"epoch": 4.03030303030303,
"grad_norm": 11.816179275512695,
"learning_rate": 1.604743083003953e-05,
"loss": 0.3386,
"step": 400
},
{
"epoch": 4.0352437417654805,
"grad_norm": 20.613216400146484,
"learning_rate": 1.5948616600790514e-05,
"loss": 0.3747,
"step": 410
},
{
"epoch": 4.040184453227932,
"grad_norm": 1.1029576063156128,
"learning_rate": 1.5849802371541503e-05,
"loss": 0.2696,
"step": 420
},
{
"epoch": 4.041666666666667,
"eval_accuracy": 0.85,
"eval_f1": 0.85,
"eval_loss": 0.47424378991127014,
"eval_precision": 0.85,
"eval_runtime": 80.2032,
"eval_samples_per_second": 1.995,
"eval_steps_per_second": 0.337,
"step": 423
},
{
"epoch": 5.003293807641634,
"grad_norm": 24.193086624145508,
"learning_rate": 1.575098814229249e-05,
"loss": 0.2924,
"step": 430
},
{
"epoch": 5.008234519104084,
"grad_norm": 13.23393440246582,
"learning_rate": 1.565217391304348e-05,
"loss": 0.3003,
"step": 440
},
{
"epoch": 5.013175230566535,
"grad_norm": 6.054862022399902,
"learning_rate": 1.5553359683794466e-05,
"loss": 0.2834,
"step": 450
},
{
"epoch": 5.018115942028985,
"grad_norm": 5.4301042556762695,
"learning_rate": 1.5454545454545454e-05,
"loss": 0.2696,
"step": 460
},
{
"epoch": 5.0230566534914365,
"grad_norm": 7.172131061553955,
"learning_rate": 1.5355731225296443e-05,
"loss": 0.2452,
"step": 470
},
{
"epoch": 5.027997364953887,
"grad_norm": 24.706287384033203,
"learning_rate": 1.5256916996047434e-05,
"loss": 0.2949,
"step": 480
},
{
"epoch": 5.032938076416337,
"grad_norm": 37.07192611694336,
"learning_rate": 1.5158102766798419e-05,
"loss": 0.3073,
"step": 490
},
{
"epoch": 5.037878787878788,
"grad_norm": 16.38689422607422,
"learning_rate": 1.5059288537549408e-05,
"loss": 0.3349,
"step": 500
},
{
"epoch": 5.041831357048748,
"eval_accuracy": 0.875,
"eval_f1": 0.8748239712095135,
"eval_loss": 0.4332951009273529,
"eval_precision": 0.8771213073538654,
"eval_runtime": 79.3756,
"eval_samples_per_second": 2.016,
"eval_steps_per_second": 0.34,
"step": 508
},
{
"epoch": 5.041831357048748,
"step": 508,
"total_flos": 1.1541819109070832e+19,
"train_loss": 0.9273178417851605,
"train_runtime": 7038.3754,
"train_samples_per_second": 5.176,
"train_steps_per_second": 0.288
},
{
"epoch": 5.041831357048748,
"eval_accuracy": 0.845,
"eval_f1": 0.8447195246413834,
"eval_loss": 0.5974410772323608,
"eval_precision": 0.8475107652791416,
"eval_runtime": 395.5147,
"eval_samples_per_second": 2.023,
"eval_steps_per_second": 0.339,
"step": 508
}
],
"logging_steps": 10,
"max_steps": 2024,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1541819109070832e+19,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}