DanJoshua's picture
End of training
edde5ba verified
{
"best_metric": 0.9125,
"best_model_checkpoint": "mvit_v2_s_Kinetics400_transf_c_rwf2000/checkpoint-570",
"epoch": 5.1,
"eval_steps": 500,
"global_step": 1140,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005263157894736842,
"grad_norm": 27.18143653869629,
"learning_rate": 9.947368421052632e-06,
"loss": 6.767,
"step": 10
},
{
"epoch": 0.010526315789473684,
"grad_norm": 28.846113204956055,
"learning_rate": 9.894736842105264e-06,
"loss": 5.9538,
"step": 20
},
{
"epoch": 0.015789473684210527,
"grad_norm": 27.071590423583984,
"learning_rate": 9.842105263157896e-06,
"loss": 5.1204,
"step": 30
},
{
"epoch": 0.021052631578947368,
"grad_norm": 43.807559967041016,
"learning_rate": 9.789473684210527e-06,
"loss": 4.4247,
"step": 40
},
{
"epoch": 0.02631578947368421,
"grad_norm": 29.54512596130371,
"learning_rate": 9.736842105263159e-06,
"loss": 3.7804,
"step": 50
},
{
"epoch": 0.031578947368421054,
"grad_norm": 24.903718948364258,
"learning_rate": 9.68421052631579e-06,
"loss": 3.0124,
"step": 60
},
{
"epoch": 0.03684210526315789,
"grad_norm": 28.422218322753906,
"learning_rate": 9.631578947368422e-06,
"loss": 2.417,
"step": 70
},
{
"epoch": 0.042105263157894736,
"grad_norm": 16.815492630004883,
"learning_rate": 9.578947368421054e-06,
"loss": 1.2391,
"step": 80
},
{
"epoch": 0.04736842105263158,
"grad_norm": 31.41044044494629,
"learning_rate": 9.526315789473684e-06,
"loss": 1.3617,
"step": 90
},
{
"epoch": 0.05263157894736842,
"grad_norm": 22.144113540649414,
"learning_rate": 9.473684210526315e-06,
"loss": 1.2047,
"step": 100
},
{
"epoch": 0.05789473684210526,
"grad_norm": 12.424088478088379,
"learning_rate": 9.421052631578949e-06,
"loss": 0.7313,
"step": 110
},
{
"epoch": 0.06315789473684211,
"grad_norm": 35.89997100830078,
"learning_rate": 9.36842105263158e-06,
"loss": 0.5971,
"step": 120
},
{
"epoch": 0.06842105263157895,
"grad_norm": 10.456808090209961,
"learning_rate": 9.315789473684212e-06,
"loss": 0.61,
"step": 130
},
{
"epoch": 0.07368421052631578,
"grad_norm": 27.438720703125,
"learning_rate": 9.263157894736842e-06,
"loss": 0.77,
"step": 140
},
{
"epoch": 0.07894736842105263,
"grad_norm": 10.961087226867676,
"learning_rate": 9.210526315789474e-06,
"loss": 0.6213,
"step": 150
},
{
"epoch": 0.08421052631578947,
"grad_norm": 20.819063186645508,
"learning_rate": 9.157894736842105e-06,
"loss": 0.6471,
"step": 160
},
{
"epoch": 0.08947368421052632,
"grad_norm": 18.45960235595703,
"learning_rate": 9.105263157894739e-06,
"loss": 0.5697,
"step": 170
},
{
"epoch": 0.09473684210526316,
"grad_norm": 10.544190406799316,
"learning_rate": 9.05263157894737e-06,
"loss": 0.6217,
"step": 180
},
{
"epoch": 0.1,
"grad_norm": 4.165653705596924,
"learning_rate": 9e-06,
"loss": 0.4345,
"step": 190
},
{
"epoch": 0.1,
"eval_accuracy": 0.725,
"eval_f1": 0.7046484309447895,
"eval_loss": 0.6745138764381409,
"eval_precision": 0.8106125970664365,
"eval_runtime": 72.1409,
"eval_samples_per_second": 2.218,
"eval_steps_per_second": 0.277,
"step": 190
},
{
"epoch": 1.0052631578947369,
"grad_norm": 20.22187042236328,
"learning_rate": 8.947368421052632e-06,
"loss": 0.7393,
"step": 200
},
{
"epoch": 1.0105263157894737,
"grad_norm": 11.406439781188965,
"learning_rate": 8.894736842105264e-06,
"loss": 0.431,
"step": 210
},
{
"epoch": 1.0157894736842106,
"grad_norm": 15.169920921325684,
"learning_rate": 8.842105263157895e-06,
"loss": 0.5438,
"step": 220
},
{
"epoch": 1.0210526315789474,
"grad_norm": 11.786495208740234,
"learning_rate": 8.789473684210527e-06,
"loss": 0.831,
"step": 230
},
{
"epoch": 1.0263157894736843,
"grad_norm": 14.316608428955078,
"learning_rate": 8.736842105263158e-06,
"loss": 0.6173,
"step": 240
},
{
"epoch": 1.0315789473684212,
"grad_norm": 22.38058090209961,
"learning_rate": 8.68421052631579e-06,
"loss": 0.5205,
"step": 250
},
{
"epoch": 1.0368421052631578,
"grad_norm": 6.769970417022705,
"learning_rate": 8.631578947368422e-06,
"loss": 0.5761,
"step": 260
},
{
"epoch": 1.0421052631578946,
"grad_norm": 23.546852111816406,
"learning_rate": 8.578947368421053e-06,
"loss": 0.4213,
"step": 270
},
{
"epoch": 1.0473684210526315,
"grad_norm": 13.848394393920898,
"learning_rate": 8.526315789473685e-06,
"loss": 0.4926,
"step": 280
},
{
"epoch": 1.0526315789473684,
"grad_norm": 6.40794563293457,
"learning_rate": 8.473684210526317e-06,
"loss": 0.3957,
"step": 290
},
{
"epoch": 1.0578947368421052,
"grad_norm": 6.262490749359131,
"learning_rate": 8.421052631578948e-06,
"loss": 0.3729,
"step": 300
},
{
"epoch": 1.063157894736842,
"grad_norm": 9.696098327636719,
"learning_rate": 8.36842105263158e-06,
"loss": 0.4236,
"step": 310
},
{
"epoch": 1.068421052631579,
"grad_norm": 10.287137985229492,
"learning_rate": 8.315789473684212e-06,
"loss": 0.4084,
"step": 320
},
{
"epoch": 1.0736842105263158,
"grad_norm": 23.936748504638672,
"learning_rate": 8.263157894736843e-06,
"loss": 0.3948,
"step": 330
},
{
"epoch": 1.0789473684210527,
"grad_norm": 19.638160705566406,
"learning_rate": 8.210526315789475e-06,
"loss": 0.5494,
"step": 340
},
{
"epoch": 1.0842105263157895,
"grad_norm": 7.03140926361084,
"learning_rate": 8.157894736842106e-06,
"loss": 0.5816,
"step": 350
},
{
"epoch": 1.0894736842105264,
"grad_norm": 19.78676414489746,
"learning_rate": 8.105263157894736e-06,
"loss": 0.7327,
"step": 360
},
{
"epoch": 1.0947368421052632,
"grad_norm": 24.11279296875,
"learning_rate": 8.052631578947368e-06,
"loss": 0.392,
"step": 370
},
{
"epoch": 1.1,
"grad_norm": 9.393863677978516,
"learning_rate": 8.000000000000001e-06,
"loss": 0.5054,
"step": 380
},
{
"epoch": 1.1,
"eval_accuracy": 0.85625,
"eval_f1": 0.8537302754481498,
"eval_loss": 0.3967539668083191,
"eval_precision": 0.8826145326397047,
"eval_runtime": 73.1162,
"eval_samples_per_second": 2.188,
"eval_steps_per_second": 0.274,
"step": 380
},
{
"epoch": 2.0052631578947366,
"grad_norm": 6.710114479064941,
"learning_rate": 7.947368421052633e-06,
"loss": 0.3862,
"step": 390
},
{
"epoch": 2.0105263157894737,
"grad_norm": 11.278571128845215,
"learning_rate": 7.894736842105265e-06,
"loss": 0.3496,
"step": 400
},
{
"epoch": 2.0157894736842104,
"grad_norm": 11.26418685913086,
"learning_rate": 7.842105263157895e-06,
"loss": 0.4601,
"step": 410
},
{
"epoch": 2.0210526315789474,
"grad_norm": 10.958210945129395,
"learning_rate": 7.789473684210526e-06,
"loss": 0.4051,
"step": 420
},
{
"epoch": 2.026315789473684,
"grad_norm": 2.237288475036621,
"learning_rate": 7.736842105263158e-06,
"loss": 0.3642,
"step": 430
},
{
"epoch": 2.031578947368421,
"grad_norm": 8.63414192199707,
"learning_rate": 7.68421052631579e-06,
"loss": 0.5683,
"step": 440
},
{
"epoch": 2.036842105263158,
"grad_norm": 13.12728500366211,
"learning_rate": 7.631578947368423e-06,
"loss": 0.3092,
"step": 450
},
{
"epoch": 2.042105263157895,
"grad_norm": 1.1393234729766846,
"learning_rate": 7.578947368421054e-06,
"loss": 0.4077,
"step": 460
},
{
"epoch": 2.0473684210526315,
"grad_norm": 1.78622567653656,
"learning_rate": 7.526315789473685e-06,
"loss": 0.3897,
"step": 470
},
{
"epoch": 2.0526315789473686,
"grad_norm": 10.598691940307617,
"learning_rate": 7.473684210526316e-06,
"loss": 0.3625,
"step": 480
},
{
"epoch": 2.057894736842105,
"grad_norm": 12.993045806884766,
"learning_rate": 7.421052631578948e-06,
"loss": 0.4397,
"step": 490
},
{
"epoch": 2.0631578947368423,
"grad_norm": 25.146648406982422,
"learning_rate": 7.368421052631579e-06,
"loss": 0.5304,
"step": 500
},
{
"epoch": 2.068421052631579,
"grad_norm": 12.983936309814453,
"learning_rate": 7.315789473684212e-06,
"loss": 0.4668,
"step": 510
},
{
"epoch": 2.0736842105263156,
"grad_norm": 9.129647254943848,
"learning_rate": 7.263157894736843e-06,
"loss": 0.4969,
"step": 520
},
{
"epoch": 2.0789473684210527,
"grad_norm": 8.563615798950195,
"learning_rate": 7.210526315789474e-06,
"loss": 0.4993,
"step": 530
},
{
"epoch": 2.0842105263157893,
"grad_norm": 7.202339172363281,
"learning_rate": 7.157894736842106e-06,
"loss": 0.3251,
"step": 540
},
{
"epoch": 2.0894736842105264,
"grad_norm": 4.147830963134766,
"learning_rate": 7.1052631578947375e-06,
"loss": 0.4315,
"step": 550
},
{
"epoch": 2.094736842105263,
"grad_norm": 7.6200270652771,
"learning_rate": 7.052631578947369e-06,
"loss": 0.3396,
"step": 560
},
{
"epoch": 2.1,
"grad_norm": 13.409224510192871,
"learning_rate": 7e-06,
"loss": 0.2138,
"step": 570
},
{
"epoch": 2.1,
"eval_accuracy": 0.9125,
"eval_f1": 0.9122807017543861,
"eval_loss": 0.28845420479774475,
"eval_precision": 0.9166666666666667,
"eval_runtime": 71.7969,
"eval_samples_per_second": 2.229,
"eval_steps_per_second": 0.279,
"step": 570
},
{
"epoch": 3.0052631578947366,
"grad_norm": 3.221128463745117,
"learning_rate": 6.947368421052632e-06,
"loss": 0.3898,
"step": 580
},
{
"epoch": 3.0105263157894737,
"grad_norm": 2.5701775550842285,
"learning_rate": 6.894736842105264e-06,
"loss": 0.3812,
"step": 590
},
{
"epoch": 3.0157894736842104,
"grad_norm": 1.1917780637741089,
"learning_rate": 6.842105263157896e-06,
"loss": 0.3651,
"step": 600
},
{
"epoch": 3.0210526315789474,
"grad_norm": 14.28803825378418,
"learning_rate": 6.789473684210527e-06,
"loss": 0.398,
"step": 610
},
{
"epoch": 3.026315789473684,
"grad_norm": 11.742090225219727,
"learning_rate": 6.736842105263158e-06,
"loss": 0.3914,
"step": 620
},
{
"epoch": 3.031578947368421,
"grad_norm": 7.811966419219971,
"learning_rate": 6.68421052631579e-06,
"loss": 0.232,
"step": 630
},
{
"epoch": 3.036842105263158,
"grad_norm": 17.144683837890625,
"learning_rate": 6.631578947368421e-06,
"loss": 0.3637,
"step": 640
},
{
"epoch": 3.042105263157895,
"grad_norm": 19.564409255981445,
"learning_rate": 6.578947368421054e-06,
"loss": 0.5103,
"step": 650
},
{
"epoch": 3.0473684210526315,
"grad_norm": 8.233375549316406,
"learning_rate": 6.526315789473685e-06,
"loss": 0.3698,
"step": 660
},
{
"epoch": 3.0526315789473686,
"grad_norm": 18.26775360107422,
"learning_rate": 6.473684210526316e-06,
"loss": 0.3242,
"step": 670
},
{
"epoch": 3.057894736842105,
"grad_norm": 15.07589340209961,
"learning_rate": 6.421052631578948e-06,
"loss": 0.338,
"step": 680
},
{
"epoch": 3.0631578947368423,
"grad_norm": 3.9075160026550293,
"learning_rate": 6.3684210526315795e-06,
"loss": 0.3227,
"step": 690
},
{
"epoch": 3.068421052631579,
"grad_norm": 2.0749523639678955,
"learning_rate": 6.31578947368421e-06,
"loss": 0.4136,
"step": 700
},
{
"epoch": 3.0736842105263156,
"grad_norm": 15.651928901672363,
"learning_rate": 6.263157894736842e-06,
"loss": 0.4005,
"step": 710
},
{
"epoch": 3.0789473684210527,
"grad_norm": 22.100845336914062,
"learning_rate": 6.2105263157894745e-06,
"loss": 0.4285,
"step": 720
},
{
"epoch": 3.0842105263157893,
"grad_norm": 2.6055493354797363,
"learning_rate": 6.157894736842106e-06,
"loss": 0.2041,
"step": 730
},
{
"epoch": 3.0894736842105264,
"grad_norm": 13.344124794006348,
"learning_rate": 6.105263157894738e-06,
"loss": 0.3862,
"step": 740
},
{
"epoch": 3.094736842105263,
"grad_norm": 0.9060586094856262,
"learning_rate": 6.0526315789473685e-06,
"loss": 0.388,
"step": 750
},
{
"epoch": 3.1,
"grad_norm": 14.1149320602417,
"learning_rate": 6e-06,
"loss": 0.4265,
"step": 760
},
{
"epoch": 3.1,
"eval_accuracy": 0.9125,
"eval_f1": 0.9125,
"eval_loss": 0.25910043716430664,
"eval_precision": 0.9125,
"eval_runtime": 72.0159,
"eval_samples_per_second": 2.222,
"eval_steps_per_second": 0.278,
"step": 760
},
{
"epoch": 4.005263157894737,
"grad_norm": 8.575451850891113,
"learning_rate": 5.947368421052632e-06,
"loss": 0.4956,
"step": 770
},
{
"epoch": 4.010526315789473,
"grad_norm": 9.764039993286133,
"learning_rate": 5.8947368421052634e-06,
"loss": 0.3357,
"step": 780
},
{
"epoch": 4.015789473684211,
"grad_norm": 8.81169605255127,
"learning_rate": 5.842105263157896e-06,
"loss": 0.2737,
"step": 790
},
{
"epoch": 4.021052631578947,
"grad_norm": 1.05990469455719,
"learning_rate": 5.789473684210527e-06,
"loss": 0.3039,
"step": 800
},
{
"epoch": 4.026315789473684,
"grad_norm": 19.7120418548584,
"learning_rate": 5.736842105263158e-06,
"loss": 0.2567,
"step": 810
},
{
"epoch": 4.031578947368421,
"grad_norm": 3.668038845062256,
"learning_rate": 5.68421052631579e-06,
"loss": 0.3245,
"step": 820
},
{
"epoch": 4.036842105263158,
"grad_norm": 17.96670913696289,
"learning_rate": 5.631578947368422e-06,
"loss": 0.2581,
"step": 830
},
{
"epoch": 4.042105263157895,
"grad_norm": 31.538679122924805,
"learning_rate": 5.578947368421052e-06,
"loss": 0.4507,
"step": 840
},
{
"epoch": 4.0473684210526315,
"grad_norm": 14.32706356048584,
"learning_rate": 5.526315789473685e-06,
"loss": 0.2924,
"step": 850
},
{
"epoch": 4.052631578947368,
"grad_norm": 10.178019523620605,
"learning_rate": 5.4736842105263165e-06,
"loss": 0.3634,
"step": 860
},
{
"epoch": 4.057894736842106,
"grad_norm": 20.643692016601562,
"learning_rate": 5.421052631578948e-06,
"loss": 0.3867,
"step": 870
},
{
"epoch": 4.063157894736842,
"grad_norm": 5.015390396118164,
"learning_rate": 5.36842105263158e-06,
"loss": 0.1877,
"step": 880
},
{
"epoch": 4.068421052631579,
"grad_norm": 6.897916793823242,
"learning_rate": 5.315789473684211e-06,
"loss": 0.3742,
"step": 890
},
{
"epoch": 4.073684210526316,
"grad_norm": 16.30086326599121,
"learning_rate": 5.263157894736842e-06,
"loss": 0.5215,
"step": 900
},
{
"epoch": 4.078947368421052,
"grad_norm": 33.295650482177734,
"learning_rate": 5.210526315789474e-06,
"loss": 0.4182,
"step": 910
},
{
"epoch": 4.08421052631579,
"grad_norm": 2.538074493408203,
"learning_rate": 5.157894736842106e-06,
"loss": 0.2224,
"step": 920
},
{
"epoch": 4.089473684210526,
"grad_norm": 1.320330262184143,
"learning_rate": 5.105263157894738e-06,
"loss": 0.2025,
"step": 930
},
{
"epoch": 4.094736842105263,
"grad_norm": 10.492331504821777,
"learning_rate": 5.052631578947369e-06,
"loss": 0.251,
"step": 940
},
{
"epoch": 4.1,
"grad_norm": 12.127493858337402,
"learning_rate": 5e-06,
"loss": 0.4049,
"step": 950
},
{
"epoch": 4.1,
"eval_accuracy": 0.8875,
"eval_f1": 0.8863636363636364,
"eval_loss": 0.3837929666042328,
"eval_precision": 0.9036458333333333,
"eval_runtime": 71.9918,
"eval_samples_per_second": 2.222,
"eval_steps_per_second": 0.278,
"step": 950
},
{
"epoch": 5.005263157894737,
"grad_norm": 5.86998987197876,
"learning_rate": 4.947368421052632e-06,
"loss": 0.4153,
"step": 960
},
{
"epoch": 5.010526315789473,
"grad_norm": 7.648462295532227,
"learning_rate": 4.894736842105264e-06,
"loss": 0.2841,
"step": 970
},
{
"epoch": 5.015789473684211,
"grad_norm": 3.2707762718200684,
"learning_rate": 4.842105263157895e-06,
"loss": 0.131,
"step": 980
},
{
"epoch": 5.021052631578947,
"grad_norm": 25.58846664428711,
"learning_rate": 4.789473684210527e-06,
"loss": 0.4913,
"step": 990
},
{
"epoch": 5.026315789473684,
"grad_norm": 13.411537170410156,
"learning_rate": 4.736842105263158e-06,
"loss": 0.2683,
"step": 1000
},
{
"epoch": 5.031578947368421,
"grad_norm": 17.36676597595215,
"learning_rate": 4.68421052631579e-06,
"loss": 0.5226,
"step": 1010
},
{
"epoch": 5.036842105263158,
"grad_norm": 7.734870433807373,
"learning_rate": 4.631578947368421e-06,
"loss": 0.1967,
"step": 1020
},
{
"epoch": 5.042105263157895,
"grad_norm": 5.902388095855713,
"learning_rate": 4.578947368421053e-06,
"loss": 0.3835,
"step": 1030
},
{
"epoch": 5.0473684210526315,
"grad_norm": 14.54670238494873,
"learning_rate": 4.526315789473685e-06,
"loss": 0.2073,
"step": 1040
},
{
"epoch": 5.052631578947368,
"grad_norm": 23.170896530151367,
"learning_rate": 4.473684210526316e-06,
"loss": 0.2461,
"step": 1050
},
{
"epoch": 5.057894736842106,
"grad_norm": 15.245760917663574,
"learning_rate": 4.4210526315789476e-06,
"loss": 0.5014,
"step": 1060
},
{
"epoch": 5.063157894736842,
"grad_norm": 2.622382164001465,
"learning_rate": 4.368421052631579e-06,
"loss": 0.3083,
"step": 1070
},
{
"epoch": 5.068421052631579,
"grad_norm": 14.459348678588867,
"learning_rate": 4.315789473684211e-06,
"loss": 0.2886,
"step": 1080
},
{
"epoch": 5.073684210526316,
"grad_norm": 13.719648361206055,
"learning_rate": 4.2631578947368425e-06,
"loss": 0.3146,
"step": 1090
},
{
"epoch": 5.078947368421052,
"grad_norm": 14.046276092529297,
"learning_rate": 4.210526315789474e-06,
"loss": 0.4636,
"step": 1100
},
{
"epoch": 5.08421052631579,
"grad_norm": 1.9393348693847656,
"learning_rate": 4.157894736842106e-06,
"loss": 0.264,
"step": 1110
},
{
"epoch": 5.089473684210526,
"grad_norm": 18.79676628112793,
"learning_rate": 4.105263157894737e-06,
"loss": 0.3153,
"step": 1120
},
{
"epoch": 5.094736842105263,
"grad_norm": 28.437938690185547,
"learning_rate": 4.052631578947368e-06,
"loss": 0.4312,
"step": 1130
},
{
"epoch": 5.1,
"grad_norm": 21.330198287963867,
"learning_rate": 4.000000000000001e-06,
"loss": 0.3757,
"step": 1140
},
{
"epoch": 5.1,
"eval_accuracy": 0.875,
"eval_f1": 0.8733976895078335,
"eval_loss": 0.4311452805995941,
"eval_precision": 0.8949967083607637,
"eval_runtime": 71.9264,
"eval_samples_per_second": 2.224,
"eval_steps_per_second": 0.278,
"step": 1140
},
{
"epoch": 5.1,
"step": 1140,
"total_flos": 0.0,
"train_loss": 0.6870630094879552,
"train_runtime": 5648.6059,
"train_samples_per_second": 2.691,
"train_steps_per_second": 0.336
},
{
"epoch": 5.1,
"eval_accuracy": 0.88625,
"eval_f1": 0.886120283885864,
"eval_loss": 0.2998928725719452,
"eval_precision": 0.8880179065868865,
"eval_runtime": 375.1017,
"eval_samples_per_second": 2.133,
"eval_steps_per_second": 0.267,
"step": 1140
}
],
"logging_steps": 10,
"max_steps": 1900,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.005
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}